From b9107325c1f13eba2dba8df637a83550cb22eaf1 Mon Sep 17 00:00:00 2001 From: lizhijin Date: Tue, 5 Sep 2023 00:56:23 +0800 Subject: [PATCH] [AArch64][CodeGen] Fix wrong operand order when creating vcmla intrinsic --- .../Target/AArch64/AArch64ISelLowering.cpp | 2 +- ...-deinterleaving-add-mull-fixed-contract.ll | 48 ++++++++-------- ...plex-deinterleaving-add-mull-fixed-fast.ll | 56 +++++++++---------- .../AArch64/complex-deinterleaving-f16-mul.ll | 32 +++++------ .../AArch64/complex-deinterleaving-f32-mul.ll | 32 +++++------ .../AArch64/complex-deinterleaving-f64-mul.ll | 28 +++++----- .../complex-deinterleaving-mixed-cases.ll | 56 +++++++++---------- .../complex-deinterleaving-multiuses.ll | 46 +++++++-------- .../complex-deinterleaving-reductions.ll | 32 +++++------ .../AArch64/complex-deinterleaving-splat.ll | 32 +++++------ .../complex-deinterleaving-uniform-cases.ll | 16 +++--- 11 files changed, 190 insertions(+), 190 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c65c52e39201ac..fbadda1a8fd446 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26216,7 +26216,7 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR( return B.CreateIntrinsic(IdMap[(int)Rotation], Ty, - {Accumulator, InputB, InputA}); + {Accumulator, InputA, InputB}); } if (OperationType == ComplexDeinterleavingOperation::CAdd) { diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll index c684a18a7e0773..09672d1be21613 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll @@ -48,14 +48,14 @@ define <4 x double> @mul_add_mull(<4 x double> %a, <4 x double> %b, <4 x double> ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #90 -; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v18.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #90 ; CHECK-NEXT: fadd v1.2d, v18.2d, v17.2d ; CHECK-NEXT: fadd v0.2d, v16.2d, v19.2d ; CHECK-NEXT: ret @@ -94,14 +94,14 @@ define <4 x double> @mul_sub_mull(<4 x double> %a, <4 x double> %b, <4 x double> ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #90 -; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v18.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #90 ; CHECK-NEXT: fsub v1.2d, v18.2d, v17.2d ; CHECK-NEXT: fsub v0.2d, v16.2d, v19.2d ; CHECK-NEXT: ret @@ -140,14 +140,14 @@ define <4 x double> @mul_conj_mull(<4 x double> %a, <4 x double> %b, <4 x double ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #270 -; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #270 +; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v18.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #270 +; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #270 ; CHECK-NEXT: fadd v1.2d, v18.2d, v17.2d ; CHECK-NEXT: fadd v0.2d, v16.2d, v19.2d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll index 9b6a9e7adf796f..7692b1cf0aaae1 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll @@ -7,10 +7,10 @@ target triple = "aarch64" define <4 x double> @mull_add(<4 x double> %a, <4 x double> %b, <4 x double> %c) { ; CHECK-LABEL: mull_add: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmla v4.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v5.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v4.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v5.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #90 +; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #90 ; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: mov v1.16b, v5.16b ; CHECK-NEXT: ret @@ -39,14 +39,14 @@ define <4 x double> @mul_add_mull(<4 x double> %a, <4 x double> %b, <4 x double> ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90 ; CHECK-NEXT: mov v0.16b, v17.16b ; CHECK-NEXT: mov v1.16b, v16.16b ; CHECK-NEXT: ret @@ -83,14 +83,14 @@ define <4 x double> @mul_sub_mull(<4 x double> %a, <4 x double> %b, <4 x double> ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #270 -; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #270 -; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #180 -; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #180 -; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #270 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #270 +; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #180 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #180 +; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90 ; CHECK-NEXT: mov v0.16b, v17.16b ; CHECK-NEXT: mov v1.16b, v16.16b ; CHECK-NEXT: ret @@ -127,14 +127,14 @@ define <4 x double> @mul_conj_mull(<4 x double> %a, <4 x double> %b, <4 x double ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #270 -; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #270 +; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #270 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #270 ; CHECK-NEXT: mov v0.16b, v17.16b ; CHECK-NEXT: mov v1.16b, v16.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll index 40433e2e076aa7..fbe913e5472cc2 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll @@ -37,8 +37,8 @@ define <4 x half> @complex_mul_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-LABEL: complex_mul_v4f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: fcmla v2.4h, v0.4h, v1.4h, #0 -; CHECK-NEXT: fcmla v2.4h, v0.4h, v1.4h, #90 +; CHECK-NEXT: fcmla v2.4h, v1.4h, v0.4h, #0 +; CHECK-NEXT: fcmla v2.4h, v1.4h, v0.4h, #90 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: @@ -61,8 +61,8 @@ define <8 x half> @complex_mul_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: complex_mul_v8f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v2.8h, v0.8h, v1.8h, #0 -; CHECK-NEXT: fcmla v2.8h, v0.8h, v1.8h, #90 +; CHECK-NEXT: fcmla v2.8h, v1.8h, v0.8h, #0 +; CHECK-NEXT: fcmla v2.8h, v1.8h, v0.8h, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: @@ -86,10 +86,10 @@ define <16 x half> @complex_mul_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: fcmla v5.8h, v0.8h, v2.8h, #0 -; CHECK-NEXT: fcmla v4.8h, v1.8h, v3.8h, #0 -; CHECK-NEXT: fcmla v5.8h, v0.8h, v2.8h, #90 -; CHECK-NEXT: fcmla v4.8h, v1.8h, v3.8h, #90 +; CHECK-NEXT: fcmla v5.8h, v2.8h, v0.8h, #0 +; CHECK-NEXT: fcmla v4.8h, v3.8h, v1.8h, #0 +; CHECK-NEXT: fcmla v5.8h, v2.8h, v0.8h, #90 +; CHECK-NEXT: fcmla v4.8h, v3.8h, v1.8h, #90 ; CHECK-NEXT: mov v0.16b, v5.16b ; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret @@ -116,14 +116,14 @@ define <32 x half> @complex_mul_v32f16(<32 x half> %a, <32 x half> %b) { ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.8h, v0.8h, v4.8h, #0 -; CHECK-NEXT: fcmla v18.8h, v1.8h, v5.8h, #0 -; CHECK-NEXT: fcmla v17.8h, v3.8h, v7.8h, #0 -; CHECK-NEXT: fcmla v19.8h, v2.8h, v6.8h, #0 -; CHECK-NEXT: fcmla v16.8h, v0.8h, v4.8h, #90 -; CHECK-NEXT: fcmla v18.8h, v1.8h, v5.8h, #90 -; CHECK-NEXT: fcmla v17.8h, v3.8h, v7.8h, #90 -; CHECK-NEXT: fcmla v19.8h, v2.8h, v6.8h, #90 +; CHECK-NEXT: fcmla v16.8h, v4.8h, v0.8h, #0 +; CHECK-NEXT: fcmla v18.8h, v5.8h, v1.8h, #0 +; CHECK-NEXT: fcmla v17.8h, v7.8h, v3.8h, #0 +; CHECK-NEXT: fcmla v19.8h, v6.8h, v2.8h, #0 +; CHECK-NEXT: fcmla v16.8h, v4.8h, v0.8h, #90 +; CHECK-NEXT: fcmla v18.8h, v5.8h, v1.8h, #90 +; CHECK-NEXT: fcmla v17.8h, v7.8h, v3.8h, #90 +; CHECK-NEXT: fcmla v19.8h, v6.8h, v2.8h, #90 ; CHECK-NEXT: mov v0.16b, v16.16b ; CHECK-NEXT: mov v1.16b, v18.16b ; CHECK-NEXT: mov v3.16b, v17.16b diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll index 05f07f6fd1c2c8..5f30d9642ce8b0 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll @@ -8,8 +8,8 @@ define <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: complex_mul_v2f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d2, #0000000000000000 -; CHECK-NEXT: fcmla v2.2s, v0.2s, v1.2s, #0 -; CHECK-NEXT: fcmla v2.2s, v0.2s, v1.2s, #90 +; CHECK-NEXT: fcmla v2.2s, v1.2s, v0.2s, #0 +; CHECK-NEXT: fcmla v2.2s, v1.2s, v0.2s, #90 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: @@ -32,8 +32,8 @@ define <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: complex_mul_v4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #0 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: @@ -57,10 +57,10 @@ define <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: fcmla v5.4s, v0.4s, v2.4s, #0 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v3.4s, #0 -; CHECK-NEXT: fcmla v5.4s, v0.4s, v2.4s, #90 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v3.4s, #90 +; CHECK-NEXT: fcmla v5.4s, v2.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v4.4s, v3.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v5.4s, v2.4s, v0.4s, #90 +; CHECK-NEXT: fcmla v4.4s, v3.4s, v1.4s, #90 ; CHECK-NEXT: mov v0.16b, v5.16b ; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret @@ -87,14 +87,14 @@ define <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) { ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.4s, v0.4s, v4.4s, #0 -; CHECK-NEXT: fcmla v18.4s, v1.4s, v5.4s, #0 -; CHECK-NEXT: fcmla v17.4s, v3.4s, v7.4s, #0 -; CHECK-NEXT: fcmla v19.4s, v2.4s, v6.4s, #0 -; CHECK-NEXT: fcmla v16.4s, v0.4s, v4.4s, #90 -; CHECK-NEXT: fcmla v18.4s, v1.4s, v5.4s, #90 -; CHECK-NEXT: fcmla v17.4s, v3.4s, v7.4s, #90 -; CHECK-NEXT: fcmla v19.4s, v2.4s, v6.4s, #90 +; CHECK-NEXT: fcmla v16.4s, v4.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v18.4s, v5.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v17.4s, v7.4s, v3.4s, #0 +; CHECK-NEXT: fcmla v19.4s, v6.4s, v2.4s, #0 +; CHECK-NEXT: fcmla v16.4s, v4.4s, v0.4s, #90 +; CHECK-NEXT: fcmla v18.4s, v5.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v17.4s, v7.4s, v3.4s, #90 +; CHECK-NEXT: fcmla v19.4s, v6.4s, v2.4s, #90 ; CHECK-NEXT: mov v0.16b, v16.16b ; CHECK-NEXT: mov v1.16b, v18.16b ; CHECK-NEXT: mov v3.16b, v17.16b diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll index 6df59951e2143c..6d7b156c3b64c9 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll @@ -8,8 +8,8 @@ define <2 x double> @complex_mul_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: complex_mul_v2f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v2.2d, v0.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v2.2d, v0.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v2.2d, v1.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v2.2d, v1.2d, v0.2d, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: @@ -33,10 +33,10 @@ define <4 x double> @complex_mul_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: fcmla v5.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v4.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v5.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v4.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v5.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v4.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v5.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v4.2d, v3.2d, v1.2d, #90 ; CHECK-NEXT: mov v0.16b, v5.16b ; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret @@ -63,14 +63,14 @@ define <8 x double> @complex_mul_v8f64(<8 x double> %a, <8 x double> %b) { ; CHECK-NEXT: movi v17.2d, #0000000000000000 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v7.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v2.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v4.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v1.2d, v5.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v7.2d, #90 -; CHECK-NEXT: fcmla v19.2d, v2.2d, v6.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v4.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v5.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v4.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v18.2d, v5.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v2.2d, #90 ; CHECK-NEXT: mov v0.16b, v16.16b ; CHECK-NEXT: mov v1.16b, v18.16b ; CHECK-NEXT: mov v3.16b, v17.16b diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll index f7837b2367671d..1ed9cf2db24f72 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll @@ -9,10 +9,10 @@ define <4 x float> @mul_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #0 -; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #90 -; CHECK-NEXT: fcmla v3.4s, v4.4s, v2.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v4.4s, v2.4s, #90 +; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #90 +; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #90 ; CHECK-NEXT: mov v0.16b, v3.16b ; CHECK-NEXT: ret entry: @@ -130,10 +130,10 @@ define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v3.4s, #0 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v3.4s, #90 +; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: @@ -166,15 +166,15 @@ define <4 x float> @mul_diamond(<4 x float> %a, <4 x float> %b, <4 x float> %c, ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v5.2d, #0000000000000000 ; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v6.4s, v2.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #90 +; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v6.4s, v0.4s, v2.4s, #0 +; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #90 ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fcmla v6.4s, v2.4s, v0.4s, #90 -; CHECK-NEXT: fcmla v5.4s, v4.4s, v3.4s, #0 -; CHECK-NEXT: fcmla v5.4s, v4.4s, v3.4s, #90 -; CHECK-NEXT: fcmla v1.4s, v6.4s, v5.4s, #0 -; CHECK-NEXT: fcmla v1.4s, v6.4s, v5.4s, #90 +; CHECK-NEXT: fcmla v6.4s, v0.4s, v2.4s, #90 +; CHECK-NEXT: fcmla v5.4s, v3.4s, v4.4s, #0 +; CHECK-NEXT: fcmla v5.4s, v3.4s, v4.4s, #90 +; CHECK-NEXT: fcmla v1.4s, v5.4s, v6.4s, #0 +; CHECK-NEXT: fcmla v1.4s, v5.4s, v6.4s, #90 ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: @@ -220,10 +220,10 @@ define <4 x float> @mul_add90_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: movi v4.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.4s, v2.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v4.4s, v2.4s, v0.4s, #90 -; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 +; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #90 +; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 ; CHECK-NEXT: fcadd v0.4s, v4.4s, v3.4s, #90 ; CHECK-NEXT: ret entry: @@ -358,8 +358,8 @@ entry: define <4 x float> @mul_addequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: mul_addequal: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #0 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: @@ -386,8 +386,8 @@ define <4 x float> @mul_subequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) ; CHECK-LABEL: mul_subequal: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 ; CHECK-NEXT: fsub v0.4s, v3.4s, v2.4s ; CHECK-NEXT: ret entry: @@ -415,8 +415,8 @@ define <4 x float> @mul_mulequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) ; CHECK-LABEL: mul_mulequal: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 ; CHECK-NEXT: fmul v0.4s, v3.4s, v2.4s ; CHECK-NEXT: ret entry: @@ -484,8 +484,8 @@ define <4 x float> @mul_negequal(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: mul_negequal: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #180 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #270 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #180 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #270 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll index 16dec1af60c1ca..039025dafa0d6e 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll @@ -10,11 +10,11 @@ define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b, ptr %p) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v3.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #0 ; CHECK-NEXT: str q3, [x0] -; CHECK-NEXT: fcmla v2.4s, v0.4s, v3.4s, #90 +; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: @@ -112,10 +112,10 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> % ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fmul v5.2s, v4.2s, v5.2s ; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s -; CHECK-NEXT: fcmla v0.4s, v3.4s, v2.4s, #0 +; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0 ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: fneg v16.2s, v5.2s -; CHECK-NEXT: fcmla v0.4s, v3.4s, v2.4s, #90 +; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #90 ; CHECK-NEXT: fmla v16.2s, v1.2s, v6.2s ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1] ; CHECK-NEXT: ret @@ -179,8 +179,8 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st2 { v2.2s, v3.2s }, [x5] ; CHECK-NEXT: ldr q1, [x3] -; CHECK-NEXT: fcmla v0.4s, v1.4s, v5.4s, #0 -; CHECK-NEXT: fcmla v0.4s, v1.4s, v5.4s, #90 +; CHECK-NEXT: fcmla v0.4s, v5.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v0.4s, v5.4s, v1.4s, #90 ; CHECK-NEXT: ret entry: %a = load <4 x float>, ptr %ptr_a @@ -300,31 +300,31 @@ define void @mul_add_common_mul_add_mul(<4 x double> %a, <4 x double> %b, <4 x d ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: ldr q19, [sp, #112] +; CHECK-NEXT: ldp q18, q20, [sp, #80] +; CHECK-NEXT: ldr q21, [sp, #64] ; CHECK-NEXT: movi v22.2d, #0000000000000000 -; CHECK-NEXT: ldp q21, q18, [sp, #96] -; CHECK-NEXT: ldp q20, q19, [sp, #64] -; CHECK-NEXT: fcmla v22.2d, v3.2d, v1.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v18.2d, v19.2d, #0 ; CHECK-NEXT: fcmla v17.2d, v21.2d, v20.2d, #0 -; CHECK-NEXT: fcmla v22.2d, v3.2d, v1.2d, #90 -; CHECK-NEXT: ldr q1, [sp, #48] -; CHECK-NEXT: ldr q3, [sp] +; CHECK-NEXT: fcmla v22.2d, v1.2d, v3.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v18.2d, v19.2d, #90 ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: fcmla v17.2d, v21.2d, v20.2d, #90 -; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #90 -; CHECK-NEXT: ldp q0, q2, [sp, #16] +; CHECK-NEXT: fcmla v22.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v18.2d, v0.2d, v2.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: ldp q3, q0, [sp, #32] +; CHECK-NEXT: ldp q2, q1, [sp] ; CHECK-NEXT: fsub v4.2d, v22.2d, v16.2d -; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #0 ; CHECK-NEXT: fsub v5.2d, v18.2d, v17.2d +; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #0 ; CHECK-NEXT: fcmla v17.2d, v3.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #90 ; CHECK-NEXT: stp q5, q4, [x0] +; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #90 ; CHECK-NEXT: fcmla v17.2d, v3.2d, v2.2d, #90 ; CHECK-NEXT: stp q17, q16, [x1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll index d245c0a0e4823d..40fd7a392c83b9 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll @@ -25,10 +25,10 @@ define dso_local %"struct.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: ldp q3, q2, [x9] ; CHECK-NEXT: cmp x8, #1600 ; CHECK-NEXT: ldp q5, q4, [x10] -; CHECK-NEXT: fcmla v0.2d, v3.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v1.2d, v2.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v3.2d, v5.2d, #90 -; CHECK-NEXT: fcmla v1.2d, v2.2d, v4.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v1.2d, v4.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v1.2d, v4.2d, v2.2d, #90 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %middle.block ; CHECK-NEXT: zip2 v2.2d, v0.2d, v1.2d @@ -92,10 +92,10 @@ define %"struct.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: ldp q3, q2, [x9] ; CHECK-NEXT: cmp x8, #1600 ; CHECK-NEXT: ldp q5, q4, [x10] -; CHECK-NEXT: fcmla v1.2d, v3.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v2.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v1.2d, v3.2d, v5.2d, #90 -; CHECK-NEXT: fcmla v0.2d, v2.2d, v4.2d, #90 +; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v4.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v4.2d, v2.2d, #90 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %middle.block ; CHECK-NEXT: zip2 v2.2d, v1.2d, v0.2d @@ -159,14 +159,14 @@ define %"struct.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) { ; CHECK-NEXT: ldp q7, q6, [x10] ; CHECK-NEXT: ldp q17, q16, [x9, #32] ; CHECK-NEXT: ldp q19, q18, [x10, #32] -; CHECK-NEXT: fcmla v1.2d, v5.2d, v7.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v2.2d, v17.2d, v19.2d, #0 -; CHECK-NEXT: fcmla v3.2d, v16.2d, v18.2d, #0 -; CHECK-NEXT: fcmla v1.2d, v5.2d, v7.2d, #90 -; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v2.2d, v17.2d, v19.2d, #90 -; CHECK-NEXT: fcmla v3.2d, v16.2d, v18.2d, #90 +; CHECK-NEXT: fcmla v1.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v2.2d, v19.2d, v17.2d, #0 +; CHECK-NEXT: fcmla v3.2d, v18.2d, v16.2d, #0 +; CHECK-NEXT: fcmla v1.2d, v7.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #90 +; CHECK-NEXT: fcmla v2.2d, v19.2d, v17.2d, #90 +; CHECK-NEXT: fcmla v3.2d, v18.2d, v16.2d, #90 ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %middle.block ; CHECK-NEXT: zip2 v4.2d, v2.2d, v3.2d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll index 0dbc2ecc8b008d..8de2ac5a140c68 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll @@ -13,16 +13,16 @@ define <4 x double> @complex_mul_const(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: movi v5.2d, #0000000000000000 ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: movi v4.2d, #0000000000000000 -; CHECK-NEXT: fcmla v6.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v5.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v6.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v6.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v5.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v6.2d, v1.2d, v3.2d, #90 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: fcmla v5.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v5.2d, v0.2d, v2.2d, #90 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.2d, v1.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v1.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v4.2d, v1.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v0.2d, v1.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v4.2d, v6.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v5.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v4.2d, v6.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v5.2d, v1.2d, #90 ; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret entry: @@ -58,15 +58,15 @@ define <4 x double> @complex_mul_non_const(<4 x double> %a, <4 x double> %b, [2 ; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 ; CHECK-NEXT: mov v4.d[1], v5.d[0] ; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: fcmla v7.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v6.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v7.2d, v3.2d, v1.2d, #90 -; CHECK-NEXT: fcmla v6.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v7.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v6.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v7.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v6.2d, v0.2d, v2.2d, #90 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: fcmla v5.2d, v4.2d, v7.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v5.2d, v4.2d, v7.2d, #90 -; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: fcmla v5.2d, v7.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v5.2d, v7.2d, v4.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #90 ; CHECK-NEXT: mov v1.16b, v5.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll index 81a8631a1691b5..2cbc8ed3192de4 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll @@ -8,8 +8,8 @@ define <4 x float> @simple_mul(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: simple_mul: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #0 -; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #90 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: @@ -65,10 +65,10 @@ define <4 x float> @three_way_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #90 -; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #90 +; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #0 +; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #90 +; CHECK-NEXT: fcmla v3.4s, v4.4s, v2.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v4.4s, v2.4s, #90 ; CHECK-NEXT: mov v0.16b, v3.16b ; CHECK-NEXT: ret entry: @@ -177,8 +177,8 @@ define <4 x float> @mul_mul_with_fneg(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: mul_mul_with_fneg: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v2.2d, #0000000000000000 -; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #270 -; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #180 +; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #270 +; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #180 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: