-
Notifications
You must be signed in to change notification settings - Fork 11.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SystemZ] Fix codegen for _[u]128 intrinsics
PR #74625 introduced a regression in the code generated for the following set of intrinsic: vec_add_u128, vec_addc_u128, vec_adde_u128, vec_addec_u128 vec_sub_u128, vec_subc_u128, vec_sube_u128, vec_subec_u128 vec_sum_u128, vec_msum_u128 vec_gfmsum_128, vec_gfmsum_accum_128 This is because the new code incorrectly assumed that a cast from "unsigned __int128" to "vector unsigned char" would simply be a bitcast re-interpretation; instead, this cast actually truncates the __int128 to char and splats the result. Fixed by adding an intermediate cast via a single-element 128-bit integer vector. Fixes: #109113 (cherry picked from commit baf9b7d)
- Loading branch information
Showing
2 changed files
with
188 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 | ||
// REQUIRES: systemz-registered-target | ||
// RUN: %clang_cc1 -target-cpu z14 -triple s390x-linux-gnu \ | ||
// RUN: -O2 -fzvector -flax-vector-conversions=none \ | ||
// RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s | ||
|
||
#include <vecintrin.h> | ||
|
||
volatile vector unsigned char vuc; | ||
volatile vector unsigned short vus; | ||
volatile vector unsigned int vui; | ||
volatile vector unsigned long long vul; | ||
|
||
// CHECK-LABEL: define dso_local void @test( | ||
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3:![0-9]+]] | ||
// CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 | ||
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 | ||
// CHECK-NEXT: [[ADD_I:%.*]] = add nsw i128 [[TMP3]], [[TMP2]] | ||
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[ADD_I]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP5]] to i128 | ||
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to i128 | ||
// CHECK-NEXT: [[TMP9:%.*]] = tail call i128 @llvm.s390.vaccq(i128 [[TMP7]], i128 [[TMP8]]) | ||
// CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP11]] to i128 | ||
// CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP12]] to i128 | ||
// CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP13]] to i128 | ||
// CHECK-NEXT: [[TMP17:%.*]] = tail call i128 @llvm.s390.vacq(i128 [[TMP14]], i128 [[TMP15]], i128 [[TMP16]]) | ||
// CHECK-NEXT: [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i8> [[TMP19]] to i128 | ||
// CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i8> [[TMP20]] to i128 | ||
// CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP21]] to i128 | ||
// CHECK-NEXT: [[TMP25:%.*]] = tail call i128 @llvm.s390.vacccq(i128 [[TMP22]], i128 [[TMP23]], i128 [[TMP24]]) | ||
// CHECK-NEXT: [[TMP26:%.*]] = bitcast i128 [[TMP25]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i8> [[TMP27]] to i128 | ||
// CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x i8> [[TMP28]] to i128 | ||
// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i128 [[TMP29]], [[TMP30]] | ||
// CHECK-NEXT: [[TMP31:%.*]] = bitcast i128 [[SUB_I]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP34:%.*]] = bitcast <16 x i8> [[TMP32]] to i128 | ||
// CHECK-NEXT: [[TMP35:%.*]] = bitcast <16 x i8> [[TMP33]] to i128 | ||
// CHECK-NEXT: [[TMP36:%.*]] = tail call i128 @llvm.s390.vscbiq(i128 [[TMP34]], i128 [[TMP35]]) | ||
// CHECK-NEXT: [[TMP37:%.*]] = bitcast i128 [[TMP36]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i8> [[TMP38]] to i128 | ||
// CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i8> [[TMP39]] to i128 | ||
// CHECK-NEXT: [[TMP43:%.*]] = bitcast <16 x i8> [[TMP40]] to i128 | ||
// CHECK-NEXT: [[TMP44:%.*]] = tail call i128 @llvm.s390.vsbiq(i128 [[TMP41]], i128 [[TMP42]], i128 [[TMP43]]) | ||
// CHECK-NEXT: [[TMP45:%.*]] = bitcast i128 [[TMP44]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP49:%.*]] = bitcast <16 x i8> [[TMP46]] to i128 | ||
// CHECK-NEXT: [[TMP50:%.*]] = bitcast <16 x i8> [[TMP47]] to i128 | ||
// CHECK-NEXT: [[TMP51:%.*]] = bitcast <16 x i8> [[TMP48]] to i128 | ||
// CHECK-NEXT: [[TMP52:%.*]] = tail call i128 @llvm.s390.vsbcbiq(i128 [[TMP49]], i128 [[TMP50]], i128 [[TMP51]]) | ||
// CHECK-NEXT: [[TMP53:%.*]] = bitcast i128 [[TMP52]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP56:%.*]] = tail call i128 @llvm.s390.vsumqf(<4 x i32> [[TMP54]], <4 x i32> [[TMP55]]) | ||
// CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP60:%.*]] = tail call i128 @llvm.s390.vsumqg(<2 x i64> [[TMP58]], <2 x i64> [[TMP59]]) | ||
// CHECK-NEXT: [[TMP61:%.*]] = bitcast i128 [[TMP60]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP64:%.*]] = tail call i128 @llvm.s390.vgfmg(<2 x i64> [[TMP62]], <2 x i64> [[TMP63]]) | ||
// CHECK-NEXT: [[TMP65:%.*]] = bitcast i128 [[TMP64]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP69:%.*]] = bitcast <16 x i8> [[TMP68]] to i128 | ||
// CHECK-NEXT: [[TMP70:%.*]] = tail call i128 @llvm.s390.vgfmag(<2 x i64> [[TMP66]], <2 x i64> [[TMP67]], i128 [[TMP69]]) | ||
// CHECK-NEXT: [[TMP71:%.*]] = bitcast i128 [[TMP70]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP75:%.*]] = bitcast <16 x i8> [[TMP74]] to i128 | ||
// CHECK-NEXT: [[TMP76:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP72]], <2 x i64> [[TMP73]], i128 [[TMP75]], i32 0) | ||
// CHECK-NEXT: [[TMP77:%.*]] = bitcast i128 [[TMP76]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP81:%.*]] = bitcast <16 x i8> [[TMP80]] to i128 | ||
// CHECK-NEXT: [[TMP82:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP78]], <2 x i64> [[TMP79]], i128 [[TMP81]], i32 4) | ||
// CHECK-NEXT: [[TMP83:%.*]] = bitcast i128 [[TMP82]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP87:%.*]] = bitcast <16 x i8> [[TMP86]] to i128 | ||
// CHECK-NEXT: [[TMP88:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP84]], <2 x i64> [[TMP85]], i128 [[TMP87]], i32 8) | ||
// CHECK-NEXT: [[TMP89:%.*]] = bitcast i128 [[TMP88]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP93:%.*]] = bitcast <16 x i8> [[TMP92]] to i128 | ||
// CHECK-NEXT: [[TMP94:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP90]], <2 x i64> [[TMP91]], i128 [[TMP93]], i32 12) | ||
// CHECK-NEXT: [[TMP95:%.*]] = bitcast i128 [[TMP94]] to <16 x i8> | ||
// CHECK-NEXT: store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: [[TMP98:%.*]] = tail call <2 x i64> @llvm.s390.vbperm(<16 x i8> [[TMP96]], <16 x i8> [[TMP97]]) | ||
// CHECK-NEXT: store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[TBAA3]] | ||
// CHECK-NEXT: ret void | ||
// | ||
void test(void) { | ||
vuc = vec_add_u128(vuc, vuc); | ||
vuc = vec_addc_u128(vuc, vuc); | ||
vuc = vec_adde_u128(vuc, vuc, vuc); | ||
vuc = vec_addec_u128(vuc, vuc, vuc); | ||
|
||
vuc = vec_sub_u128(vuc, vuc); | ||
vuc = vec_subc_u128(vuc, vuc); | ||
vuc = vec_sube_u128(vuc, vuc, vuc); | ||
vuc = vec_subec_u128(vuc, vuc, vuc); | ||
|
||
vuc = vec_sum_u128(vui, vui); | ||
vuc = vec_sum_u128(vul, vul); | ||
|
||
vuc = vec_gfmsum_128(vul, vul); | ||
vuc = vec_gfmsum_accum_128(vul, vul, vuc); | ||
|
||
vuc = vec_msum_u128(vul, vul, vuc, 0); | ||
vuc = vec_msum_u128(vul, vul, vuc, 4); | ||
vuc = vec_msum_u128(vul, vul, vuc, 8); | ||
vuc = vec_msum_u128(vul, vul, vuc, 12); | ||
|
||
vul = vec_bperm_u128(vuc, vuc); | ||
} | ||
//. | ||
// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} | ||
// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} | ||
// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} | ||
//. |