Skip to content

Commit

Permalink
[llvm][NVPTX] Strip unneeded '+0' in PTX load/store (#113017)
Browse files Browse the repository at this point in the history
Remove the extraneous '+0' immediate offset part in PTX load/stores, to
improve readability of output PTX code.
  • Loading branch information
JOE1994 authored Oct 19, 2024
1 parent 5aec88f commit 0f0a96b
Show file tree
Hide file tree
Showing 66 changed files with 1,225 additions and 1,209 deletions.
8 changes: 4 additions & 4 deletions clang/test/CodeGenCUDA/bf16.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ __device__ void test_arg(__bf16 *out, __bf16 in) {
__device__ __bf16 test_ret( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0];
return in;
// CHECK: st.param.b16 [func_retval0+0], %[[R]]
// CHECK: st.param.b16 [func_retval0], %[[R]]
// CHECK: ret;
}

Expand All @@ -35,15 +35,15 @@ __device__ __bf16 external_func( __bf16 in);
// CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2]
__device__ __bf16 test_call( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
// CHECK: st.param.b16 [param0+0], %[[R]];
// CHECK: st.param.b16 [param0], %[[R]];
// CHECK: .param .align 2 .b8 retval0[2];
// CHECK: call.uni (retval0),
// CHECK-NEXT: _Z13external_funcDF16b,
// CHECK-NEXT: (
// CHECK-NEXT: param0
// CHECK-NEXT );
// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0];
// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
return external_func(in);
// CHECK: st.param.b16 [func_retval0+0], %[[RET]]
// CHECK: st.param.b16 [func_retval0], %[[RET]]
// CHECK: ret;
}
10 changes: 10 additions & 0 deletions llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,16 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
}
}

void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
auto &Op = MI->getOperand(OpNum);
assert(Op.isImm() && "Invalid operand");
if (Op.getImm() != 0) {
O << "+";
printOperand(MI, OpNum, O);
}
}

void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
const MCOperand &Op = MI->getOperand(OpNum);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O,
const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,
Expand Down
114 changes: 59 additions & 55 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1934,6 +1934,10 @@ def MmaCode : Operand<i32> {
let PrintMethod = "printMmaCode";
}

def Offseti32imm : Operand<i32> {
let PrintMethod = "printOffseti32imm";
}

def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;

Expand Down Expand Up @@ -2482,21 +2486,21 @@ def ProxyReg :

let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
!strconcat("ld.param", opstr, " \t$dst, [retval0$b];"),
[]>;

class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b),
!strconcat("ld.param.v2", opstr,
" \t{{$dst, $dst2}}, [retval0+$b];"), []>;
" \t{{$dst, $dst2}}, [retval0$b];"), []>;

class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins i32imm:$b),
(ins Offseti32imm:$b),
!strconcat("ld.param.v4", opstr,
" \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
" \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"),
[]>;
}

Expand All @@ -2512,8 +2516,8 @@ let mayStore = true in {
if !or(support_imm, !isa<NVPTXRegClass>(op)) then
def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
: NVPTXInst<(outs),
(ins op:$val, i32imm:$a, i32imm:$b),
"st.param" # opstr # " \t[param$a+$b], $val;",
(ins op:$val, i32imm:$a, Offseti32imm:$b),
"st.param" # opstr # " \t[param$a$b], $val;",
[]>;
}

Expand All @@ -2524,8 +2528,8 @@ let mayStore = true in {
# !if(!isa<NVPTXRegClass>(op2), "r", "i")
: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2,
i32imm:$a, i32imm:$b),
"st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};",
i32imm:$a, Offseti32imm:$b),
"st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};",
[]>;
}

Expand All @@ -2541,29 +2545,29 @@ let mayStore = true in {

: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
i32imm:$a, i32imm:$b),
i32imm:$a, Offseti32imm:$b),
"st.param.v4" # opstr #
" \t[param$a+$b], {{$val1, $val2, $val3, $val4}};",
" \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
[]>;
}

class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
!strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a),
!strconcat("st.param", opstr, " \t[func_retval0$a], $val;"),
[]>;

class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a),
!strconcat("st.param.v2", opstr,
" \t[func_retval0+$a], {{$val, $val2}};"),
" \t[func_retval0$a], {{$val, $val2}};"),
[]>;

class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs),
(ins regclass:$val, regclass:$val2, regclass:$val3,
regclass:$val4, i32imm:$a),
regclass:$val4, Offseti32imm:$a),
!strconcat("st.param.v4", opstr,
" \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
" \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"),
[]>;
}

Expand Down Expand Up @@ -2827,21 +2831,21 @@ multiclass LD<NVPTXRegClass regclass> {
def _ari : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
"\t$dst, [$addr$offset];", []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
"\t$dst, [$addr$offset];", []>;
def _asi : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
"\t$dst, [$addr$offset];", []>;
}

let mayLoad=1, hasSideEffects=0 in {
Expand Down Expand Up @@ -2876,23 +2880,23 @@ multiclass ST<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr,
i32imm:$offset),
Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr+$offset], $src;", []>;
" \t[$addr$offset], $src;", []>;
def _ari_64 : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr,
i32imm:$offset),
Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr+$offset], $src;", []>;
" \t[$addr$offset], $src;", []>;
def _asi : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr,
i32imm:$offset),
Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr+$offset], $src;", []>;
" \t[$addr$offset], $src;", []>;
}

let mayStore=1, hasSideEffects=0 in {
Expand Down Expand Up @@ -2929,21 +2933,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v2_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v4_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
Expand All @@ -2965,21 +2969,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v4_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
Expand Down Expand Up @@ -3016,23 +3020,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
Int32Regs:$addr, i32imm:$offset),
Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2}};", []>;
"\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
Int64Regs:$addr, i32imm:$offset),
Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2}};", []>;
"\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
imem:$addr, i32imm:$offset),
imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2}};", []>;
"\t[$addr$offset], {{$src1, $src2}};", []>;
def _v4_avar : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
Expand All @@ -3058,23 +3062,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
"\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
"\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}"
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
"$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
}

let mayStore=1, hasSideEffects=0 in {
Expand Down Expand Up @@ -3903,4 +3907,4 @@ def atomic_thread_fence_seq_cst_cta :
Requires<[hasPTX<60>, hasSM<70>]>;
def atomic_thread_fence_acq_rel_cta :
NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>,
Requires<[hasPTX<60>, hasSM<70>]>;
Requires<[hasPTX<60>, hasSM<70>]>;
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ define i32 @f(ptr %p) {
; ENABLED-NEXT: ld.param.u64 %rd1, [f_param_0];
; ENABLED-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1];
; ENABLED-NEXT: add.s32 %r3, %r1, %r2;
; ENABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
; ENABLED-NEXT: st.param.b32 [func_retval0], %r3;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: f(
Expand All @@ -32,7 +32,7 @@ define i32 @f(ptr %p) {
; DISABLED-NEXT: ld.u32 %r1, [%rd1];
; DISABLED-NEXT: ld.u32 %r2, [%rd1+4];
; DISABLED-NEXT: add.s32 %r3, %r1, %r2;
; DISABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
; DISABLED-NEXT: st.param.b32 [func_retval0], %r3;
; DISABLED-NEXT: ret;
%p.1 = getelementptr i32, ptr %p, i32 1
%v0 = load i32, ptr %p, align 8
Expand Down Expand Up @@ -68,7 +68,7 @@ define half @fh(ptr %p) {
; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
; ENABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: fh(
Expand Down Expand Up @@ -100,7 +100,7 @@ define half @fh(ptr %p) {
; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
; DISABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr half, ptr %p, i32 1
%p.2 = getelementptr half, ptr %p, i32 2
Expand Down Expand Up @@ -132,7 +132,7 @@ define float @ff(ptr %p) {
; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
; ENABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
; ENABLED-NEXT: st.param.f32 [func_retval0], %f9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: ff(
Expand All @@ -151,7 +151,7 @@ define float @ff(ptr %p) {
; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
; DISABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
; DISABLED-NEXT: st.param.f32 [func_retval0], %f9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr float, ptr %p, i32 1
%p.2 = getelementptr float, ptr %p, i32 2
Expand Down
Loading

0 comments on commit 0f0a96b

Please sign in to comment.