Skip to content

Commit

Permalink
Update SIMD assembler instructions for z/OS (#2974)
Browse files Browse the repository at this point in the history
Signed-off-by: Mike Essenmacher <[email protected]>
  • Loading branch information
mikeessen authored Oct 11, 2024
1 parent c3dbcf8 commit 29e8476
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 18 deletions.
5 changes: 0 additions & 5 deletions src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ void configurePassesNNPA() {
// TODO: remove this if zDNN adds support for saturation.
if (nnpaEnableSaturation)
nnpaEnableCompilerStickUnstick = true;
// Currently nnpaEnableCompilerStickUnstick not supported on zOS.
// TODO enable on zOS
if (mtriple == "s390x-ibm-zos") {
nnpaEnableCompilerStickUnstick = false;
}
}

void addONNXToZHighPasses(mlir::PassManager &pm) {
Expand Down
34 changes: 23 additions & 11 deletions src/Accelerators/NNPA/Conversion/ZLowToLLVM/ZLowToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1542,10 +1542,12 @@ class ZLowDLF16ToF32Lowering : public ConvertToLLVMPattern {
Type vecTypeI32 = LLVM::getFixedVectorType(i32Ty, 4);
Type vecTypeF32 = LLVM::getFixedVectorType(f32Ty, 4);

// SIMD instruction in string for z/Linux.
// SIMD instruction in string for z/Linux and z/OS.
// Convert and lengthen from DLF16: VCLFN(H/L) V1,V2,M3,M4
// M3 = 2 = FP32, M4 = 0 = DLF16
const char *asmStr = "VCLFNH $0,$2,2,0 \n\t VCLFNL $1,$2,2,0 \n\t";
// Note the spaces are required by the z/OS assembler.
const char *asmStr = " VCLFNH $0,$2,2,0 \n\t"
" VCLFNL $1,$2,2,0 \n\t";
const char *asmConstraints = "=&v,=v,v";

// Prepare the input vector.
Expand Down Expand Up @@ -1710,10 +1712,11 @@ class ZLowF32ToDLF16Lowering : public ConvertToLLVMPattern {
Type vecTypeI16 = LLVM::getFixedVectorType(i16Ty, 8);
Type vecTypeF16 = LLVM::getFixedVectorType(f16Ty, 8);

// SIMD instruction in string for z/Linux.
// SIMD instruction in string for z/Linux and z/OS.
// Convert and round to DLF16: VCRNF V1,V2,V3,M4,M5
// M4 = 0 = DLF16, M5 = 2 = FP32
const char *asmStr = "VCRNF $0,$1,$2,0,2";
// Note the spaces are required by the z/OS assembler.
const char *asmStr = " VCRNF $0,$1,$2,0,2 \n\t";
const char *asmConstraints = "=v,v,v";

// Prepare two input vectors: each for left/right four elements.
Expand Down Expand Up @@ -1864,10 +1867,15 @@ class ZLowDLF16ToF32VectorLowering : public ConvertToLLVMPattern {
Value inputVecI16 =
create.llvm.bitcast(vecTypeI16, operandAdaptor.getInput());

// Emit SIMD instruction for conversion.
// TODO: check if z/OS uses the same or different instruction.
const char *asmStr = ".insn vrr,0xe60000000056,$0,$2,0,2,0,0 \n\t"
".insn vrr,0xe6000000005E,$1,$2,0,2,0,0 \n\t";
// SIMD instruction in string for z/Linux and z/OS.
// Note this .insn version of asmStr was used previously for z/Linux.
// const char *asmStr = ".insn vrr,0xe60000000056,$0,$2,0,2,0,0 \n\t"
// ".insn vrr,0xe6000000005E,$1,$2,0,2,0,0 \n\t";
// Convert and lengthen from DLF16: VCLFN(H/L) V1,V2,M3,M4
// M3 = 2 = FP32, M4 = 0 = DLF16
// Note the spaces are required by the z/OS assembler.
const char *asmStr = " VCLFNH $0,$2,2,0 \n\t"
" VCLFNL $1,$2,2,0 \n\t";
const char *asmConstraints = "=&v,=v,v";
SmallVector<Value> asmVals{inputVecI16};
Value outVecI32Struct =
Expand Down Expand Up @@ -1920,9 +1928,13 @@ class ZLowF32ToDLF16VectorLowering : public ConvertToLLVMPattern {
Value vecI32H = create.llvm.bitcast(vecTypeI32, operandAdaptor.getInput1());
Value vecI32L = create.llvm.bitcast(vecTypeI32, operandAdaptor.getInput2());

// Emit SIMD instruction for conversion.
// TODO: check if z/OS uses the same or different instruction.
const char *asmStr = ".insn vrr,0xe60000000075,$0,$1,$2,0,2,0";
// SIMD instruction in string for z/Linux and z/OS.
// Note this .insn version of asmStr was used previously for z/Linux.
// asmStr = ".insn vrr,0xe60000000075,$0,$1,$2,0,2,0";
// Convert and round to DLF16: VCRNF V1,V2,V3,M4,M5
// M4 = 0 = DLF16, M5 = 2 = FP32
// Note the spaces are required by the z/OS assembler.
const char *asmStr = " VCRNF $0,$1,$2,0,2 \n\t";
const char *asmConstraints = "=v,v,v";
SmallVector<Value> asmVals{vecI32H, vecI32L};

Expand Down
6 changes: 4 additions & 2 deletions src/Conversion/KrnlToLLVM/KrnlRoundEven.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ class KrnlRoundEvenOpLowering : public ConversionPattern {
Value inputVecI32 = create.llvm.bitcast(vecTypeI32, input);
SmallVector<Value> asmVals{inputVecI32};
// SIMD ASM round to nearest even (M5=4) op
const char *asmStr = "VFISB $0,$1,0,4";
// Note the spaces are required by the z/OS assembler.
const char *asmStr = " VFISB $0,$1,0,4 \n\t";
const char *asmConstraints = "=v,v";
Value outVecI32 =
rewriter
Expand All @@ -87,7 +88,8 @@ class KrnlRoundEvenOpLowering : public ConversionPattern {
Type typeF32 = rewriter.getF32Type();
SmallVector<Value> asmVals{input};
// Scalar ASM round to the nearest even (M3=4) op.
const char *asmStr = "FIEBR $0,4,$1";
// Note the spaces are required by the z/OS assembler.
const char *asmStr = " FIEBR $0,4,$1 \n\t";
const char *asmConstraints = "=f,f";
Value outF32 =
rewriter
Expand Down

0 comments on commit 29e8476

Please sign in to comment.