From 50340ce294d564d6528f306e1c66d5797183bdad Mon Sep 17 00:00:00 2001 From: Sarwat Shaheen Date: Tue, 3 Oct 2023 11:22:15 -0400 Subject: [PATCH] Enable vector instructions for short format floats in selectEvaluator This commit addresses the use of vector instructions to handle short format in the **select** evaluator. Previously, the use of vector instructions for short format in the select evaluator was disabled, even though on z14 and newer platforms, it is supported. The issue was caused by not correctly converting the condition code from GPR to FPR for short format. Changes for enabling vector instructions for short format: - Use of LLGFR instruction for long format for zero-extending a 32 bit conditionReg to 64 bits - Use of separate SLLG instruction for short format floats to preserve the float representation of the first 32 bits as it is later moved into FPR - Addition of mask values in the VFCE instruction to get the element size mask for floats and doubles respectively Closes: #5002 Signed-off-by: Sarwat Shaheen sarwat.shaheen@yahoo.com --- compiler/z/codegen/ControlFlowEvaluator.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/compiler/z/codegen/ControlFlowEvaluator.cpp b/compiler/z/codegen/ControlFlowEvaluator.cpp index 232a7ba1898..e1341ce2e48 100644 --- a/compiler/z/codegen/ControlFlowEvaluator.cpp +++ b/compiler/z/codegen/ControlFlowEvaluator.cpp @@ -2669,18 +2669,31 @@ OMR::Z::TreeEvaluator::dselectEvaluator(TR::Node *node, TR::CodeGenerator *cg) TR::Register *resultReg = cg->gprClobberEvaluate(trueValueNode); TR::Register *conditionReg = cg->evaluate(conditionNode); TR::Register *falseValReg = cg->evaluate(falseValueNode); - if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && node->getOpCode().isDouble()) + if ((cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && node->getOpCode().isDouble()) + || (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z14) && node->getOpCode().isFloat())) { TR::Register *vectorSelReg = cg->allocateRegister(TR_VRF); TR::Register *tempReg = cg->allocateRegister(TR_FPR); TR::Register *vzeroReg = cg->allocateRegister(TR_VRF); - // Convert 32 Bit register to 64 Bit (Comparison Child of the select node is 32 bit) - generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, conditionReg, conditionReg); + if (node->getOpCode().isDouble()) + { + // Convert 32 Bit register to 64 Bit for Doubles using zero-extension (Comparison Child of the select node is 32 bit) + generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, conditionReg, conditionReg); + } + else + { + // Shift left the 32 least significant bits for preserving the float representaion as the hardware only operates on the first 32 bits in a FPR + generateRSInstruction(cg, TR::InstOpCode::SLLG, node, conditionReg, 32); + } // convert to floating point generateRRInstruction(cg, TR::InstOpCode::LDGR, node, tempReg, conditionReg); // generate compare with zero generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vzeroReg, 0, 0); - generateVRRcInstruction(cg, TR::InstOpCode::VFCE, node, vectorSelReg, tempReg, vzeroReg, 1, 0, 3); + // Mask values used for VFCE instruction: + // M4 - Floating-point-format control = getVectorElementSizeMask(node->getSize()) - gets the element size mask for doubles/floats respectively + // M5 - Single-Element-Control = 0x8, setting bit 0 to one, controlling the operation to take place only on the zero-indexed element in the vector + // M6 - Condition Code Set = 0, the Condition Code is not set and remains unchanged + generateVRRcInstruction(cg, TR::InstOpCode::VFCE, node, vectorSelReg, tempReg, vzeroReg, 0, 0x8, getVectorElementSizeMask(node->getSize())); // generate select - if condition == 0, vectorSelReg will contain all 1s, so false and true are swapped generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, resultReg, falseValReg, resultReg, vectorSelReg); cg->stopUsingRegister(tempReg);