From 50340ce294d564d6528f306e1c66d5797183bdad Mon Sep 17 00:00:00 2001
From: Sarwat Shaheen <sarwat.shaheen@yahoo.com>
Date: Tue, 3 Oct 2023 11:22:15 -0400
Subject: [PATCH] Enable vector instructions for short format floats in
 selectEvaluator

This commit addresses the use of vector instructions to handle short format in the **select** evaluator.
Previously, the use of vector instructions for short format in the select evaluator was disabled, even though on z14 and newer platforms, it is supported. The issue was caused by not correctly converting the condition code from GPR to FPR for short format. Changes for enabling vector instructions for short format:

- Use of LLGFR instruction for long format for zero-extending a 32 bit conditionReg to 64 bits
- Use of separate SLLG instruction for short format floats to preserve the float representation of the first 32 bits as it is later moved into FPR
- Addition of mask values in the VFCE instruction to get the element size mask for floats and doubles respectively

Closes: #5002

Signed-off-by: Sarwat Shaheen sarwat.shaheen@yahoo.com
---
 compiler/z/codegen/ControlFlowEvaluator.cpp | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/compiler/z/codegen/ControlFlowEvaluator.cpp b/compiler/z/codegen/ControlFlowEvaluator.cpp
index 232a7ba1898..e1341ce2e48 100644
--- a/compiler/z/codegen/ControlFlowEvaluator.cpp
+++ b/compiler/z/codegen/ControlFlowEvaluator.cpp
@@ -2669,18 +2669,31 @@ OMR::Z::TreeEvaluator::dselectEvaluator(TR::Node *node, TR::CodeGenerator *cg)
    TR::Register *resultReg = cg->gprClobberEvaluate(trueValueNode);
    TR::Register *conditionReg = cg->evaluate(conditionNode);
    TR::Register *falseValReg = cg->evaluate(falseValueNode);
-   if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && node->getOpCode().isDouble())
+   if ((cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && node->getOpCode().isDouble())
+    || (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z14) && node->getOpCode().isFloat()))
       {
       TR::Register *vectorSelReg = cg->allocateRegister(TR_VRF);
       TR::Register *tempReg = cg->allocateRegister(TR_FPR);
       TR::Register *vzeroReg = cg->allocateRegister(TR_VRF);
-      // Convert 32 Bit register to 64 Bit (Comparison Child of the select node is 32 bit)
-      generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, conditionReg, conditionReg);
+      if (node->getOpCode().isDouble())
+         {
+         // Convert 32 Bit register to 64 Bit for Doubles using zero-extension (Comparison Child of the select node is 32 bit)
+         generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, conditionReg, conditionReg);
+         }
+      else
+         {
+         // Shift left the 32 least significant bits for preserving the float representaion as the hardware only operates on the first 32 bits in a FPR
+         generateRSInstruction(cg, TR::InstOpCode::SLLG, node, conditionReg, 32);
+         }
       // convert to floating point
       generateRRInstruction(cg, TR::InstOpCode::LDGR, node, tempReg, conditionReg);
       // generate compare with zero
       generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vzeroReg, 0, 0);
-      generateVRRcInstruction(cg, TR::InstOpCode::VFCE, node, vectorSelReg, tempReg, vzeroReg, 1, 0, 3);
+      // Mask values used for VFCE instruction:
+      // M4 - Floating-point-format control = getVectorElementSizeMask(node->getSize()) - gets the element size mask for doubles/floats respectively
+      // M5 - Single-Element-Control = 0x8, setting bit 0 to one, controlling the operation to take place only on the zero-indexed element in the vector
+      // M6 - Condition Code Set = 0, the Condition Code is not set and remains unchanged
+      generateVRRcInstruction(cg, TR::InstOpCode::VFCE, node, vectorSelReg, tempReg, vzeroReg, 0, 0x8, getVectorElementSizeMask(node->getSize()));
       // generate select - if condition == 0, vectorSelReg will contain all 1s, so false and true are swapped
       generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, resultReg, falseValReg, resultReg, vectorSelReg);
       cg->stopUsingRegister(tempReg);