dragonwell-project · JinZhonghui · Feb 23, 2023
diff --git a/src/hotspot/cpu/x86/c2_intelJccErratum_x86.cpp b/src/hotspot/cpu/x86/c2_intelJccErratum_x86.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "c2_intelJccErratum_x86.hpp"
+#include "opto/cfgnode.hpp"
+#include "opto/compile.hpp"
+#include "opto/machnode.hpp"
+#include "opto/node.hpp"
+#include "opto/regalloc.hpp"
+#include "utilities/align.hpp"
+#include "utilities/debug.hpp"
+
+// Compute which 32 byte boundary an address corresponds to
+uintptr_t IntelJccErratum::boundary(uintptr_t addr) {
+  return addr >> 5;
+}
+
+bool IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc) {
+  int jcc_size = int(end_pc - start_pc);
+  assert(jcc_size <= largest_jcc_size(), "invalid jcc size: %d", jcc_size);
+  return boundary(start_pc) != boundary(end_pc);
+}
+
+bool IntelJccErratum::is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index) {
+  if (node->is_MachCall() && !node->is_MachCallJava()) {
+    return true;
+  }
+  return node_index == (block->number_of_nodes() - 1);
+}
+
+int IntelJccErratum::jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc) {
+  node->add_flag(Node::Flag_intel_jcc_erratum);
+  return node->size(regalloc);
+}
+
+int IntelJccErratum::tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc) {
+  ResourceMark rm;
+  int nop_size = 0;
+  MachNode* last_m = NULL;
+
+  for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
+    const Block* const block = cfg->get_block(i);
+    for (uint j = 0; j < block->number_of_nodes(); ++j) {
+      const Node* const node = block->get_node(j);
+      if (!node->is_Mach()) {
+        continue;
+      }
+      MachNode* m = node->as_Mach();
+      if (is_jcc_erratum_branch(block, m, j)) {
+        // Found a root jcc erratum branch, flag it as problematic
+        nop_size += jcc_erratum_taint_node(m, regalloc);
+
+        if (!m->is_MachReturn() && !m->is_MachCall()) {
+          // We might fuse a problematic jcc erratum branch with a preceding
+          // ALU instruction - we must catch such problematic macro fusions
+          // and flag the ALU instruction as problematic too.
+          for (uint k = 1; k < m->req(); ++k) {
+            const Node* const use = m->in(k);
+            if (use == last_m && !m->is_MachReturn()) {
+              // Flag fused conditions too
+              nop_size += jcc_erratum_taint_node(last_m, regalloc);
+            }
+          }
+        }
+        last_m = NULL;
+      } else {
+        last_m = m;
+      }
+    }
+  }
+  return nop_size;
+}
+
+int IntelJccErratum::compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc) {
+  int jcc_size = mach->size(regalloc);
+  if (index_in_block < block->number_of_nodes() - 1) {
+    Node* next = block->get_node(index_in_block + 1);
+    if (next->is_Mach() && (next->as_Mach()->flags() & Node::Flag_intel_jcc_erratum)) {
+      jcc_size += mach->size(regalloc);
+    }
+  }
+  if (jcc_size > largest_jcc_size()) {
+    // Let's not try fixing this for nodes that seem unreasonably large
+    return false;
+  }
+  if (is_crossing_or_ending_at_32_byte_boundary(current_offset, current_offset + jcc_size)) {
+    return int(align_up(current_offset, 32) - current_offset);
+  } else {
+    return 0;
+  }
+}
+
+#define __ _masm.
+
+uintptr_t IntelJccErratumAlignment::pc() {
+  return (uintptr_t)__ pc();
+}
+
+IntelJccErratumAlignment::IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size) :
+    _masm(masm),
+    _start_pc(pc()) {
+  if (!VM_Version::has_intel_jcc_erratum()) {
+    return;
+  }
+
+  if (Compile::current()->in_scratch_emit_size()) {
+    // When we measure the size of this 32 byte alignment, we apply a conservative guess.
+    __ nop(jcc_size);
+  } else if (IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, _start_pc + jcc_size)) {
+    // The affected branch might get slowed down by micro code mitigations
+    // as it could be susceptible to the erratum. Place nops until the next
+    // 32 byte boundary to make sure the branch will be cached.
+    const int alignment_nops = (int)(align_up(_start_pc, 32) - _start_pc);
+    __ nop(alignment_nops);
+    _start_pc = pc();
+  }
+}
+
+IntelJccErratumAlignment::~IntelJccErratumAlignment() {
+  if (!VM_Version::has_intel_jcc_erratum() ||
+      Compile::current()->in_scratch_emit_size()) {
+    return;
+  }
+
+  assert(!IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, pc()), "Invalid jcc_size estimate");
+}
diff --git a/src/hotspot/cpu/x86/c2_intelJccErratum_x86.hpp b/src/hotspot/cpu/x86/c2_intelJccErratum_x86.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_INTELJCCERRATUM_X86_HPP
+#define CPU_X86_INTELJCCERRATUM_X86_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+class Block;
+class Compile;
+class MachNode;
+class MacroAssembler;
+class PhaseCFG;
+class PhaseRegAlloc;
+
+class IntelJccErratum : public AllStatic {
+private:
+  // Compute which 32 byte boundary an address corresponds to
+  static uintptr_t boundary(uintptr_t addr);
+  static int jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc);
+
+public:
+  static bool is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc);
+  static bool is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index);
+  // Analyze JCC erratum branches. Affected nodes get tagged with Flag_intel_jcc_erratum.
+  // The function returns a conservative estimate of all required nops on all mach nodes.
+  static int tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc);
+  // Computes the exact padding for a mach node
+  static int compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc);
+  static int largest_jcc_size() { return 20; }
+};
+
+class IntelJccErratumAlignment {
+private:
+  MacroAssembler& _masm;
+  uintptr_t       _start_pc;
+
+  uintptr_t pc();
+
+public:
+  IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size);
+  ~IntelJccErratumAlignment();
+};
+
+#endif // CPU_X86_INTELJCCERRATUM_X86_HPP
+
diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
@@ -30,16 +30,29 @@ source_hpp %{
 
 source %{
 
+#include "c2_intelJccErratum_x86.hpp"
+
 static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
   ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
-  __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-  __ jcc(Assembler::notZero, *stub->entry());
+  {
+    IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
+    __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
+    __ jcc(Assembler::notZero, *stub->entry());
+  }
   __ bind(*stub->continuation());
 }
 
-static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+static void z_load_barrier_cmpxchg(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, Label& good) {
   ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
-  __ jmp(*stub->entry());
+  {
+    IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
+    __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
+    __ jcc(Assembler::zero, good);
+  }
+  {
+    IntelJccErratumAlignment intel_alignment(_masm, 5 /* jcc_size */);
+    __ jmp(*stub->entry());
+  }
   __ bind(*stub->continuation());
 }
 
@@ -101,9 +114,7 @@ instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP t
     __ cmpxchgptr($newval$$Register, $mem$$Address);
     if (barrier_data() != ZLoadBarrierElided) {
       Label good;
-      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-      __ jcc(Assembler::zero, good);
-      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
+      z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
       __ movptr($oldval$$Register, $tmp$$Register);
       __ lock();
       __ cmpxchgptr($newval$$Register, $mem$$Address);
@@ -133,9 +144,7 @@ instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlags
     __ cmpxchgptr($newval$$Register, $mem$$Address);
     if (barrier_data() != ZLoadBarrierElided) {
       Label good;
-      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-      __ jcc(Assembler::zero, good);
-      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
+      z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
       __ movptr($oldval$$Register, $tmp$$Register);
       __ lock();
       __ cmpxchgptr($newval$$Register, $mem$$Address);

diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -39,6 +39,7 @@
 int VM_Version::_cpu;
 int VM_Version::_model;
 int VM_Version::_stepping;
+bool VM_Version::_has_intel_jcc_erratum;
 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
 
 // Address of instruction which causes SEGV
@@ -745,6 +746,8 @@ void VM_Version::get_processor_features() {
     }
   }
 
+  _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
+
   char buf[512];
   jio_snprintf(buf, sizeof(buf),
                "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x"
@@ -1674,6 +1677,70 @@ bool VM_Version::use_biased_locking() {
   return UseBiasedLocking;
 }
 
+bool VM_Version::compute_has_intel_jcc_erratum() {
+  if (!is_intel_family_core()) {
+    // Only Intel CPUs are affected.
+    return false;
+  }
+  // The following table of affected CPUs is based on the following document released by Intel:
+  // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
+  switch (_model) {
+  case 0x8E:
+    // 06_8EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
+    // 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U
+    // 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U 23e
+    // 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Y
+    // 06_8EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake U43e
+    // 06_8EH | B | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
+    // 06_8EH | C | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
+    // 06_8EH | C | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U42
+    // 06_8EH | C | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
+    return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
+  case 0x4E:
+    // 06_4E  | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake U
+    // 06_4E  | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake U23e
+    // 06_4E  | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake Y
+    return _stepping == 0x3;
+  case 0x55:
+    // 06_55H | 4 | Intel® Xeon® Processor D Family based on microarchitecture code name Skylake D, Bakerville
+    // 06_55H | 4 | Intel® Xeon® Scalable Processors based on microarchitecture code name Skylake Server
+    // 06_55H | 4 | Intel® Xeon® Processor W Family based on microarchitecture code name Skylake W
+    // 06_55H | 4 | Intel® Core™ X-series Processors based on microarchitecture code name Skylake X
+    // 06_55H | 4 | Intel® Xeon® Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
+    // 06_55  | 7 | 2nd Generation Intel® Xeon® Scalable Processors based on microarchitecture code name Cascade Lake (server)
+    return _stepping == 0x4 || _stepping == 0x7;
+  case 0x5E:
+    // 06_5E  | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake H
+    // 06_5E  | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake S
+    return _stepping == 0x3;
+  case 0x9E:
+    // 06_9EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake G
+    // 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake H
+    // 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake S
+    // 06_9EH | 9 | Intel® Core™ X-series Processors based on microarchitecture code name Kaby Lake X
+    // 06_9EH | 9 | Intel® Xeon® Processor E3 v6 Family Kaby Lake Xeon E3
+    // 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake H
+    // 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S
+    // 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
+    // 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
+    // 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
+    // 06_9EH | B | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (4+2)
+    // 06_9EH | B | Intel® Celeron® Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
+    // 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
+    // 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (8+2)
+    return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
+  case 0xA6:
+    // 06_A6H | 0  | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U62
+    return _stepping == 0x0;
+  case 0xAE:
+    // 06_AEH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
+    return _stepping == 0xA;
+  default:
+    // If we are running on another intel machine not recognized in the table, we are okay.
+    return false;
+  }
+}
+
 // On Xen, the cpuid instruction returns
 //  eax / registers[0]: Version of Xen
 //  ebx / registers[1]: chars 'XenV'

diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -290,6 +290,8 @@ class VM_Version : public Abstract_VM_Version {
   static int _model;
   static int _stepping;
 
+  static bool _has_intel_jcc_erratum;
+
   static address   _cpuinfo_segv_addr; // address of instruction which causes SEGV
   static address   _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
 
@@ -492,6 +494,8 @@ class VM_Version : public Abstract_VM_Version {
     return result;
   }
 
+  static bool compute_has_intel_jcc_erratum();
+
   static uint64_t feature_flags() {
     uint64_t result = 0;
     if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
@@ -872,6 +876,12 @@ class VM_Version : public Abstract_VM_Version {
     return false;
   }
 
+  // This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102)
+  // that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode
+  // mitigation causes regressions when jumps or fused conditional branches cross or end at
+  // 32 byte boundaries.
+  static bool has_intel_jcc_erratum() { return _has_intel_jcc_erratum; }
+
   // AMD features
   static bool supports_3dnow_prefetch()    { return (_features & CPU_3DNOW_PREFETCH) != 0; }
   static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }