Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport] 8234160: Enable optimized mitigation for Intel jcc erratum in C2 #430

Open
wants to merge 1 commit into
base: VectorAPI_dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions src/hotspot/cpu/x86/c2_intelJccErratum_x86.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "c2_intelJccErratum_x86.hpp"
#include "opto/cfgnode.hpp"
#include "opto/compile.hpp"
#include "opto/machnode.hpp"
#include "opto/node.hpp"
#include "opto/regalloc.hpp"
#include "utilities/align.hpp"
#include "utilities/debug.hpp"

// Compute which 32 byte boundary an address corresponds to
uintptr_t IntelJccErratum::boundary(uintptr_t addr) {
return addr >> 5;
}

bool IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc) {
int jcc_size = int(end_pc - start_pc);
assert(jcc_size <= largest_jcc_size(), "invalid jcc size: %d", jcc_size);
return boundary(start_pc) != boundary(end_pc);
}

bool IntelJccErratum::is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index) {
if (node->is_MachCall() && !node->is_MachCallJava()) {
return true;
}
return node_index == (block->number_of_nodes() - 1);
}

int IntelJccErratum::jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc) {
node->add_flag(Node::Flag_intel_jcc_erratum);
return node->size(regalloc);
}

int IntelJccErratum::tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc) {
ResourceMark rm;
int nop_size = 0;
MachNode* last_m = NULL;

for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
const Block* const block = cfg->get_block(i);
for (uint j = 0; j < block->number_of_nodes(); ++j) {
const Node* const node = block->get_node(j);
if (!node->is_Mach()) {
continue;
}
MachNode* m = node->as_Mach();
if (is_jcc_erratum_branch(block, m, j)) {
// Found a root jcc erratum branch, flag it as problematic
nop_size += jcc_erratum_taint_node(m, regalloc);

if (!m->is_MachReturn() && !m->is_MachCall()) {
// We might fuse a problematic jcc erratum branch with a preceding
// ALU instruction - we must catch such problematic macro fusions
// and flag the ALU instruction as problematic too.
for (uint k = 1; k < m->req(); ++k) {
const Node* const use = m->in(k);
if (use == last_m && !m->is_MachReturn()) {
// Flag fused conditions too
nop_size += jcc_erratum_taint_node(last_m, regalloc);
}
}
}
last_m = NULL;
} else {
last_m = m;
}
}
}
return nop_size;
}

int IntelJccErratum::compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc) {
int jcc_size = mach->size(regalloc);
if (index_in_block < block->number_of_nodes() - 1) {
Node* next = block->get_node(index_in_block + 1);
if (next->is_Mach() && (next->as_Mach()->flags() & Node::Flag_intel_jcc_erratum)) {
jcc_size += mach->size(regalloc);
}
}
if (jcc_size > largest_jcc_size()) {
// Let's not try fixing this for nodes that seem unreasonably large
return false;
}
if (is_crossing_or_ending_at_32_byte_boundary(current_offset, current_offset + jcc_size)) {
return int(align_up(current_offset, 32) - current_offset);
} else {
return 0;
}
}

#define __ _masm.

uintptr_t IntelJccErratumAlignment::pc() {
return (uintptr_t)__ pc();
}

IntelJccErratumAlignment::IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size) :
_masm(masm),
_start_pc(pc()) {
if (!VM_Version::has_intel_jcc_erratum()) {
return;
}

if (Compile::current()->in_scratch_emit_size()) {
// When we measure the size of this 32 byte alignment, we apply a conservative guess.
__ nop(jcc_size);
} else if (IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, _start_pc + jcc_size)) {
// The affected branch might get slowed down by micro code mitigations
// as it could be susceptible to the erratum. Place nops until the next
// 32 byte boundary to make sure the branch will be cached.
const int alignment_nops = (int)(align_up(_start_pc, 32) - _start_pc);
__ nop(alignment_nops);
_start_pc = pc();
}
}

IntelJccErratumAlignment::~IntelJccErratumAlignment() {
if (!VM_Version::has_intel_jcc_erratum() ||
Compile::current()->in_scratch_emit_size()) {
return;
}

assert(!IntelJccErratum::is_crossing_or_ending_at_32_byte_boundary(_start_pc, pc()), "Invalid jcc_size estimate");
}
68 changes: 68 additions & 0 deletions src/hotspot/cpu/x86/c2_intelJccErratum_x86.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef CPU_X86_INTELJCCERRATUM_X86_HPP
#define CPU_X86_INTELJCCERRATUM_X86_HPP

#include "memory/allocation.hpp"
#include "utilities/globalDefinitions.hpp"

class Block;
class Compile;
class MachNode;
class MacroAssembler;
class PhaseCFG;
class PhaseRegAlloc;

class IntelJccErratum : public AllStatic {
private:
// Compute which 32 byte boundary an address corresponds to
static uintptr_t boundary(uintptr_t addr);
static int jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc);

public:
static bool is_crossing_or_ending_at_32_byte_boundary(uintptr_t start_pc, uintptr_t end_pc);
static bool is_jcc_erratum_branch(const Block* block, const MachNode* node, uint node_index);
// Analyze JCC erratum branches. Affected nodes get tagged with Flag_intel_jcc_erratum.
// The function returns a conservative estimate of all required nops on all mach nodes.
static int tag_affected_machnodes(Compile* C, PhaseCFG* cfg, PhaseRegAlloc* regalloc);
// Computes the exact padding for a mach node
static int compute_padding(uintptr_t current_offset, const MachNode* mach, Block* block, uint index_in_block, PhaseRegAlloc* regalloc);
static int largest_jcc_size() { return 20; }
};

class IntelJccErratumAlignment {
private:
MacroAssembler& _masm;
uintptr_t _start_pc;

uintptr_t pc();

public:
IntelJccErratumAlignment(MacroAssembler& masm, int jcc_size);
~IntelJccErratumAlignment();
};

#endif // CPU_X86_INTELJCCERRATUM_X86_HPP

29 changes: 19 additions & 10 deletions src/hotspot/cpu/x86/gc/z/z_x86_64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,29 @@ source_hpp %{

source %{

#include "c2_intelJccErratum_x86.hpp"

static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::notZero, *stub->entry());
{
IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::notZero, *stub->entry());
}
__ bind(*stub->continuation());
}

static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
static void z_load_barrier_cmpxchg(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, Label& good) {
ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
__ jmp(*stub->entry());
{
IntelJccErratumAlignment intel_alignment(_masm, 10 /* jcc_size */);
__ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
}
{
IntelJccErratumAlignment intel_alignment(_masm, 5 /* jcc_size */);
__ jmp(*stub->entry());
}
__ bind(*stub->continuation());
}

Expand Down Expand Up @@ -101,9 +114,7 @@ instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP t
__ cmpxchgptr($newval$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
__ movptr($oldval$$Register, $tmp$$Register);
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
Expand Down Expand Up @@ -133,9 +144,7 @@ instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlags
__ cmpxchgptr($newval$$Register, $mem$$Address);
if (barrier_data() != ZLoadBarrierElided) {
Label good;
__ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
__ jcc(Assembler::zero, good);
z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
z_load_barrier_cmpxchg(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register, good);
__ movptr($oldval$$Register, $tmp$$Register);
__ lock();
__ cmpxchgptr($newval$$Register, $mem$$Address);
Expand Down
67 changes: 67 additions & 0 deletions src/hotspot/cpu/x86/vm_version_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
int VM_Version::_cpu;
int VM_Version::_model;
int VM_Version::_stepping;
bool VM_Version::_has_intel_jcc_erratum;
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };

// Address of instruction which causes SEGV
Expand Down Expand Up @@ -745,6 +746,8 @@ void VM_Version::get_processor_features() {
}
}

_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();

char buf[512];
jio_snprintf(buf, sizeof(buf),
"(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x"
Expand Down Expand Up @@ -1674,6 +1677,70 @@ bool VM_Version::use_biased_locking() {
return UseBiasedLocking;
}

bool VM_Version::compute_has_intel_jcc_erratum() {
if (!is_intel_family_core()) {
// Only Intel CPUs are affected.
return false;
}
// The following table of affected CPUs is based on the following document released by Intel:
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
switch (_model) {
case 0x8E:
// 06_8EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake U 23e
// 06_8EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Y
// 06_8EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake U43e
// 06_8EH | B | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
// 06_8EH | C | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Amber Lake Y
// 06_8EH | C | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U42
// 06_8EH | C | 8th Generation Intel® Core™ Processors based on microarchitecture code name Whiskey Lake U
return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
case 0x4E:
// 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake U
// 06_4E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake U23e
// 06_4E | 3 | 6th Generation Intel® Core™ Processors based on microarchitecture code name Skylake Y
return _stepping == 0x3;
case 0x55:
// 06_55H | 4 | Intel® Xeon® Processor D Family based on microarchitecture code name Skylake D, Bakerville
// 06_55H | 4 | Intel® Xeon® Scalable Processors based on microarchitecture code name Skylake Server
// 06_55H | 4 | Intel® Xeon® Processor W Family based on microarchitecture code name Skylake W
// 06_55H | 4 | Intel® Core™ X-series Processors based on microarchitecture code name Skylake X
// 06_55H | 4 | Intel® Xeon® Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
// 06_55 | 7 | 2nd Generation Intel® Xeon® Scalable Processors based on microarchitecture code name Cascade Lake (server)
return _stepping == 0x4 || _stepping == 0x7;
case 0x5E:
// 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake H
// 06_5E | 3 | 6th Generation Intel® Core™ Processor Family based on microarchitecture code name Skylake S
return _stepping == 0x3;
case 0x9E:
// 06_9EH | 9 | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake G
// 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake H
// 06_9EH | 9 | 7th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake S
// 06_9EH | 9 | Intel® Core™ X-series Processors based on microarchitecture code name Kaby Lake X
// 06_9EH | 9 | Intel® Xeon® Processor E3 v6 Family Kaby Lake Xeon E3
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake H
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S
// 06_9EH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
// 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
// 06_9EH | A | Intel® Xeon® Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
// 06_9EH | B | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (4+2)
// 06_9EH | B | Intel® Celeron® Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
// 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
// 06_9EH | D | 9th Generation Intel® Core™ Processor Family based on microarchitecture code name Coffee Lake S (8+2)
return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
case 0xA6:
// 06_A6H | 0 | 10th Generation Intel® Core™ Processor Family based on microarchitecture code name Comet Lake U62
return _stepping == 0x0;
case 0xAE:
// 06_AEH | A | 8th Generation Intel® Core™ Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
return _stepping == 0xA;
default:
// If we are running on another intel machine not recognized in the table, we are okay.
return false;
}
}

// On Xen, the cpuid instruction returns
// eax / registers[0]: Version of Xen
// ebx / registers[1]: chars 'XenV'
Expand Down
10 changes: 10 additions & 0 deletions src/hotspot/cpu/x86/vm_version_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ class VM_Version : public Abstract_VM_Version {
static int _model;
static int _stepping;

static bool _has_intel_jcc_erratum;

static address _cpuinfo_segv_addr; // address of instruction which causes SEGV
static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV

Expand Down Expand Up @@ -492,6 +494,8 @@ class VM_Version : public Abstract_VM_Version {
return result;
}

static bool compute_has_intel_jcc_erratum();

static uint64_t feature_flags() {
uint64_t result = 0;
if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
Expand Down Expand Up @@ -872,6 +876,12 @@ class VM_Version : public Abstract_VM_Version {
return false;
}

// This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102)
// that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode
// mitigation causes regressions when jumps or fused conditional branches cross or end at
// 32 byte boundaries.
static bool has_intel_jcc_erratum() { return _has_intel_jcc_erratum; }

// AMD features
static bool supports_3dnow_prefetch() { return (_features & CPU_3DNOW_PREFETCH) != 0; }
static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
Expand Down
Loading