Skip to content

Commit

Permalink
-mtune=/-mcpu= support for x86 AMD CPU's
Browse files Browse the repository at this point in the history
  • Loading branch information
LebedevRI committed Mar 19, 2022
1 parent 49db215 commit 3f0595e
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 2 deletions.
12 changes: 12 additions & 0 deletions python_bindings/src/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,18 @@ void define_enums(py::module &m) {
.value("RVV", Target::Feature::RVV)
.value("ARMv81a", Target::Feature::ARMv81a)
.value("SanitizerCoverage", Target::Feature::SanitizerCoverage)
.value("TuneK8", Target::Feature::TuneK8)
.value("TuneK8_SSE3", Target::Feature::TuneK8_SSE3)
.value("TuneAMDFam10", Target::Feature::TuneAMDFam10)
.value("TuneBtVer1", Target::Feature::TuneBtVer1)
.value("TuneBdVer1", Target::Feature::TuneBdVer1)
.value("TuneBdVer2", Target::Feature::TuneBdVer2)
.value("TuneBdVer3", Target::Feature::TuneBdVer3)
.value("TuneBdVer4", Target::Feature::TuneBdVer4)
.value("TuneBtVer2", Target::Feature::TuneBtVer2)
.value("TuneZnVer1", Target::Feature::TuneZnVer1)
.value("TuneZnVer2", Target::Feature::TuneZnVer2)
.value("TuneZnVer3", Target::Feature::TuneZnVer3)
.value("FeatureEnd", Target::Feature::FeatureEnd);

py::enum_<halide_type_code_t>(m, "TypeCode")
Expand Down
28 changes: 28 additions & 0 deletions src/CodeGen_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,34 @@ void CodeGen_X86::visit(const Store *op) {
}

string CodeGen_X86::mcpu() const {
// First, check if any explicit request for tuning exists.
if (target.has_feature(Target::TuneK8)) {
return "k8";
} else if (target.has_feature(Target::TuneK8_SSE3)) {
return "k8-sse3";
} else if (target.has_feature(Target::TuneAMDFam10)) {
return "amdfam10";
} else if (target.has_feature(Target::TuneBtVer1)) {
return "btver1";
} else if (target.has_feature(Target::TuneBdVer1)) {
return "bdver1";
} else if (target.has_feature(Target::TuneBdVer2)) {
return "bdver2";
} else if (target.has_feature(Target::TuneBdVer3)) {
return "bdver3";
} else if (target.has_feature(Target::TuneBdVer4)) {
return "bdver4";
} else if (target.has_feature(Target::TuneBtVer2)) {
return "btver2";
} else if (target.has_feature(Target::TuneZnVer1)) {
return "znver1";
} else if (target.has_feature(Target::TuneZnVer2)) {
return "znver2";
} else if (target.has_feature(Target::TuneZnVer3)) {
return "znver3";
}

// And only after that, perform an ad-hoc guess for the tune given features.
if (target.has_feature(Target::AVX512_SapphireRapids)) {
return "sapphirerapids";
} else if (target.has_feature(Target::AVX512_Cannonlake)) {
Expand Down
23 changes: 21 additions & 2 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,18 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"rvv", Target::RVV},
{"armv81a", Target::ARMv81a},
{"sanitizer_coverage", Target::SanitizerCoverage},
{"tune_k8", Target::TuneK8},
{"tune_k8_sse3", Target::TuneK8_SSE3},
{"tune_amdfam10", Target::TuneAMDFam10},
{"tune_btver1", Target::TuneBtVer1},
{"tune_bdver1", Target::TuneBdVer1},
{"tune_bdver2", Target::TuneBdVer2},
{"tune_bdver3", Target::TuneBdVer3},
{"tune_bdver4", Target::TuneBdVer4},
{"tune_btver2", Target::TuneBtVer2},
{"tune_znver1", Target::TuneZnVer1},
{"tune_znver2", Target::TuneZnVer2},
{"tune_znver3", Target::TuneZnVer3},
// NOTE: When adding features to this map, be sure to update PyEnums.cpp as well.
};

Expand Down Expand Up @@ -453,7 +465,7 @@ bool merge_string(Target &t, const std::string &target) {
}
tokens.push_back(rest);

bool os_specified = false, arch_specified = false, bits_specified = false, features_specified = false;
bool os_specified = false, arch_specified = false, bits_specified = false, tune_specified = false, features_specified = false;
bool is_host = false;

for (size_t i = 0; i < tokens.size(); i++) {
Expand Down Expand Up @@ -484,6 +496,13 @@ bool merge_string(Target &t, const std::string &target) {
}
os_specified = true;
} else if (lookup_feature(tok, feature)) {
if (tok.substr(0, std::strlen("tune_")) == "tune_") {
if (tune_specified) {
// Only a single tune makes sense.
return false;
}
tune_specified = true;
}
t.set_feature(feature);
features_specified = true;
} else if (tok == "trace_all") {
Expand Down Expand Up @@ -979,7 +998,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
// clang-format on

// clang-format off
const std::array<Feature, 14> intersection_features = {{
const std::array<Feature, 15> intersection_features = {{
ARMv7s,
ARMv81a,
AVX,
Expand Down
12 changes: 12 additions & 0 deletions src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,18 @@ struct Target {
RVV = halide_target_feature_rvv,
ARMv81a = halide_target_feature_armv81a,
SanitizerCoverage = halide_target_feature_sanitizer_coverage,
TuneK8 = halide_target_feature_tune_k8,
TuneK8_SSE3 = halide_target_feature_tune_k8_sse3,
TuneAMDFam10 = halide_target_feature_tune_amdfam10,
TuneBtVer1 = halide_target_feature_tune_btver1,
TuneBdVer1 = halide_target_feature_tune_bdver1,
TuneBdVer2 = halide_target_feature_tune_bdver2,
TuneBdVer3 = halide_target_feature_tune_bdver3,
TuneBdVer4 = halide_target_feature_tune_bdver4,
TuneBtVer2 = halide_target_feature_tune_btver2,
TuneZnVer1 = halide_target_feature_tune_znver1,
TuneZnVer2 = halide_target_feature_tune_znver2,
TuneZnVer3 = halide_target_feature_tune_znver3,
FeatureEnd = halide_target_feature_end
};
Target() = default;
Expand Down
12 changes: 12 additions & 0 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1347,6 +1347,18 @@ typedef enum halide_target_feature_t {
halide_target_feature_rvv, ///< Enable RISCV "V" Vector Extension
halide_target_feature_armv81a, ///< Enable ARMv8.1-a instructions
halide_target_feature_sanitizer_coverage, ///< Enable hooks for SanitizerCoverage support.
halide_target_feature_tune_k8, ///< Tune specifically for the AMD K8 CPU.
halide_target_feature_tune_k8_sse3, ///< Tune specifically for the AMD K8 w/SSE3 CPU.
halide_target_feature_tune_amdfam10, ///< Tune specifically for the AMD FAM10 CPU.
halide_target_feature_tune_btver1, ///< Tune specifically for the AMD BtVer1 CPU.
halide_target_feature_tune_bdver1, ///< Tune specifically for the AMD BdVer1 CPU.
halide_target_feature_tune_bdver2, ///< Tune specifically for the AMD BdVer2 CPU.
halide_target_feature_tune_bdver3, ///< Tune specifically for the AMD BdVer3 CPU.
halide_target_feature_tune_bdver4, ///< Tune specifically for the AMD BdVer4 CPU.
halide_target_feature_tune_btver2, ///< Tune specifically for the AMD BtVer2 CPU.
halide_target_feature_tune_znver1, ///< Tune specifically for the AMD ZnVer1 CPU.
halide_target_feature_tune_znver2, ///< Tune specifically for the AMD ZnVer2 CPU.
halide_target_feature_tune_znver3, ///< Tune specifically for the AMD ZnVer3 CPU.
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
} halide_target_feature_t;

Expand Down

0 comments on commit 3f0595e

Please sign in to comment.