diff --git a/src/runtime/threading_backend.cc b/src/runtime/threading_backend.cc index fd6c2f70d6c6..14b5f27dd495 100644 --- a/src/runtime/threading_backend.cc +++ b/src/runtime/threading_backend.cc @@ -33,6 +33,9 @@ #include #endif #if defined(__hexagon__) +extern "C" { +#include +} #include #include #include @@ -381,15 +384,26 @@ int MaxConcurrency() { #if defined(_M_X64) || defined(__x86_64__) max_concurrency /= 2; // ignore hyper-threading #elif defined(__hexagon__) + // Ideally max_concurrency is set to the total count of 128B + // HVX units available. This prevenets threads unable to lock + // an HVX unit from scheduling work on the Scalar cores instead + // of HVX. + int num_hvx128_contexts = (qurt_hvx_get_units() >> 8) & 0xFF; // With unsigned PDs, getting the number of available hardware threads - // is not supported in earlier versions of QuRT. In such cases assume 4. - // If running on simulator, set max_concurrency to 1. + // is not supported in earlier versions of QuRT. In such cases assume + // the number of HVX units available. If running on simulator, set + // max_concurrency to 1. if (max_concurrency == 0) { if (dlsym(RTLD_DEFAULT, "running_in_sim_dev_17bc90206f6cf5a7")) { max_concurrency = 1; } else { - max_concurrency = 4; + max_concurrency = num_hvx128_contexts; } + } else { + // If the hardware_concurrency has already set the max_concurrency to + // a non-zero value then make sure it is not greater than the number + // of HVX units available. + max_concurrency = std::min(num_hvx128_contexts, max_concurrency); } #endif }