diff --git a/src/runtime/threading_backend.cc b/src/runtime/threading_backend.cc
index 7974ff768591a..01c92037837ae 100644
--- a/src/runtime/threading_backend.cc
+++ b/src/runtime/threading_backend.cc
@@ -133,9 +133,7 @@ class ThreadGroup::Impl {
 #endif
     }
     if (exclude_worker0) {  // master thread run task
-#if defined(__ANDROID__)
-      SetFullCpuAffinity();
-#else
+#if defined(__linux__) || defined(__ANDROID__)
       // if we set TVM_BIND_MASTER_THREAD to be 1, we will bind master thread
       // to core 0.
       const char* bind_master_thread = getenv("TVM_BIND_MASTER_THREAD");
@@ -148,19 +146,26 @@ class ThreadGroup::Impl {
           CPU_SET(sorted_order_[0], &cpuset);
         }
         pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+      } else {
+        SetFullCpuAffinity(reverse);
       }
-      pthread_atfork(nullptr, nullptr, ThreadGroup::Impl::SetFullCpuAffinity);
 #endif
     }
 #endif
   }
 
-  static void SetFullCpuAffinity() {
+  void SetFullCpuAffinity(bool reverse) {
 #if defined(__linux__) || defined(__ANDROID__)
     cpu_set_t cpuset;
     CPU_ZERO(&cpuset);
-    for (unsigned i = 0; i < std::thread::hardware_concurrency(); i++) {
-      CPU_SET(i, &cpuset);
+    if (reverse) {
+      for (int i = 0; i < little_count_; i++) {
+        CPU_SET(sorted_order_[sorted_order_.size() - i - 1], &cpuset);
+      }
+    } else {
+      for (int i = 0; i < big_count_; i++) {
+        CPU_SET(sorted_order_[i], &cpuset);
+      }
     }
 #if defined(__ANDROID__)
     sched_setaffinity(pthread_self(), sizeof(cpu_set_t), &cpuset);