borglab · dellaert · May 6, 2020 · Jan 11, 2020 · Jan 11, 2020 · Jan 11, 2020
diff --git a/examples/SolverComparer.cpp b/examples/SolverComparer.cpp
@@ -58,9 +58,8 @@
 #include <iostream>
 
 #ifdef GTSAM_USE_TBB
-#include <tbb/tbb.h>
-#undef max // TBB seems to include windows.h and we don't want these macros
-#undef min
+#include <tbb/task_arena.h> // tbb::task_arena
+#include <tbb/task_group.h> // tbb::task_group
 #endif
 
 using namespace std;
@@ -206,10 +205,11 @@ int main(int argc, char *argv[]) {
   }
 
 #ifdef GTSAM_USE_TBB
-  std::unique_ptr<tbb::task_scheduler_init> init;
+  tbb::task_arena arena;
+  tbb::task_group tg;
   if(nThreads > 0) {
     cout << "Using " << nThreads << " threads" << endl;
-    init.reset(new tbb::task_scheduler_init(nThreads));
+    arena.initialize(nThreads);
   } else
     cout << "Using threads for all processors" << endl;
 #else
@@ -219,6 +219,10 @@ int main(int argc, char *argv[]) {
   }
 #endif
 
+#ifdef GTSAM_USE_TBB
+  arena.execute([&]{
+  tg.run_and_wait([&]{
+#endif
   // Run mode
   if(incremental)
     runIncremental();
@@ -230,6 +234,10 @@ int main(int argc, char *argv[]) {
     runPerturb();
   else if(stats)
     runStats();
+#ifdef GTSAM_USE_TBB
+  });
+  });
+#endif
 
   return 0;
 }

diff --git a/examples/TimeTBB.cpp b/examples/TimeTBB.cpp
@@ -28,9 +28,12 @@ using boost::assign::list_of;
 
 #ifdef GTSAM_USE_TBB
 
-#include <tbb/tbb.h>
-#undef max // TBB seems to include windows.h and we don't want these macros
-#undef min
+#include <tbb/blocked_range.h>           // tbb::blocked_range
+#include <tbb/tick_count.h>              // tbb::tick_count
+#include <tbb/parallel_for.h>            // tbb::parallel_for
+#include <tbb/cache_aligned_allocator.h> // tbb::cache_aligned_allocator
+#include <tbb/task_arena.h>              // tbb::task_arena
+#include <tbb/task_group.h>              // tbb::task_group
 
 static const DenseIndex numberOfProblems = 1000000;
 static const DenseIndex problemSize = 4;
@@ -67,10 +70,14 @@ struct WorkerWithoutAllocation
 };
 
 /* ************************************************************************* */
-map<int, double> testWithoutMemoryAllocation()
+map<int, double> testWithoutMemoryAllocation(int num_threads)
 {
   // A function to do some matrix operations without allocating any memory
 
+  // Create task_arena and task_group
+  tbb::task_arena arena(num_threads);
+  tbb::task_group tg;
+
   // Now call it
   vector<double> results(numberOfProblems);
 
@@ -79,7 +86,14 @@ map<int, double> testWithoutMemoryAllocation()
   for(size_t grainSize: grainSizes)
   {
     tbb::tick_count t0 = tbb::tick_count::now();
-    tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
+
+    // Run parallel code (as a task group) inside of task arena
+    arena.execute([&]{
+      tg.run_and_wait([&]{
+        tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
+      });
+    });
+
     tbb::tick_count t1 = tbb::tick_count::now();
     cout << "Without memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
     timingResults[(int)grainSize] = (t1 - t0).seconds();
@@ -120,10 +134,14 @@ struct WorkerWithAllocation
 };
 
 /* ************************************************************************* */
-map<int, double> testWithMemoryAllocation()
+map<int, double> testWithMemoryAllocation(int num_threads)
 {
   // A function to do some matrix operations with allocating memory
 
+  // Create task_arena and task_group
+  tbb::task_arena arena(num_threads);
+  tbb::task_group tg;
+
   // Now call it
   vector<double> results(numberOfProblems);
 
@@ -132,7 +150,14 @@ map<int, double> testWithMemoryAllocation()
   for(size_t grainSize: grainSizes)
   {
     tbb::tick_count t0 = tbb::tick_count::now();
-    tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
+
+    // Run parallel code (as a task group) inside of task arena
+    arena.execute([&]{
+      tg.run_and_wait([&]{
+        tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
+      });
+    });
+
     tbb::tick_count t1 = tbb::tick_count::now();
     cout << "With memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
     timingResults[(int)grainSize] = (t1 - t0).seconds();
@@ -153,9 +178,8 @@ int main(int argc, char* argv[])
   for(size_t n: numThreads)
   {
     cout << "With " << n << " threads:" << endl;
-    tbb::task_scheduler_init init((int)n);
-    results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation();
-    results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation();
+    results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((int)n);
+    results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation((int)n);
     cout << endl;
   }
 

diff --git a/gtsam/base/ThreadsafeException.h b/gtsam/base/ThreadsafeException.h
@@ -11,7 +11,7 @@
 
 /**
  * @file     ThreadSafeException.h
- * @brief    Base exception type that uses tbb_exception if GTSAM is compiled with TBB
+ * @brief    Base exception type that uses tbb_allocator if GTSAM is compiled with TBB
  * @author   Richard Roberts
  * @date     Aug 21, 2010
  * @addtogroup base
@@ -25,34 +25,28 @@
 #include <gtsam/dllexport.h>
 #include <string>
 #include <typeinfo>
+#include <exception>
 
 #ifdef GTSAM_USE_TBB
 #include <tbb/tbb_allocator.h>
-#include <tbb/tbb_exception.h>
 #include <tbb/scalable_allocator.h>
 #include <iostream>
 #endif
 
 namespace gtsam {
 
-/// Base exception type that uses tbb_exception if GTSAM is compiled with TBB.
+/// Base exception type that uses tbb_allocator if GTSAM is compiled with TBB.
 template<class DERIVED>
 class ThreadsafeException:
-#ifdef GTSAM_USE_TBB
-    public tbb::tbb_exception
-#else
 public std::exception
-#endif
 {
-#ifdef GTSAM_USE_TBB
 private:
-  typedef tbb::tbb_exception Base;
+  typedef std::exception Base;
+#ifdef GTSAM_USE_TBB
 protected:
   typedef std::basic_string<char, std::char_traits<char>,
       tbb::tbb_allocator<char> > String;
 #else
-private:
-  typedef std::exception Base;
 protected:
   typedef std::string String;
 #endif
@@ -82,36 +76,6 @@ public std::exception
   }
 
 public:
-  // Implement functions for tbb_exception
-#ifdef GTSAM_USE_TBB
-  virtual tbb::tbb_exception* move() throw () {
-    void* cloneMemory = scalable_malloc(sizeof(DERIVED));
-    if (!cloneMemory) {
-      std::cerr << "Failed allocating memory to copy thrown exception, exiting now." << std::endl;
-      exit(-1);
-    }
-    DERIVED* clone = ::new(cloneMemory) DERIVED(static_cast<DERIVED&>(*this));
-    clone->dynamic_ = true;
-    return clone;
-  }
-
-  virtual void destroy() throw () {
-    if (dynamic_) {
-      DERIVED* derivedPtr = static_cast<DERIVED*>(this);
-      derivedPtr->~DERIVED();
-      scalable_free(derivedPtr);
-    }
-  }
-
-  virtual void throw_self() {
-    throw *static_cast<DERIVED*>(this);
-  }
-
-  virtual const char* name() const throw () {
-    return typeid(DERIVED).name();
-  }
-#endif
-
   virtual const char* what() const throw () {
     return description_ ? description_->c_str() : "";
   }

diff --git a/gtsam/base/debug.cpp b/gtsam/base/debug.cpp
@@ -20,29 +20,29 @@
 #include <gtsam/config.h> // for GTSAM_USE_TBB
 
 #ifdef GTSAM_USE_TBB
-#include <tbb/mutex.h>
+#include <mutex> // std::mutex, std::unique_lock
 #endif
 
 namespace gtsam {
 
 GTSAM_EXPORT FastMap<std::string, ValueWithDefault<bool, false> > debugFlags;
 
 #ifdef GTSAM_USE_TBB
-tbb::mutex debugFlagsMutex;
+std::mutex debugFlagsMutex;
 #endif
 
 /* ************************************************************************* */
 bool guardedIsDebug(const std::string& s) {
 #ifdef GTSAM_USE_TBB
-  tbb::mutex::scoped_lock lock(debugFlagsMutex);
+  std::unique_lock<std::mutex> lock(debugFlagsMutex);
 #endif
   return gtsam::debugFlags[s];
 }
 
 /* ************************************************************************* */
 void guardedSetDebug(const std::string& s, const bool v) {
 #ifdef GTSAM_USE_TBB
-  tbb::mutex::scoped_lock lock(debugFlagsMutex);
+  std::unique_lock<std::mutex> lock(debugFlagsMutex);
 #endif
   gtsam::debugFlags[s] = v;
 }

diff --git a/gtsam/base/treeTraversal/parallelTraversalTasks.h b/gtsam/base/treeTraversal/parallelTraversalTasks.h
@@ -22,11 +22,8 @@
 #include <boost/make_shared.hpp>
 
 #ifdef GTSAM_USE_TBB
-#  include <tbb/tbb.h>
-#  include <tbb/scalable_allocator.h>
-#  undef max // TBB seems to include windows.h and we don't want these macros
-#  undef min
-#  undef ERROR
+#include <tbb/task.h>               // tbb::task, tbb::task_list
+#include <tbb/scalable_allocator.h> // tbb::scalable_allocator
 
 namespace gtsam {
 

diff --git a/gtsam/base/types.h b/gtsam/base/types.h
@@ -27,9 +27,9 @@
 #include <cstddef>
 #include <cstdint>
 
+#include <exception>
+
 #ifdef GTSAM_USE_TBB
-#include <tbb/task_scheduler_init.h>
-#include <tbb/tbb_exception.h>
 #include <tbb/scalable_allocator.h>
 #endif
 

diff --git a/gtsam/geometry/Unit3.cpp b/gtsam/geometry/Unit3.cpp
@@ -84,7 +84,7 @@ const Matrix32& Unit3::basis(OptionalJacobian<6, 2> H) const {
   // NOTE(hayk): At some point it seemed like this reproducably resulted in
   // deadlock. However, I don't know why and I can no longer reproduce it.
   // It either was a red herring or there is still a latent bug left to debug.
-  tbb::mutex::scoped_lock lock(B_mutex_);
+  std::unique_lock<std::mutex> lock(B_mutex_);
 #endif
 
   const bool cachedBasis = static_cast<bool>(B_);

diff --git a/gtsam/geometry/Unit3.h b/gtsam/geometry/Unit3.h
@@ -33,7 +33,7 @@
 #include <string>
 
 #ifdef GTSAM_USE_TBB
-#include <tbb/mutex.h>
+#include <mutex> // std::mutex
 #endif
 
 namespace gtsam {
@@ -48,7 +48,7 @@ class Unit3 {
   mutable boost::optional<Matrix62> H_B_; ///< Cached basis derivative
 
 #ifdef GTSAM_USE_TBB
-  mutable tbb::mutex B_mutex_; ///< Mutex to protect the cached basis.
+  mutable std::mutex B_mutex_; ///< Mutex to protect the cached basis.
 #endif
 
 public: