cassie_bench: Improve allocation tracking

* expose allocation count from LimitMalloc * track allocations with some simple stats * separate first run to expose steady state allocation * lower some Limitmalloc ceilings from observations
RobotLocomotion · Aug 25, 2020 · 5c78ab4 · 5c78ab4
1 parent 59de1fd
commit 5c78ab4
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 9 deletions.
diff --git a/common/test_utilities/limit_malloc.cc b/common/test_utilities/limit_malloc.cc
@@ -44,6 +44,8 @@ class Monitor {
     }
   }
 
+  int num_allocations() const { return observed_num_allocations_.load(); }
+
  private:
   void ObserveAllocation();
 
@@ -174,6 +176,10 @@ LimitMalloc::LimitMalloc(LimitMallocParams args) {
   }
 }
 
+int LimitMalloc::num_allocations() const {
+  return ActiveMonitor::load()->num_allocations();
+}
+
 LimitMalloc::~LimitMalloc() {
   // De-activate our Monitor.
   auto prior = ActiveMonitor::reset();

diff --git a/common/test_utilities/limit_malloc.h b/common/test_utilities/limit_malloc.h
@@ -55,6 +55,9 @@ class LimitMalloc final {
   /// Undoes this object's malloc limits.
   ~LimitMalloc();
 
+  /// Returns the number of allocations observed so far.
+  int num_allocations() const;
+
   // We write this out by hand, to avoid depending on Drake *at all*.
   /// @name Does not allow copy, move, or assignment
   //@{

diff --git a/common/test_utilities/test/limit_malloc_test.cc b/common/test_utilities/test/limit_malloc_test.cc
@@ -61,6 +61,8 @@ TEST_P(LimitMallocTest, UnlimitedTest) {
   Allocate();  // Malloc is OK.
   Allocate();  // Malloc is OK.
   Allocate();  // Malloc is OK.
+  // Allocations are counted.
+  EXPECT_EQ(3, guard.num_allocations());
 }
 
 TEST_P(LimitMallocTest, BasicTest) {

diff --git a/examples/multibody/cassie_benchmark/test/cassie_bench.cc b/examples/multibody/cassie_benchmark/test/cassie_bench.cc
@@ -96,33 +96,76 @@ class CassieDoubleFixture : public benchmark::Fixture {
   VectorXd x_{};
 };
 
+// Track and report simple streaming statistics on allocations. Variance
+// tracking is adapted from:
+// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
+class AllocationTracker {
+ public:
+  explicit AllocationTracker(benchmark::State* state) : state_(state) {}
+
+  ~AllocationTracker() {
+    state_->counters["Allocs.min"] = min_;
+    state_->counters["Allocs.max"] = max_;
+    state_->counters["Allocs.mean"] = mean_;
+    state_->counters["Allocs.stddev"] = [&]() {
+      if (updates_ < 2) {
+        return std::numeric_limits<double>::quiet_NaN();
+      } else {
+        return std::sqrt(m2_ / (updates_ - 1));
+      }
+    }();
+  }
+
+  void Update(int allocs) {
+    min_ = std::min(min_, allocs);
+    max_ = std::max(max_, allocs);
+    updates_++;
+    double delta = allocs - mean_;
+    mean_ += delta / updates_;
+    m2_ += delta * (allocs - mean_);
+  }
+
+ private:
+  benchmark::State* state_;
+  int min_{std::numeric_limits<int>::max()};
+  int max_{std::numeric_limits<int>::min()};
+  int updates_{};
+  double mean_{};
+  double m2_{};
+};
+
 // NOLINTNEXTLINE(runtime/references) cpplint disapproves of gbench choices.
 BENCHMARK_F(CassieDoubleFixture, DoubleMassMatrix)(benchmark::State& state) {
+  AllocationTracker tracker(&state);
   MatrixXd M(nv_, nv_);
   for (auto _ : state) {
     // @see LimitMalloc note above.
     LimitMalloc guard(LimitReleaseOnly(175));
     InvalidateState();
     plant_->CalcMassMatrix(*context_, &M);
+    tracker.Update(guard.num_allocations());
   }
 }
 
 BENCHMARK_F(CassieDoubleFixture, DoubleInverseDynamics)
     // NOLINTNEXTLINE(runtime/references) cpplint disapproves of gbench choices.
     (benchmark::State& state) {
+  AllocationTracker tracker(&state);
   VectorXd desired_vdot = VectorXd::Zero(nv_);
   multibody::MultibodyForces<double> external_forces(*plant_);
   for (auto _ : state) {
     // @see LimitMalloc note above.
     LimitMalloc guard(LimitReleaseOnly(3));
     InvalidateState();
     plant_->CalcInverseDynamics(*context_, desired_vdot, external_forces);
+    tracker.Update(guard.num_allocations());
   }
 }
 
 BENCHMARK_F(CassieDoubleFixture, DoubleForwardDynamics)
     // NOLINTNEXTLINE(runtime/references) cpplint disapproves of gbench choices.
     (benchmark::State& state) {
+  AllocationTracker tracker(&state);
   auto derivatives = plant_->AllocateTimeDerivatives();
   auto& port_value =
       plant_->get_actuation_input_port().FixValue(context_.get(), u_);
@@ -132,6 +175,7 @@ BENCHMARK_F(CassieDoubleFixture, DoubleForwardDynamics)
     InvalidateState();
     port_value.GetMutableData();  // Invalidates caching of inputs.
     plant_->CalcTimeDerivatives(*context_, derivatives.get());
+    tracker.Update(guard.num_allocations());
   }
 }
 
@@ -160,21 +204,34 @@ class CassieAutodiffFixture : public CassieDoubleFixture {
 BENCHMARK_F(CassieAutodiffFixture, AutodiffMassMatrix)
     // NOLINTNEXTLINE(runtime/references) cpplint disapproves of gbench choices.
     (benchmark::State& state) {
+  AllocationTracker tracker(&state);
   MatrixX<AutoDiffXd> M_autodiff(nv_, nv_);
   auto x_autodiff = math::initializeAutoDiff(x_);
   plant_autodiff_->SetPositionsAndVelocities(context_autodiff_.get(),
       x_autodiff);
-  for (auto _ : state) {
-    // @see LimitMalloc note above.
-    LimitMalloc guard(LimitReleaseOnly(62476));
+
+  auto compute = [&]() {
     InvalidateState();
     plant_autodiff_->CalcMassMatrix(*context_autodiff_, &M_autodiff);
+  };
+
+  // The first iteration allocates more memory than subsequent runs.
+  compute();
+
+  for (auto _ : state) {
+    // @see LimitMalloc note above.
+    LimitMalloc guard(LimitReleaseOnly(61405));
+
+    compute();
+
+    tracker.Update(guard.num_allocations());
   }
 }
 
 BENCHMARK_F(CassieAutodiffFixture, AutodiffInverseDynamics)
     // NOLINTNEXTLINE(runtime/references) cpplint disapproves of gbench choices.
     (benchmark::State& state) {
+  AllocationTracker tracker(&state);
   VectorXd desired_vdot = VectorXd::Zero(nv_);
   multibody::MultibodyForces<AutoDiffXd> external_forces_autodiff(
       *plant_autodiff_);
@@ -183,34 +240,56 @@ BENCHMARK_F(CassieAutodiffFixture, AutodiffInverseDynamics)
       math::initializeAutoDiff(desired_vdot, nq_ + 2 * nv_, nq_ + nv_);
   plant_autodiff_->SetPositionsAndVelocities(context_autodiff_.get(),
       x_autodiff);
-  for (auto _ : state) {
-    // @see LimitMalloc note above.
-    LimitMalloc guard(LimitReleaseOnly(70301));
+
+  auto compute = [&]() {
     InvalidateState();
     plant_autodiff_->CalcInverseDynamics(*context_autodiff_,
                                          vdot_autodiff,
                                          external_forces_autodiff);
+  };
+
+  // The first iteration allocates more memory than subsequent runs.
+  compute();
+
+  for (auto _ : state) {
+    // @see LimitMalloc note above.
+    LimitMalloc guard(LimitReleaseOnly(68798));
+
+    compute();
+
+    tracker.Update(guard.num_allocations());
   }
 }
 
 BENCHMARK_F(CassieAutodiffFixture, AutodiffForwardDynamics)
     // NOLINTNEXTLINE(runtime/references) cpplint disapproves of gbench choices.
     (benchmark::State& state) {
+  AllocationTracker tracker(&state);
   auto derivatives_autodiff = plant_autodiff_->AllocateTimeDerivatives();
   auto u_autodiff = math::initializeAutoDiff(u_, nq_ + nv_ + nu_, nq_ + nv_);
   auto& port_value = plant_autodiff_->get_actuation_input_port().FixValue(
       context_autodiff_.get(), u_autodiff);
   auto x_autodiff = math::initializeAutoDiff(x_, nq_ + nv_ + nu_);
   plant_autodiff_->SetPositionsAndVelocities(context_autodiff_.get(),
       x_autodiff);
-  for (auto _ : state) {
-    // @see LimitMalloc note above.
-    LimitMalloc guard(LimitReleaseOnly(105675));
 
+  auto compute = [&]() {
     InvalidateState();
     port_value.GetMutableData();  // Invalidates caching of inputs.
     plant_autodiff_->CalcTimeDerivatives(*context_autodiff_,
                                          derivatives_autodiff.get());
+  };
+
+  // The first iteration allocates more memory than subsequent runs.
+  compute();
+
+  for (auto _ : state) {
+    // @see LimitMalloc note above.
+    LimitMalloc guard(LimitReleaseOnly(102904));
+
+    compute();
+
+    tracker.Update(guard.num_allocations());
   }
 }