Revert "Factoring out allocator cache"

This reverts commit 64988ae.
STEllAR-GROUP · Oct 17, 2024 · 108efca · 108efca
1 parent 64988ae
commit 108efca
Show file tree

Hide file tree

Showing 32 changed files with 221 additions and 433 deletions.
diff --git a/libs/core/allocator_support/CMakeLists.txt b/libs/core/allocator_support/CMakeLists.txt
@@ -42,7 +42,7 @@ set(allocator_support_compat_headers
 )
 # cmake-format: on
 
-set(allocator_support_sources thread_local_caching_allocator.cpp)
+set(allocator_support_sources)
 
 include(HPX_AddModule)
 add_hpx_module(
@@ -52,7 +52,6 @@ add_hpx_module(
   HEADERS ${allocator_support_headers}
   COMPAT_HEADERS ${allocator_support_compat_headers}
   DEPENDENCIES hpx_dependencies_allocator
-  MODULE_DEPENDENCIES hpx_assertion hpx_concepts hpx_config hpx_preprocessor
-                      hpx_type_support
+  MODULE_DEPENDENCIES hpx_concepts hpx_config hpx_preprocessor hpx_type_support
   CMAKE_SUBDIRS examples tests
 )
diff --git a/libs/core/allocator_support/include/hpx/allocator_support/thread_local_caching_allocator.hpp b/libs/core/allocator_support/include/hpx/allocator_support/thread_local_caching_allocator.hpp
@@ -8,11 +8,8 @@
 
 #include <hpx/config.hpp>
 #include <hpx/allocator_support/config/defines.hpp>
-#include <hpx/assert.hpp>
 
 #include <cstddef>
-#include <cstdint>
-#include <functional>
 #include <memory>
 #include <new>
 #include <type_traits>
@@ -24,41 +21,15 @@ namespace hpx::util {
     !((defined(HPX_HAVE_CUDA) && defined(__CUDACC__)) ||                       \
         defined(HPX_HAVE_HIP))
 
-    namespace detail {
-
-        HPX_CORE_EXPORT void init_allocator_cache(
-            std::size_t, std::function<void()>&& clear_cache);
-        HPX_CORE_EXPORT std::pair<void*, std::size_t> allocate_from_cache(
-            std::size_t) noexcept;
-        [[nodiscard]] HPX_CORE_EXPORT bool cache_empty(std::size_t) noexcept;
-        HPX_CORE_EXPORT void return_to_cache(
-            std::size_t, void* p, std::size_t n);
-
-        // maximal number of caches [0...max)
-        inline constexpr int max_number_of_caches = 16;
-
-        ///////////////////////////////////////////////////////////////////////
-        constexpr int next_power_of_two(std::int64_t n) noexcept
-        {
-            int i = 0;
-            for (--n; n > 0; n >>= 1)
-            {
-                ++i;
-            }
-            return i;
-        }
-    }    // namespace detail
-
     ///////////////////////////////////////////////////////////////////////////
-    template <typename T = char, typename Allocator = std::allocator<T>>
+    template <template <typename, typename> class Stack, typename T = char,
+        typename Allocator = std::allocator<T>>
     struct thread_local_caching_allocator
     {
         HPX_NO_UNIQUE_ADDRESS Allocator alloc;
 
-    private:
         using traits = std::allocator_traits<Allocator>;
 
-    public:
         using value_type = typename traits::value_type;
         using pointer = typename traits::pointer;
         using const_pointer = typename traits::const_pointer;
@@ -68,7 +39,7 @@ namespace hpx::util {
         template <typename U>
         struct rebind
         {
-            using other = thread_local_caching_allocator<U,
+            using other = thread_local_caching_allocator<Stack, U,
                 typename traits::template rebind_alloc<U>>;
         };
 
@@ -80,43 +51,91 @@ namespace hpx::util {
         using propagate_on_container_swap =
             typename traits::propagate_on_container_swap;
 
-        explicit thread_local_caching_allocator(
-            Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
-                is_nothrow_copy_constructible_v<Allocator>))
-          : alloc(alloc)
+    private:
+        struct allocated_cache
         {
-            // Note: capturing the allocator will be ok only as long as it
-            // doesn't have any state as this lambda will be possibly called
-            // very late during destruction of the thread_local cache.
-            static_assert(std::is_empty_v<Allocator>,
-                "Please don't use allocators with state in conjunction with "
-                "the thread_local_caching_allocator");
-
-            constexpr std::size_t num_cache =
-                detail::next_power_of_two(sizeof(T));
-
-            static_assert(num_cache < detail::max_number_of_caches,
-                "This allocator does not support allocating objects larger "
-                "than 2^16 bytes");
-
-            auto f = [=]() mutable {
-                while (!detail::cache_empty(num_cache))
+            explicit allocated_cache(Allocator const& a) noexcept(
+                noexcept(std::is_nothrow_copy_constructible_v<Allocator>))
+              : alloc(a)
+              , data(0)
+            {
+            }
+
+            allocated_cache(allocated_cache const&) = delete;
+            allocated_cache(allocated_cache&&) = delete;
+            allocated_cache& operator=(allocated_cache const&) = delete;
+            allocated_cache& operator=(allocated_cache&&) = delete;
+
+            ~allocated_cache()
+            {
+                clear_cache();
+            }
+
+            pointer allocate(size_type n)
+            {
+                pointer p;
+                std::pair<T*, size_type> pair;
+                if (data.pop(pair))
+                {
+                    p = pair.first;
+                }
+                else
                 {
-                    auto [p, n] = detail::allocate_from_cache(num_cache);
-                    if (p != nullptr)
+                    p = traits::allocate(alloc, n);
+                    if (p == nullptr)
                     {
-                        traits::deallocate(const_cast<Allocator&>(alloc),
-                            static_cast<char*>(p), n);
+                        throw std::bad_alloc();
                     }
                 }
-            };
 
-            detail::init_allocator_cache(num_cache, HPX_MOVE(f));
+                ++allocated;
+                return p;
+            }
+
+            void deallocate(pointer p, size_type n) noexcept
+            {
+                data.push(std::make_pair(p, n));
+                if (++deallocated > 2 * (allocated + 16))
+                {
+                    clear_cache();
+                    allocated = 0;
+                    deallocated = 0;
+                }
+            }
+
+        private:
+            void clear_cache() noexcept
+            {
+                std::pair<T*, size_type> p;
+                while (data.pop(p))
+                {
+                    traits::deallocate(alloc, p.first, p.second);
+                }
+            }
+
+            HPX_NO_UNIQUE_ADDRESS Allocator alloc;
+            Stack<std::pair<T*, size_type>, Allocator> data;
+            std::size_t allocated = 0;
+            std::size_t deallocated = 0;
+        };
+
+        allocated_cache& cache()
+        {
+            thread_local allocated_cache allocated_data(alloc);
+            return allocated_data;
+        }
+
+    public:
+        explicit thread_local_caching_allocator(
+            Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
+                is_nothrow_copy_constructible_v<Allocator>))
+          : alloc(alloc)
+        {
         }
 
         template <typename U, typename Alloc>
         explicit thread_local_caching_allocator(
-            thread_local_caching_allocator<U, Alloc> const&
+            thread_local_caching_allocator<Stack, U, Alloc> const&
                 rhs) noexcept(noexcept(std::
                 is_nothrow_copy_constructible_v<Alloc>))
           : alloc(rhs.alloc)
@@ -136,32 +155,16 @@ namespace hpx::util {
 
         [[nodiscard]] pointer allocate(size_type n, void const* = nullptr)
         {
-            constexpr std::size_t num_cache =
-                detail::next_power_of_two(sizeof(T));
-            std::size_t N = n * (1ull << num_cache);
-
-            if (max_size() < N)
+            if (max_size() < n)
             {
                 throw std::bad_array_new_length();
             }
-
-            auto [p, _] = detail::allocate_from_cache(num_cache);
-            if (p == nullptr)
-            {
-                p = traits::allocate(alloc, N);
-                if (p == nullptr)
-                {
-                    throw std::bad_alloc();
-                }
-            }
-            return static_cast<pointer>(p);
+            return cache().allocate(n);
         }
 
-        void deallocate(pointer p, size_type n)
+        void deallocate(pointer p, size_type n) noexcept
         {
-            constexpr std::size_t num_cache =
-                detail::next_power_of_two(sizeof(T));
-            detail::return_to_cache(num_cache, p, n * (1ull << num_cache));
+            cache().deallocate(p, n);
         }
 
         [[nodiscard]] constexpr size_type max_size() noexcept
@@ -196,7 +199,8 @@ namespace hpx::util {
         }
     };
 #else
-    template <typename T = char, typename Allocator = std::allocator<T>>
+    template <template <typename, typename> class Stack, typename T = char,
+        typename Allocator = std::allocator<T>>
     using thread_local_caching_allocator = Allocator;
 #endif
 }    // namespace hpx::util
diff --git a/libs/core/allocator_support/src/thread_local_caching_allocator.cpp b/libs/core/allocator_support/src/thread_local_caching_allocator.cpp
diff --git a/libs/core/async_base/CMakeLists.txt b/libs/core/async_base/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022 The STE||AR-Group
+# Copyright (c) 2020-2024 The STE||AR-Group
 #
 # SPDX-License-Identifier: BSL-1.0
 # Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -34,6 +34,6 @@ add_hpx_module(
   COMPAT_HEADERS ${async_base_compat_headers}
   SOURCES ${async_base_sources}
   MODULE_DEPENDENCIES hpx_allocator_support hpx_concepts hpx_config
-                      hpx_coroutines hpx_tag_invoke
+                      hpx_concurrency hpx_coroutines hpx_tag_invoke
   CMAKE_SUBDIRS examples tests
 )