diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index a91056dfa..111103dfd 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -19,6 +19,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -65,12 +66,36 @@ class aligned_resource_adaptor final : public device_memory_resource {
    * @param alignment_threshold Only allocations with a size larger than or equal to this threshold
    * are aligned.
    */
-  explicit aligned_resource_adaptor(Upstream* upstream,
+  explicit aligned_resource_adaptor(device_async_resource_ref upstream,
                                     std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
                                     std::size_t alignment_threshold = default_alignment_threshold)
     : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+    RMM_EXPECTS(rmm::is_supported_alignment(alignment),
+                "Allocation alignment is not a power of 2.");
+  }
+
+  /**
+   * @brief Construct an aligned resource adaptor using `upstream` to satisfy allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   * @throws rmm::logic_error if `allocation_alignment` is not a power of 2
+   *
+   * @param upstream The resource used for allocating/deallocating device memory.
+   * @param alignment The size used for allocation alignment.
+   * @param alignment_threshold Only allocations with a size larger than or equal to this threshold
+   * are aligned.
+   */
+  explicit aligned_resource_adaptor(Upstream* upstream,
+                                    std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
+                                    std::size_t alignment_threshold = default_alignment_threshold)
+    : upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()},
+      alignment_{alignment},
+      alignment_threshold_{alignment_threshold}
+  {
     RMM_EXPECTS(rmm::is_supported_alignment(alignment),
                 "Allocation alignment is not a power of 2.");
   }
@@ -90,11 +115,6 @@ class aligned_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief The default alignment used by the adaptor.
    */
@@ -104,8 +124,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
   using lock_guard = std::lock_guard<std::mutex>;
 
   /**
-   * @brief Allocates memory of size at least `bytes` using the upstream resource with the specified
-   * alignment.
+   * @brief Allocates memory of size at least `bytes` using the upstream resource with the
+   * specified alignment.
    *
    * @throws rmm::bad_alloc if the requested allocation could not be fulfilled
    * by the upstream resource.
@@ -117,10 +137,10 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      return upstream_->allocate(bytes, stream);
+      return get_upstream_resource().allocate_async(bytes, 1, stream);
     }
     auto const size = upstream_allocation_size(bytes);
-    void* pointer   = upstream_->allocate(size, stream);
+    void* pointer   = get_upstream_resource().allocate_async(size, 1, stream);
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
     auto const address         = reinterpret_cast<std::size_t>(pointer);
     auto const aligned_address = rmm::align_up(address, alignment_);
@@ -143,7 +163,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      upstream_->deallocate(ptr, bytes, stream);
+      get_upstream_resource().deallocate_async(ptr, bytes, 1, stream);
     } else {
       {
         lock_guard lock(mtx_);
@@ -153,7 +173,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
           pointers_.erase(iter);
         }
       }
-      upstream_->deallocate(ptr, upstream_allocation_size(bytes), stream);
+      get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream);
     }
   }
 
@@ -174,8 +194,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
   }
 
   /**
-   * @brief Calculate the allocation size needed from upstream to account for alignments of both the
-   * size and the base pointer.
+   * @brief Calculate the allocation size needed from upstream to account for alignments of both
+   * the size and the base pointer.
    *
    * @param bytes The requested allocation size.
    * @return Allocation size needed from upstream to align both the size and the base pointer.
@@ -186,7 +206,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
     return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT;
   }
 
-  Upstream* upstream_;  ///< The upstream resource used for satisfying allocation requests
+  /// The upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource()};
   std::unordered_map<void*, void*> pointers_;  ///< Map of aligned pointers to upstream pointers.
   std::size_t alignment_;                      ///< The size used for allocation alignment
   std::size_t alignment_threshold_;  ///< The size above which allocations should be aligned
diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index b274e0c18..0860a46d5 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -21,6 +21,7 @@
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/detail/arena.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -80,6 +81,26 @@ namespace rmm::mr {
 template <typename Upstream>
 class arena_memory_resource final : public device_memory_resource {
  public:
+  /**
+   * @brief Construct an `arena_memory_resource`.
+   *
+   * @param upstream_mr The memory resource from which to allocate blocks for the global arena.
+   * @param arena_size Size in bytes of the global arena. Defaults to half of the available
+   * memory on the current device.
+   * @param dump_log_on_failure If true, dump memory log when running out of memory.
+   */
+  explicit arena_memory_resource(device_async_resource_ref upstream_mr,
+                                 std::optional<std::size_t> arena_size = std::nullopt,
+                                 bool dump_log_on_failure              = false)
+    : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure}
+  {
+    if (dump_log_on_failure_) {
+      logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
+      // Set the level to `debug` for more detailed output.
+      logger_->set_level(spdlog::level::info);
+    }
+  }
+
   /**
    * @brief Construct an `arena_memory_resource`.
    *
@@ -93,7 +114,13 @@ class arena_memory_resource final : public device_memory_resource {
   explicit arena_memory_resource(Upstream* upstream_mr,
                                  std::optional<std::size_t> arena_size = std::nullopt,
                                  bool dump_log_on_failure              = false)
-    : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure}
+    : global_arena_{[upstream_mr]() {
+                      RMM_EXPECTS(upstream_mr != nullptr,
+                                  "Unexpected null upstream memory resource.");
+                      return device_async_resource_ref{*upstream_mr};
+                    }(),
+                    arena_size},
+      dump_log_on_failure_{dump_log_on_failure}
   {
     if (dump_log_on_failure_) {
       logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
@@ -111,8 +138,8 @@ class arena_memory_resource final : public device_memory_resource {
   arena_memory_resource& operator=(arena_memory_resource&&) noexcept = delete;
 
  private:
-  using global_arena = rmm::mr::detail::arena::global_arena<Upstream>;
-  using arena        = rmm::mr::detail::arena::arena<Upstream>;
+  using global_arena = rmm::mr::detail::arena::global_arena;
+  using arena        = rmm::mr::detail::arena::arena;
 
   /**
    * @brief Allocates memory of size at least `bytes`.
@@ -272,7 +299,7 @@ class arena_memory_resource final : public device_memory_resource {
       std::unique_lock lock(map_mtx_);
       auto thread_arena = std::make_shared<arena>(global_arena_);
       thread_arenas_.emplace(thread_id, thread_arena);
-      thread_local detail::arena::arena_cleaner<Upstream> cleaner{thread_arena};
+      thread_local detail::arena::arena_cleaner cleaner{thread_arena};
       return *thread_arena;
     }
   }
diff --git a/include/rmm/mr/device/callback_memory_resource.hpp b/include/rmm/mr/device/callback_memory_resource.hpp
index 1483925de..74af8679a 100644
--- a/include/rmm/mr/device/callback_memory_resource.hpp
+++ b/include/rmm/mr/device/callback_memory_resource.hpp
@@ -84,12 +84,13 @@ class callback_memory_resource final : public device_memory_resource {
    * It is the caller's responsibility to maintain the lifetime of the pointed-to data
    * for the duration of the lifetime of the `callback_memory_resource`.
    */
-  callback_memory_resource(allocate_callback_t allocate_callback,
-                           deallocate_callback_t deallocate_callback,
-                           void* allocate_callback_arg   = nullptr,
-                           void* deallocate_callback_arg = nullptr) noexcept
-    : allocate_callback_(allocate_callback),
-      deallocate_callback_(deallocate_callback),
+  callback_memory_resource(
+    allocate_callback_t allocate_callback,
+    deallocate_callback_t deallocate_callback,
+    void* allocate_callback_arg   = nullptr,  // NOLINT(bugprone-easily-swappable-parameters)
+    void* deallocate_callback_arg = nullptr) noexcept
+    : allocate_callback_(std::move(allocate_callback)),
+      deallocate_callback_(std::move(deallocate_callback)),
       allocate_callback_arg_(allocate_callback_arg),
       deallocate_callback_arg_(deallocate_callback_arg)
   {
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index c7965ca34..324b9c928 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -22,6 +22,7 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/logger.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -492,7 +493,6 @@ inline auto max_free_size(std::set<superblock> const& superblocks)
  * @tparam Upstream Memory resource to use for allocating the arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class global_arena final {
  public:
   /**
@@ -504,10 +504,9 @@ class global_arena final {
    * @param arena_size Size in bytes of the global arena. Defaults to half of the available memory
    * on the current device.
    */
-  global_arena(Upstream* upstream_mr, std::optional<std::size_t> arena_size)
+  global_arena(device_async_resource_ref upstream_mr, std::optional<std::size_t> arena_size)
     : upstream_mr_{upstream_mr}
   {
-    RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer.");
     auto const size =
       rmm::align_down(arena_size.value_or(default_size()), rmm::CUDA_ALLOCATION_ALIGNMENT);
     RMM_EXPECTS(size >= superblock::minimum_size,
@@ -528,7 +527,7 @@ class global_arena final {
   ~global_arena()
   {
     std::lock_guard lock(mtx_);
-    upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size());
+    upstream_mr_.deallocate(upstream_block_.pointer(), upstream_block_.size());
   }
 
   /**
@@ -537,7 +536,7 @@ class global_arena final {
    * @param size The size in bytes of the allocation.
    * @return bool True if the allocation should be handled by the global arena.
    */
-  bool handles(std::size_t size) const { return size > superblock::minimum_size; }
+  static bool handles(std::size_t size) { return size > superblock::minimum_size; }
 
   /**
    * @brief Acquire a superblock that can fit a block of the given size.
@@ -608,7 +607,7 @@ class global_arena final {
    * @param stream Stream on which to perform deallocation.
    * @return bool true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
+  bool deallocate_async(void* ptr, std::size_t size, cuda_stream_view stream)
   {
     RMM_LOGGING_ASSERT(handles(size));
     stream.synchronize_no_throw();
@@ -690,7 +689,7 @@ class global_arena final {
    * @brief Default size of the global arena if unspecified.
    * @return the default global arena size.
    */
-  constexpr std::size_t default_size() const
+  static std::size_t default_size()
   {
     auto const [free, total] = rmm::available_device_memory();
     return free / 2;
@@ -703,7 +702,7 @@ class global_arena final {
    */
   void initialize(std::size_t size)
   {
-    upstream_block_ = {upstream_mr_->allocate(size), size};
+    upstream_block_ = {upstream_mr_.allocate(size), size};
     superblocks_.emplace(upstream_block_.pointer(), size);
   }
 
@@ -775,7 +774,7 @@ class global_arena final {
   }
 
   /// The upstream resource to allocate memory from.
-  Upstream* upstream_mr_;
+  device_async_resource_ref upstream_mr_;
   /// Block allocated from upstream so that it can be quickly freed.
   block upstream_block_;
   /// Address-ordered set of superblocks.
@@ -793,7 +792,6 @@ class global_arena final {
  * @tparam Upstream Memory resource to use for allocating the global arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class arena {
  public:
   /**
@@ -801,7 +799,7 @@ class arena {
    *
    * @param global_arena The global arena from which to allocate superblocks.
    */
-  explicit arena(global_arena<Upstream>& global_arena) : global_arena_{global_arena} {}
+  explicit arena(global_arena& global_arena) : global_arena_{global_arena} {}
 
   // Disable copy (and move) semantics.
   arena(arena const&)                = delete;
@@ -835,7 +833,9 @@ class arena {
    */
   bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
   {
-    if (global_arena_.handles(size) && global_arena_.deallocate(ptr, size, stream)) { return true; }
+    if (global_arena::handles(size) && global_arena_.deallocate_async(ptr, size, stream)) {
+      return true;
+    }
     return deallocate(ptr, size);
   }
 
@@ -959,7 +959,7 @@ class arena {
   }
 
   /// The global arena to allocate superblocks from.
-  global_arena<Upstream>& global_arena_;
+  global_arena& global_arena_;
   /// Acquired superblocks.
   std::set<superblock> superblocks_;
   /// Mutex for exclusive lock.
@@ -974,10 +974,9 @@ class arena {
  * @tparam Upstream Memory resource to use for allocating the global arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class arena_cleaner {
  public:
-  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& arena) : arena_(arena) {}
+  explicit arena_cleaner(std::shared_ptr<arena> const& arena) : arena_(arena) {}
 
   // Disable copy (and move) semantics.
   arena_cleaner(arena_cleaner const&)            = delete;
@@ -995,7 +994,7 @@ class arena_cleaner {
 
  private:
   /// A non-owning pointer to the arena that may need cleaning.
-  std::weak_ptr<arena<Upstream>> arena_;
+  std::weak_ptr<arena> arena_;
 };
 
 }  // namespace rmm::mr::detail::arena
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index 53bc572c2..880517807 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -17,6 +17,7 @@
 
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -103,12 +104,33 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
    * @param callback Callback function @see failure_callback_t
    * @param callback_arg Extra argument passed to `callback`
    */
-  failure_callback_resource_adaptor(Upstream* upstream,
+  failure_callback_resource_adaptor(device_async_resource_ref upstream,
                                     failure_callback_t callback,
                                     void* callback_arg)
     : upstream_{upstream}, callback_{std::move(callback)}, callback_arg_{callback_arg}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  /**
+   * @brief Construct a new `failure_callback_resource_adaptor` using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param callback Callback function @see failure_callback_t
+   * @param callback_arg Extra argument passed to `callback`
+   */
+  failure_callback_resource_adaptor(Upstream* upstream,
+                                    failure_callback_t callback,
+                                    void* callback_arg)
+    : upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()},
+      callback_{std::move(callback)},
+      callback_arg_{callback_arg}
+  {
   }
 
   failure_callback_resource_adaptor()                                                    = delete;
@@ -128,11 +150,6 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using the upstream
@@ -151,7 +168,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
 
     while (true) {
       try {
-        ret = upstream_->allocate(bytes, stream);
+        ret = get_upstream_resource().allocate_async(bytes, stream);
         break;
       } catch (exception_type const& e) {
         if (!callback_(bytes, callback_arg_)) { throw; }
@@ -169,7 +186,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -183,11 +200,12 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<failure_callback_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource()};
   failure_callback_t callback_;
   void* callback_arg_;
 };
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index 76a5a31c1..0ca74720a 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -18,6 +18,7 @@
 #include <rmm/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -53,7 +54,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
    * @param allocation_limit Maximum memory allowed for this allocator
    * @param alignment Alignment in bytes for the start of each allocated buffer
    */
-  limiting_resource_adaptor(Upstream* upstream,
+  limiting_resource_adaptor(device_async_resource_ref upstream,
                             std::size_t allocation_limit,
                             std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
     : allocation_limit_{allocation_limit},
@@ -61,7 +62,29 @@ class limiting_resource_adaptor final : public device_memory_resource {
       alignment_(alignment),
       upstream_{upstream}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  /**
+   * @brief Construct a new limiting resource adaptor using `upstream` to satisfy
+   * allocation requests and limiting the total allocation amount possible.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param allocation_limit Maximum memory allowed for this allocator
+   * @param alignment Alignment in bytes for the start of each allocated buffer
+   */
+  limiting_resource_adaptor(Upstream* upstream,
+                            std::size_t allocation_limit,
+                            std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
+    : allocation_limit_{allocation_limit},
+      allocated_bytes_(0),
+      alignment_(alignment),
+      upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()}
+  {
   }
 
   limiting_resource_adaptor()                                 = delete;
@@ -81,11 +104,6 @@ class limiting_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Query the number of bytes that have been allocated. Note that
    * this can not be used to know how large of an allocation is possible due
@@ -126,7 +144,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
     auto const old           = allocated_bytes_.fetch_add(proposed_size);
     if (old + proposed_size <= allocation_limit_) {
       try {
-        return upstream_->allocate(bytes, stream);
+        return get_upstream_resource().allocate_async(bytes, stream);
       } catch (...) {
         allocated_bytes_ -= proposed_size;
         throw;
@@ -147,7 +165,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     std::size_t allocated_size = rmm::align_up(bytes, alignment_);
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
     allocated_bytes_ -= allocated_size;
   }
 
@@ -162,7 +180,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<limiting_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -175,8 +193,8 @@ class limiting_resource_adaptor final : public device_memory_resource {
   // todo: should be some way to ask the upstream...
   std::size_t alignment_;
 
-  Upstream* upstream_;  ///< The upstream resource used for satisfying
-                        ///< allocation requests
+  // The upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource()};
 };
 
 /**
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index a56a784a1..f6839f6c7 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -19,29 +19,29 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <map>
 #include <mutex>
 
 /**
  * @file per_device_resource.hpp
- * @brief Management of per-device `device_memory_resource`s
- *
- * One might wish to construct a `device_memory_resource` and use it for (de)allocation
- * without explicit dependency injection, i.e., passing a reference to that object to all places it
- * is to be used. Instead, one might want to set their resource as a "default" and have it be used
- * in all places where another resource has not been explicitly specified. In applications with
- * multiple GPUs in the same process, it may also be necessary to maintain independent default
- * resources for each device. To this end, the `set_per_device_resource` and
- * `get_per_device_resource` functions enable mapping a CUDA device id to a `device_memory_resource`
- * pointer.
- *
- * For example, given a pointer, `mr`, to a `device_memory_resource` object, calling
+ * @brief Management of per-device memory resources
+ *
+ * One might wish to construct a memory resource and use it for (de)allocation without explicit
+ * dependency injection, i.e., passing a reference to that object to all places it is to be used.
+ * Instead, one might want to set their resource as a "default" and have it be used in all places
+ * where another resource has not been explicitly specified. In applications with multiple GPUs in
+ * the same process, it may also be necessary to maintain independent default resources for each
+ * device. To this end, the `set_per_device_resource` and `get_per_device_resource` functions enable
+ * mapping a CUDA device id to a memory resource.
+ *
+ * For example, `rmm::device_async_resource_ref`, `mr`, to a memory resource object, calling
  * `set_per_device_resource(cuda_device_id{0}, mr)` will establish a mapping between CUDA device 0
  * and `mr` such that all future calls to `get_per_device_resource(cuda_device_id{0})` will return
- * the same pointer, `mr`. In this way, all places that use the resource returned from
- * `get_per_device_resource` for (de)allocation will use the user provided resource, `mr`.
+ * the same `rmm::device_async_resource_ref`, `mr`. In this way, all places that use the resource
+ * returned from `get_per_device_resource` for (de)allocation will use the user provided resource,
+ * `mr`.
  *
  * @note `device_memory_resource`s make CUDA API calls without setting the current CUDA device.
  * Therefore a memory resource should only be used with the current CUDA device set to the device
@@ -49,16 +49,16 @@
  * is only valid if `id` refers to the CUDA device that was active when `mr` was created.
  *
  * If no resource was explicitly set for a given device specified by `id`, then
- * `get_per_device_resource(id)` will return a pointer to a `cuda_memory_resource`.
+ * `get_per_device_resource(id)` will return a reference to a `cuda_memory_resource`.
  *
  * To fetch and modify the resource for the current CUDA device, `get_current_device_resource()` and
  * `set_current_device_resource()` will automatically use the current CUDA device id from
  * `cudaGetDevice()`.
  *
- * Creating a device_memory_resource for each device requires care to set the current device
- * before creating each resource, and to maintain the lifetime of the resources as long as they
- * are set as per-device resources. Here is an example loop that creates `unique_ptr`s to
- * pool_memory_resource objects for each device and sets them as the per-device resource for that
+ * Creating a memory resource for each device requires care to set the current device before
+ * creating each resource, and to maintain the lifetime of the resources as long as they are set as
+ * per-device resources. Here is an example loop that creates `unique_ptr`s to
+ * `pool_memory_resource` objects for each device and sets them as the per-device resource for that
  * device.
  *
  * @code{.cpp}
@@ -102,6 +102,18 @@ inline device_memory_resource* initial_resource()
   return &mr;
 }
 
+/**
+ * @brief Returns a reference to the initial resource.
+ *
+ * Returns a global instance of a `cuda_memory_resource` as a function local static.
+ *
+ * @return Pointer to the static cuda_memory_resource used as the initial, default resource
+ */
+inline device_async_resource_ref initial_resource_ref()
+{
+  return device_async_resource_ref{*initial_resource()};
+}
+
 /**
  * @briefreturn{Reference to the lock}
  */
@@ -111,13 +123,23 @@ inline std::mutex& map_lock()
   return map_lock;
 }
 
+// This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
+/**
+ * @briefreturn{Reference to the map from device id -> resource*}
+ */
+/*RMM_EXPORT inline auto& get_pointer_map()
+{
+  static std::map<cuda_device_id::value_type, device_memory_resource*> device_id_to_resource;
+  return device_id_to_resource;
+}*/
+
 // This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
 /**
  * @briefreturn{Reference to the map from device id -> resource}
  */
 RMM_EXPORT inline auto& get_map()
 {
-  static std::map<cuda_device_id::value_type, device_memory_resource*> device_id_to_resource;
+  static std::map<cuda_device_id::value_type, device_async_resource_ref> device_id_to_resource;
   return device_id_to_resource;
 }
 
@@ -144,26 +166,35 @@ RMM_EXPORT inline auto& get_map()
  * @param device_id The id of the target device
  * @return Pointer to the current `device_memory_resource` for device `id`
  */
-inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
+inline device_async_resource_ref get_per_device_resource(cuda_device_id device_id)
 {
-  std::lock_guard<std::mutex> lock{detail::map_lock()};
+  /*std::lock_guard<std::mutex> lock{detail::map_lock()};
   auto& map = detail::get_map();
   // If a resource was never set for `id`, set to the initial resource
   auto const found = map.find(device_id.value());
   return (found == map.end()) ? (map[device_id.value()] = detail::initial_resource())
-                              : found->second;
+                              : found->second;*/
+  std::lock_guard<std::mutex> lock{detail::map_lock()};
+  auto& map = detail::get_map();
+  // If a resource was never set for `id`, set to the initial resource
+  auto const found = map.find(device_id.value());
+  if (found == map.end()) {
+    //
+    auto item = map.insert({device_id.value(), detail::initial_resource()});
+    return item.first->second;
+  }
+  return found->second;
 }
 
 /**
  * @brief Set the `device_memory_resource` for the specified device.
  *
- * If `new_mr` is not `nullptr`, sets the memory resource pointer for the device specified by `id`
- * to `new_mr`. Otherwise, resets `id`s resource to the initial `cuda_memory_resource`.
+ * Sets the memory resource pointer for the device specified by `id` to `new_mr`.
  *
  * `id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is undefined.
  *
- * The object pointed to by `new_mr` must outlive the last use of the resource, otherwise behavior
- * is undefined. It is the caller's responsibility to maintain the lifetime of the resource
+ * The memory resource referenced by `new_mr` must outlive the last use of the resource, otherwise
+ * behavior is undefined. It is the caller's responsibility to maintain the lifetime of the resource
  * object.
  *
  * This function is thread-safe with respect to concurrent calls to `set_per_device_resource`,
@@ -177,26 +208,112 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
  * when the device_memory_resource was created.
  *
  * @param device_id The id of the target device
- * @param new_mr If not `nullptr`, pointer to new `device_memory_resource` to use as new resource
- * for `id`
- * @return Pointer to the previous memory resource for `id`
+ * @param new_mr Reference to `device_memory_resource` to use as new resource for `id`
+ * @return Reference to the previous memory resource for `id`
  */
-inline device_memory_resource* set_per_device_resource(cuda_device_id device_id,
-                                                       device_memory_resource* new_mr)
+inline device_async_resource_ref set_per_device_resource(cuda_device_id device_id,
+                                                         device_async_resource_ref new_mr)
 {
+  // std::lock_guard<std::mutex> lock{detail::map_lock()};
+  // auto& map          = detail::get_map();
+  // auto const old_itr = map.find(device_id.value());
+  // // If a resource didn't previously exist for `id`, return pointer to initial_resource
+  // auto* old_mr           = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second;
+  // map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr;
+  // return old_mr;
   std::lock_guard<std::mutex> lock{detail::map_lock()};
   auto& map          = detail::get_map();
   auto const old_itr = map.find(device_id.value());
   // If a resource didn't previously exist for `id`, return pointer to initial_resource
-  auto* old_mr           = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second;
-  map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr;
-  return old_mr;
+  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
+  if (old_itr == map.end()) {
+    map.insert({device_id.value(), new_mr});
+    std::cout << "returning initial resource in set_per_device_resource_ref\n";
+    return device_async_resource_ref{detail::initial_resource()};
+  }
+
+  auto old_resource_ref = old_itr->second;
+  old_itr->second       = new_mr;  // update map directly via iterator
+  return old_resource_ref;
+}
+
+/**
+ * @brief Reset the memory resource for the specified device to the initial resource.
+ *
+ * Resets to a reference to the initial `cuda_memory_resource`.
+ *
+ * `device_id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is
+ * undefined.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource`,
+ * `get_per_device_resource`, `get_current_device_resource`, `set_current_device_resource`,
+ * `reset_per_device_resource` and `reset_current_device_resource`. Concurrent calls to any of these
+ * functions will result in a valid state, but the order of execution is undefined.
+ *
+ * @param device_id The id of the target device
+ * @return Previous `device_async_resource_ref` for `device_id`
+ */
+inline device_async_resource_ref reset_per_device_resource(cuda_device_id device_id)
+{
+  return set_per_device_resource(device_id, detail::initial_resource());
+}
+
+/**
+ * @brief Reset the memory resource for the current device to the initial resource.
+ *
+ * Resets to a reference to the initial `cuda_memory_resource`. The "current device" is the device
+ * returned by `cudaGetDevice`.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource`,
+ * `get_per_device_resource`, `get_current_device_resource`, `set_current_device_resource`,
+ * `reset_per_device_resource` and `reset_current_device_resource. Concurrent calls to any of these
+ * functions will result in a valid state, but the order of execution is undefined.
+ *
+ * @return Previous `device_async_resource_ref` for `device_id`
+ */
+inline device_async_resource_ref reset_current_device_resource()
+{
+  return reset_per_device_resource(rmm::get_current_cuda_device());
+}
+
+/**
+ * @brief Set the `device_memory_resource` for the specified device.
+ *
+ * If `new_mr` is not `nullptr`, sets the memory resource for the device specified by `id`
+ * to `new_mr`. Otherwise, resets `id`s resource to the initial `cuda_memory_resource`.
+ *
+ * `id.value()` must be in the range `[0, cudaGetDeviceCount())`, otherwise behavior is undefined.
+ *
+ * The object pointed to by `new_mr` must outlive the last use of the resource, otherwise behavior
+ * is undefined. It is the caller's responsibility to maintain the lifetime of the resource
+ * object.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource`,
+ * `get_per_device_resource`, `get_current_device_resource`, and `set_current_device_resource`.
+ * Concurrent calls to any of these functions will result in a valid state, but the order of
+ * execution is undefined.
+ *
+ * @note The resource passed in `new_mr` must have been created when device `id` was the current
+ * CUDA device (e.g. set using `cudaSetDevice()`). The behavior of a memory resource is
+ * undefined if used while the active CUDA device is a different device from the one that was active
+ * when the memory resource was created.
+ *
+ * @param device_id The id of the target device
+ * @param new_mr If not `nullptr`, pointer to new `device_memory_resource` to use as new resource
+ * for `id`
+ * @return Pointer to the previous memory resource for `id`
+ */
+inline device_async_resource_ref set_per_device_resource(cuda_device_id device_id,
+                                                         device_memory_resource* new_mr)
+{
+  if (new_mr == nullptr) { return reset_per_device_resource(device_id); }
+  return set_per_device_resource(device_id, *new_mr);
 }
 
 /**
  * @brief Get the memory resource for the current device.
  *
- * Returns a pointer to the resource set for the current device. The initial resource is a
+ * Returns a reference to the resource set for the current device. The initial resource is a
  * `cuda_memory_resource`.
  *
  * The "current device" is the device returned by `cudaGetDevice`.
@@ -206,19 +323,47 @@ inline device_memory_resource* set_per_device_resource(cuda_device_id device_id,
  * Concurrent calls to any of these functions will result in a valid state, but the order of
  * execution is undefined.
  *
- * @note The returned `device_memory_resource` should only be used with the current CUDA device.
+ * @note The returned memory resource should only be used with the current CUDA device.
  * Changing the current device (e.g. using `cudaSetDevice()`) and then using the returned resource
- * can result in undefined behavior. The behavior of a device_memory_resource is undefined if used
+ * can result in undefined behavior. The behavior of a memory resource is undefined if used
  * while the active CUDA device is a different device from the one that was active when the
- * device_memory_resource was created.
+ * memory resource was created.
  *
  * @return Pointer to the resource for the current device
  */
-inline device_memory_resource* get_current_device_resource()
+inline device_async_resource_ref get_current_device_resource()
 {
   return get_per_device_resource(rmm::get_current_cuda_device());
 }
 
+/**
+ * @brief Set the memory resource for the current device.
+ *
+ * Sets the memory resource for the current device to `new_mr`.
+ *
+ * The "current device" is the device returned by `cudaGetDevice`.
+ *
+ * The object pointed to by `new_mr` must outlive the last use of the resource, otherwise behavior
+ * is undefined. It is the caller's responsibility to maintain the lifetime of the resource
+ * object.
+ *
+ * This function is thread-safe with respect to concurrent calls to `set_per_device_resource`,
+ * `get_per_device_resource`, `get_current_device_resource`, and `set_current_device_resource`.
+ * Concurrent calls to any of these functions will result in a valid state, but the order of
+ * execution is undefined.
+ *
+ * @note The resource passed in `new_mr` must have been created for the current CUDA device. The
+ * behavior of a memory resource is undefined if used while the active CUDA device is a
+ * different device from the one that was active when the memory resource was created.
+ *
+ * @param new_mr If not `nullptr`, pointer to new resource to use for the current device
+ * @return Pointer to the previous resource for the current device
+ */
+inline device_async_resource_ref set_current_device_resource(device_async_resource_ref new_mr)
+{
+  return set_per_device_resource(rmm::get_current_cuda_device(), new_mr);
+}
+
 /**
  * @brief Set the memory resource for the current device.
  *
@@ -237,15 +382,16 @@ inline device_memory_resource* get_current_device_resource()
  * execution is undefined.
  *
  * @note The resource passed in `new_mr` must have been created for the current CUDA device. The
- * behavior of a device_memory_resource is undefined if used while the active CUDA device is a
- * different device from the one that was active when the device_memory_resource was created.
+ * behavior of a memory resource is undefined if used while the active CUDA device is a
+ * different device from the one that was active when the memory resource was created.
  *
  * @param new_mr If not `nullptr`, pointer to new resource to use for the current device
  * @return Pointer to the previous resource for the current device
  */
-inline device_memory_resource* set_current_device_resource(device_memory_resource* new_mr)
+inline device_async_resource_ref set_current_device_resource(device_memory_resource* new_mr)
 {
-  return set_per_device_resource(rmm::get_current_cuda_device(), new_mr);
+  return set_per_device_resource(rmm::get_current_cuda_device(), *new_mr);
 }
+
 /** @} */  // end of group
 }  // namespace rmm::mr
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index a3a972904..5e76aaf74 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -24,6 +24,7 @@
 #include <rmm/mr/device/detail/coalescing_free_list.hpp>
 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/std/type_traits>
@@ -112,6 +113,34 @@ class pool_memory_resource final
   friend class detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
                                                       detail::coalescing_free_list>;
 
+  /**
+   * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
+   * `upstream_mr`.
+   *
+   * @throws rmm::logic_error if `upstream_mr == nullptr`
+   * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of
+   * pool_memory_resource::allocation_alignment bytes.
+   * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a
+   * multiple of pool_memory_resource::allocation_alignment bytes.
+   *
+   * @param upstream_mr The memory_resource from which to allocate blocks for the pool.
+   * @param initial_pool_size Minimum size, in bytes, of the initial pool.
+   * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all
+   * of the available from the upstream resource.
+   */
+  explicit pool_memory_resource(device_async_resource_ref upstream_mr,
+                                std::size_t initial_pool_size,
+                                std::optional<std::size_t> maximum_pool_size = std::nullopt)
+    : upstream_mr_{upstream_mr}
+  {
+    RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT),
+                "Error, Initial pool size required to be a multiple of 256 bytes");
+    RMM_EXPECTS(rmm::is_aligned(maximum_pool_size.value_or(0), rmm::CUDA_ALLOCATION_ALIGNMENT),
+                "Error, Maximum pool size required to be a multiple of 256 bytes");
+
+    initialize_pool(initial_pool_size, maximum_pool_size);
+  }
+
   /**
    * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
    * `upstream_mr`.
@@ -132,7 +161,7 @@ class pool_memory_resource final
                                 std::optional<std::size_t> maximum_pool_size = std::nullopt)
     : upstream_mr_{[upstream_mr]() {
         RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");
-        return upstream_mr;
+        return device_async_resource_ref{*upstream_mr};
       }()}
   {
     RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT),
@@ -182,16 +211,11 @@ class pool_memory_resource final
   /**
    * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_mr_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
-
   /**
    * @brief Computes the size of the current pool
    *
@@ -464,7 +488,8 @@ class pool_memory_resource final
   }
 
  private:
-  Upstream* upstream_mr_;  // The "heap" to allocate the pool from
+  // The "heap" to allocate the pool from
+  device_async_resource_ref upstream_mr_;
   std::size_t current_pool_size_{};
   std::optional<std::size_t> maximum_pool_size_{};
 
diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index cbf1b87d2..ed0c5a7dc 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -111,6 +112,16 @@ class statistics_resource_adaptor final : public device_memory_resource {
     }
   };
 
+  /**
+   * @brief Construct a new statistics resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream Reference to the resource used for allocating/deallocating device memory
+   */
+  statistics_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
+
   /**
    * @brief Construct a new statistics resource adaptor using `upstream` to satisfy
    * allocation requests.
@@ -119,9 +130,12 @@ class statistics_resource_adaptor final : public device_memory_resource {
    *
    * @param upstream The resource used for allocating/deallocating device memory
    */
-  statistics_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  statistics_resource_adaptor(Upstream* upstream)
+    : upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   statistics_resource_adaptor()                                              = delete;
@@ -141,11 +155,6 @@ class statistics_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Returns a `counter` struct for this adaptor containing the current,
    * peak, and total number of allocated bytes for this
@@ -224,7 +233,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
+    void* ptr = upstream_.allocate_async(bytes, stream);
 
     // increment the stats
     {
@@ -247,7 +256,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    upstream_.deallocate_async(ptr, bytes, stream);
 
     {
       write_lock_t lock(mtx_);
@@ -269,7 +278,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -277,14 +286,14 @@ class statistics_resource_adaptor final : public device_memory_resource {
   // Invariant: the stack always contains at least one entry
   std::stack<std::pair<counter, counter>> counter_stack_{{std::make_pair(counter{}, counter{})}};
   std::shared_mutex mutable mtx_;  // mutex for thread safe access to allocations_
-  Upstream* upstream_;             // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{rmm::mr::get_current_device_resource()};
 };
 
 /**
  * @brief Convenience factory to return a `statistics_resource_adaptor` around the
  * upstream resource `upstream`.
  *
- * @tparam Upstream Type of the upstream `device_memory_resource`.
  * @param upstream Pointer to the upstream resource
  * @return The new statistics resource adaptor
  */
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 0d3046973..31fc46d32 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -19,6 +19,7 @@
 #include <rmm/detail/stack_trace.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <fmt/core.h>
@@ -90,10 +91,26 @@ class tracking_resource_adaptor final : public device_memory_resource {
    * @param upstream The resource used for allocating/deallocating device memory
    * @param capture_stacks If true, capture stacks for allocation calls
    */
-  tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
+  tracking_resource_adaptor(device_async_resource_ref upstream, bool capture_stacks = false)
     : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{upstream}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  /**
+   * @brief Construct a new tracking resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param capture_stacks If true, capture stacks for allocation calls
+   */
+  tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
+    : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{[upstream]() {
+        RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+        return device_async_resource_ref{*upstream};
+      }()}
+  {
   }
 
   tracking_resource_adaptor()                                 = delete;
@@ -113,11 +130,6 @@ class tracking_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Get the outstanding allocations map
    *
@@ -197,8 +209,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
-
+    void* ptr = upstream_.allocate_async(bytes, stream);
     // track it.
     {
       write_lock_t lock(mtx_);
@@ -218,7 +229,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    upstream_.deallocate_async(ptr, bytes, stream);
     {
       write_lock_t lock(mtx_);
 
@@ -263,7 +274,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -271,7 +282,9 @@ class tracking_resource_adaptor final : public device_memory_resource {
   std::map<void*, allocation_info> allocations_;  // map of active allocations
   std::atomic<std::size_t> allocated_bytes_;      // number of bytes currently allocated
   std::shared_mutex mutable mtx_;                 // mutex for thread safe access to allocations_
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_{
+    rmm::mr::get_current_device_resource()};  // the upstream resource used for satisfying
+                                              // allocation requests
 };
 
 /**
diff --git a/python/rmm/rmm/_cuda/stream.pxd b/python/rmm/rmm/_cuda/stream.pxd
index 3c3d3aa6f..66107c4d8 100644
--- a/python/rmm/rmm/_cuda/stream.pxd
+++ b/python/rmm/rmm/_cuda/stream.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@ from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 
 from rmm._lib.cuda_stream_view cimport cuda_stream_view
+from rmm._lib.resource_ref cimport stream_ref
 
 
 cdef class Stream:
@@ -27,6 +28,7 @@ cdef class Stream:
     cdef Stream _from_cudaStream_t(cudaStream_t s, object owner=*)
 
     cdef cuda_stream_view view(self) except * nogil
+    cdef stream_ref ref(self) except * nogil
     cdef void c_synchronize(self) except * nogil
     cdef bool c_is_default(self) except * nogil
     cdef void _init_with_new_cuda_stream(self) except *
diff --git a/python/rmm/rmm/_cuda/stream.pyx b/python/rmm/rmm/_cuda/stream.pyx
index 4d5ff5232..19d91d624 100644
--- a/python/rmm/rmm/_cuda/stream.pyx
+++ b/python/rmm/rmm/_cuda/stream.pyx
@@ -23,6 +23,7 @@ from rmm._lib.cuda_stream_view cimport (
     cuda_stream_per_thread,
     cuda_stream_view,
 )
+from rmm._lib.resource_ref cimport stream_ref
 
 
 cdef class Stream:
@@ -63,6 +64,12 @@ cdef class Stream:
         """
         return cuda_stream_view(<cudaStream_t><uintptr_t>(self._cuda_stream))
 
+    cdef stream_ref ref(self) except * nogil:
+        """
+        Generate a cuda::stream_ref from this Stream instance
+        """
+        return stream_ref(<cudaStream_t><uintptr_t>(self._cuda_stream))
+
     cdef void c_synchronize(self) except * nogil:
         """
         Synchronize the CUDA stream.
diff --git a/python/rmm/rmm/_lib/_torch_allocator.cpp b/python/rmm/rmm/_lib/_torch_allocator.cpp
index dc92e4639..ab36c2150 100644
--- a/python/rmm/rmm/_lib/_torch_allocator.cpp
+++ b/python/rmm/rmm/_lib/_torch_allocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,7 +40,8 @@ extern "C" void* allocate(std::size_t size, int device, void* stream)
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
   auto mr = rmm::mr::get_per_device_resource(device_id);
-  return mr->allocate(size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  return mr.allocate_async(
+    size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
 }
 
 /**
@@ -56,5 +57,8 @@ extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
   auto mr = rmm::mr::get_per_device_resource(device_id);
-  mr->deallocate(ptr, size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  mr.deallocate_async(ptr,
+                      size,
+                      rmm::CUDA_ALLOCATION_ALIGNMENT,
+                      rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
 }
diff --git a/python/rmm/rmm/_lib/extern_memory_resources.pxd b/python/rmm/rmm/_lib/extern_memory_resources.pxd
new file mode 100644
index 000000000..fb6df19c0
--- /dev/null
+++ b/python/rmm/rmm/_lib/extern_memory_resources.pxd
@@ -0,0 +1,192 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libc.stdint cimport int8_t, int64_t
+from libcpp cimport bool
+from libcpp.optional cimport optional
+
+
+cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*)
+    ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*)
+
+cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    ctypedef bool (*failure_callback_t)(size_t, void*)
+
+# TODO: when we adopt Cython 3.0 use enum class
+cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
+        namespace \
+        "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \
+        nogil:
+    enum allocation_handle_type \
+            "rmm::mr::cuda_async_memory_resource::allocation_handle_type":
+        none
+        posix_file_descriptor
+        win32
+        win32_kmt
+
+cdef extern from "rmm/mr/device/device_memory_resource.hpp" namespace "rmm::mr" nogil:
+    cdef cppclass device_memory_resource:
+
+        device_memory_resource() except +
+
+cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" namespace "rmm::mr" nogil:
+    cdef cppclass cuda_memory_resource(device_memory_resource):
+
+        cuda_memory_resource() except +
+
+cdef extern from "rmm/mr/device/managed_memory_resource.hpp" namespace "rmm::mr" nogil:
+    cdef cppclass managed_memory_resource(device_memory_resource):
+
+        managed_memory_resource() except +
+
+cdef extern from "rmm/mr/device/system_memory_resource.hpp" namespace "rmm::mr" nogil:
+    cdef cppclass system_memory_resource(device_memory_resource):
+
+        system_memory_resource() except +
+
+cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass sam_headroom_memory_resource(device_memory_resource):
+
+        sam_headroom_memory_resource(size_t headroom) except +
+
+cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass cuda_async_memory_resource(device_memory_resource):
+
+        cuda_async_memory_resource(
+            optional[size_t] initial_pool_size,
+            optional[size_t] release_threshold,
+            optional[allocation_handle_type] export_handle_type
+        ) except +
+
+cdef extern from "rmm/mr/device/pool_memory_resource.hpp" namespace "rmm::mr" nogil:
+    cdef cppclass pool_memory_resource[Upstream](device_memory_resource):
+
+        size_t pool_size()
+
+        pool_memory_resource(
+            device_async_resource_ref upstream_mr,
+            size_t initial_pool_size,
+            optional[size_t] maximum_pool_size
+        ) except +
+
+cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource):
+
+        fixed_size_memory_resource(
+            Upstream* upstream_mr,
+            size_t block_size,
+            size_t block_to_preallocate
+        ) except +
+
+cdef extern from "rmm/mr/device/binning_memory_resource.hpp" namespace "rmm::mr" nogil:
+    cdef cppclass binning_memory_resource[Upstream](device_memory_resource):
+
+        binning_memory_resource(
+            Upstream* upstream_mr,
+            int8_t min_size_exponent,
+            int8_t max_size_exponent) except +
+
+        void add_bin(size_t allocation_size) except +
+        void add_bin(
+            size_t allocation_size,
+            device_memory_resource* bin_resource) except +
+
+        binning_memory_resource(Upstream* upstream_mr) except +
+
+cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass callback_memory_resource(device_memory_resource):
+
+        callback_memory_resource(
+            allocate_callback_t allocate_callback,
+            deallocate_callback_t deallocate_callback,
+            void* allocate_callback_arg,
+            void* deallocate_callback_arg
+        ) except +
+
+cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource):
+
+        size_t get_allocated_bytes() except +
+        size_t get_allocation_limit() except +
+
+        limiting_resource_adaptor(
+            Upstream* upstream_mr,
+            size_t allocation_limit
+        ) except +
+
+cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
+
+        void flush() except +
+
+        logging_resource_adaptor(
+            Upstream* upstream_mr,
+            string filename
+        ) except +
+
+cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource):
+
+        struct counter:
+            counter()
+
+            int64_t value
+            int64_t peak
+            int64_t total
+
+        counter get_bytes_counter() except +
+        counter get_allocations_counter() except +
+        pair[counter, counter] pop_counters() except +
+        pair[counter, counter] push_counters() except +
+
+        statistics_resource_adaptor(Upstream* upstream_mr) except +
+
+cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource):
+
+        size_t get_allocated_bytes() except +
+        string get_outstanding_allocations_str() except +
+        void log_outstanding_allocations() except +
+
+        tracking_resource_adaptor(
+            Upstream* upstream_mr,
+            bool capture_stacks
+        ) except +
+
+cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass failure_callback_resource_adaptor[Upstream](device_memory_resource):
+
+        failure_callback_resource_adaptor(
+            Upstream* upstream_mr,
+            failure_callback_t callback,
+            void* callback_arg
+        ) except +
+
+cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource):
+
+        prefetch_resource_adaptor(Upstream* upstream_mr) except +
diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd
index 000a3fe1e..ecaa36edd 100644
--- a/python/rmm/rmm/_lib/memory_resource.pxd
+++ b/python/rmm/rmm/_lib/memory_resource.pxd
@@ -13,61 +13,106 @@
 # limitations under the License.
 
 from libc.stdint cimport int8_t
-from libcpp.memory cimport shared_ptr
+from libcpp.memory cimport make_shared, shared_ptr
 from libcpp.pair cimport pair
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 
 from rmm._lib.cuda_stream_view cimport cuda_stream_view
+from rmm._lib.resource_ref cimport (
+    CUDA_ALLOCATION_ALIGNMENT,
+    device_async_resource_ref,
+    stream_ref,
+)
 
+include "extern_memory_resources.pxd"
+
+cdef extern from *:
+    """
+    template <typename T>
+    rmm::device_async_resource_ref as_ref(T *p) { return p; }
+    """
+
+    device_async_resource_ref as_ref[T](T *p) noexcept nogil
 
-cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass device_memory_resource:
-        void* allocate(size_t bytes) except +
-        void* allocate(size_t bytes, cuda_stream_view stream) except +
-        void deallocate(void* ptr, size_t bytes) except +
-        void deallocate(
-            void* ptr,
-            size_t bytes,
-            cuda_stream_view stream
-        ) except +
 
 cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
     size_t percent_of_free_device_memory(int percent) except +
     pair[size_t, size_t] available_device_memory() except +
 
+
 cdef class DeviceMemoryResource:
-    cdef shared_ptr[device_memory_resource] c_obj
     cdef device_memory_resource* get_mr(self) noexcept nogil
+    cdef shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil
+
 
 cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
     cdef readonly DeviceMemoryResource upstream_mr
-
     cpdef DeviceMemoryResource get_upstream(self)
 
+
 cdef class CudaMemoryResource(DeviceMemoryResource):
-    pass
+    cdef shared_ptr[cuda_memory_resource] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class ManagedMemoryResource(DeviceMemoryResource):
-    pass
+    cdef shared_ptr[managed_memory_resource] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class SystemMemoryResource(DeviceMemoryResource):
-    pass
+    cdef shared_ptr[system_memory_resource] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
-    pass
+    cdef shared_ptr[sam_headroom_memory_resource] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
-    pass
+    cdef shared_ptr[cuda_async_memory_resource] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class PoolMemoryResource(UpstreamResourceAdaptor):
-    pass
+    cdef shared_ptr[pool_memory_resource[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
-    pass
+    cdef shared_ptr[fixed_size_memory_resource[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class BinningMemoryResource(UpstreamResourceAdaptor):
+    cdef shared_ptr[binning_memory_resource[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
 
     cdef readonly list _bin_mrs
 
@@ -77,27 +122,66 @@ cdef class BinningMemoryResource(UpstreamResourceAdaptor):
         DeviceMemoryResource bin_resource=*)
 
 cdef class CallbackMemoryResource(DeviceMemoryResource):
+    cdef shared_ptr[callback_memory_resource] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
     cdef object _allocate_func
     cdef object _deallocate_func
 
 cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
-    pass
+    cdef shared_ptr[limiting_resource_adaptor[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
+    cdef shared_ptr[logging_resource_adaptor[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
     cdef object _log_file_name
     cpdef get_file_name(self)
     cpdef flush(self)
 
 cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor):
-    pass
+    cdef shared_ptr[statistics_resource_adaptor[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
-    pass
+    cdef shared_ptr[tracking_resource_adaptor[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
+    cdef shared_ptr[failure_callback_resource_adaptor[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
     cdef object _callback
 
+
 cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
-    pass
+    cdef shared_ptr[prefetch_resource_adaptor[device_memory_resource]] c_obj
+    cdef inline device_memory_resource* get_mr(self) noexcept nogil:
+        return self.c_obj.get()
+    cdef inline shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        return make_shared[device_async_resource_ref](as_ref(self.c_obj.get()))
+
 
 cpdef DeviceMemoryResource get_current_device_resource()
diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx
index 5030c5d2d..602c85d20 100644
--- a/python/rmm/rmm/_lib/memory_resource.pyx
+++ b/python/rmm/rmm/_lib/memory_resource.pyx
@@ -22,9 +22,8 @@ from collections import defaultdict
 cimport cython
 from cython.operator cimport dereference as deref
 from libc.stddef cimport size_t
-from libc.stdint cimport int8_t, int64_t, uintptr_t
+from libc.stdint cimport int8_t, uintptr_t
 from libcpp cimport bool
-from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.optional cimport optional
 from libcpp.pair cimport pair
 from libcpp.string cimport string
@@ -32,7 +31,9 @@ from libcpp.string cimport string
 from cuda.cudart import cudaError_t
 
 from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice
+
 from rmm._cuda.stream cimport Stream
+
 from rmm._cuda.stream import DEFAULT_STREAM
 
 from rmm._lib.cuda_stream_view cimport cuda_stream_view
@@ -44,6 +45,7 @@ from rmm._lib.per_device_resource cimport (
     cuda_device_id,
     set_per_device_resource as cpp_set_per_device_resource,
 )
+
 from rmm.statistics import Statistics
 
 # Transparent handle of a C++ exception
@@ -82,164 +84,16 @@ cdef extern from *:
     void throw_cpp_except(CppExcept) nogil
 
 
-# NOTE: Keep extern declarations in .pyx file as much as possible to avoid
-# leaking dependencies when importing RMM Cython .pxd files
-cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass cuda_memory_resource(device_memory_resource):
-        cuda_memory_resource() except +
-
-cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass managed_memory_resource(device_memory_resource):
-        managed_memory_resource() except +
-
-cdef extern from "rmm/mr/device/system_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass system_memory_resource(device_memory_resource):
-        system_memory_resource() except +
-
-cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass sam_headroom_memory_resource(device_memory_resource):
-        sam_headroom_memory_resource(size_t headroom) except +
-
-cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-
-    cdef cppclass cuda_async_memory_resource(device_memory_resource):
-        cuda_async_memory_resource(
-            optional[size_t] initial_pool_size,
-            optional[size_t] release_threshold,
-            optional[allocation_handle_type] export_handle_type) except +
-
-# TODO: when we adopt Cython 3.0 use enum class
-cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
-        namespace \
-        "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \
-        nogil:
-    enum allocation_handle_type \
-            "rmm::mr::cuda_async_memory_resource::allocation_handle_type":
-        none
-        posix_file_descriptor
-        win32
-        win32_kmt
-
-
-cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass pool_memory_resource[Upstream](device_memory_resource):
-        pool_memory_resource(
-            Upstream* upstream_mr,
-            size_t initial_pool_size,
-            optional[size_t] maximum_pool_size) except +
-        size_t pool_size()
-
-cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource):
-        fixed_size_memory_resource(
-            Upstream* upstream_mr,
-            size_t block_size,
-            size_t block_to_preallocate) except +
-
-cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*)
-    ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*)
-
-    cdef cppclass callback_memory_resource(device_memory_resource):
-        callback_memory_resource(
-            allocate_callback_t allocate_callback,
-            deallocate_callback_t deallocate_callback,
-            void* allocate_callback_arg,
-            void* deallocate_callback_arg
-        ) except +
-
-cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass binning_memory_resource[Upstream](device_memory_resource):
-        binning_memory_resource(Upstream* upstream_mr) except +
-        binning_memory_resource(
-            Upstream* upstream_mr,
-            int8_t min_size_exponent,
-            int8_t max_size_exponent) except +
-
-        void add_bin(size_t allocation_size) except +
-        void add_bin(
-            size_t allocation_size,
-            device_memory_resource* bin_resource) except +
-
-cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource):
-        limiting_resource_adaptor(
-            Upstream* upstream_mr,
-            size_t allocation_limit) except +
-
-        size_t get_allocated_bytes() except +
-        size_t get_allocation_limit() except +
-
-cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
-        logging_resource_adaptor(
-            Upstream* upstream_mr,
-            string filename) except +
-
-        void flush() except +
-
-cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource):
-        struct counter:
-            counter()
-
-            int64_t value
-            int64_t peak
-            int64_t total
-
-        statistics_resource_adaptor(Upstream* upstream_mr) except +
-
-        counter get_bytes_counter() except +
-        counter get_allocations_counter() except +
-        pair[counter, counter] pop_counters() except +
-        pair[counter, counter] push_counters() except +
-
-cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource):
-        tracking_resource_adaptor(
-            Upstream* upstream_mr,
-            bool capture_stacks) except +
-
-        size_t get_allocated_bytes() except +
-        string get_outstanding_allocations_str() except +
-        void log_outstanding_allocations() except +
-
-cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    ctypedef bool (*failure_callback_t)(size_t, void*)
-    cdef cppclass failure_callback_resource_adaptor[Upstream](
-        device_memory_resource
-    ):
-        failure_callback_resource_adaptor(
-            Upstream* upstream_mr,
-            failure_callback_t callback,
-            void* callback_arg
-        ) except +
-
-cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \
-        namespace "rmm::mr" nogil:
-    cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource):
-        prefetch_resource_adaptor(Upstream* upstream_mr) except +
-
-
 cdef class DeviceMemoryResource:
 
+    # TODO this will be removed once the C++ base class is removed
+    # it is only used for passing upstream resources to C++, which will
+    # become resource_ref (type erased)
     cdef device_memory_resource* get_mr(self) noexcept nogil:
-        """Get the underlying C++ memory resource object."""
-        return self.c_obj.get()
+        pass
+
+    cdef shared_ptr[device_async_resource_ref] get_ref(self) noexcept nogil:
+        pass
 
     def allocate(self, size_t nbytes, Stream stream=DEFAULT_STREAM):
         """Allocate ``nbytes`` bytes of memory.
@@ -251,7 +105,11 @@ cdef class DeviceMemoryResource:
         stream : Stream
             Optional stream for the allocation
         """
-        return <uintptr_t>self.c_obj.get().allocate(nbytes, stream.view())
+        return <uintptr_t>deref(self.get_ref()).allocate_async(
+            nbytes,
+            CUDA_ALLOCATION_ALIGNMENT,
+            stream.ref()
+        )
 
     def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM):
         """Deallocate memory pointed to by ``ptr`` of size ``nbytes``.
@@ -265,7 +123,12 @@ cdef class DeviceMemoryResource:
         stream : Stream
             Optional stream for the deallocation
         """
-        self.c_obj.get().deallocate(<void*>(ptr), nbytes, stream.view())
+        deref(self.get_ref()).deallocate_async(
+            <void*>(ptr),
+            nbytes,
+            CUDA_ALLOCATION_ALIGNMENT,
+            stream.ref()
+        )
 
 
 # See the note about `no_gc_clear` in `device_buffer.pyx`.
@@ -285,10 +148,6 @@ cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
 
         self.upstream_mr = upstream_mr
 
-    def __dealloc__(self):
-        # Must cleanup the base MR before any upstream MR
-        self.c_obj.reset()
-
     cpdef DeviceMemoryResource get_upstream(self):
         return self.upstream_mr
 
@@ -435,7 +294,7 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
         )
         self.c_obj.reset(
             new pool_memory_resource[device_memory_resource](
-                upstream_mr.get_mr(),
+                deref(upstream_mr.get_ref()),
                 c_initial_pool_size,
                 c_maximum_pool_size
             )
@@ -465,10 +324,7 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
         pass
 
     def pool_size(self):
-        cdef pool_memory_resource[device_memory_resource]* c_mr = (
-            <pool_memory_resource[device_memory_resource]*>(self.get_mr())
-        )
-        return c_mr.pool_size()
+        return self.c_obj.get().pool_size()
 
 cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
     def __cinit__(
@@ -1169,12 +1025,7 @@ cpdef set_per_device_resource(int device, DeviceMemoryResource mr):
     global _per_device_mrs
     _per_device_mrs[device] = mr
 
-    # Since cuda_device_id does not have a default constructor, it must be heap
-    # allocated
-    cdef unique_ptr[cuda_device_id] device_id = \
-        make_unique[cuda_device_id](device)
-
-    cpp_set_per_device_resource(deref(device_id), mr.get_mr())
+    cpp_set_per_device_resource(cuda_device_id(device), deref(mr.get_ref()))
 
 
 cpdef set_current_device_resource(DeviceMemoryResource mr):
diff --git a/python/rmm/rmm/_lib/per_device_resource.pxd b/python/rmm/rmm/_lib/per_device_resource.pxd
index c33217622..7b9370006 100644
--- a/python/rmm/rmm/_lib/per_device_resource.pxd
+++ b/python/rmm/rmm/_lib/per_device_resource.pxd
@@ -1,23 +1,31 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from rmm._lib.memory_resource cimport device_memory_resource
+from rmm._lib.resource_ref cimport device_async_resource_ref
 
 
-cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
+cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
     cdef cppclass cuda_device_id:
         ctypedef int value_type
-
+        cuda_device_id()
         cuda_device_id(value_type id)
-
         value_type value()
 
 cdef extern from "rmm/mr/device/per_device_resource.hpp" \
         namespace "rmm::mr" nogil:
-    cdef device_memory_resource* set_current_device_resource(
-        device_memory_resource* new_mr
-    )
-    cdef device_memory_resource* get_current_device_resource()
-    cdef device_memory_resource* set_per_device_resource(
-        cuda_device_id id, device_memory_resource* new_mr
-    )
-    cdef device_memory_resource* get_per_device_resource (
-        cuda_device_id id
+    cdef device_async_resource_ref set_per_device_resource(
+        cuda_device_id id,
+        device_async_resource_ref new_mr
     )
diff --git a/python/rmm/rmm/_lib/resource_ref.pxd b/python/rmm/rmm/_lib/resource_ref.pxd
new file mode 100644
index 000000000..1c2c94325
--- /dev/null
+++ b/python/rmm/rmm/_lib/resource_ref.pxd
@@ -0,0 +1,39 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cuda.ccudart cimport cudaStream_t
+
+
+cdef extern from "cuda/memory_resource" namespace "cuda" nogil:
+    cdef cppclass stream_ref:
+        stream_ref() except +
+        stream_ref(cudaStream_t stream) except +
+
+cdef extern from "rmm/aligned.hpp" namespace "rmm" nogil:
+    cdef size_t CUDA_ALLOCATION_ALIGNMENT
+
+cdef extern from "rmm/resource_ref.hpp" namespace "rmm" nogil:
+    cdef cppclass device_async_resource_ref:
+        void* allocate(size_t bytes, size_t alignment) except +
+        void deallocate(void* ptr, size_t bytes, size_t alignment) except +
+        void* allocate_async(
+            size_t bytes,
+            size_t alignment,
+            stream_ref stream) except +
+        void deallocate_async(
+            void* ptr,
+            size_t bytes,
+            size_t alignment,
+            stream_ref stream
+        ) except +
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 75b15a90b..b65c39060 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -41,6 +41,8 @@ function(ConfigureTestInternal TEST_NAME)
   target_compile_options(${TEST_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
                                              -Wno-error=deprecated-declarations>)
 
+  target_compile_options(${TEST_NAME} PRIVATE "$<$<CONFIG:Debug>:-O0>")
+
   if(DISABLE_DEPRECATION_WARNING)
     target_compile_options(
       ${TEST_NAME} PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-deprecated-declarations>)
diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu
index e58ba53a2..7413dd2dc 100644
--- a/tests/container_multidevice_tests.cu
+++ b/tests/container_multidevice_tests.cu
@@ -42,9 +42,9 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateDestroyDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource(check_mr);
 
     {
       if constexpr (std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
@@ -69,7 +69,7 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource();
     auto check_mr = device_check_resource_adaptor{orig_mr};
     rmm::mr::set_current_device_resource(&check_mr);
 
@@ -109,7 +109,7 @@ TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource();
     auto check_mr = device_check_resource_adaptor{orig_mr};
     rmm::mr::set_current_device_resource(&check_mr);
 
@@ -132,7 +132,7 @@ TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource();
     auto check_mr = device_check_resource_adaptor{orig_mr};
     rmm::mr::set_current_device_resource(&check_mr);
 
diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp
index 1cc068434..ec7e6c3e9 100644
--- a/tests/cuda_stream_tests.cpp
+++ b/tests/cuda_stream_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -96,6 +96,6 @@ TEST_F(CudaStreamDeathTest, TestSyncNoThrow)
     // should assert here or in `~cuda_stream()`
     stream_a.synchronize_no_throw();
   };
-  EXPECT_DEATH(test(), "Assertion");
+  EXPECT_DEATH(test(), "");
 }
 #endif
diff --git a/tests/device_check_resource_adaptor.hpp b/tests/device_check_resource_adaptor.hpp
index fcb578fdf..c77c078db 100644
--- a/tests/device_check_resource_adaptor.hpp
+++ b/tests/device_check_resource_adaptor.hpp
@@ -17,13 +17,14 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <gtest/gtest.h>
 
 class device_check_resource_adaptor final : public rmm::mr::device_memory_resource {
  public:
-  device_check_resource_adaptor(rmm::mr::device_memory_resource* upstream)
+  device_check_resource_adaptor(rmm::device_async_resource_ref upstream)
     : device_id{rmm::get_current_cuda_device()}, upstream_(upstream)
   {
   }
@@ -36,11 +37,6 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
     return upstream_;
   }
 
-  /**
-   * @briefreturn{device_memory_resource* to the upstream memory resource}
-   */
-  [[nodiscard]] device_memory_resource* get_upstream() const noexcept { return upstream_; }
-
  private:
   [[nodiscard]] bool check_device_id() const { return device_id == rmm::get_current_cuda_device(); }
 
@@ -48,7 +44,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { return upstream_->allocate(bytes, stream); }
+    if (is_correct_device) { return get_upstream_resource().allocate_async(bytes, stream); }
     return nullptr;
   }
 
@@ -56,7 +52,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { upstream_->deallocate(ptr, bytes, stream); }
+    if (is_correct_device) { get_upstream_resource().deallocate_async(ptr, bytes, stream); }
   }
 
   [[nodiscard]] bool do_is_equal(
@@ -64,10 +60,10 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<device_check_resource_adaptor const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
   rmm::cuda_device_id device_id;
-  rmm::mr::device_memory_resource* upstream_{};
+  rmm::device_async_resource_ref upstream_{rmm::mr::get_current_device_resource()};
 };
diff --git a/tests/mock_resource.hpp b/tests/mock_resource.hpp
index e06148d3a..555cf0d74 100644
--- a/tests/mock_resource.hpp
+++ b/tests/mock_resource.hpp
@@ -25,7 +25,12 @@ class mock_resource : public rmm::mr::device_memory_resource {
  public:
   MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override));
   MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override));
+  bool operator==(mock_resource const&) const noexcept { return true; }
+  bool operator!=(mock_resource const&) const { return false; }
+  friend void get_property(mock_resource const&, cuda::mr::device_accessible) noexcept {}
   using size_pair = std::pair<std::size_t, std::size_t>;
 };
 
+static_assert(cuda::mr::async_resource_with<mock_resource, cuda::mr::device_accessible>);
+
 }  // namespace rmm::test
diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp
index b9ecbc8ca..85262c29d 100644
--- a/tests/mr/device/aligned_mr_tests.cpp
+++ b/tests/mr/device/aligned_mr_tests.cpp
@@ -59,13 +59,13 @@ TEST(AlignedTest, ThrowOnInvalidAllocationAlignment)
 TEST(AlignedTest, SupportsGetMemInfo)
 {
   mock_resource mock;
-  aligned_mock mr{&mock};
+  aligned_mock mr{mock};
 }
 
 TEST(AlignedTest, DefaultAllocationAlignmentPassthrough)
 {
   mock_resource mock;
-  aligned_mock mr{&mock};
+  aligned_mock mr{mock};
 
   cuda_stream_view stream;
   void* const pointer = int_to_address(123);
diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp
index 1eb38888e..6b7468d74 100644
--- a/tests/mr/device/arena_mr_tests.cpp
+++ b/tests/mr/device/arena_mr_tests.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "../../byte_literals.hpp"
+#include "cuda/stream_ref"
 
 #include <rmm/aligned.hpp>
 #include <rmm/cuda_device.hpp>
@@ -23,6 +24,7 @@
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -37,15 +39,22 @@ namespace {
 
 class mock_memory_resource {
  public:
-  MOCK_METHOD(void*, allocate, (std::size_t));
-  MOCK_METHOD(void, deallocate, (void*, std::size_t));
+  MOCK_METHOD(void*, allocate, (std::size_t, std::size_t));
+  MOCK_METHOD(void, deallocate, (void*, std::size_t, std::size_t));
+  MOCK_METHOD(void*, allocate_async, (std::size_t, std::size_t, cuda::stream_ref));
+  MOCK_METHOD(void, deallocate_async, (void*, std::size_t, std::size_t, cuda::stream_ref));
+  bool operator==(mock_memory_resource const&) const noexcept { return true; }
+  bool operator!=(mock_memory_resource const&) const { return false; }
+  friend void get_property(mock_memory_resource const&, cuda::mr::device_accessible) noexcept {}
 };
 
+static_assert(cuda::mr::async_resource_with<mock_memory_resource, cuda::mr::device_accessible>);
+
 using rmm::mr::detail::arena::block;
 using rmm::mr::detail::arena::byte_span;
 using rmm::mr::detail::arena::superblock;
-using global_arena = rmm::mr::detail::arena::global_arena<mock_memory_resource>;
-using arena        = rmm::mr::detail::arena::arena<mock_memory_resource>;
+using global_arena = rmm::mr::detail::arena::global_arena;
+using arena        = rmm::mr::detail::arena::arena;
 using arena_mr     = rmm::mr::arena_memory_resource<rmm::mr::device_memory_resource>;
 using ::testing::Return;
 
@@ -59,9 +68,10 @@ auto const fake_address4 = reinterpret_cast<void*>(superblock::minimum_size * 2)
 struct ArenaTest : public ::testing::Test {
   void SetUp() override
   {
-    EXPECT_CALL(mock_mr, allocate(arena_size)).WillOnce(Return(fake_address3));
-    EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size));
-    global     = std::make_unique<global_arena>(&mock_mr, arena_size);
+    EXPECT_CALL(mock_mr, allocate(arena_size, ::testing::_)).WillOnce(Return(fake_address3));
+    EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size, ::testing::_));
+
+    global     = std::make_unique<global_arena>(mock_mr, arena_size);
     per_thread = std::make_unique<arena>(*global);
   }
 
@@ -293,13 +303,6 @@ TEST_F(ArenaTest, SuperblockMaxFreeSizeWhenFull)  // NOLINT
 /**
  * Test global_arena.
  */
-
-TEST_F(ArenaTest, GlobalArenaNullUpstream)  // NOLINT
-{
-  auto construct_nullptr = []() { global_arena global{nullptr, std::nullopt}; };
-  EXPECT_THROW(construct_nullptr(), rmm::logic_error);  // NOLINT(cppcoreguidelines-avoid-goto)
-}
-
 TEST_F(ArenaTest, GlobalArenaAcquire)  // NOLINT
 {
   auto const sblk = global->acquire(256);
@@ -378,7 +381,7 @@ TEST_F(ArenaTest, GlobalArenaDeallocate)  // NOLINT
 {
   auto* ptr = global->allocate(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
-  global->deallocate(ptr, superblock::minimum_size * 2, {});
+  global->deallocate_async(ptr, superblock::minimum_size * 2, {});
   ptr = global->allocate(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
 }
@@ -387,8 +390,8 @@ TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp)  // NOLINT
 {
   auto* ptr  = global->allocate(superblock::minimum_size + 256);
   auto* ptr2 = global->allocate(superblock::minimum_size + 512);
-  global->deallocate(ptr, superblock::minimum_size + 256, {});
-  global->deallocate(ptr2, superblock::minimum_size + 512, {});
+  global->deallocate_async(ptr, superblock::minimum_size + 256, {});
+  global->deallocate_async(ptr2, superblock::minimum_size + 512, {});
   EXPECT_EQ(global->allocate(arena_size), fake_address3);
 }
 
diff --git a/tests/mr/device/callback_mr_tests.cpp b/tests/mr/device/callback_mr_tests.cpp
index 34a2cc8cc..0b66e925b 100644
--- a/tests/mr/device/callback_mr_tests.cpp
+++ b/tests/mr/device/callback_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,22 +36,24 @@ using ::testing::_;
 
 TEST(CallbackTest, TestCallbacksAreInvoked)
 {
-  auto base_mr = mock_resource();
+  auto base_mr  = mock_resource();
+  auto base_ref = device_async_resource_ref{base_mr};
   EXPECT_CALL(base_mr, do_allocate(10_MiB, cuda_stream_view{})).Times(1);
   EXPECT_CALL(base_mr, do_deallocate(_, 10_MiB, cuda_stream_view{})).Times(1);
 
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    return base_mr->allocate(size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    return base_mr.allocate_async(size, stream);
   };
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    base_mr->deallocate(ptr, size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    base_mr.deallocate_async(ptr, size, stream);
   };
   auto mr =
-    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr);
-  auto ptr = mr.allocate(10_MiB);
-  mr.deallocate(ptr, 10_MiB);
+    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_ref, &base_ref);
+  auto const size = std::size_t{10_MiB};
+  auto* ptr       = mr.allocate(size);
+  mr.deallocate(ptr, size);
 }
 
 TEST(CallbackTest, LoggingTest)
@@ -61,22 +63,23 @@ TEST(CallbackTest, LoggingTest)
   auto base_mr           = rmm::mr::get_current_device_resource();
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Allocating " << size << " bytes" << std::endl;
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    return base_mr->allocate(size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    return base_mr.allocate_async(size, stream);
   };
 
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Deallocating " << size << " bytes" << std::endl;
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    base_mr->deallocate(ptr, size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    base_mr.deallocate_async(ptr, size, stream);
   };
   auto mr =
-    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, base_mr, base_mr);
-  auto ptr = mr.allocate(10_MiB);
-  mr.deallocate(ptr, 10_MiB);
+    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr);
+  auto const size = std::size_t{10_MiB};
+  auto* ptr       = mr.allocate(size);
+  mr.deallocate(ptr, size);
 
   std::string output = testing::internal::GetCapturedStdout();
-  std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", 10_MiB, 10_MiB);
+  std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", size, size);
   ASSERT_EQ(expect, output);
 }
 
diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp
index 777ce9428..5ef914dd4 100644
--- a/tests/mr/device/limiting_mr_tests.cpp
+++ b/tests/mr/device/limiting_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,19 +25,19 @@
 namespace rmm::test {
 namespace {
 
-using Limiting_adaptor = rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>;
+using limiting_adaptor = rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>;
 
 TEST(LimitingTest, ThrowOnNullUpstream)
 {
   auto const max_size{5_MiB};
-  auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, max_size}; };
+  auto construct_nullptr = []() { limiting_adaptor mr{nullptr, max_size}; };
   EXPECT_THROW(construct_nullptr(), rmm::logic_error);
 }
 
 TEST(LimitingTest, TooBig)
 {
   auto const max_size{5_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
   EXPECT_THROW(mr.allocate(max_size + 1), rmm::out_of_memory);
 }
 
@@ -45,15 +45,15 @@ TEST(LimitingTest, UpstreamFailure)
 {
   auto const max_size_1{2_MiB};
   auto const max_size_2{5_MiB};
-  Limiting_adaptor mr1{rmm::mr::get_current_device_resource(), max_size_1};
-  Limiting_adaptor mr2{&mr1, max_size_2};
+  limiting_adaptor mr1{rmm::mr::get_current_device_resource(), max_size_1};
+  limiting_adaptor mr2{&mr1, max_size_2};
   EXPECT_THROW(mr2.allocate(4_MiB), rmm::out_of_memory);
 }
 
 TEST(LimitingTest, UnderLimitDueToFrees)
 {
   auto const max_size{10_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
@@ -81,7 +81,7 @@ TEST(LimitingTest, UnderLimitDueToFrees)
 TEST(LimitingTest, OverLimit)
 {
   auto const max_size{10_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
diff --git a/tests/mr/device/mr_ref_multithreaded_tests.cpp b/tests/mr/device/mr_ref_multithreaded_tests.cpp
index 3ba32445f..2a7c2904c 100644
--- a/tests/mr/device/mr_ref_multithreaded_tests.cpp
+++ b/tests/mr/device/mr_ref_multithreaded_tests.cpp
@@ -69,42 +69,40 @@ void spawn(Task task, Arguments&&... args)
 
 TEST(DefaultTest, UseCurrentDeviceResource_mt) { spawn(test_get_current_device_resource); }
 
-TEST(DefaultTest, CurrentDeviceResourceIsCUDA_mt)
-{
-  spawn([]() {
-    EXPECT_NE(nullptr, rmm::mr::get_current_device_resource());
-    EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{}));
-  });
-}
-
 TEST(DefaultTest, GetCurrentDeviceResource_mt)
 {
   spawn([]() {
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
-    EXPECT_NE(nullptr, mr);
-    EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{}));
+    EXPECT_EQ(rmm::mr::get_current_device_resource(),
+              rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
   });
 }
 
 // Disable until we support resource_ref with set_current_device_resource
-/*TEST_P(mr_ref_test_mt, SetCurrentDeviceResource_mt)
+TEST_P(mr_ref_test_mt, SetCurrentDeviceResource_mt)
 {
   // single thread changes default resource, then multiple threads use it
+  rmm::mr::cuda_memory_resource cuda_mr{};
+  auto cuda_ref = rmm::device_async_resource_ref{cuda_mr};
 
-  rmm::mr::device_memory_resource* old = rmm::mr::set_current_device_resource(this->mr.get());
-  EXPECT_NE(nullptr, old);
+  rmm::mr::set_current_device_resource(cuda_ref);
+  auto old = rmm::mr::set_current_device_resource(this->ref);
 
-  spawn([mr = this->mr.get()]() {
+  spawn([mr = this->ref]() {
     EXPECT_EQ(mr, rmm::mr::get_current_device_resource());
     test_get_current_device_resource();  // test allocating with the new default resource
   });
 
-  // setting default resource w/ nullptr should reset to initial
-  rmm::mr::set_current_device_resource(nullptr);
-  EXPECT_TRUE(old->is_equal(*rmm::mr::get_current_device_resource()));
-}*/
+  // old mr should equal a cuda mr
+  EXPECT_EQ(old, cuda_ref);
+
+  // setting to `nullptr` should reset to initial cuda resource
+  // Resetting should reset to initial cuda resource
+  rmm::mr::reset_current_device_resource();
+  EXPECT_EQ(rmm::mr::get_current_device_resource(),
+            rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
+}
 
-/*TEST_P(mr_ref_test_mt, SetCurrentDeviceResourcePerThread_mt)
+TEST_P(mr_ref_test_mt, SetCurrentDeviceResourcePerThread_mt)
 {
   int num_devices{};
   RMM_CUDA_TRY(cudaGetDeviceCount(&num_devices));
@@ -112,31 +110,29 @@ TEST(DefaultTest, GetCurrentDeviceResource_mt)
   std::vector<std::thread> threads;
   threads.reserve(num_devices);
   for (int i = 0; i < num_devices; ++i) {
-    threads.emplace_back(std::thread{[mr = this->mr.get()](auto dev_id) {
-                                       RMM_CUDA_TRY(cudaSetDevice(dev_id));
-                                       rmm::mr::device_memory_resource* old =
-                                         rmm::mr::set_current_device_resource(mr);
-                                       EXPECT_NE(nullptr, old);
-                                       // initial resource for this device should be CUDA mr
-                                       EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{}));
-                                       // get_current_device_resource should equal the resource we
-                                       // just set
-                                       EXPECT_EQ(mr, rmm::mr::get_current_device_resource());
-                                       // Setting current dev resource to nullptr should reset to
-                                       // cuda MR and return the MR we previously set
-                                       old = rmm::mr::set_current_device_resource(nullptr);
-                                       EXPECT_NE(nullptr, old);
-                                       EXPECT_EQ(old, mr);
-                                       EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(
-                                         rmm::mr::cuda_memory_resource{}));
-                                     },
-                                     i});
+    threads.emplace_back(
+      [mr = this->ref](auto dev_id) {
+        rmm::mr::cuda_memory_resource cuda_mr{};
+        auto cuda_ref = rmm::device_async_resource_ref{cuda_mr};
+
+        rmm::mr::set_current_device_resource(cuda_ref);
+        auto old = rmm::mr::set_current_device_resource(mr);
+        // old mr should equal a cuda mr
+        EXPECT_EQ(old, cuda_ref);
+        // current dev resource should equal this resource
+        EXPECT_EQ(mr, rmm::mr::get_current_device_resource());
+        // Resetting should reset to initial cuda resource
+        rmm::mr::reset_current_device_resource();
+        EXPECT_EQ(rmm::mr::get_current_device_resource(),
+                  rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
+      },
+      i);
   }
 
   for (auto& thread : threads) {
     thread.join();
   }
-}*/
+}
 
 TEST_P(mr_ref_test_mt, Allocate) { spawn(test_various_allocations, this->ref); }
 
diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp
index df0045d2b..8398da4c4 100644
--- a/tests/mr/device/mr_ref_test.hpp
+++ b/tests/mr/device/mr_ref_test.hpp
@@ -65,12 +65,11 @@ struct allocation {
 
 inline void test_get_current_device_resource()
 {
-  EXPECT_NE(nullptr, rmm::mr::get_current_device_resource());
-  void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB);
+  void* ptr = rmm::mr::get_current_device_resource().allocate(1_MiB);
   EXPECT_NE(nullptr, ptr);
   EXPECT_TRUE(is_properly_aligned(ptr));
   EXPECT_TRUE(is_device_accessible_memory(ptr));
-  rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB);
+  rmm::mr::get_current_device_resource().deallocate(ptr, 1_MiB);
 }
 
 inline void test_allocate(resource_ref ref, std::size_t bytes)
diff --git a/tests/mr/device/mr_ref_tests.cpp b/tests/mr/device/mr_ref_tests.cpp
index d94817bef..c7e49d84b 100644
--- a/tests/mr/device/mr_ref_tests.cpp
+++ b/tests/mr/device/mr_ref_tests.cpp
@@ -55,40 +55,36 @@ INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests,
                                            "Binning"),
                          [](auto const& info) { return info.param; });
 
-TEST(DefaultTest, CurrentDeviceResourceIsCUDA)
-{
-  EXPECT_NE(nullptr, rmm::mr::get_current_device_resource());
-  EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{}));
-}
-
 TEST(DefaultTest, UseCurrentDeviceResource) { test_get_current_device_resource(); }
 
 TEST(DefaultTest, GetCurrentDeviceResource)
 {
-  auto* mr = rmm::mr::get_current_device_resource();
-  EXPECT_NE(nullptr, mr);
-  EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{}));
+  EXPECT_EQ(rmm::mr::get_current_device_resource(),
+            rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
 }
 
 // Disable until we support resource_ref with set_current_device_resource
-/*TEST_P(mr_ref_test, SetCurrentDeviceResource)
+TEST_P(mr_ref_test, SetCurrentDeviceResource)
 {
-  rmm::mr::device_memory_resource* old{};
-  old = rmm::mr::set_current_device_resource(this->mr.get());
-  EXPECT_NE(nullptr, old);
+  rmm::mr::cuda_memory_resource cuda_mr{};
+  auto cuda_ref = rmm::device_async_resource_ref{cuda_mr};
+
+  rmm::mr::set_current_device_resource(cuda_ref);
+  auto old = rmm::mr::set_current_device_resource(this->ref);
 
   // old mr should equal a cuda mr
-  EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{}));
+  EXPECT_EQ(old, cuda_ref);
 
   // current dev resource should equal this resource
-  EXPECT_TRUE(this->mr->is_equal(*rmm::mr::get_current_device_resource()));
+  EXPECT_EQ(this->ref, rmm::mr::get_current_device_resource());
 
   test_get_current_device_resource();
 
-  // setting to `nullptr` should reset to initial cuda resource
-  rmm::mr::set_current_device_resource(nullptr);
-  EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{}));
-}*/
+  // Resetting should reset to initial cuda resource
+  rmm::mr::reset_current_device_resource();
+  EXPECT_EQ(rmm::mr::get_current_device_resource(),
+            rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()});
+}
 
 TEST_P(mr_ref_test, SelfEquality) { EXPECT_TRUE(this->ref == this->ref); }
 
diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp
index c63a61844..d82464735 100644
--- a/tests/mr/device/pool_mr_tests.cpp
+++ b/tests/mr/device/pool_mr_tests.cpp
@@ -61,7 +61,7 @@ TEST(PoolTest, ReferenceThrowMaxLessThanInitial)
   auto max_less_than_initial = []() {
     const auto initial{1024};
     const auto maximum{256};
-    pool_mr mr{*rmm::mr::get_current_device_resource(), initial, maximum};
+    pool_mr mr{rmm::mr::get_current_device_resource(), initial, maximum};
   };
   EXPECT_THROW(max_less_than_initial(), rmm::logic_error);
 }
diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp
index 6c5700f0b..c88a2ddca 100644
--- a/tests/mr/device/statistics_mr_tests.cpp
+++ b/tests/mr/device/statistics_mr_tests.cpp
@@ -127,7 +127,7 @@ TEST(StatisticsTest, PeakAllocations)
 
 TEST(StatisticsTest, MultiTracking)
 {
-  auto* orig_device_resource = rmm::mr::get_current_device_resource();
+  auto orig_device_resource = rmm::mr::get_current_device_resource();
   statistics_adaptor mr{orig_device_resource};
   rmm::mr::set_current_device_resource(&mr);
 
diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp
index 7c2532c60..a2d0e41e7 100644
--- a/tests/mr/device/tracking_mr_tests.cpp
+++ b/tests/mr/device/tracking_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -101,7 +101,7 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks)
 
 TEST(TrackingTest, MultiTracking)
 {
-  auto* orig_device_resource = rmm::mr::get_current_device_resource();
+  auto orig_device_resource = rmm::mr::get_current_device_resource();
   tracking_adaptor mr{orig_device_resource, true};
   rmm::mr::set_current_device_resource(&mr);
 
diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp
index 8445ab1f5..071739575 100644
--- a/tests/mr/host/mr_ref_tests.cpp
+++ b/tests/mr/host/mr_ref_tests.cpp
@@ -233,14 +233,17 @@ TYPED_TEST(MRRefTest, UnsupportedAlignmentTest)
   for (std::size_t num_trials = 0; num_trials < NUM_TRIALS; ++num_trials) {
     for (std::size_t alignment = MinTestedAlignment; alignment <= MaxTestedAlignment;
          alignment *= TestedAlignmentMultiplier) {
+#ifdef NDEBUG
       auto allocation_size = size_distribution(generator);
       void* ptr{nullptr};
       // An unsupported alignment (like an odd number) should result in an
       // alignment of `alignof(std::max_align_t)`
       auto const bad_alignment = alignment + 1;
+
       EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, bad_alignment));
       EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t)));
       EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, bad_alignment));
+#endif
     }
   }
 }