Skip to content

Commit

Permalink
Rewrite relocation to be compliant with P1144 (#67)
Browse files Browse the repository at this point in the history
[Relocation](https://quuxplusone.github.io/blog/2018/07/18/announcing-trivially-relocatable/)
now mostly follows the API proposed in
[P1144](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p1144r9.html).
Parlay will also defer the implementation of relocation operations to
the compiler/library if they are present, which currently works on
Arthur O'Dwyer's LLVM fork
[here](https://github.com/Quuxplusone/llvm-project).
  • Loading branch information
DanielLiamAnderson authored Feb 17, 2024
1 parent c655b8f commit 36459f4
Show file tree
Hide file tree
Showing 25 changed files with 922 additions and 659 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
# -------------------------------------------------------------------

cmake_minimum_required(VERSION 3.14)
project(PARLAY VERSION 2.2.4

project(PARLAY VERSION 2.3.1
DESCRIPTION "A collection of parallel algorithms and other support for parallelism in C++"
LANGUAGES CXX)

Expand Down
58 changes: 58 additions & 0 deletions benchmark/bench_sequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,61 @@ static void bench_short_subscript(benchmark::State& state) {
}
}

static void bench_grow_int64(benchmark::State& state) {
parlay::sequence<int64_t> s;
for (auto _ : state) {
state.PauseTiming();
s = parlay::sequence<int64_t>(10000000);
state.ResumeTiming();
s.reserve(s.capacity() + 1); // Trigger grow
}
}

// No annotation needed since this one should be detectable
struct Relocatable {
std::unique_ptr<int> x;
Relocatable() = default;
Relocatable(int x_) : x(std::make_unique<int>(x_)) { }
};

#if defined(PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE)
namespace parlay {
template<>
PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(Relocatable);
}
#endif

static_assert(parlay::is_trivially_relocatable_v<Relocatable>);

struct NotRelocatable {
std::unique_ptr<int> x;
NotRelocatable() = default;
NotRelocatable(int x_) : x(std::make_unique<int>(x_)) { }
NotRelocatable(NotRelocatable&& other) noexcept : x(std::move(other.x)) { }
~NotRelocatable() { }
};
static_assert(!parlay::is_trivially_relocatable_v<NotRelocatable>);

static void bench_grow_relocatable(benchmark::State& state) {
parlay::sequence<Relocatable> s;
for (auto _ : state) {
state.PauseTiming();
s = parlay::sequence<Relocatable>(10000000);
state.ResumeTiming();
s.reserve(s.capacity() + 1); // Trigger grow
}
}

static void bench_grow_nonrelocatable(benchmark::State& state) {
parlay::sequence<NotRelocatable> s;
for (auto _ : state) {
state.PauseTiming();
s = parlay::sequence<NotRelocatable>(10000000);
state.ResumeTiming();
s.reserve(s.capacity() + 1); // Trigger grow
}
}

// ------------------------- Registration -------------------------------

#define BENCH(NAME) BENCHMARK(bench_ ## NAME) \
Expand All @@ -35,3 +90,6 @@ static void bench_short_subscript(benchmark::State& state) {

BENCH(subscript);
BENCH(short_subscript);
BENCH(grow_int64);
BENCH(grow_relocatable);
BENCH(grow_nonrelocatable);
7 changes: 5 additions & 2 deletions include/parlay/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,11 @@ struct allocator {
template <class U> /* implicit */ constexpr allocator(const allocator<U>&) noexcept { }
};

template<typename T>
struct is_trivially_relocatable<allocator<T>> : std::true_type {};
// Allocator should be trivially copyable since it is stateless and has no user-provided copy
// constructor. This should guarantee that it is also trivially relocatable.
static_assert(std::is_trivially_copyable_v<allocator<int>>);
static_assert(is_trivially_relocatable_v<allocator<int>>);


template <class T, class U>
bool operator==(const allocator<T>&, const allocator<U>&) { return true; }
Expand Down
4 changes: 2 additions & 2 deletions include/parlay/internal/bucket_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void radix_step_(slice<InIterator, InIterator> A,

for (size_t j = n; j > 0; j--) {
auto x = --counts[keys[j-1]];
uninitialized_relocate(&B[x], &A[j-1]);
relocate_at(&A[j - 1], &B[x]);
}
}

Expand Down Expand Up @@ -128,7 +128,7 @@ void base_sort(slice<InIterator, InIterator> in,
else {
quicksort(in.begin(), in.size(), f);
if (!inplace) {
uninitialized_relocate_n(out.begin(), in.begin(), in.size());
parlay::uninitialized_relocate(in.begin(), in.end(), out.begin());
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/collect_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ auto seq_collect_reduce_sparse(Slice A, Helper const &helper) {
auto r = r_s.begin();
size_t j = 0;
for (size_t i = 0; i < table_size; i++)
if (flags[i]) uninitialized_relocate(&r[j++], &table[i]);
if (flags[i]) relocate_at(&table[i], &r[j++]);
assert(j == count);
return r_s;
}
Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/counting_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ auto count_sort_inplace(slice<InIterator, InIterator> In, KeyS const& Keys, size
using value_type = typename slice<InIterator, InIterator>::value_type;
auto Tmp = uninitialized_sequence<value_type>(In.size());
auto a = count_sort<uninitialized_relocate_tag>(In, make_slice(Tmp), make_slice(Keys), num_buckets);
uninitialized_relocate_n(In.begin(), Tmp.begin(), In.size());
parlay::uninitialized_relocate(Tmp.begin(), Tmp.end(), In.begin());
return a.first;
}

Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/delayed/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ struct block_delayed_filter_t :
}
}
auto res = sequence<It>::uninitialized(n);
uninitialized_relocate_n(res.begin(), temp.begin(), n);
parlay::uninitialized_relocate_n(temp.begin(), n, res.begin());
return res;
}

Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/delayed/filter_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct block_delayed_filter_op_t :
}
}
auto res = sequence<result_type>::uninitialized(n);
uninitialized_relocate_n(res.begin(), temp.begin(), n);
parlay::uninitialized_relocate_n(temp.begin(), n, res.begin());
return res;
}

Expand Down
14 changes: 7 additions & 7 deletions include/parlay/internal/integer_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ void seq_radix_sort_(slice<InIterator, InIterator> In,
}

if (swapped && inplace) {
uninitialized_relocate_n(In.begin(), Out.begin(), In.size());
parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin());
}
else if (!swapped && !inplace) {
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
}
}

Expand All @@ -105,10 +105,10 @@ void seq_radix_sort(slice<InIterator, InIterator> In,
size_t n = In.size();
if (odd) {
// We could just use assign_dispatch(Tmp[i], In[i]) for each i, but we
// can optimize better by calling destructive_move_slice, since this
// can optimize better by calling uninitialized_relocate, since this
// has the ability to memcpy multiple elements at once
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
uninitialized_relocate_n(Tmp.begin(), In.begin(), Tmp.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Tmp.begin());
}
else {
for (size_t i = 0; i < n; i++)
Expand All @@ -117,7 +117,7 @@ void seq_radix_sort(slice<InIterator, InIterator> In,
seq_radix_sort_(Tmp, Out, g, key_bits, false);
} else {
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
}
else {
for (size_t i = 0; i < n; i++)
Expand Down Expand Up @@ -219,7 +219,7 @@ sequence<size_t> integer_sort_r(slice<InIterator, InIterator> In,
// uninitialized_relocate_n, which can memcpy multiple elements at a time
// to save on performing every copy individually.
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
}
else {
parallel_for(0, In.size(), [&](size_t i) {
Expand Down Expand Up @@ -248,7 +248,7 @@ sequence<size_t> integer_sort_r(slice<InIterator, InIterator> In,

if constexpr (inplace_tag::value == true) {
if (!one_bucket) {
uninitialized_relocate_n(In.begin(), Out.begin(), In.size());
parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin());
}
}

Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/merge_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void merge_sort_(slice<InIterator, InIterator> In,
insertion_sort(In.begin(), In.size(), f);
if (!inplace) {
for (size_t i = 0; i < In.size(); i++) {
uninitialized_relocate(&Out[i], &In[i]);
relocate_at(&In[i], &Out[i]);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/sample_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void sample_sort_inplace_(slice<InIterator, InIterator> In,

// Sample block is already sorted, so we don't need to sort it again.
// We can just move it straight over into the other sorted blocks
uninitialized_relocate_n(Tmp.begin(), sample_set.begin(), sample_set_size);
parlay::uninitialized_relocate(sample_set.begin(), sample_set.end(), Tmp.begin());

// move data from blocks to buckets
auto bucket_offsets =
Expand Down
11 changes: 10 additions & 1 deletion include/parlay/internal/sequence_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,16 @@ struct alignas(uint64_t) sequence_base {
auto n = size();
auto dest_buffer = new_buffer.data();
auto current_buffer = data();
uninitialized_relocate_n_a(dest_buffer, current_buffer, n, *this);

if constexpr (is_trivial_allocator_v<T_allocator_type, T>) {
parlay::uninitialized_relocate_n(current_buffer, n, dest_buffer);
}
else {
parallel_for(0, n, [&](size_t i){
std::allocator_traits<T_allocator_type>::construct(alloc, std::addressof(dest_buffer[i]), std::move(current_buffer[i]));
std::allocator_traits<T_allocator_type>::destroy(alloc, std::addressof(current_buffer[i]));
});
}

// Destroy the old stuff
if (!is_small()) {
Expand Down
159 changes: 159 additions & 0 deletions include/parlay/internal/uninitialized_iterator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@

#ifndef PARLAY_INTERNAL_UNINITIALIZED_ITERATOR_H_
#define PARLAY_INTERNAL_UNINITIALIZED_ITERATOR_H_

#include <iterator>
#include <type_traits>

#include "../range.h"
#include "../type_traits.h"

namespace parlay {
namespace internal {

// Given a container of uninitialized<T>, you can wrap its iterators with
// uninitialized_iterator_adaptor to get an iterator whose value type is T!
//
// The resulting iterator will have the same iterator category as Iterator.
template<typename Iterator>
class uninitialized_iterator_adaptor {
public:
using iterator_category = parlay::iterator_category_t<Iterator>;
using difference_type = parlay::iterator_difference_type_t<Iterator>;
using value_type = decltype(std::declval<parlay::iterator_value_type_t<Iterator>>().value);
using reference = std::add_lvalue_reference_t<value_type>;
using pointer = std::add_pointer_t<value_type>;

explicit uninitialized_iterator_adaptor(Iterator it_) : it(it_) {}

reference operator*() const { return it->value; }

pointer operator->() const { return std::addressof(it->value); }

uninitialized_iterator_adaptor& operator++() {
++it;
return *this;
}

friend void swap(uninitialized_iterator_adaptor& left, uninitialized_iterator_adaptor& right) noexcept {
std::swap(left.it, right.it);
}

// ------------------------ Enabled if input iterator ------------------------

template<typename It = Iterator>
auto operator++(int)
-> std::enable_if_t<parlay::is_input_iterator_v<It>, uninitialized_iterator_adaptor> {
auto tmp = *this;
++(*this);
return tmp;
}

template<typename It = Iterator>
auto operator==(const uninitialized_iterator_adaptor& other) const
-> std::enable_if_t<parlay::is_input_iterator_v<It>, bool> {
return it == other.it;
}

template<typename It = Iterator>
auto operator!=(const uninitialized_iterator_adaptor& other) const
-> std::enable_if_t<parlay::is_input_iterator_v<It>, bool> {
return it != other.it;
}

// ------------------------ Enabled if forward iterator ------------------------

// Can't SFINAE special member functions so this is close enough until C++20
template<typename It = Iterator, std::enable_if_t<parlay::is_forward_iterator_v<It>, int> = 0>
uninitialized_iterator_adaptor() : it{} {}

// ------------------------ Enabled if bidirectional iterator ------------------------

template<typename It = Iterator>
auto operator--() -> std::enable_if_t<parlay::is_bidirectional_iterator_v<It>, uninitialized_iterator_adaptor&> {
it--;
return *this;
}

template<typename It = Iterator>
auto operator--(int) -> std::enable_if_t<parlay::is_bidirectional_iterator_v<It>, uninitialized_iterator_adaptor> {
auto tmp = *this;
--(*this);
return tmp;
}

// ------------------------ Enabled if random-access iterator ------------------------

template<typename It = Iterator>
auto operator+=(difference_type diff)
-> std::enable_if_t<parlay::is_bidirectional_iterator_v<It>, uninitialized_iterator_adaptor&> {
it += diff;
return *this;
}

template<typename It = Iterator>
auto operator+(difference_type diff) const
-> std::enable_if_t<parlay::is_bidirectional_iterator_v<It>, uninitialized_iterator_adaptor> {
auto result = *this;
result += diff;
return result;
}

template<typename It = Iterator>
auto operator-=(difference_type diff)
-> std::enable_if_t<parlay::is_bidirectional_iterator_v<It>, uninitialized_iterator_adaptor&> {
it -= diff;
return *this;
}

template<typename It = Iterator>
auto operator-(difference_type diff) const
-> std::enable_if_t<parlay::is_bidirectional_iterator_v<It>, uninitialized_iterator_adaptor> {
auto result = *this;
result -= diff;
return result;
}

template<typename It = Iterator>
auto operator-(const uninitialized_iterator_adaptor& other) const
-> std::enable_if_t<parlay::is_bidirectional_iterator_v<It>, difference_type> {
return it - other.it;
}

template<typename It = Iterator>
auto operator[](std::size_t p) const -> std::enable_if_t<parlay::is_random_access_iterator_v<It>, reference> {
return it[p].value;
}

template<typename It = Iterator>
auto operator<(const uninitialized_iterator_adaptor& other) const
-> std::enable_if_t<parlay::is_random_access_iterator_v<It>, bool> {
return it < other.it;
}

template<typename It = Iterator>
auto operator<=(const uninitialized_iterator_adaptor& other) const
-> std::enable_if_t<parlay::is_random_access_iterator_v<It>, bool> {
return it <= other.it;
}

template<typename It = Iterator>
auto operator>(const uninitialized_iterator_adaptor& other) const
-> std::enable_if_t<parlay::is_random_access_iterator_v<It>, bool> {
return it > other.it;
}

template<typename It = Iterator>
auto operator>=(const uninitialized_iterator_adaptor& other) const
-> std::enable_if_t<parlay::is_random_access_iterator_v<It>, bool> {
return it >= other.it;
}

private:
Iterator it;
};

} // namespace internal
} // namespace parlay

#endif // PARLAY_INTERNAL_UNINITIALIZED_ITERATOR_H_
Loading

0 comments on commit 36459f4

Please sign in to comment.