Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add meta dimensions specifiers to cudax::launch #2001

Open
wants to merge 33 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
caf08d3
Kinda working at_least specifier
pciolkosz Jul 10, 2024
e0fa74f
Rebase to launch
pciolkosz Jul 10, 2024
03c579c
Hierarchy type transformation trait
pciolkosz Jul 10, 2024
e8ba03e
Occupancy meta specifier
pciolkosz Jul 10, 2024
1475b6d
Example fixes
pciolkosz Jul 11, 2024
ddba659
Add max_coresident and finalize functor overload
pciolkosz Jul 12, 2024
e47fb4e
Take into account cluster size in max_coresident
pciolkosz Jul 13, 2024
e3c8120
Add finalize to launch, take dyn smem into account
pciolkosz Jul 15, 2024
bf40e09
Add tests for dynamic smem handling in finalize
pciolkosz Jul 15, 2024
9b10ec7
Minor improvements
pciolkosz Jul 16, 2024
70d8799
Add stream argument to finalize
pciolkosz Jul 17, 2024
a42eef2
Rename transform to finalize
pciolkosz Jul 17, 2024
a6c8ce0
Rename transform to finalize
pciolkosz Jul 17, 2024
08c8717
Inline docs and some comments
pciolkosz Jul 17, 2024
ae74327
Fix at_least
pciolkosz Jul 17, 2024
fe78bab
Remove accidental swp file
pciolkosz Jul 17, 2024
1ab1793
Add safety TODO
pciolkosz Jul 17, 2024
eefbc06
Compilation fixes
pciolkosz Jul 17, 2024
818a1bf
More compilation fixes
pciolkosz Jul 18, 2024
d9375a0
Some more fixes for MSVC
pciolkosz Jul 18, 2024
b0bf125
Add pragma for warning in is_function
pciolkosz Jul 18, 2024
9121424
Help confused MSVC
pciolkosz Jul 18, 2024
1b14e37
Fix typo in a comment
pciolkosz Jul 19, 2024
4ccc364
Merge branch 'main' into dims_meta_specifier
pciolkosz Aug 4, 2024
5cc3f2f
Fix formatting
pciolkosz Aug 4, 2024
2a8f57a
Fix formatting
pciolkosz Aug 4, 2024
e675d1a
Change best_occupancy to max_occupancy
pciolkosz Aug 21, 2024
899f72e
Merge remote-tracking branch 'upstream/main' into dims_meta_specifier
pciolkosz Aug 21, 2024
1392a97
Fix extra comma, add missing empty kernel
pciolkosz Aug 22, 2024
bba433b
Push context once when launching a draft
pciolkosz Aug 22, 2024
ce653b6
Remove constexpr from finalize
pciolkosz Aug 22, 2024
a8b78a1
Unused parameter
pciolkosz Aug 22, 2024
ab263a1
Pragma for is_function
pciolkosz Aug 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ _CCCL_NODISCARD constexpr auto make_hierarchy_fragment_reversable(L1&& l1, Level
"Provided levels can't create a valid hierarchy when stacked in the provided order or reversed");
if constexpr (can_stack)
{
return hierarchy_dimensions_fragment(LUnit{}, ::cuda::std::forward<L1>(l1), ::cuda::std::forward<Levels>(ls)...);
return hierarchy_dimensions_fragment(
LUnit{}, std::make_tuple(::cuda::std::forward<L1>(l1), ::cuda::std::forward<Levels>(ls)...));
}
else
{
Expand Down Expand Up @@ -213,7 +214,7 @@ _CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto dims_to_count(const dimensions<
template <typename... Levels>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto get_level_counts_helper(const Levels&... ls)
{
return ::cuda::std::make_tuple(dims_to_count(ls.dims)...);
return ::cuda::std::make_tuple(dims_to_count(ls.dims_for_query())...);
}

template <typename Unit, typename Level, typename Dims>
Expand Down Expand Up @@ -244,15 +245,20 @@ struct hierarchy_extents_helper
using TopLevel = typename LTopDims::level_type;
if constexpr (sizeof...(Levels) == 0)
{
return replace_with_intrinsics_or_constexpr<BottomUnit, TopLevel>(ltop.dims);
return replace_with_intrinsics_or_constexpr<BottomUnit, TopLevel>(ltop.dims_for_query());
}
else
{
using Unit = typename detail::get_first_level_type<typename Levels::level_type...>::type;
return dims_product<typename TopLevel::product_type>(
replace_with_intrinsics_or_constexpr<Unit, TopLevel>(ltop.dims), (*this)(levels...));
replace_with_intrinsics_or_constexpr<Unit, TopLevel>(ltop.dims_for_query()), (*this)(levels...));
}
}

_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto operator()() noexcept
{
return hierarchy_query_result<dimensions_index_type, 1, 1, 1>();
}
};

template <typename T, size_t... Extents>
Expand All @@ -271,17 +277,22 @@ struct index_helper
using TopLevel = typename LTopDims::level_type;
if constexpr (sizeof...(Levels) == 0)
{
return static_index_hint(ltop.dims, dims_helper<BottomUnit, TopLevel>::index());
return static_index_hint(ltop.dims_for_query(), dims_helper<BottomUnit, TopLevel>::index());
}
else
{
using Unit = typename detail::get_first_level_type<typename Levels::level_type...>::type;
auto hinted_index = static_index_hint(ltop.dims, dims_helper<Unit, TopLevel>::index());
auto hinted_index = static_index_hint(ltop.dims_for_query(), dims_helper<Unit, TopLevel>::index());
return dims_sum<typename TopLevel::product_type>(
dims_product<typename TopLevel::product_type>(hinted_index, hierarchy_extents_helper<BottomUnit>()(levels...)),
index_helper<BottomUnit>()(levels...));
}
}

_CCCL_NODISCARD _CCCL_DEVICE constexpr auto operator()() noexcept
{
return hierarchy_query_result<dimensions_index_type, 1, 1, 1>();
}
};

template <typename BottomUnit>
Expand All @@ -293,18 +304,23 @@ struct rank_helper
using TopLevel = typename LTopDims::level_type;
if constexpr (sizeof...(Levels) == 0)
{
auto hinted_index = static_index_hint(ltop.dims, dims_helper<BottomUnit, TopLevel>::index());
return detail::index_to_linear<typename TopLevel::product_type>(hinted_index, ltop.dims);
auto hinted_index = static_index_hint(ltop.dims_for_query(), dims_helper<BottomUnit, TopLevel>::index());
return detail::index_to_linear<typename TopLevel::product_type>(hinted_index, ltop.dims_for_query());
}
else
{
using Unit = typename detail::get_first_level_type<typename Levels::level_type...>::type;
auto hinted_index = static_index_hint(ltop.dims, dims_helper<Unit, TopLevel>::index());
auto level_rank = detail::index_to_linear<typename TopLevel::product_type>(hinted_index, ltop.dims);
auto hinted_index = static_index_hint(ltop.dims_for_query(), dims_helper<Unit, TopLevel>::index());
auto level_rank = detail::index_to_linear<typename TopLevel::product_type>(hinted_index, ltop.dims_for_query());
return level_rank * dims_to_count(hierarchy_extents_helper<BottomUnit>()(levels...))
+ rank_helper<BottomUnit>()(levels...);
}
}

_CCCL_NODISCARD _CCCL_DEVICE constexpr dimensions_index_type operator()() noexcept
{
return 1;
}
};
} // namespace detail

Expand Down Expand Up @@ -344,19 +360,6 @@ struct hierarchy_dimensions_fragment
static_assert(::cuda::std::is_base_of_v<hierarchy_level, BottomUnit> || ::cuda::std::is_same_v<BottomUnit, void>);
::cuda::std::tuple<Levels...> levels;

_CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const Levels&... ls) noexcept
: levels(ls...)
{}
_CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(Levels&&... ls) noexcept
: levels(::cuda::std::forward<Levels>(ls)...)
{}
_CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const BottomUnit&, const Levels&... ls) noexcept
: levels(ls...)
{}
_CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const BottomUnit&, Levels&&... ls) noexcept
: levels(::cuda::std::forward<Levels>(ls)...)
{}

_CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const ::cuda::std::tuple<Levels...>& ls) noexcept
: levels(ls)
{}
Expand All @@ -365,11 +368,11 @@ struct hierarchy_dimensions_fragment
{}

_CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(
const BottomUnit& unit, const ::cuda::std::tuple<Levels...>& ls) noexcept
const BottomUnit&, const ::cuda::std::tuple<Levels...>& ls) noexcept
: levels(ls)
{}
_CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(
const BottomUnit& unit, ::cuda::std::tuple<Levels...>&& ls) noexcept
const BottomUnit&, ::cuda::std::tuple<Levels...>&& ls) noexcept
: levels(::cuda::std::forward<::cuda::std::tuple<Levels...>>(ls))
{}

Expand All @@ -378,10 +381,17 @@ private:
template <typename Unit, typename Level>
_CCCL_NODISCARD _CCCL_HOST_DEVICE static constexpr auto levels_range_static(const decltype(levels)& levels) noexcept
{
static_assert(has_level<Level, hierarchy_dimensions_fragment<BottomUnit, Levels...>>);
static_assert(has_level_or_unit<Level, hierarchy_dimensions_fragment<BottomUnit, Levels...>>);
static_assert(has_level_or_unit<Unit, hierarchy_dimensions_fragment<BottomUnit, Levels...>>);
static_assert(detail::legal_unit_for_level<Unit, Level>);
return ::cuda::std::apply(detail::get_levels_range<Level, Unit, Levels...>, levels);
if constexpr (::cuda::std::is_same_v<Unit, Level>)
{
return ::cuda::std::make_tuple();
}
else
{
static_assert(detail::legal_unit_for_level<Unit, Level>);
return ::cuda::std::apply(detail::get_levels_range<Level, Unit, Levels...>, levels);
}
}

// TODO is this useful enough to expose?
Expand All @@ -397,7 +407,7 @@ private:
template <typename... Selected>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto operator()(const Selected&... levels) const noexcept
{
return hierarchy_dimensions_fragment<Unit, Selected...>(levels...);
return hierarchy_dimensions_fragment<Unit, Selected...>(std::make_tuple(levels...));
}
};

Expand Down Expand Up @@ -478,6 +488,8 @@ public:
_CCCL_HOST_DEVICE constexpr auto extents(const Unit& = Unit(), const Level& = Level()) const noexcept
{
auto selected = levels_range<Unit, Level>();
static_assert(detail::usable_for_queries<decltype(selected)>,
"Dimensions type is not usable for queries, finalize the dimensions first");
return detail::convert_to_query_result(::cuda::std::apply(detail::hierarchy_extents_helper<Unit>{}, selected));
}

Expand Down Expand Up @@ -612,6 +624,8 @@ public:
_CCCL_DEVICE constexpr auto index(const Unit& = Unit(), const Level& = Level()) const noexcept
{
auto selected = levels_range<Unit, Level>();
static_assert(detail::usable_for_queries<decltype(selected)>,
"Dimensions type is not usable for queries, finalize the dimensions first");
return detail::convert_to_query_result(::cuda::std::apply(detail::index_helper<Unit>{}, selected));
}

Expand Down Expand Up @@ -655,6 +669,8 @@ public:
_CCCL_DEVICE constexpr auto rank(const Unit& = Unit(), const Level& = Level()) const noexcept
{
auto selected = levels_range<Unit, Level>();
static_assert(detail::usable_for_queries<decltype(selected)>,
"Dimensions type is not usable for queries, finalize the dimensions first");
return ::cuda::std::apply(detail::rank_helper<Unit>{}, selected);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,16 @@ struct extents_corrected : public ::cuda::std::extents<T, Extents...> {
};
*/

struct base_dimensions_handler
{
// TODO: Should these two be merged into one bool?
static constexpr bool is_type_supported = true;
template <typename Level>
static constexpr bool is_level_supported = true;
};

template <typename Dims>
struct dimensions_handler
struct dimensions_handler : public base_dimensions_handler
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remark: struct base classes are public by default.

Suggested change
struct dimensions_handler : public base_dimensions_handler
struct dimensions_handler : base_dimensions_handler

I don't know why this triggers me enough to write a comment. Feel free to ignore!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We tend to prefer explicitness about these things. Makes the code more accessible to less experienced contributors.

{
static constexpr bool is_type_supported = ::cuda::std::is_integral_v<Dims>;

Expand All @@ -51,10 +59,8 @@ struct dimensions_handler
};

template <>
struct dimensions_handler<dim3>
struct dimensions_handler<dim3> : public base_dimensions_handler
{
static constexpr bool is_type_supported = true;

_CCCL_NODISCARD _CCCL_HOST_DEVICE static constexpr auto translate(const dim3& d) noexcept
{
return dimensions<dimensions_index_type,
Expand All @@ -65,15 +71,25 @@ struct dimensions_handler<dim3>
};

template <typename Dims, Dims Val>
struct dimensions_handler<::cuda::std::integral_constant<Dims, Val>>
struct dimensions_handler<::cuda::std::integral_constant<Dims, Val>> : public base_dimensions_handler
{
static constexpr bool is_type_supported = true;

_CCCL_NODISCARD _CCCL_HOST_DEVICE static constexpr auto translate(const Dims& d) noexcept
{
return dimensions<dimensions_index_type, size_t(d), 1, 1>();
}
};

// needs_finalization or similar might be a better name
template <typename Dims>
inline constexpr bool usable_for_queries = false;

template <typename T, size_t... Extents>
inline constexpr bool usable_for_queries<dimensions<T, Extents...>> = true;
Comment on lines +83 to +87
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remark: I love variable templates as traits instead of structs. They are shorter and more to the point. They require C++14 though, which is why we don't see many of those around here.


template <typename... LevelDims>
inline constexpr bool usable_for_queries<::cuda::std::tuple<LevelDims...>> =
(... && usable_for_queries<::cuda::std::decay_t<decltype(::cuda::std::declval<LevelDims>().dims)>>);

} // namespace detail

/**
Expand Down Expand Up @@ -113,11 +129,27 @@ template <typename Level, typename Dimensions>
struct level_dimensions
{
static_assert(::cuda::std::is_base_of_v<hierarchy_level, Level>);
using level_type = Level;
using level_type = Level;
using dimensions_type = Dimensions;

// Needs alignas to work around an issue with tuple
alignas(16) const Dimensions dims; // Unit for dimensions is implicit

// TODO might be deleted one we are confident rest of the code properly check usabe_for_queries
template <typename T = void>
_CCCL_HOST_DEVICE constexpr const Dimensions& dims_for_query() const
{
static_assert(detail::usable_for_queries<Dimensions>,
"Dimensions type is not usable for queries, finalize the dimensions first");
return dims;
}

template <typename NewDims>
_CCCL_NODISCARD _CCCL_HOST_DEVICE level_dimensions<Level, NewDims> finalize(const NewDims& new_dims) const
{
return level_dimensions<Level, NewDims>(new_dims);
}

_CCCL_HOST_DEVICE constexpr level_dimensions(const Dimensions& d)
: dims(d)
{}
Expand Down Expand Up @@ -148,6 +180,8 @@ template <typename T>
_CCCL_HOST_DEVICE constexpr auto grid_dims(T t) noexcept
{
static_assert(detail::dimensions_handler<T>::is_type_supported);
static_assert(detail::dimensions_handler<T>::template is_level_supported<grid_level>,
"This level type does not support the provided type of dimensions");
auto dims = detail::dimensions_handler<T>::translate(t);
return level_dimensions<grid_level, decltype(dims)>(dims);
}
Expand All @@ -172,6 +206,8 @@ template <typename T>
_CCCL_HOST_DEVICE constexpr auto cluster_dims(T t) noexcept
{
static_assert(detail::dimensions_handler<T>::is_type_supported);
static_assert(detail::dimensions_handler<T>::template is_level_supported<cluster_level>,
"This level type does not support the provided type of dimensions");
auto dims = detail::dimensions_handler<T>::translate(t);
return level_dimensions<cluster_level, decltype(dims)>(dims);
}
Expand All @@ -196,6 +232,8 @@ template <typename T>
_CCCL_HOST_DEVICE constexpr auto block_dims(T t) noexcept
{
static_assert(detail::dimensions_handler<T>::is_type_supported);
static_assert(detail::dimensions_handler<T>::template is_level_supported<block_level>,
"This level type does not support the provided type of dimensions");
auto dims = detail::dimensions_handler<T>::translate(t);
return level_dimensions<block_level, decltype(dims)>(dims);
}
Expand Down
25 changes: 19 additions & 6 deletions cudax/include/cuda/experimental/__launch/configuration.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,12 @@ enum class launch_option_kind
struct option_not_found
{};

// TODO could this return references, so we don't copy the options
template <detail::launch_option_kind Kind>
struct find_option_in_tuple_impl
{
template <typename Option, typename... Options>
_CCCL_DEVICE auto& operator()(const Option& opt, const Options&... rest)
_CCCL_HOST_DEVICE auto operator()(const Option& opt, const Options&... rest)
{
if constexpr (Option::kind == Kind)
{
Expand All @@ -68,14 +69,14 @@ struct find_option_in_tuple_impl
}
}

_CCCL_DEVICE auto operator()()
_CCCL_HOST_DEVICE auto operator()()
{
return option_not_found();
}
};

template <detail::launch_option_kind Kind, typename... Options>
_CCCL_DEVICE auto& find_option_in_tuple(const ::cuda::std::tuple<Options...>& tuple)
_CCCL_HOST_DEVICE const auto find_option_in_tuple(const ::cuda::std::tuple<Options...>& tuple)
{
return ::cuda::std::apply(find_option_in_tuple_impl<Kind>(), tuple);
}
Expand Down Expand Up @@ -209,11 +210,23 @@ struct dynamic_shared_memory_option : public detail::launch_option
friend cudaError_t detail::apply_kernel_config(
const kernel_config<Dimensions, Options...>& config, cudaLaunchConfig_t& cuda_config, void* kernel) noexcept;

_CCCL_HOST_DEVICE constexpr std::size_t size_bytes() const
{
if constexpr (Extent == ::cuda::std::dynamic_extent)
{
return size * sizeof(Content);
}
else
{
return Extent * sizeof(Content);
}
}

private:
_CCCL_NODISCARD cudaError_t apply(cudaLaunchConfig_t& config, void* kernel) const noexcept
{
cudaFuncAttributes attrs;
int size_needed = static_cast<int>(size * sizeof(Content));
int size_needed = static_cast<int>(size_bytes());
cudaError_t status = cudaFuncGetAttributes(&attrs, kernel);

if ((size_needed > attrs.maxDynamicSharedSizeBytes) && NonPortableSize)
Expand Down Expand Up @@ -441,7 +454,7 @@ _CCCL_DEVICE _CCCL_NODISCARD static char* get_smem_ptr() noexcept
template <typename Dimensions, typename... Options>
_CCCL_DEVICE auto& dynamic_smem_ref(const kernel_config<Dimensions, Options...>& config) noexcept
{
auto& option = detail::find_option_in_tuple<detail::launch_option_kind::dynamic_shared_memory>(config.options);
auto option = detail::find_option_in_tuple<detail::launch_option_kind::dynamic_shared_memory>(config.options);
using option_type = ::cuda::std::remove_reference_t<decltype(option)>;
static_assert(!::cuda::std::is_same_v<option_type, detail::option_not_found>,
"Dynamic shared memory option not found in the kernel configuration");
Expand All @@ -461,7 +474,7 @@ _CCCL_DEVICE auto& dynamic_smem_ref(const kernel_config<Dimensions, Options...>&
template <typename Dimensions, typename... Options>
_CCCL_DEVICE auto dynamic_smem_span(const kernel_config<Dimensions, Options...>& config) noexcept
{
auto& option = detail::find_option_in_tuple<detail::launch_option_kind::dynamic_shared_memory>(config.options);
auto option = detail::find_option_in_tuple<detail::launch_option_kind::dynamic_shared_memory>(config.options);
using option_type = ::cuda::std::remove_reference_t<decltype(option)>;
static_assert(!::cuda::std::is_same_v<option_type, detail::option_not_found>,
"Dynamic shared memory option not found in the kernel configuration");
Expand Down
Loading
Loading