Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support ACTIVITY_DOMAIN_ROCTX #87

Merged
merged 1 commit into from
Jul 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions source/lib/omnitrace/library/components/fwd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ TIMEMORY_DEFINE_NS_API(category, device_hsa)
TIMEMORY_DEFINE_NS_API(category, rocm_hip)
TIMEMORY_DEFINE_NS_API(category, rocm_hsa)
TIMEMORY_DEFINE_NS_API(category, rocm_smi)
TIMEMORY_DEFINE_NS_API(category, rocm_roctx)
TIMEMORY_DEFINE_NS_API(category, kokkos)
TIMEMORY_DEFINE_NS_API(category, mpi)
TIMEMORY_DEFINE_NS_API(category, ompt)
Expand All @@ -84,6 +85,7 @@ TIMEMORY_DEFINE_NAME_TRAIT("user", category::user);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hip", category::rocm_hip);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hsa", category::rocm_hsa);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_roctx", category::rocm_roctx);
TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling);
TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling);
TIMEMORY_DEFINE_NAME_TRAIT("kokkos", category::kokkos);
Expand Down
7 changes: 5 additions & 2 deletions source/lib/omnitrace/library/components/roctracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,11 @@ roctracer::setup()

ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API,
hip_api_callback, nullptr));
// ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX,
// hip_api_callback, nullptr));
if(get_use_roctx())
{
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX,
roctx_api_callback, nullptr));
}
// Enable HIP activity tracing
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));

Expand Down
16 changes: 16 additions & 0 deletions source/lib/omnitrace/library/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ configure_settings(bool _init)
"Enable sampling GPU power, temp, utilization, and memory usage", true, "backend",
"rocm_smi", "rocm");

OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_ROCTX",
"Enable ROCtx API. Warning! Out-of-order ranges may corrupt perfetto flamegraph",
false, "backend", "roctracer", "rocm", "roctx");

OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_SAMPLING",
"Enable statistical sampling of call-stack", false,
"backend", "sampling");
Expand Down Expand Up @@ -1198,6 +1203,17 @@ get_use_rocm_smi()
#endif
}

bool
get_use_roctx()
{
#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0
static auto _v = get_config()->find("OMNITRACE_USE_ROCTX");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
#else
return false;
#endif
}

bool&
get_use_sampling()
{
Expand Down
3 changes: 3 additions & 0 deletions source/lib/omnitrace/library/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ get_use_rocprofiler() OMNITRACE_HOT;
bool
get_use_rocm_smi() OMNITRACE_HOT;

bool
get_use_roctx();

bool&
get_use_sampling() OMNITRACE_HOT;

Expand Down
1 change: 1 addition & 0 deletions source/lib/omnitrace/library/perfetto.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
.SetDescription("Device-side functions submitted via HIP API"), \
perfetto::Category("rocm_hip").SetDescription("Host-side HIP functions"), \
perfetto::Category("rocm_hsa").SetDescription("Host-side HSA functions"), \
perfetto::Category("rocm_roctx").SetDescription("Host-side ROCTX labels"), \
perfetto::Category("device_busy") \
.SetDescription("Busy percentage of a GPU device"), \
perfetto::Category("device_temp") \
Expand Down
87 changes: 87 additions & 0 deletions source/lib/omnitrace/library/roctracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "library/roctracer.hpp"
#include "library.hpp"
#include "library/components/fwd.hpp"
#include "library/config.hpp"
#include "library/critical_trace.hpp"
#include "library/debug.hpp"
Expand All @@ -44,6 +45,7 @@
#include <roctracer_ext.h>
#include <roctracer_hcc.h>
#include <roctracer_hip.h>
#include <roctracer_roctx.h>

#define AMD_INTERNAL_BUILD 1
#include <roctracer_hsa.h>
Expand Down Expand Up @@ -432,6 +434,91 @@ namespace
thread_local std::unordered_map<size_t, size_t> gpu_cids = {};
}

void
roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
void* /*arg*/)
{
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
return;

OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);

if(domain != ACTIVITY_DOMAIN_ROCTX) return;

static auto _range_map = std::unordered_map<roctx_range_id_t, std::string_view>{};
static auto _range_lock = std::mutex{};
const auto* _data = reinterpret_cast<const roctx_api_data_t*>(callback_data);

switch(cid)
{
case ROCTX_API_ID_roctxRangePushA:
{
if(get_use_perfetto())
tracing::push_perfetto(category::rocm_roctx{}, _data->args.message);

if(get_use_timemory()) tracing::push_timemory(_data->args.message);

break;
}
case ROCTX_API_ID_roctxRangePop:
{
if(get_use_timemory()) tracing::pop_timemory(_data->args.message);
if(get_use_perfetto())
tracing::pop_perfetto(category::rocm_roctx{}, _data->args.message);
break;
}
case ROCTX_API_ID_roctxRangeStartA:
{
{
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
if(!_lk.owns_lock()) _lk.lock();
_range_map.emplace(roctx_range_id_t{ _data->args.id },
std::string_view{ _data->args.message });
}

if(get_use_perfetto())
tracing::push_perfetto(category::rocm_roctx{}, _data->args.message);

if(get_use_timemory()) tracing::push_timemory(_data->args.message);
break;
}
case ROCTX_API_ID_roctxRangeStop:
{
std::string_view _message = {};
{
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
if(!_lk.owns_lock()) _lk.lock();
auto itr = _range_map.find(roctx_range_id_t{ _data->args.id });
OMNITRACE_CI_THROW(itr == _range_map.end(),
"Error! could not find range with id %lu\n",
_data->args.id);
if(itr == _range_map.end())
{
OMNITRACE_VERBOSE(0, "Warning! could not find range with id %lu\n",
_data->args.id);
return;
}
else
{
_message = itr->second;
}
}

if(!_message.empty())
{
if(get_use_timemory()) tracing::pop_timemory(_message.data());
if(get_use_perfetto())
tracing::pop_perfetto(category::rocm_roctx{}, _message.data());
}

break;
}
case ROCTX_API_ID_roctxMarkA:
// we do nothing with marker events...for now
default: break;
}
}

// HIP API callback function
void
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
Expand Down
3 changes: 3 additions & 0 deletions source/lib/omnitrace/library/roctracer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ hip_exec_activity_callbacks(int64_t _tid);
void
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);

void
roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);

// Activity tracing callback
void
hip_activity_callback(const char* begin, const char* end, void*);
Expand Down