Skip to content

Commit

Permalink
Correctly handle callbacks from newer drivers during profiling.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Apr 26, 2024
1 parent 3352f6a commit ad565da
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 47 deletions.
86 changes: 43 additions & 43 deletions lib/cupti/error.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,96 +15,96 @@ function name(err::CUPTIError)
unsafe_string(str_ref[])
end

@enum_without_prefix CUptiResult CUPTI_

## COV_EXCL_START
function description(err)
if err.code == CUPTI_SUCCESS
if err.code == SUCCESS
"no error"
elseif err.code == CUPTI_ERROR_INVALID_PARAMETER
elseif err.code == ERROR_INVALID_PARAMETER
"one or more of the parameters is invalid"
elseif err.code == CUPTI_ERROR_INVALID_DEVICE
elseif err.code == ERROR_INVALID_DEVICE
"the device does not correspond to a valid CUDA device"
elseif err.code == CUPTI_ERROR_INVALID_CONTEXT
elseif err.code == ERROR_INVALID_CONTEXT
"the context is NULL or not valid"
elseif err.code == CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
elseif err.code == ERROR_INVALID_EVENT_DOMAIN_ID
"the event domain id is invalid"
elseif err.code == CUPTI_ERROR_INVALID_EVENT_ID
elseif err.code == ERROR_INVALID_EVENT_ID
"the event id is invalid"
elseif err.code == CUPTI_ERROR_INVALID_EVENT_NAME
elseif err.code == ERROR_INVALID_EVENT_NAME
"the event name is invalid"
elseif err.code == CUPTI_ERROR_INVALID_OPERATION
elseif err.code == ERROR_INVALID_OPERATION
"the current operation cannot be performed due to dependency on other factors"
elseif err.code == CUPTI_ERROR_OUT_OF_MEMORY
elseif err.code == ERROR_OUT_OF_MEMORY
"unable to allocate enough memory to perform the requested operation"
elseif err.code == CUPTI_ERROR_HARDWARE
elseif err.code == ERROR_HARDWARE
"an error occurred on the performance monitoring hardware"
elseif err.code == CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT
elseif err.code == ERROR_PARAMETER_SIZE_NOT_SUFFICIENT
"the output buffer size is not sufficient to return all requested data"
elseif err.code == CUPTI_ERROR_API_NOT_IMPLEMENTED
elseif err.code == ERROR_API_NOT_IMPLEMENTED
"aPI is not implemented"
elseif err.code == CUPTI_ERROR_MAX_LIMIT_REACHED
elseif err.code == ERROR_MAX_LIMIT_REACHED
"the maximum limit is reached"
elseif err.code == CUPTI_ERROR_NOT_READY
elseif err.code == ERROR_NOT_READY
"the object is not yet ready to perform the requested operation"
elseif err.code == CUPTI_ERROR_NOT_COMPATIBLE
elseif err.code == ERROR_NOT_COMPATIBLE
"the current operation is not compatible with the current state of the object"
elseif err.code == CUPTI_ERROR_NOT_INITIALIZED
elseif err.code == ERROR_NOT_INITIALIZED
"CUPTI is unable to initialize its connection to the CUDA driver"
elseif err.code == CUPTI_ERROR_INVALID_METRIC_ID
elseif err.code == ERROR_INVALID_METRIC_ID
"the metric id is invalid"
elseif err.code == CUPTI_ERROR_INVALID_METRIC_NAME
elseif err.code == ERROR_INVALID_METRIC_NAME
"the metric name is invalid"
elseif err.code == CUPTI_ERROR_QUEUE_EMPTY
elseif err.code == ERROR_QUEUE_EMPTY
"the queue is empty"
elseif err.code == CUPTI_ERROR_INVALID_HANDLE
elseif err.code == ERROR_INVALID_HANDLE
"invalid handle (internal?)"
elseif err.code == CUPTI_ERROR_INVALID_STREAM
elseif err.code == ERROR_INVALID_STREAM
"invalid stream"
elseif err.code == CUPTI_ERROR_INVALID_KIND
elseif err.code == ERROR_INVALID_KIND
"invalid kind"
elseif err.code == CUPTI_ERROR_INVALID_EVENT_VALUE
elseif err.code == ERROR_INVALID_EVENT_VALUE
"invalid event value"
elseif err.code == CUPTI_ERROR_DISABLED
elseif err.code == ERROR_DISABLED
"CUPTI is disabled due to conflicts with other enabled profilers"
elseif err.code == CUPTI_ERROR_INVALID_MODULE
elseif err.code == ERROR_INVALID_MODULE
"invalid module"
elseif err.code == CUPTI_ERROR_INVALID_METRIC_VALUE
elseif err.code == ERROR_INVALID_METRIC_VALUE
"invalid metric value"
elseif err.code == CUPTI_ERROR_HARDWARE_BUSY
elseif err.code == ERROR_HARDWARE_BUSY
"the performance monitoring hardware is in use by other client"
elseif err.code == CUPTI_ERROR_NOT_SUPPORTED
elseif err.code == ERROR_NOT_SUPPORTED
"the attempted operation is not supported on the current system or device"
elseif err.code == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED
elseif err.code == ERROR_UM_PROFILING_NOT_SUPPORTED
"unified memory profiling is not supported on the system. Potential reason could be unsupported OS or architecture"
elseif err.code == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE
elseif err.code == ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE
"unified memory profiling is not supported on the device"
elseif err.code == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES
elseif err.code == ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES
"unified memory profiling is not supported on a multi-GPU configuration without P2P support between any pair of devices"
elseif err.code == CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS
elseif err.code == ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS
"unified memory profiling is not supported under the Multi-Process Service (MPS) environment with CUDA 7.5"
elseif err.code == CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED
elseif err.code == ERROR_CDP_TRACING_NOT_SUPPORTED
"Devices with compute capability 7.0 don't support CDP tracing"
elseif err.code == CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED
elseif err.code == ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED
"profiling on virtualized GPU is not supported"
elseif err.code == CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE
elseif err.code == ERROR_CUDA_COMPILER_NOT_COMPATIBLE
"Profiling results might be incorrect for CUDA applications compiled with nvcc version older than 9.0 for devices with compute capability 6.0 and 6.1"
elseif err.code == CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
elseif err.code == ERROR_INSUFFICIENT_PRIVILEGES
"""Insufficient privileges: You don't have permissions to profile GPU code.
Please configure your system to allow all users to profile, or run Julia with
elevated permissions: https://developer.nvidia.com/ERR_NVGPUCTRPERM#SolnAdminTag"""
elseif err.code == CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED
elseif err.code == ERROR_OLD_PROFILER_API_INITIALIZED
"old profiling api's are not supported with new profiling api's"
elseif err.code == CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE
elseif err.code == ERROR_OPENACC_UNDEFINED_ROUTINE
"missing definition of the OpenACC API routine in the linked OpenACC library"
elseif err.code == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED
elseif err.code == ERROR_LEGACY_PROFILER_NOT_SUPPORTED
"an unknown internal error has occurred. Legacy CUPTI Profiling is not supported on devices with Compute Capability 7.5 or higher (Turing+)"
elseif err.code == CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED
elseif err.code == ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED
"CUPTI doesn't allow multiple callback subscribers. Only a single subscriber can be registered at a time."
elseif err.code == CUPTI_ERROR_UNKNOWN
elseif err.code == ERROR_UNKNOWN
"an unknown error has occurred"
else
"unknown status"
end
end
## COV_EXCL_STOP

@enum_without_prefix CUptiResult CUPTI_
16 changes: 12 additions & 4 deletions src/profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -373,16 +373,24 @@ function capture(cfg)

name = if record.kind == CUPTI.CUPTI_ACTIVITY_KIND_DRIVER
ref = Ref{Cstring}(C_NULL)
CUPTI.cuptiGetCallbackName(CUPTI.CUPTI_CB_DOMAIN_DRIVER_API,
record.cbid, ref)
res = CUPTI.unchecked_cuptiGetCallbackName(CUPTI.CUPTI_CB_DOMAIN_DRIVER_API,
record.cbid, ref)
if res == CUPTI.SUCCESS
unsafe_string(ref[])
elseif res == CUPTI.ERROR_INVALID_PARAMETER
# this can happen when using a driver that's newer than the toolkit
"<unknown driver API>"
else
CUPTI.throw_api_error(res)
end
unsafe_string(ref[])
elseif record.kind == CUPTI.CUPTI_ACTIVITY_KIND_RUNTIME
ref = Ref{Cstring}(C_NULL)
CUPTI.cuptiGetCallbackName(CUPTI.CUPTI_CB_DOMAIN_RUNTIME_API,
record.cbid, ref)
record.cbid, ref)
unsafe_string(ref[])
else
"<unknown>"
"<unknown activity kind>"
end

push!(host_trace, (; id, start=t0, stop=t1, name,
Expand Down

0 comments on commit ad565da

Please sign in to comment.