Skip to content
This repository has been archived by the owner on Mar 12, 2021. It is now read-only.

Commit

Permalink
Simplify reporting to get rid of additional lock in the common case.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Mar 13, 2020
1 parent a3176d6 commit 743f9a6
Showing 1 changed file with 22 additions and 38 deletions.
60 changes: 22 additions & 38 deletions src/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ mutable struct AllocStats
pool_nfree::Int
## in bytes
pool_alloc::Int
pool_free::Int

# actual CUDA allocations
actual_nalloc::Int
Expand All @@ -32,7 +31,7 @@ mutable struct AllocStats
actual_time::Float64
end

const alloc_stats = AllocStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
const alloc_stats = AllocStats(0, 0, 0, 0, 0, 0, 0, 0, 0)

Base.copy(alloc_stats::AllocStats) =
AllocStats((getfield(alloc_stats, field) for field in fieldnames(AllocStats))...)
Expand Down Expand Up @@ -149,7 +148,7 @@ const pool = Ref{Module}(BinnedPool)

export OutOfGPUMemoryError

const requested = Dict{CuPtr{Nothing},Tuple{Int,Vector}}()
const requested = Dict{CuPtr{Nothing},Vector}()

"""
OutOfGPUMemoryError()
Expand Down Expand Up @@ -181,17 +180,12 @@ a [`OutOfGPUMemoryError`](@ref) if the allocation request cannot be satisfied.
end
ptr === nothing && throw(OutOfGPUMemoryError(sz))

# record the backtrace
bt = if Base.JLOptions().debug_level >= 2
backtrace()
else
[]
end

# record the allocation
@lock memory_lock begin
@assert !haskey(requested, ptr)
requested[ptr] = (sz,bt)
if Base.JLOptions().debug_level >= 2
@lock memory_lock begin
@assert !haskey(requested, ptr)
requested[ptr] = backtrace()
end
end

alloc_stats.pool_time += time
Expand All @@ -211,11 +205,11 @@ Releases a buffer pointed to by `ptr` to the memory pool.
ptr == CU_NULL && return

# record the allocation
sz, bt = @lock memory_lock begin
@assert haskey(requested, ptr)
sz, bt = requested[ptr]
delete!(requested, ptr)
sz, bt
if Base.JLOptions().debug_level >= 2
@lock memory_lock begin
@assert haskey(requested, ptr)
delete!(requested, ptr)
end
end

time = Base.@elapsed begin
Expand All @@ -224,7 +218,6 @@ Releases a buffer pointed to by `ptr` to the memory pool.

alloc_stats.pool_time += time
alloc_stats.pool_nfree += 1
alloc_stats.pool_free += sz

return
end
Expand Down Expand Up @@ -382,12 +375,11 @@ function memory_status(io::IO=stdout)
free_bytes, total_bytes = CUDAdrv.Mem.info()
used_bytes = total_bytes - free_bytes
used_ratio = used_bytes / total_bytes

@printf(io, "Effective GPU memory usage: %.2f%% (%s/%s)\n",
100*used_ratio, Base.format_bytes(used_bytes),
Base.format_bytes(total_bytes))

@printf(io, "CuArrays GPU memory usage: %s", Base.format_bytes(usage[]))
@printf(io, "CuArrays allocator usage: %s", Base.format_bytes(usage[]))
if usage_limit[] !== nothing
@printf(io, " (capped at %s)", Base.format_bytes(usage_limit[]))
end
Expand All @@ -396,28 +388,20 @@ function memory_status(io::IO=stdout)
alloc_used_bytes = pool[].used_memory()
alloc_cached_bytes = pool[].cached_memory()
alloc_total_bytes = alloc_used_bytes + alloc_cached_bytes

@printf(io, "%s usage: %s (%s allocated, %s cached)\n", nameof(pool[]),
Base.format_bytes(alloc_total_bytes), Base.format_bytes(alloc_used_bytes),
Base.format_bytes(alloc_cached_bytes))

@lock memory_lock begin
requested_bytes = mapreduce(first, +, values(requested); init=0)

@printf(io, "%s efficiency: %.2f%% (%s requested, %s allocated)\n", nameof(pool[]),
100*requested_bytes/usage[],
Base.format_bytes(requested_bytes),
Base.format_bytes(usage[]))

# check if the memory usage as counted by the CUDA allocator wrapper
# matches what is reported by the pool implementation
discrepancy = abs(usage[] - alloc_total_bytes)
if discrepancy != 0
@debug "Discrepancy of $(Base.format_bytes(discrepancy)) between memory pool and allocator"
end
# check if the memory usage as counted by the CUDA allocator wrapper
# matches what is reported by the pool implementation
discrepancy = abs(usage[] - alloc_total_bytes)
if discrepancy != 0
println(io, "Discrepancy of $(Base.format_bytes(discrepancy)) between memory pool and allocator!")
end

if Base.JLOptions().debug_level >= 2
for (ptr, (sz,bt)) in requested
if Base.JLOptions().debug_level >= 2
@lock memory_lock begin
for (ptr, bt) in requested
@printf(io, "\nOutstanding memory allocation of %s at %p",
Base.format_bytes(sz), Int(ptr))
stack = stacktrace(bt, false)
Expand Down

0 comments on commit 743f9a6

Please sign in to comment.