Skip to content
This repository has been archived by the owner on Mar 12, 2021. It is now read-only.

Commit

Permalink
Merge pull request #693 from JuliaGPU/tb/disable_gc
Browse files Browse the repository at this point in the history
Disable the GC after taking pool-related spinlocks.
  • Loading branch information
maleadt authored Apr 28, 2020
2 parents 16c0080 + a145023 commit 2258a24
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 28 deletions.
22 changes: 20 additions & 2 deletions src/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@ using Base.Threads: SpinLock
# each allocator needs to lock its own resources separately too.
const memory_lock = SpinLock()

# the above spinlocks are taken around code that might gc, which might cause a deadlock
# if we try to acquire from the finalizer too. avoid that by temporarily disabling running finalizers,
# concurrently on this thread.
enable_finalizers(on::Bool) = ccall(:jl_gc_enable_finalizers, Cvoid, (Ptr{Cvoid}, Int32,), Core.getptls(), on)
macro safe_lock(l, ex)
quote
temp = $(esc(l))
lock(temp)
enable_finalizers(false)
try
$(esc(ex))
finally
unlock(temp)
enable_finalizers(true)
end
end
end

const MEMDEBUG = ccall(:jl_is_memdebug, Bool, ())


Expand Down Expand Up @@ -80,7 +98,7 @@ function actual_alloc(bytes)
ptr = convert(CuPtr{Nothing}, buf)

# record the buffer
@lock memory_lock begin
@safe_lock memory_lock begin
@assert !haskey(allocated, ptr)
allocated[ptr] = buf
end
Expand All @@ -94,7 +112,7 @@ end

function actual_free(ptr::CuPtr{Nothing})
# look up the buffer
buf = @lock memory_lock begin
buf = @safe_lock memory_lock begin
buf = allocated[ptr]
delete!(allocated, ptr)
buf
Expand Down
14 changes: 7 additions & 7 deletions src/memory/binned.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module BinnedPool
# TODO: move the management thread one level up, to be shared by all allocators

using ..CuArrays
using ..CuArrays: @pool_timeit
using ..CuArrays: @pool_timeit, @safe_lock

using CUDAdrv

Expand Down Expand Up @@ -197,7 +197,7 @@ end

# repopulate the "available" pools from the list of freed blocks
function repopulate()
blocks = @lock freed_lock begin
blocks = @safe_lock freed_lock begin
isempty(freed) && return
blocks = Set(freed)
empty!(freed)
Expand Down Expand Up @@ -323,7 +323,7 @@ end
function pool_free(block)
# we don't do any work here to reduce pressure on the GC (spending time in finalizers)
# and to simplify locking (preventing concurrent access during GC interventions)
@lock freed_lock begin
@safe_lock freed_lock begin
push!(freed, block)
end
end
Expand Down Expand Up @@ -380,7 +380,7 @@ function alloc(bytes)

if block !== nothing
ptr = pointer(block)
@lock allocated_lock begin
@safe_lock allocated_lock begin
allocated[ptr] = block
end
return ptr
Expand All @@ -390,7 +390,7 @@ function alloc(bytes)
end

function free(ptr)
block = @lock allocated_lock begin
block = @safe_lock allocated_lock begin
block = allocated[ptr]
delete!(allocated, ptr)
block
Expand All @@ -410,10 +410,10 @@ function free(ptr)
return
end

used_memory() = @lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)
used_memory() = @safe_lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)

function cached_memory()
sz = @lock freed_lock mapreduce(sizeof, +, freed; init=0)
sz = @safe_lock freed_lock mapreduce(sizeof, +, freed; init=0)
@lock pool_lock for (pid, pl) in enumerate(pools_avail)
bytes = poolsize(pid)
sz += bytes * length(pl)
Expand Down
13 changes: 7 additions & 6 deletions src/memory/dummy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ module DummyPool

# dummy allocator that passes through any requests, calling into the GC if that fails.

using ..CuArrays: @pool_timeit, actual_alloc, actual_free
using ..CuArrays
using ..CuArrays: @pool_timeit, @safe_lock

using CUDAdrv

Expand All @@ -24,13 +25,13 @@ function alloc(sz)
end

@pool_timeit "$phase.1 alloc" begin
ptr = actual_alloc(sz)
ptr = CuArrays.actual_alloc(sz)
end
ptr === nothing || break
end

if ptr !== nothing
@lock allocated_lock begin
@safe_lock allocated_lock begin
allocated[ptr] = sz
end
return ptr
Expand All @@ -40,17 +41,17 @@ function alloc(sz)
end

function free(ptr)
@lock allocated_lock begin
@safe_lock allocated_lock begin
sz = allocated[ptr]
delete!(allocated, ptr)
end
actual_free(ptr)
CuArrays.actual_free(ptr)
return
end

reclaim(target_bytes::Int=typemax(Int)) = return 0

used_memory() = @lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)
used_memory() = @safe_lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)

cached_memory() = 0

Expand Down
12 changes: 6 additions & 6 deletions src/memory/simple.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module SimplePool
# simple scan into a list of free buffers

using ..CuArrays
using ..CuArrays: @pool_timeit
using ..CuArrays: @pool_timeit, @safe_lock

using CUDAdrv

Expand Down Expand Up @@ -132,7 +132,7 @@ end
function pool_free(block)
# we don't do any work here to reduce pressure on the GC (spending time in finalizers)
# and to simplify locking (preventing concurrent access during GC interventions)
@lock freed_lock begin
@safe_lock freed_lock begin
push!(freed, block)
end
end
Expand All @@ -149,7 +149,7 @@ function alloc(sz)
block = pool_alloc(sz)
if block !== nothing
ptr = pointer(block)
@lock allocated_lock begin
@safe_lock allocated_lock begin
allocated[ptr] = block
end
return ptr
Expand All @@ -159,7 +159,7 @@ function alloc(sz)
end

function free(ptr)
block = @lock allocated_lock begin
block = @safe_lock allocated_lock begin
block = allocated[ptr]
delete!(allocated, ptr)
block
Expand All @@ -168,10 +168,10 @@ function free(ptr)
return
end

used_memory() = @lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)
used_memory() = @safe_lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)

function cached_memory()
sz = @lock freed_lock mapreduce(sizeof, +, freed; init=0)
sz = @safe_lock freed_lock mapreduce(sizeof, +, freed; init=0)
sz += @lock pool_lock mapreduce(sizeof, +, pool; init=0)
return sz
end
Expand Down
14 changes: 7 additions & 7 deletions src/memory/split.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module SplittingPool
# scan into a sorted list of free buffers, splitting buffers along the way

using ..CuArrays
using ..CuArrays: @pool_timeit
using ..CuArrays: @pool_timeit, @safe_lock

using DataStructures

Expand Down Expand Up @@ -223,7 +223,7 @@ end

# repopulate the pools from the list of freed blocks
function repopulate()
blocks = @lock freed_lock begin
blocks = @safe_lock freed_lock begin
isempty(freed) && return
blocks = Set(freed)
empty!(freed)
Expand Down Expand Up @@ -372,7 +372,7 @@ function pool_free(block)
# we don't do any work here to reduce pressure on the GC (spending time in finalizers)
# and to simplify locking (preventing concurrent access during GC interventions)
block.state = FREED
@lock freed_lock begin
@safe_lock freed_lock begin
push!(freed, block)
end
end
Expand All @@ -390,7 +390,7 @@ function alloc(sz)
if block !== nothing
block.state = ALLOCATED
ptr = pointer(block)
@lock allocated_lock begin
@safe_lock allocated_lock begin
@assert !haskey(allocated, ptr) "Newly-allocated block $block is already allocated"
allocated[ptr] = block
end
Expand All @@ -401,7 +401,7 @@ function alloc(sz)
end

function free(ptr)
block = @lock allocated_lock begin
block = @safe_lock allocated_lock begin
block = allocated[ptr]
delete!(allocated, ptr)
block
Expand All @@ -422,10 +422,10 @@ function reclaim(sz::Int=typemax(Int))
return freed_sz
end

used_memory() = @lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)
used_memory() = @safe_lock allocated_lock mapreduce(sizeof, +, values(allocated); init=0)

function cached_memory()
sz = @lock freed_lock mapreduce(sizeof, +, freed; init=0)
sz = @safe_lock freed_lock mapreduce(sizeof, +, freed; init=0)
@lock pool_lock for pool in (pool_small, pool_large, pool_huge)
sz += mapreduce(sizeof, +, pool; init=0)
end
Expand Down

0 comments on commit 2258a24

Please sign in to comment.