Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lock failing in multithreaded plan_fft() #1921

Closed
jpdoane opened this issue May 22, 2023 · 2 comments · Fixed by #2352
Closed

lock failing in multithreaded plan_fft() #1921

jpdoane opened this issue May 22, 2023 · 2 comments · Fixed by #2352
Labels
bug Something isn't working

Comments

@jpdoane
Copy link

jpdoane commented May 22, 2023

The CUDA.CUFFT.plan_fft!() appears to have a thread locking issue:

using CUDA, CUDA.CUFFT

function gpu_fft_thread()
    try 
        X = CUDA.randn(ComplexF32, 1024,1024)
        myfft = plan_fft!(X, 1)
        myfft * X
    catch e
        error(e)
    end
    return nothing
end

function run_fft_threads()
    try
        for nn=1:10
            tids = [Threads.@spawn gpu_fft_thread() for nn=1:10]
            while !all(istaskdone.(tids))
                yield();
            end
        end
    catch e
        error(e)
    end
end

run_fft_threads()

generates:

error in running finalizer: ErrorException("val already in a list")
error at ./error.jl:35
push! at ./linked_list.jl:53 [inlined]
_wait2 at ./condition.jl:87
#wait#621 at ./condition.jl:127
wait at ./condition.jl:125 [inlined]
slowlock at ./lock.jl:156
lock at ./lock.jl:147 [inlined]
lock at ./lock.jl:227
push! at /home/jon/.julia/packages/CUDA/p5OVK/lib/utils/cache.jl:72 [inlined]
cufftReleasePlan at /home/jon/.julia/packages/CUDA/p5OVK/lib/cufft/wrappers.jl:158 [inlined]
#137 at /home/jon/.julia/packages/CUDA/p5OVK/lib/cufft/fft.jl:30 [inlined]
#context!#59 at /home/jon/.julia/packages/CUDA/p5OVK/lib/cudadrv/state.jl:170 [inlined]
context! at /home/jon/.julia/packages/CUDA/p5OVK/lib/cudadrv/state.jl:165 [inlined]
unsafe_free! at /home/jon/.julia/packages/CUDA/p5OVK/lib/cufft/fft.jl:29
unknown function (ip: 0x7fefe0126a62)
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
run_finalizer at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gc.c:417
jl_gc_run_finalizers_in_list at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gc.c:507
run_finalizers at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gc.c:553
enable_finalizers at ./gcutils.jl:126 [inlined]
unlock at ./locks-mt.jl:68 [inlined]
push! at ./task.jl:703
enq_work at ./task.jl:783
yield at ./task.jl:862
run_fft_threads at /home/jon/processing_analysis/fftw_threads.jl:19
unknown function (ip: 0x7fefe015402f)
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
do_call at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:126
eval_value at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:226
eval_stmt_value at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:177 [inlined]
eval_body at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:624
jl_interpret_toplevel_thunk at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:762
top-level scope at /home/jon/processing_analysis/fftw_threads.jl:27
jl_toplevel_eval_flex at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:912
jl_toplevel_eval_flex at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:856
ijl_toplevel_eval_in at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:971
eval at ./boot.jl:370 [inlined]
include_string at ./loading.jl:1864
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
_include at ./loading.jl:1924
include at ./client.jl:478
unknown function (ip: 0x7fefe00998a2)
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
do_call at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:126
eval_value at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:226
eval_stmt_value at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:177 [inlined]
eval_body at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:624
jl_interpret_toplevel_thunk at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/interpreter.c:762
top-level scope at REPL[1]:1
jl_toplevel_eval_flex at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:912
jl_toplevel_eval_flex at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:856
jl_toplevel_eval_flex at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:856
ijl_toplevel_eval_in at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/toplevel.c:971
eval at ./boot.jl:370 [inlined]
eval at ./Base.jl:68 [inlined]
repleval at /home/jon/.vscode-server/extensions/julialang.language-julia-1.47.2/scripts/packages/VSCodeServer/src/repl.jl:222
#107 at /home/jon/.vscode-server/extensions/julialang.language-julia-1.47.2/scripts/packages/VSCodeServer/src/repl.jl:186
unknown function (ip: 0x7fefe009982f)
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
with_logstate at ./logging.jl:514
with_logger at ./logging.jl:626 [inlined]
#106 at /home/jon/.vscode-server/extensions/julialang.language-julia-1.47.2/scripts/packages/VSCodeServer/src/repl.jl:187
unknown function (ip: 0x7fefe009960f)
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
jl_f__call_latest at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/builtins.c:774
#invokelatest#2 at ./essentials.jl:816 [inlined]
invokelatest at ./essentials.jl:813
unknown function (ip: 0x7fefe0098772)
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
do_apply at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/builtins.c:730
macro expansion at /home/jon/.vscode-server/extensions/julialang.language-julia-1.47.2/scripts/packages/VSCodeServer/src/eval.jl:34 [inlined]
#61 at ./task.jl:514
unknown function (ip: 0x7fefe007bb9f)
_jl_invoke at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
start_task at /cache/build/default-amdci4-4/julialang/julia-release-1-dot-9/src/task.c:1092

Discourse thread:
https://discourse.julialang.org/t/is-cuda-jl-and-fftw-threadsafe/99219/4?u=jpdoane

version info:

CUDA runtime 12.1, artifact installation
CUDA driver 12.1
NVIDIA driver 525.78.1, originally for CUDA 12.0

Libraries: 
- CUBLAS: 12.1.3
- CURAND: 10.3.2
- CUFFT: 11.0.2
- CUSOLVER: 11.4.5
- CUSPARSE: 12.1.0
- CUPTI: 18.0.0
- NVML: 12.0.0+525.78.1

Toolchain:
- Julia: 1.9.0
- LLVM: 14.0.6
- PTX ISA support: 3.2, 4.0, 4.1, 4.2, 4.3, 5.0, 6.0, 6.1, 6.3, 6.4, 6.5, 7.0, 7.1, 7.2, 7.3, 7.4, 7.5
- Device capability support: sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86
@jpdoane jpdoane added the bug Something isn't working label May 22, 2023
@jpdoane
Copy link
Author

jpdoane commented May 22, 2023

I've also seen this error as well:

error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")
ijl_error at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/rtutils.c:41
ijl_switch at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/task.c:634
try_yieldto at ./task.jl:910
wait at ./task.jl:984
#wait#621 at ./condition.jl:130
wait at ./condition.jl:125 [inlined]
slowlock at ./lock.jl:156
lock at ./lock.jl:147 [inlined]
lock at ./lock.jl:227
push! at /home/jon/.julia/packages/CUDA/p5OVK/lib/utils/cache.jl:72 [inlined]
cufftReleasePlan at /home/jon/.julia/packages/CUDA/p5OVK/lib/cufft/wrappers.jl:158 [inlined]
#137 at /home/jon/.julia/packages/CUDA/p5OVK/lib/cufft/fft.jl:30 [inlined]
#context!#59 at /home/jon/.julia/packages/CUDA/p5OVK/lib/cudadrv/state.jl:170 [inlined]
context! at /home/jon/.julia/packages/CUDA/p5OVK/lib/cudadrv/state.jl:165 [inlined]
unsafe_free! at /home/jon/.julia/packages/CUDA/p5OVK/lib/cufft/fft.jl:29
unknown function (ip: 0x7f5d0013fee2)
_jl_invoke at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gf.c:2940
run_finalizer at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gc.c:417
jl_gc_run_finalizers_in_list at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gc.c:507
run_finalizers at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gc.c:553
enable_finalizers at ./gcutils.jl:126 [inlined]
unlock at ./locks-mt.jl:68 [inlined]
multiq_deletemin at ./partr.jl:164
trypoptask at ./task.jl:966
jfptr_trypoptask_40607.clone_1 at /usr/local/julia/julia-1.9.0/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gf.c:2940
get_next_task at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/partr.c:270 [inlined]
ijl_task_get_next at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/partr.c:302
poptask at ./task.jl:974
wait at ./task.jl:983
task_done_hook at ./task.jl:672
jfptr_task_done_hook_31083.clone_1 at /usr/local/julia/julia-1.9.0/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
jl_finish_task at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/task.c:320
start_task at /cache/build/default-amdci4-0/julialang/julia-release-1-dot-9/src/task.c:1103

@maleadt
Copy link
Member

maleadt commented May 26, 2023

MWE:

struct Cache{K,V}
    handles::Dict{K,V}
    lock::ReentrantLock

    function Cache{K,V}() where {K,V}
        return new{K,V}(Dict{K,V}(), ReentrantLock())
    end
end

macro safe_lock(l, ex)
    quote
        GC.enable(false)
        lock($(esc(l)))
        try
            $(esc(ex))
        finally
            unlock($(esc(l)))
            GC.enable(true)
        end
    end
end

# get or create value
function Base.pop!(ctor::Function, cache::Cache{K,V}, key) where {K,V}
    # lookup
    function check_cache()
        @safe_lock cache.lock begin
            if !haskey(cache.handles, key)
                nothing
            else
                cache.handles[key]
            end
        end
    end
    handle = check_cache()
    if handle === nothing
        GC.gc(false)
        handle = check_cache()
    end

    # create
    if handle === nothing
        handle = ctor()::V
    end

    return handle
end

# put in cache or destroy value
function Base.push!(dtor::Function, cache::Cache{K,V}, key::K, handle::V) where {K,V}
    # cache
    should_destroy = @safe_lock cache.lock begin
        if haskey(cache.handles, key)
            true
        else
            cache.handles[key] = handle
            false
        end
    end

    # destroy
    if should_destroy
        dtor(handle)
    end

    return
end

const handles = Cache{Int, Int}()
mutable struct Foo
    handle

    function Foo()
        x = rand(1:100)
        handle = pop!(handles, x) do
            x
        end
        obj = new(handle)
        finalizer(obj) do _
            push!(handles, handle, handle) do _
                # do nothing
            end
        end
    end
end

function test()
    try
        for nn=1:10
            tids = [Threads.@spawn Foo() for nn=1:10]
            while !all(istaskdone.(tids))
                yield()
            end
        end
    catch e
        error(e)
    end
end

while true
    test()
end

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants