From 68f8e715db3d2d6badf2d624b536878469991550 Mon Sep 17 00:00:00 2001 From: Dani Pinyol Date: Sun, 12 Apr 2026 22:22:27 +0200 Subject: [PATCH] fix: rare crash due to race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vectors PYJLVALUES and PYJLFREEVALUES are modified without any protection against GC-triggered re-entrancy. _pyjl_dealloc(o1) or PyJuliaValue_SetValue └→ push!(PYJLFREEVALUES, idx) # begins resize (sets internal flag) └→ vector must grow → allocates → triggers Julia GC └→ GC collects a Py object → runs py_finalizer └→ enqueue(ptr) → PyGILState_Check()==1 (same thread holds GIL) └→ Py_DecRef(ptr) → refcount hits 0 └→ _pyjl_dealloc(o2) └→ push!(PYJLFREEVALUES, idx2) ← BOOM: flag already set ConcurrencyViolationError! 1. push! needs to grow the vector (exceeds capacity), so it allocates 2. The allocation triggers Julia GC, which runs finalizers 3. A finalizer calls Py_DecRef (because enqueue sees the GIL is held on this same thread), dropping refcount to 0, which triggers _pyjl_dealloc re-entrantly on the same vector This is especially likely during shutdown (at_jl_exit → jl_atexit_hook), because jl_gc_run_all_finalizers runs finalizers on the calling thread (the main thread, which holds the GIL). Many Py objects are finalized at once, repeatedly pushing to PYJLFREEVALUES, and each push that triggers a reallocation can cause the re-entrant chain. Disabled Julia's GC during the critical vector modifications, so that push!/pop! cannot trigger allocation-based GC, which prevents the finalizer chain from re-entering. Added a SpinLock as defense-in-depth against true multi-thread races. --- src/JlWrap/C.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index 19ff2904..dc1d84c2 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -25,6 +25,8 @@ const PyJuliaBase_Type = Ref(C.PyNULL) const PYJLVALUES = [] # unused indices in PYJLVALUES const PYJLFREEVALUES = Int[] +# lock protecting PYJLVALUES and PYJLFREEVALUES from concurrent modification +const PYJL_LOCK = Threads.SpinLock() function _pyjl_new(t::C.PyPtr, ::C.PyPtr, ::C.PyPtr) alloc = C.PyType_GetSlot(t, C.Py_tp_alloc) @@ -39,8 +41,16 @@ end function _pyjl_dealloc(o::C.PyPtr) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx != 0 + # Disable GC to prevent push! from triggering a GC that runs finalizers + # re-entrantly (the finalizer chain: push! → alloc → GC → py_finalizer → + # enqueue → Py_DecRef → _pyjl_dealloc → push! on same vector → + # ConcurrencyViolationError). The lock guards against true multi-thread races. + prev = Base.GC.enable(false) + lock(PYJL_LOCK) PYJLVALUES[idx] = nothing push!(PYJLFREEVALUES, idx) + unlock(PYJL_LOCK) + Base.GC.enable(prev) end (C.@ft UnsafePtr{PyJuliaValueObject}(o).weaklist[!]) == C.PyNULL || C.PyObject_ClearWeakRefs(o) freeptr = C.PyType_GetSlot(C.Py_Type(o), C.Py_tp_free) @@ -375,6 +385,10 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin o = C.asptr(_o) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx == 0 + # Disable GC to prevent push!/pop! from triggering a GC whose finalizers + # re-entrantly resize the same vectors (see _pyjl_dealloc comment). + prev = Base.GC.enable(false) + lock(PYJL_LOCK) if isempty(PYJLFREEVALUES) push!(PYJLVALUES, v) idx = length(PYJLVALUES) @@ -382,6 +396,8 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin idx = pop!(PYJLFREEVALUES) PYJLVALUES[idx] = v end + unlock(PYJL_LOCK) + Base.GC.enable(prev) C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] = idx else PYJLVALUES[idx] = v