From f74b527b6c5bfb3ddcbe855b06611d4e598ed3c6 Mon Sep 17 00:00:00 2001
From: David Hewitt <mail@davidhewitt.dev>
Date: Sat, 11 May 2024 07:59:05 +0100
Subject: [PATCH] add flag to skip reference pool mutex if the program doesn't
 use the pool

---
 src/gil.rs              | 19 +++++++++++++++++--
 src/impl_/trampoline.rs |  7 +++++--
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/gil.rs b/src/gil.rs
index 29c4ffbe389..4770bd905ba 100644
--- a/src/gil.rs
+++ b/src/gil.rs
@@ -7,6 +7,7 @@ use std::cell::Cell;
 use std::cell::RefCell;
 #[cfg(not(debug_assertions))]
 use std::cell::UnsafeCell;
+use std::sync::atomic::AtomicBool;
 use std::{mem, ptr::NonNull, sync};
 
 static START: sync::Once = sync::Once::new();
@@ -213,6 +214,7 @@ impl GILGuard {
         let gstate = unsafe { ffi::PyGILState_Ensure() }; // acquire GIL
         #[allow(deprecated)]
         let pool = unsafe { mem::ManuallyDrop::new(GILPool::new()) };
+        update_deferred_reference_counts(pool.python());
 
         Some(GILGuard { gstate, pool })
     }
@@ -235,6 +237,7 @@ type PyObjVec = Vec<NonNull<ffi::PyObject>>;
 
 /// Thread-safe storage for objects which were inc_ref / dec_ref while the GIL was not held.
 struct ReferencePool {
+    ever_used: AtomicBool,
     // .0 is INCREFs, .1 is DECREFs
     pointer_ops: sync::Mutex<(PyObjVec, PyObjVec)>,
 }
@@ -242,15 +245,18 @@ struct ReferencePool {
 impl ReferencePool {
     const fn new() -> Self {
         Self {
+            ever_used: AtomicBool::new(false),
             pointer_ops: sync::Mutex::new((Vec::new(), Vec::new())),
         }
     }
 
     fn register_incref(&self, obj: NonNull<ffi::PyObject>) {
+        self.ever_used.store(true, sync::atomic::Ordering::Relaxed);
         self.pointer_ops.lock().unwrap().0.push(obj);
     }
 
     fn register_decref(&self, obj: NonNull<ffi::PyObject>) {
+        self.ever_used.store(true, sync::atomic::Ordering::Relaxed);
         self.pointer_ops.lock().unwrap().1.push(obj);
     }
 
@@ -276,6 +282,17 @@ impl ReferencePool {
     }
 }
 
+#[inline]
+pub(crate) fn update_deferred_reference_counts(py: Python<'_>) {
+    // Justification for relaxed: worst case this causes already deferred drops to be
+    // delayed slightly later, and this is also a one-time flag, so if the program is
+    // using deferred drops it is highly likely that branch prediction will always
+    // assume this is true and we don't need the atomic overhead.
+    if POOL.ever_used.load(sync::atomic::Ordering::Relaxed) {
+        POOL.update_counts(py)
+    }
+}
+
 unsafe impl Sync for ReferencePool {}
 
 static POOL: ReferencePool = ReferencePool::new();
@@ -375,8 +392,6 @@ impl GILPool {
     #[inline]
     pub unsafe fn new() -> GILPool {
         increment_gil_count();
-        // Update counts of PyObjects / Py that have been cloned or dropped since last acquisition
-        POOL.update_counts(Python::assume_gil_acquired());
         GILPool {
             start: OWNED_OBJECTS
                 .try_with(|owned_objects| {
diff --git a/src/impl_/trampoline.rs b/src/impl_/trampoline.rs
index db493817cba..f4db0c92d83 100644
--- a/src/impl_/trampoline.rs
+++ b/src/impl_/trampoline.rs
@@ -12,8 +12,9 @@ use std::{
 #[allow(deprecated)]
 use crate::gil::GILPool;
 use crate::{
-    callback::PyCallbackOutput, ffi, ffi_ptr_ext::FfiPtrExt, impl_::panic::PanicTrap,
-    methods::IPowModulo, panic::PanicException, types::PyModule, Py, PyResult, Python,
+    callback::PyCallbackOutput, ffi, ffi_ptr_ext::FfiPtrExt, gil::update_deferred_reference_counts,
+    impl_::panic::PanicTrap, methods::IPowModulo, panic::PanicException, types::PyModule, Py,
+    PyResult, Python,
 };
 
 #[inline]
@@ -182,6 +183,7 @@ where
     #[allow(deprecated)]
     let pool = unsafe { GILPool::new() };
     let py = pool.python();
+    update_deferred_reference_counts(py);
     let out = panic_result_into_callback_output(
         py,
         panic::catch_unwind(move || -> PyResult<_> { body(py) }),
@@ -229,6 +231,7 @@ where
     #[allow(deprecated)]
     let pool = GILPool::new();
     let py = pool.python();
+    update_deferred_reference_counts(py);
     if let Err(py_err) = panic::catch_unwind(move || body(py))
         .unwrap_or_else(|payload| Err(PanicException::from_panic_payload(payload)))
     {