diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 5500c70a3b0aad..d3efac906aeb93 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -235,6 +235,12 @@ struct _gc_runtime_state { /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; struct gc_stats *generation_stats; +#ifdef Py_GIL_DISABLED + /* Protects generation_stats between gc_get_stats_impl() (reader) and + gc_collect_main() (writer); both resolve the stats slot (buffer index) + under this lock so they agree on which slot to touch (gh-151646). */ + PyMutex stats_mutex; +#endif /* true if we are currently running the collector */ int collecting; // The frame that started the current collection. It might be NULL even when diff --git a/Lib/test/test_free_threading/test_gc_get_stats_race.py b/Lib/test/test_free_threading/test_gc_get_stats_race.py new file mode 100644 index 00000000000000..dc609a98fdffd5 --- /dev/null +++ b/Lib/test/test_free_threading/test_gc_get_stats_race.py @@ -0,0 +1,99 @@ +# Reproduction for the gc.get_stats() data race under free-threading. +# +# CPython issue gh-151646: in free-threading builds (``--disable-gil``) the +# concurrent collector and ``gc.get_stats()`` touch the same per-generation +# statistics struct (``gcstate->generation_stats``) without any synchronisation: +# +# * READER -- ``gc_get_stats_impl()`` in ``Modules/gcmodule.c`` copies the +# ``struct gc_generation_stats`` for each generation (``collections``, +# ``collected``, ``uncollectable``, ``candidates``, ``duration``) with a +# plain struct assignment and no lock. +# +# * WRITER -- at the end of every collection cycle ``gc_collect_main()`` in +# ``Python/gc_free_threading.c`` mutates that very struct in place +# (``stats->collections++``, ``stats->collected += m``, ...), again with no +# lock. +# +# When one thread is collecting while several others read the stats, the +# unsynchronised read/write to the same memory is a data race. The values are +# only statistics, so the race is benign at the Python level (no crash), which +# is exactly what lets this script double as a regression test: once the fix +# adds proper synchronisation, ThreadSanitizer should stay quiet while the +# script keeps exiting cleanly. +# +# Running this script under a free-threading CPython build compiled with +# ThreadSanitizer (``./configure --disable-gil --with-thread-sanitizer``) is +# expected to print a ``WARNING: ThreadSanitizer: data race`` report whose stack +# traces point at ``gc_get_stats_impl`` (gcmodule.c) and ``gc_collect_main`` +# (gc_free_threading.c). +# +# Standalone usage: +# +# ./python Lib/test/test_free_threading/test_gc_get_stats_race.py +# +# or as part of the test suite (only meaningful under a TSAN build): +# +# ./python -m test test_free_threading.test_gc_get_stats_race + +import gc +import threading +import unittest + +from test.support import threading_helper + + +# One thread hammers gc.collect() (the writer side); enough reader threads run +# gc.get_stats() concurrently to make the race easy to observe. Readers run +# until the writer is done so the test duration is controlled by the collection +# count rather than by reader loop counts. +NUM_READERS = 4 +ITERATIONS = 50 + + +def _stress_get_stats_race(num_readers=NUM_READERS, iterations=ITERATIONS): + """Race gc.collect() against gc.get_stats().""" + + # Synchronise the start so the writer and readers overlap for as long as + # possible, maximising the chance of the read and write landing on the + # statistics struct at the same time. + done = threading.Event() + + def collector(): + try: + for _ in range(iterations): + # Writer: each full collection updates gcstate->generation_stats. + gc.collect() + finally: + done.set() + + def reader(): + while not done.is_set(): + # Reader: copies the per-generation stats structs with no lock. + gc.get_stats() + + threading_helper.run_concurrently([collector] + [reader] * num_readers) + + +@threading_helper.requires_working_threading() +class TestGCGetStatsRace(unittest.TestCase): + def test_get_stats_collect_race(self): + _stress_get_stats_race() + + # The race is benign at the Python level: gc.get_stats() must still + # return well-formed data and the interpreter must not crash. + stats = gc.get_stats() + self.assertEqual(len(stats), 3) + for generation in stats: + self.assertIn("collections", generation) + self.assertIn("collected", generation) + self.assertIn("uncollectable", generation) + + +if __name__ == "__main__": + # Standalone reproduction: run the race and exit cleanly so the script can + # be reused as a regression check once the fix lands. + print(f"Racing 1 gc.collect() thread against " + f"{NUM_READERS} gc.get_stats() thread(s), {ITERATIONS} collections...") + _stress_get_stats_race() + print("Done (no Python-level crash). " + "Run under a free-threading + TSAN build to observe the data race.") diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst new file mode 100644 index 00000000000000..fde428c8a2839b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst @@ -0,0 +1,3 @@ +Fix a data race in free-threading builds between :func:`gc.get_stats` and a +concurrent garbage collection cycle. Access to the per-generation statistics +is now serialized with a mutex so the reader observes a consistent snapshot. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 0093995441e390..e2df31556f3c37 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -374,9 +374,15 @@ gc_get_stats_impl(PyObject *module) /* To get consistent values despite allocations while constructing the result list, we use a snapshot of the running stats. */ GCState *gcstate = get_gc_state(); +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&gcstate->stats_mutex); +#endif stats[0] = gcstate->generation_stats->young.items[gcstate->generation_stats->young.index]; stats[1] = gcstate->generation_stats->old[0].items[gcstate->generation_stats->old[0].index]; stats[2] = gcstate->generation_stats->old[1].items[gcstate->generation_stats->old[1].index]; +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&gcstate->stats_mutex); +#endif PyObject *result = PyList_New(0); if (result == NULL) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 4e36189580bbf8..99f1a1eb47e3dd 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -2281,7 +2281,8 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } } - /* Update stats */ + /* Update stats. */ + PyMutex_Lock(&gcstate->stats_mutex); struct gc_generation_stats *stats = get_stats(gcstate, generation); stats->ts_start = start; stats->ts_stop = stop; @@ -2290,6 +2291,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) stats->uncollectable += n; stats->duration += duration; stats->candidates += state.candidates; + PyMutex_Unlock(&gcstate->stats_mutex); GC_STAT_ADD(generation, objects_collected, m); #ifdef Py_STATS