From 47cfd29848271a8d379959d879d9e03b7b3a1765 Mon Sep 17 00:00:00 2001 From: Jiucheng Zang Date: Fri, 19 Jun 2026 23:36:59 -0400 Subject: [PATCH 1/2] Fix data race in free-threading builds between gc.get_stats() and garbage collection --- Include/internal/pycore_interp_structs.h | 6 + .../test_gc_get_stats_race.py | 107 ++++++++++++++++++ ...-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst | 3 + Modules/gcmodule.c | 8 ++ Python/gc_free_threading.c | 7 +- 5 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 Lib/test/test_free_threading/test_gc_get_stats_race.py create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 5500c70a3b0aad..4095e6543abaa0 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -235,6 +235,12 @@ struct _gc_runtime_state { /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; struct gc_stats *generation_stats; +#ifdef Py_GIL_DISABLED + /* Serializes access to generation_stats between gc_get_stats_impl() + (reader) and gc_collect_main() (writer) so they can run concurrently + under free-threading without a data race (gh-151646). */ + PyMutex stats_mutex; +#endif /* true if we are currently running the collector */ int collecting; // The frame that started the current collection. It might be NULL even when diff --git a/Lib/test/test_free_threading/test_gc_get_stats_race.py b/Lib/test/test_free_threading/test_gc_get_stats_race.py new file mode 100644 index 00000000000000..a91bdce0304b1d --- /dev/null +++ b/Lib/test/test_free_threading/test_gc_get_stats_race.py @@ -0,0 +1,107 @@ +# Reproduction for the gc.get_stats() data race under free-threading. +# +# CPython issue gh-151646: in free-threading builds (``--disable-gil``) the +# concurrent collector and ``gc.get_stats()`` touch the same per-generation +# statistics struct (``gcstate->generation_stats``) without any synchronisation: +# +# * READER -- ``gc_get_stats_impl()`` in ``Modules/gcmodule.c`` copies the +# ``struct gc_generation_stats`` for each generation (``collections``, +# ``collected``, ``uncollectable``, ``candidates``, ``duration``) with a +# plain struct assignment and no lock. +# +# * WRITER -- at the end of every collection cycle ``gc_collect_main()`` in +# ``Python/gc_free_threading.c`` mutates that very struct in place +# (``stats->collections++``, ``stats->collected += m``, ...), again with no +# lock. +# +# When one thread is collecting while several others read the stats, the +# unsynchronised read/write to the same memory is a data race. The values are +# only statistics, so the race is benign at the Python level (no crash), which +# is exactly what lets this script double as a regression test: once the fix +# adds proper synchronisation, ThreadSanitizer should stay quiet while the +# script keeps exiting cleanly. +# +# Running this script under a free-threading CPython build compiled with +# ThreadSanitizer (``./configure --disable-gil --with-thread-sanitizer``) is +# expected to print a ``WARNING: ThreadSanitizer: data race`` report whose stack +# traces point at ``gc_get_stats_impl`` (gcmodule.c) and ``gc_collect_main`` +# (gc_free_threading.c). +# +# Standalone usage: +# +# ./python Lib/test/test_free_threading/test_gc_get_stats_race.py +# +# or as part of the test suite (only meaningful under a TSAN build): +# +# ./python -m test test_free_threading.test_gc_get_stats_race + +import gc +import threading +import unittest + +from test.support import threading_helper + + +# One thread hammers gc.collect() (the writer side); enough reader threads run +# gc.get_stats() concurrently to make the race easy to observe. The iteration +# counts are deliberately fixed and finite so the script always terminates. +NUM_COLLECTORS = 1 +NUM_READERS = 8 +ITERATIONS = 50_000 + + +def _stress_get_stats_race(num_collectors=NUM_COLLECTORS, + num_readers=NUM_READERS, + iterations=ITERATIONS): + """Race gc.collect() against gc.get_stats() and return collected stats.""" + + # Synchronise the start so collectors and readers overlap for as long as + # possible, maximising the chance of the read and write landing on the + # statistics struct at the same time. + barrier = threading.Barrier(num_collectors + num_readers) + + def collector(): + barrier.wait() + for _ in range(iterations): + # Writer: each full collection updates gcstate->generation_stats. + gc.collect() + + def reader(): + barrier.wait() + for _ in range(iterations): + # Reader: copies the per-generation stats structs with no lock. + gc.get_stats() + + threads = [threading.Thread(target=collector) for _ in range(num_collectors)] + threads += [threading.Thread(target=reader) for _ in range(num_readers)] + + with threading_helper.start_threads(threads): + pass + + +@threading_helper.requires_working_threading() +class TestGCGetStatsRace(unittest.TestCase): + def test_get_stats_collect_race(self): + # Use a reduced iteration count under the regular test suite so the test + # stays reasonably quick while still exercising the race; the standalone + # __main__ path below uses the full ITERATIONS for a reliable repro. + _stress_get_stats_race(iterations=2_000) + + # The race is benign at the Python level: gc.get_stats() must still + # return well-formed data and the interpreter must not crash. + stats = gc.get_stats() + self.assertEqual(len(stats), 3) + for generation in stats: + self.assertIn("collections", generation) + self.assertIn("collected", generation) + self.assertIn("uncollectable", generation) + + +if __name__ == "__main__": + # Standalone reproduction: run the full-size race and exit cleanly so the + # script can be reused as a regression check once the fix lands. + print(f"Racing {NUM_COLLECTORS} gc.collect() thread(s) against " + f"{NUM_READERS} gc.get_stats() thread(s), {ITERATIONS} iterations each...") + _stress_get_stats_race() + print("Done (no Python-level crash). " + "Run under a free-threading + TSAN build to observe the data race.") diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst new file mode 100644 index 00000000000000..fde428c8a2839b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-19-12-00-00.gh-issue-151646.Gc5tAt.rst @@ -0,0 +1,3 @@ +Fix a data race in free-threading builds between :func:`gc.get_stats` and a +concurrent garbage collection cycle. Access to the per-generation statistics +is now serialized with a mutex so the reader observes a consistent snapshot. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 0093995441e390..93f8f997fc261a 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -374,9 +374,17 @@ gc_get_stats_impl(PyObject *module) /* To get consistent values despite allocations while constructing the result list, we use a snapshot of the running stats. */ GCState *gcstate = get_gc_state(); +#ifdef Py_GIL_DISABLED + /* Hold stats_mutex so the snapshot is consistent with a concurrent + collector updating the same struct (gh-151646). */ + PyMutex_Lock(&gcstate->stats_mutex); +#endif stats[0] = gcstate->generation_stats->young.items[gcstate->generation_stats->young.index]; stats[1] = gcstate->generation_stats->old[0].items[gcstate->generation_stats->old[0].index]; stats[2] = gcstate->generation_stats->old[1].items[gcstate->generation_stats->old[1].index]; +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&gcstate->stats_mutex); +#endif PyObject *result = PyList_New(0); if (result == NULL) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 4e36189580bbf8..f760d6ecc093d4 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -2281,7 +2281,11 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } } - /* Update stats */ + /* Update stats. Hold stats_mutex so a concurrent gc.get_stats() reader + sees a consistent snapshot rather than racing on these fields + (gh-151646). get_stats() reads buffer->index inside the lock so that + both writer and reader resolve the same slot under the same mutex. */ + PyMutex_Lock(&gcstate->stats_mutex); struct gc_generation_stats *stats = get_stats(gcstate, generation); stats->ts_start = start; stats->ts_stop = stop; @@ -2290,6 +2294,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) stats->uncollectable += n; stats->duration += duration; stats->candidates += state.candidates; + PyMutex_Unlock(&gcstate->stats_mutex); GC_STAT_ADD(generation, objects_collected, m); #ifdef Py_STATS From 08a114aef1d12927ec80f4d57511bda6b0f84ad5 Mon Sep 17 00:00:00 2001 From: Jiucheng Zang Date: Fri, 3 Jul 2026 21:23:11 -0400 Subject: [PATCH 2/2] gh-151646: Lighten gc.get_stats race test and trim mutex comments Address review feedback (nascheme) on GH-151766: - The stress test looped 8 reader threads x 50,000 iterations each. Run one writer for a fixed number of gc.collect() calls and have the readers loop until the writer signals done, so the duration is driven by the collection count. Use threading_helper.run_concurrently() for start synchronisation. - Trim the stats_mutex comments: keep one comment on the struct member saying what it protects, and drop the now-redundant comments at the lock sites. Co-Authored-By: Claude Opus 4.8 --- Include/internal/pycore_interp_structs.h | 6 +-- .../test_gc_get_stats_race.py | 52 ++++++++----------- Modules/gcmodule.c | 2 - Python/gc_free_threading.c | 5 +- 4 files changed, 26 insertions(+), 39 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 4095e6543abaa0..d3efac906aeb93 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -236,9 +236,9 @@ struct _gc_runtime_state { struct gc_generation permanent_generation; struct gc_stats *generation_stats; #ifdef Py_GIL_DISABLED - /* Serializes access to generation_stats between gc_get_stats_impl() - (reader) and gc_collect_main() (writer) so they can run concurrently - under free-threading without a data race (gh-151646). */ + /* Protects generation_stats between gc_get_stats_impl() (reader) and + gc_collect_main() (writer); both resolve the stats slot (buffer index) + under this lock so they agree on which slot to touch (gh-151646). */ PyMutex stats_mutex; #endif /* true if we are currently running the collector */ diff --git a/Lib/test/test_free_threading/test_gc_get_stats_race.py b/Lib/test/test_free_threading/test_gc_get_stats_race.py index a91bdce0304b1d..dc609a98fdffd5 100644 --- a/Lib/test/test_free_threading/test_gc_get_stats_race.py +++ b/Lib/test/test_free_threading/test_gc_get_stats_race.py @@ -43,49 +43,41 @@ # One thread hammers gc.collect() (the writer side); enough reader threads run -# gc.get_stats() concurrently to make the race easy to observe. The iteration -# counts are deliberately fixed and finite so the script always terminates. -NUM_COLLECTORS = 1 -NUM_READERS = 8 -ITERATIONS = 50_000 +# gc.get_stats() concurrently to make the race easy to observe. Readers run +# until the writer is done so the test duration is controlled by the collection +# count rather than by reader loop counts. +NUM_READERS = 4 +ITERATIONS = 50 -def _stress_get_stats_race(num_collectors=NUM_COLLECTORS, - num_readers=NUM_READERS, - iterations=ITERATIONS): - """Race gc.collect() against gc.get_stats() and return collected stats.""" +def _stress_get_stats_race(num_readers=NUM_READERS, iterations=ITERATIONS): + """Race gc.collect() against gc.get_stats().""" - # Synchronise the start so collectors and readers overlap for as long as + # Synchronise the start so the writer and readers overlap for as long as # possible, maximising the chance of the read and write landing on the # statistics struct at the same time. - barrier = threading.Barrier(num_collectors + num_readers) + done = threading.Event() def collector(): - barrier.wait() - for _ in range(iterations): - # Writer: each full collection updates gcstate->generation_stats. - gc.collect() + try: + for _ in range(iterations): + # Writer: each full collection updates gcstate->generation_stats. + gc.collect() + finally: + done.set() def reader(): - barrier.wait() - for _ in range(iterations): + while not done.is_set(): # Reader: copies the per-generation stats structs with no lock. gc.get_stats() - threads = [threading.Thread(target=collector) for _ in range(num_collectors)] - threads += [threading.Thread(target=reader) for _ in range(num_readers)] - - with threading_helper.start_threads(threads): - pass + threading_helper.run_concurrently([collector] + [reader] * num_readers) @threading_helper.requires_working_threading() class TestGCGetStatsRace(unittest.TestCase): def test_get_stats_collect_race(self): - # Use a reduced iteration count under the regular test suite so the test - # stays reasonably quick while still exercising the race; the standalone - # __main__ path below uses the full ITERATIONS for a reliable repro. - _stress_get_stats_race(iterations=2_000) + _stress_get_stats_race() # The race is benign at the Python level: gc.get_stats() must still # return well-formed data and the interpreter must not crash. @@ -98,10 +90,10 @@ def test_get_stats_collect_race(self): if __name__ == "__main__": - # Standalone reproduction: run the full-size race and exit cleanly so the - # script can be reused as a regression check once the fix lands. - print(f"Racing {NUM_COLLECTORS} gc.collect() thread(s) against " - f"{NUM_READERS} gc.get_stats() thread(s), {ITERATIONS} iterations each...") + # Standalone reproduction: run the race and exit cleanly so the script can + # be reused as a regression check once the fix lands. + print(f"Racing 1 gc.collect() thread against " + f"{NUM_READERS} gc.get_stats() thread(s), {ITERATIONS} collections...") _stress_get_stats_race() print("Done (no Python-level crash). " "Run under a free-threading + TSAN build to observe the data race.") diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 93f8f997fc261a..e2df31556f3c37 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -375,8 +375,6 @@ gc_get_stats_impl(PyObject *module) the result list, we use a snapshot of the running stats. */ GCState *gcstate = get_gc_state(); #ifdef Py_GIL_DISABLED - /* Hold stats_mutex so the snapshot is consistent with a concurrent - collector updating the same struct (gh-151646). */ PyMutex_Lock(&gcstate->stats_mutex); #endif stats[0] = gcstate->generation_stats->young.items[gcstate->generation_stats->young.index]; diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index f760d6ecc093d4..99f1a1eb47e3dd 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -2281,10 +2281,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } } - /* Update stats. Hold stats_mutex so a concurrent gc.get_stats() reader - sees a consistent snapshot rather than racing on these fields - (gh-151646). get_stats() reads buffer->index inside the lock so that - both writer and reader resolve the same slot under the same mutex. */ + /* Update stats. */ PyMutex_Lock(&gcstate->stats_mutex); struct gc_generation_stats *stats = get_stats(gcstate, generation); stats->ts_start = start;