diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h
index 64128a78136e563..9cbf5b5823f5d0e 100644
--- a/Include/internal/pycore_ceval_state.h
+++ b/Include/internal/pycore_ceval_state.h
@@ -35,6 +35,7 @@ extern "C" {
     { \
         .status = PERF_STATUS_NO_INIT, \
         .extra_code_index = -1, \
+        .code_watcher_id = -1, \
         .persist_after_fork = 0, \
     }
 #else
diff --git a/Lib/test/test_free_threading/test_perf_trampoline.py b/Lib/test/test_free_threading/test_perf_trampoline.py
new file mode 100644
index 000000000000000..f4f4edbfbd60c88
--- /dev/null
+++ b/Lib/test/test_free_threading/test_perf_trampoline.py
@@ -0,0 +1,82 @@
+"""Tests perf trampoline in a multi-threaded environment to verify thread safety in free-threaded builds."""
+
+import gc
+import sys
+import threading
+import unittest
+from test.support import threading_helper
+
+NTHREADS = 10
+ITERATIONS_PER_THREAD = 50
+
+
+@threading_helper.requires_working_threading()
+class TestPerfTrampolineThreadSafety(unittest.TestCase):
+
+    def test_concurrent_code_compilation(self):
+        """Stress test simultaneous allocation of code arenas and tracking hooks."""
+        if not hasattr(sys, "_perf_trampoline_init") or not hasattr(sys, "_perf_trampoline_fini"):
+            self.skipTest("perf trampoline APIs are not supported on this platform")
+
+        try:
+            sys._perf_trampoline_init(1)
+        except ValueError as exc:
+            self.skipTest(f"perf trampoline activation failed: {exc}")
+        self.addCleanup(sys._perf_trampoline_fini)
+
+        barrier = threading.Barrier(NTHREADS)
+
+        def worker():
+            barrier.wait()
+            for i in range(ITERATIONS_PER_THREAD):
+                ns = {}
+                tid = threading.get_ident()
+                exec(
+                    f"def func_{tid}_{i}(): return {i}\n"
+                    f"result = func_{tid}_{i}()",
+                    ns,
+                )
+                self.assertEqual(ns["result"], i)
+                del ns  # Immediate drop to prevent delaying cleanup, no internal gc.collect()
+
+        threading_helper.run_concurrently(
+            nthreads=NTHREADS, worker_func=worker
+        )
+        # Single final sweep to ensure code objects and extra tracking clear cleanly
+        gc.collect()
+
+    def test_concurrent_shared_code_execution(self):
+        """Verify multiple threads safely handle entering evaluation loops for the same code object."""
+        if not hasattr(sys, "_perf_trampoline_init") or not hasattr(sys, "_perf_trampoline_fini"):
+            self.skipTest("perf trampoline APIs are not supported on this platform")
+
+        try:
+            sys._perf_trampoline_init(1)
+        except ValueError as exc:
+            self.skipTest(f"perf trampoline activation failed: {exc}")
+        self.addCleanup(sys._perf_trampoline_fini)
+
+        barrier = threading.Barrier(NTHREADS)
+
+        # Pre-compile a single function instance to share across all threads
+        shared_ns = {}
+        exec("def shared_func(): return 42", shared_ns)
+        shared_func = shared_ns["shared_func"]
+
+        def worker():
+            barrier.wait()
+            for _ in range(ITERATIONS_PER_THREAD):
+                self.assertEqual(shared_func(), 42)
+
+        threading_helper.run_concurrently(
+            nthreads=NTHREADS, worker_func=worker
+        )
+
+        del shared_func
+        del shared_ns
+        # Single final sweep to clean up shared resources safely
+        gc.collect()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c
index d90b789c2b57126..1e5d55967049ca9 100644
--- a/Python/perf_trampoline.c
+++ b/Python/perf_trampoline.c
@@ -134,6 +134,7 @@ any DWARF information available for them).
 #include "pycore_interpframe.h"   // _PyFrame_GetCode()
 #include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
 #include "pycore_runtime.h"       // _PyRuntime
+#include "pycore_lock.h"
 
 
 #ifdef PY_HAVE_PERF_TRAMPOLINE
@@ -197,6 +198,10 @@ enum perf_trampoline_type {
 };
 
 #define perf_status _PyRuntime.ceval.perf.status
+
+#define ATOMIC_LOAD_STATUS() _Py_atomic_load_int_relaxed((const int *)&perf_status)
+#define ATOMIC_STORE_STATUS(val) _Py_atomic_store_int_relaxed((int *)&perf_status, val)
+
 #define extra_code_index _PyRuntime.ceval.perf.extra_code_index
 #define perf_code_arena _PyRuntime.ceval.perf.code_arena
 #define trampoline_api _PyRuntime.ceval.perf.trampoline_api
@@ -207,7 +212,12 @@ enum perf_trampoline_type {
 #define trampoline_refcount _PyRuntime.ceval.perf.trampoline_refcount
 #define code_watcher_id _PyRuntime.ceval.perf.code_watcher_id
 
+#ifdef Py_GIL_DISABLED
+static PyMutex perf_trampoline_mutex = {0};
+#endif
+
 static void free_code_arenas(void);
+static int _PyPerfTrampoline_Fini_Locked(void);
 
 static void
 perf_trampoline_clear_code_watcher(void)
@@ -216,14 +226,19 @@ perf_trampoline_clear_code_watcher(void)
         PyCode_ClearWatcher(code_watcher_id);
         code_watcher_id = -1;
     }
-    extra_code_index = -1;
+    _Py_atomic_store_ssize_relaxed(&extra_code_index, -1);
 }
 
 static void
 perf_trampoline_reset_state(void)
 {
     free_code_arenas();
-    perf_trampoline_clear_code_watcher();
+    // We do NOT clear code_watcher_id here. PyCode_ClearWatcher is not
+    // thread-safe in free-threading mode against concurrent code deallocation.
+    // Leaving the watcher active is safe. We MUST reset extra_code_index to -1
+    // so that next activation gets a new index, because the old index's
+    // co_extra array still contains dangling pointers to the freed arenas.
+    _Py_atomic_store_ssize_relaxed(&extra_code_index, -1);
 }
 
 static int
@@ -232,18 +247,25 @@ perf_trampoline_code_watcher(PyCodeEvent event, PyCodeObject *co)
     if (event != PY_CODE_EVENT_DESTROY) {
         return 0;
     }
-    if (extra_code_index == -1) {
+    Py_ssize_t index = _Py_atomic_load_ssize_relaxed(&extra_code_index);
+    if (index == -1) {
         return 0;
     }
     py_trampoline f = NULL;
-    int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
+    int ret = _PyCode_GetExtra((PyObject *)co, index, (void **)&f);
     if (ret != 0 || f == NULL) {
         return 0;
     }
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&perf_trampoline_mutex);
+#endif
     trampoline_refcount--;
     if (trampoline_refcount == 0) {
         perf_trampoline_reset_state();
     }
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
     return 0;
 }
 
@@ -330,7 +352,7 @@ new_code_arena(void)
     if (memory == MAP_FAILED) {
         PyErr_SetFromErrno(PyExc_OSError);
         PyErr_FormatUnraisable("Failed to create new mmap for perf trampoline");
-        perf_status = PERF_STATUS_FAILED;
+        ATOMIC_STORE_STATUS(PERF_STATUS_FAILED);
         return -1;
     }
     (void)_PyAnnotateMemoryMap(memory, mem_size, "cpython:perf_trampoline");
@@ -430,34 +452,65 @@ static PyObject *
 py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
                         int throw)
 {
-    if (perf_status == PERF_STATUS_FAILED ||
-        perf_status == PERF_STATUS_NO_INIT) {
+    int status = ATOMIC_LOAD_STATUS();
+    if (status == PERF_STATUS_FAILED ||
+        status == PERF_STATUS_NO_INIT) {
         goto default_eval;
     }
     PyCodeObject *co = _PyFrame_GetCode(frame);
     py_trampoline f = NULL;
-    assert(extra_code_index != -1);
-    int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
+    py_trampoline new_trampoline = NULL;
+    size_t code_size = 0;
+    int ret;
+    Py_ssize_t index = _Py_atomic_load_ssize_relaxed(&extra_code_index);
+    if (index == -1) {
+        goto default_eval;
+    }
+    ret = _PyCode_GetExtra((PyObject *)co, index, (void **)&f);
     if (ret != 0 || f == NULL) {
         // This is the first time we see this code object so we need
         // to compile a trampoline for it.
-        py_trampoline new_trampoline = compile_trampoline();
-        if (new_trampoline == NULL) {
+#ifdef Py_GIL_DISABLED
+        PyMutex_Lock(&perf_trampoline_mutex);
+#endif
+        if (ATOMIC_LOAD_STATUS() == PERF_STATUS_OK) {
+            ret = _PyCode_GetExtra((PyObject *)co, index, (void **)&f);
+            if (ret != 0 || f == NULL) {
+                new_trampoline = compile_trampoline();
+                if (new_trampoline != NULL) {
+                    _PyCode_SetExtra((PyObject *)co, index,
+                                     (void *)new_trampoline);
+                    trampoline_refcount++;
+                    code_size = perf_code_arena->code_size;
+                }
+            }
+        }
+#ifdef Py_GIL_DISABLED
+        PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
+        if (f == NULL && new_trampoline != NULL) {
+            // Call write_state outside the global lock to avoid holding it
+            // during JIT I/O. The default implementation
+            // (PyUnstable_WritePerfMapEntry) is internally thread-safe.
+            trampoline_api.write_state(trampoline_api.state, new_trampoline,
+                                       code_size, co);
+            f = new_trampoline;
+        }
+        if (f != NULL) {
+            goto execute_trampoline;
+        } else {
             goto default_eval;
         }
-        trampoline_api.write_state(trampoline_api.state, new_trampoline,
-                                   perf_code_arena->code_size, co);
-        _PyCode_SetExtra((PyObject *)co, extra_code_index,
-                         (void *)new_trampoline);
-        trampoline_refcount++;
-        f = new_trampoline;
     }
+execute_trampoline:
     assert(f != NULL);
-    return f(ts, frame, throw, prev_eval_frame != NULL ? prev_eval_frame : _PyEval_EvalFrameDefault);
+    return f(
+        ts, frame, throw,
+        prev_eval_frame != NULL ? prev_eval_frame : _PyEval_EvalFrameDefault);
 default_eval:
     // Something failed, fall back to the default evaluator.
     if (prev_eval_frame) {
-        return prev_eval_frame(ts, frame, throw);
+      return prev_eval_frame(ts, frame, throw);
     }
     return _PyEval_EvalFrameDefault(ts, frame, throw);
 }
@@ -467,18 +520,50 @@ int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
 {
 #ifdef PY_HAVE_PERF_TRAMPOLINE
     py_trampoline f = NULL;
-    assert(extra_code_index != -1);
-    int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
+    py_trampoline new_trampoline = NULL;
+    size_t code_size = 0;
+    int ret;
+    int set_extra_ret = 0;
+    Py_ssize_t index = _Py_atomic_load_ssize_relaxed(&extra_code_index);
+    if (index == -1) {
+        return -1;
+    }
+    ret = _PyCode_GetExtra((PyObject *)co, index, (void **)&f);
     if (ret != 0 || f == NULL) {
-        py_trampoline new_trampoline = compile_trampoline();
+#ifdef Py_GIL_DISABLED
+        PyMutex_Lock(&perf_trampoline_mutex);
+#endif
+        if (ATOMIC_LOAD_STATUS() == PERF_STATUS_OK) {
+            ret = _PyCode_GetExtra((PyObject *)co, index, (void **)&f);
+            if (ret != 0 || f == NULL) {
+                new_trampoline = compile_trampoline();
+                if (new_trampoline != NULL) {
+                    set_extra_ret = _PyCode_SetExtra((PyObject *)co, index,
+                                                     (void *)new_trampoline);
+                    if (set_extra_ret == 0) {
+                        trampoline_refcount++;
+                        code_size = perf_code_arena->code_size;
+                    }
+                }
+            }
+        }
+#ifdef Py_GIL_DISABLED
+        PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
+        if (f != NULL) {
+            return 0; // Already compiled by another thread
+        }
         if (new_trampoline == NULL) {
             return 0;
         }
-        trampoline_api.write_state(trampoline_api.state, new_trampoline,
-                                   perf_code_arena->code_size, co);
-        trampoline_refcount++;
-        return _PyCode_SetExtra((PyObject *)co, extra_code_index,
-                         (void *)new_trampoline);
+        if (set_extra_ret == 0) {
+            // Call write_state outside the global lock to avoid holding it
+            // during JIT I/O. The default implementation
+            // (PyUnstable_WritePerfMapEntry) is internally thread-safe.
+            trampoline_api.write_state(trampoline_api.state, new_trampoline,
+                                       code_size, co);
+        }
+        return set_extra_ret;
     }
 #endif // PY_HAVE_PERF_TRAMPOLINE
     return 0;
@@ -515,74 +600,41 @@ _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks)
         return -1;
     }
 #ifdef PY_HAVE_PERF_TRAMPOLINE
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&perf_trampoline_mutex);
+#endif
     if (trampoline_api.state) {
-        _PyPerfTrampoline_Fini();
+        _PyPerfTrampoline_Fini_Locked();
     }
     trampoline_api.init_state = callbacks->init_state;
     trampoline_api.write_state = callbacks->write_state;
     trampoline_api.free_state = callbacks->free_state;
     trampoline_api.state = NULL;
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&perf_trampoline_mutex);
 #endif
-    return 0;
-}
-
-int
-_PyPerfTrampoline_Init(int activate)
-{
-#ifdef PY_HAVE_PERF_TRAMPOLINE
-    PyThreadState *tstate = _PyThreadState_GET();
-    if (code_watcher_id == 0) {
-        // Initialize to -1 since 0 is a valid watcher ID
-        code_watcher_id = -1;
-    }
-    if (!activate) {
-        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, prev_eval_frame);
-        perf_status = PERF_STATUS_NO_INIT;
-    }
-    else if (tstate->interp->eval_frame != py_trampoline_evaluator) {
-        prev_eval_frame = _PyInterpreterState_GetEvalFrameFunc(tstate->interp);
-        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator);
-        extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
-        if (extra_code_index == -1) {
-            return -1;
-        }
-        if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) {
-            trampoline_api.state = trampoline_api.init_state();
-        }
-        if (new_code_arena() < 0) {
-            return -1;
-        }
-        code_watcher_id = PyCode_AddWatcher(perf_trampoline_code_watcher);
-        if (code_watcher_id < 0) {
-            PyErr_FormatUnraisable("Failed to register code watcher for perf trampoline");
-            free_code_arenas();
-            return -1;
-        }
-        trampoline_refcount = 1;  // Base refcount held by the system
-        perf_status = PERF_STATUS_OK;
-    }
 #endif
     return 0;
 }
 
-int
-_PyPerfTrampoline_Fini(void)
-{
 #ifdef PY_HAVE_PERF_TRAMPOLINE
-    if (perf_status != PERF_STATUS_OK) {
+static int
+_PyPerfTrampoline_Fini_Locked(void)
+{
+    if (ATOMIC_LOAD_STATUS() != PERF_STATUS_OK) {
         return 0;
     }
     PyThreadState *tstate = _PyThreadState_GET();
     if (tstate->interp->eval_frame == py_trampoline_evaluator) {
         _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL);
     }
-    if (perf_status == PERF_STATUS_OK) {
+    if (trampoline_api.free_state != NULL) {
         trampoline_api.free_state(trampoline_api.state);
-        perf_trampoline_type = PERF_TRAMPOLINE_UNSET;
     }
+    perf_trampoline_type = PERF_TRAMPOLINE_UNSET;
 
     // Prevent new trampolines from being created
-    perf_status = PERF_STATUS_NO_INIT;
+    ATOMIC_STORE_STATUS(PERF_STATUS_NO_INIT);
 
     // Decrement base refcount. If refcount reaches 0, all code objects are already
     // dead so clean up now. Otherwise, watcher remains active to clean up when last
@@ -591,15 +643,101 @@ _PyPerfTrampoline_Fini(void)
     if (trampoline_refcount == 0) {
         perf_trampoline_reset_state();
     }
+    return 0;
+}
+#endif
+
+int
+_PyPerfTrampoline_Init(int activate)
+{
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&perf_trampoline_mutex);
+#endif
+    if (activate && ATOMIC_LOAD_STATUS() == PERF_STATUS_OK) {
+#ifdef Py_GIL_DISABLED
+        PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
+        return 0;
+    }
+    PyThreadState *tstate = _PyThreadState_GET();
+    if (code_watcher_id == 0) {
+        // Initialize to -1 since 0 is a valid watcher ID
+        code_watcher_id = -1;
+    }
+    if (!activate) {
+      _PyInterpreterState_SetEvalFrameFunc(tstate->interp, prev_eval_frame);
+      ATOMIC_STORE_STATUS(PERF_STATUS_NO_INIT);
+    } else if (tstate->interp->eval_frame != py_trampoline_evaluator) {
+      prev_eval_frame = _PyInterpreterState_GetEvalFrameFunc(tstate->interp);
+      _PyInterpreterState_SetEvalFrameFunc(tstate->interp,
+                                           py_trampoline_evaluator);
+      Py_ssize_t idx = _PyEval_RequestCodeExtraIndex(NULL);
+      _Py_atomic_store_ssize_relaxed(&extra_code_index, idx);
+      if (idx == -1) {
+        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, prev_eval_frame);
+#ifdef Py_GIL_DISABLED
+        PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
+        return -1;
+      }
+      if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) {
+        trampoline_api.state = trampoline_api.init_state();
+      }
+      if (new_code_arena() < 0) {
+        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, prev_eval_frame);
+#ifdef Py_GIL_DISABLED
+        PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
+        return -1;
+      }
+      code_watcher_id = PyCode_AddWatcher(perf_trampoline_code_watcher);
+      if (code_watcher_id < 0) {
+        PyErr_FormatUnraisable(
+            "Failed to register code watcher for perf trampoline");
+        free_code_arenas();
+        _PyInterpreterState_SetEvalFrameFunc(tstate->interp, prev_eval_frame);
+#ifdef Py_GIL_DISABLED
+        PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
+        return -1;
+      }
+      trampoline_refcount = 1;  // Base refcount held by the system
+      ATOMIC_STORE_STATUS(PERF_STATUS_OK);
+    }
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
 #endif
     return 0;
 }
 
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+int
+_PyPerfTrampoline_Fini(void)
+{
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&perf_trampoline_mutex);
+#endif
+    int ret = _PyPerfTrampoline_Fini_Locked();
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&perf_trampoline_mutex);
+#endif
+    return ret;
+}
+#else
+int
+_PyPerfTrampoline_Fini(void)
+{
+    return 0;
+}
+#endif
+
 int
 PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
 #ifdef PY_HAVE_PERF_TRAMPOLINE
-    persist_after_fork = enable;
-    return persist_after_fork;
+    _Py_atomic_store_ssize_relaxed(&persist_after_fork, (Py_ssize_t)enable);
+    return (int)_Py_atomic_load_ssize_relaxed(&persist_after_fork);
 #endif
     return 0;
 }
@@ -608,7 +746,10 @@ PyStatus
 _PyPerfTrampoline_AfterFork_Child(void)
 {
 #ifdef PY_HAVE_PERF_TRAMPOLINE
-    if (persist_after_fork) {
+#ifdef Py_GIL_DISABLED
+    _PyMutex_at_fork_reinit(&perf_trampoline_mutex);
+#endif
+    if (_Py_atomic_load_ssize_relaxed(&persist_after_fork)) {
         if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) {
             return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map.");
         }
@@ -624,14 +765,19 @@ _PyPerfTrampoline_AfterFork_Child(void)
         int was_active = _PyIsPerfTrampolineActive();
         _PyPerfTrampoline_Fini();
         if (was_active) {
-            // After fork, Fini may leave the old code watcher registered
-            // if trampolined code objects from the parent still exist
-            // (trampoline_refcount > 0). Clear it unconditionally before
-            // Init registers a new one, but keep the old arenas mapped: the
-            // child may still need to return through trampoline frames that
-            // were on the C stack at fork().
-            perf_trampoline_clear_code_watcher();
-            _PyPerfTrampoline_Init(1);
+          // After fork, Fini may leave the old code watcher registered
+          // if trampolined code objects from the parent still exist
+          // (trampoline_refcount > 0). Clear it unconditionally before
+          // Init registers a new one, but keep the old arenas mapped: the
+          // child may still need to return through trampoline frames that
+          // were on the C stack at fork().
+          perf_trampoline_clear_code_watcher();
+          if (_PyPerfTrampoline_Init(1) < 0) {
+            PyErr_Clear();
+            PySys_WriteStderr("Python warning: Failed to restart perf trampoline after fork. "
+                              "Disabling perf trampoline.\n");
+            ATOMIC_STORE_STATUS(PERF_STATUS_FAILED);
+          }
         }
     }
 #endif