From 7209306f5787adcdfa0b14136be8cd1aca06d472 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 24 Nov 2025 16:06:02 +0000 Subject: [PATCH 1/8] gh-138122: Add incomplete sample detection and fix generator frame unwinding Add PyThreadState.entry_frame to track the bottommost frame in each thread. The profiler validates unwinding reached this frame, rejecting incomplete samples caused by race conditions or errors. Also fix unwinding to skip suspended generator frames which have NULL previous pointers. --- Include/cpython/pystate.h | 5 +++++ Include/internal/pycore_debug_offsets.h | 2 ++ ...025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst | 6 ++++++ Modules/_remote_debugging/_remote_debugging.h | 3 ++- Modules/_remote_debugging/frames.c | 16 +++++++++++++++- Modules/_remote_debugging/threads.c | 8 +++++++- Python/ceval.c | 8 ++++++++ Python/pystate.c | 1 + 8 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 1e1e46ea4c0bcd..c37a669eb738bd 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -135,6 +135,11 @@ struct _ts { /* Pointer to currently executing frame. */ struct _PyInterpreterFrame *current_frame; + /* Pointer to the entry/bottommost frame of the current call stack. + * This is the frame that was entered when starting execution. + * Used by profiling/sampling to detect incomplete stack traces. */ + struct _PyInterpreterFrame *entry_frame; + Py_tracefunc c_profilefunc; Py_tracefunc c_tracefunc; PyObject *c_profileobj; diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 0f17bf17f82656..8350aea8ade768 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -102,6 +102,7 @@ typedef struct _Py_DebugOffsets { uint64_t next; uint64_t interp; uint64_t current_frame; + uint64_t entry_frame; uint64_t thread_id; uint64_t native_thread_id; uint64_t datastack_chunk; @@ -272,6 +273,7 @@ typedef struct _Py_DebugOffsets { .next = offsetof(PyThreadState, next), \ .interp = offsetof(PyThreadState, interp), \ .current_frame = offsetof(PyThreadState, current_frame), \ + .entry_frame = offsetof(PyThreadState, entry_frame), \ .thread_id = offsetof(PyThreadState, thread_id), \ .native_thread_id = offsetof(PyThreadState, native_thread_id), \ .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \ diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst new file mode 100644 index 00000000000000..b515b7dbcfa661 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst @@ -0,0 +1,6 @@ +Add incomplete sample detection to prevent corrupted profiling data. The +interpreter now tracks the base frame (bottommost frame) in each thread's +``PyThreadState.base_frame``, which the profiler uses to validate that +stack unwinding reached the expected bottom. Samples that fail to unwind +completely (due to race conditions, memory corruption, or other errors) +are now rejected rather than being included as spurious single-frame stacks. diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index c4547baf96746b..25f2f517037373 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -363,7 +363,8 @@ extern int process_frame_chain( uintptr_t initial_frame_addr, StackChunkList *chunks, PyObject *frame_info, - uintptr_t gc_frame + uintptr_t gc_frame, + uintptr_t base_frame_addr ); /* ============================================================================ diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index d60caadcb9a11e..e944a3953cfcaa 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -258,14 +258,17 @@ process_frame_chain( uintptr_t initial_frame_addr, StackChunkList *chunks, PyObject *frame_info, - uintptr_t gc_frame) + uintptr_t gc_frame, + uintptr_t base_frame_addr) { uintptr_t frame_addr = initial_frame_addr; uintptr_t prev_frame_addr = 0; + uintptr_t last_frame_addr = 0; // Track the last frame we processed const size_t MAX_FRAMES = 1024; size_t frame_count = 0; while ((void*)frame_addr != NULL) { + last_frame_addr = frame_addr; // Remember this frame before moving to next PyObject *frame = NULL; uintptr_t next_frame_addr = 0; uintptr_t stackpointer = 0; @@ -347,5 +350,16 @@ process_frame_chain( frame_addr = next_frame_addr; } + // Validate we reached the base frame if it's set + if (base_frame_addr != 0 && last_frame_addr != 0) { + if (last_frame_addr != base_frame_addr) { + // We didn't reach the expected bottom frame - incomplete sample + PyErr_Format(PyExc_RuntimeError, + "Incomplete sample: reached frame 0x%lx but expected base frame 0x%lx", + last_frame_addr, base_frame_addr); + return -1; + } + } + return 0; } diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c index 99147b01a1b9ed..a037b660fc2753 100644 --- a/Modules/_remote_debugging/threads.c +++ b/Modules/_remote_debugging/threads.c @@ -388,7 +388,13 @@ unwind_stack_for_thread( goto error; } - if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame) < 0) { + // Read base_frame for validation + uintptr_t base_frame_addr = 0; + if (unwinder->debug_offsets.thread_state.base_frame != 0) { + base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame); + } + + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame, base_frame_addr) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; } diff --git a/Python/ceval.c b/Python/ceval.c index 5381cd826dfd19..2f120c4a37c64f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1234,6 +1234,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry.frame.previous = tstate->current_frame; frame->previous = &entry.frame; tstate->current_frame = frame; + /* Track entry frame for profiling/sampling */ + if (entry.frame.previous == NULL) { + tstate->entry_frame = &entry.frame; + } entry.frame.localsplus[0] = PyStackRef_NULL; #ifdef _Py_TIER2 if (tstate->current_executor != NULL) { @@ -1300,6 +1304,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(frame->owner == FRAME_OWNED_BY_INTERPRETER); /* Restore previous frame and exit */ tstate->current_frame = frame->previous; + /* Clear entry frame if we're returning to no frame */ + if (tstate->current_frame == NULL) { + tstate->entry_frame = NULL; + } return NULL; } #ifdef _Py_TIER2 diff --git a/Python/pystate.c b/Python/pystate.c index c12a1418e74309..10bff808a845f2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1483,6 +1483,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->gilstate_counter = 1; tstate->current_frame = NULL; + tstate->entry_frame = NULL; tstate->datastack_chunk = NULL; tstate->datastack_top = NULL; tstate->datastack_limit = NULL; From 111e70cb23643b6b917de4c83e125658a293f3ce Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 25 Nov 2025 15:03:09 +0000 Subject: [PATCH 2/8] Implement mark's idea --- Include/cpython/pystate.h | 10 +++-- Include/internal/pycore_debug_offsets.h | 4 +- Include/internal/pycore_interpframe_structs.h | 1 + Include/internal/pycore_tstate.h | 5 +++ ...-11-24-16-07-57.gh-issue-138122.m3EF9E.rst | 12 +++--- Modules/_remote_debugging/_remote_debugging.h | 4 +- Modules/_remote_debugging/frames.c | 37 +++++++++---------- Modules/_remote_debugging/threads.c | 9 +---- Python/ceval.c | 8 ---- Python/pystate.c | 27 +++++++++++++- Python/traceback.c | 5 ++- 11 files changed, 70 insertions(+), 52 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index c37a669eb738bd..440c367dc08319 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -135,10 +135,12 @@ struct _ts { /* Pointer to currently executing frame. */ struct _PyInterpreterFrame *current_frame; - /* Pointer to the entry/bottommost frame of the current call stack. - * This is the frame that was entered when starting execution. - * Used by profiling/sampling to detect incomplete stack traces. */ - struct _PyInterpreterFrame *entry_frame; + /* Pointer to the base frame (bottommost sentinel frame). + Used by profilers to validate complete stack unwinding. + Points to the embedded base_frame in _PyThreadStateImpl. + The frame is embedded there rather than here because _PyInterpreterFrame + is defined in internal headers that cannot be exposed in the public API. */ + struct _PyInterpreterFrame *base_frame; Py_tracefunc c_profilefunc; Py_tracefunc c_tracefunc; diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 8350aea8ade768..40ef9302f71391 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -102,7 +102,7 @@ typedef struct _Py_DebugOffsets { uint64_t next; uint64_t interp; uint64_t current_frame; - uint64_t entry_frame; + uint64_t base_frame; uint64_t thread_id; uint64_t native_thread_id; uint64_t datastack_chunk; @@ -273,7 +273,7 @@ typedef struct _Py_DebugOffsets { .next = offsetof(PyThreadState, next), \ .interp = offsetof(PyThreadState, interp), \ .current_frame = offsetof(PyThreadState, current_frame), \ - .entry_frame = offsetof(PyThreadState, entry_frame), \ + .base_frame = offsetof(PyThreadState, base_frame), \ .thread_id = offsetof(PyThreadState, thread_id), \ .native_thread_id = offsetof(PyThreadState, native_thread_id), \ .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \ diff --git a/Include/internal/pycore_interpframe_structs.h b/Include/internal/pycore_interpframe_structs.h index 38510685f4093c..04effdf189de52 100644 --- a/Include/internal/pycore_interpframe_structs.h +++ b/Include/internal/pycore_interpframe_structs.h @@ -24,6 +24,7 @@ enum _frameowner { FRAME_OWNED_BY_GENERATOR = 1, FRAME_OWNED_BY_FRAME_OBJECT = 2, FRAME_OWNED_BY_INTERPRETER = 3, + FRAME_OWNED_BY_THREAD_STATE = 4, /* Sentinel base frame in thread state */ }; struct _PyInterpreterFrame { diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 50048801b2e4ee..c4f723ac8abbbe 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -10,6 +10,7 @@ extern "C" { #include "pycore_brc.h" // struct _brc_thread_state #include "pycore_freelist_state.h" // struct _Py_freelists +#include "pycore_interpframe_structs.h" // _PyInterpreterFrame #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr #include "pycore_uop.h" // struct _PyUOpInstruction @@ -61,6 +62,10 @@ typedef struct _PyThreadStateImpl { // semi-public fields are in PyThreadState. PyThreadState base; + // Embedded base frame - sentinel at the bottom of the frame stack. + // Used by profiling/sampling to detect incomplete stack traces. + _PyInterpreterFrame base_frame; + // The reference count field is used to synchronize deallocation of the // thread state during runtime finalization. Py_ssize_t refcount; diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst index b515b7dbcfa661..6ffd26e35dca8f 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst @@ -1,6 +1,6 @@ -Add incomplete sample detection to prevent corrupted profiling data. The -interpreter now tracks the base frame (bottommost frame) in each thread's -``PyThreadState.base_frame``, which the profiler uses to validate that -stack unwinding reached the expected bottom. Samples that fail to unwind -completely (due to race conditions, memory corruption, or other errors) -are now rejected rather than being included as spurious single-frame stacks. +Add incomplete sample detection to prevent corrupted profiling data. Each +thread state now contains an embedded base frame (sentinel at the bottom of +the frame stack) with owner type ``FRAME_OWNED_BY_THREAD_STATE``. The profiler +validates that stack unwinding terminates at this sentinel frame. Samples that +fail to reach the base frame (due to race conditions, memory corruption, or +other errors) are now rejected rather than being included as spurious data. diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 25f2f517037373..a1f64d6218775e 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -363,8 +363,8 @@ extern int process_frame_chain( uintptr_t initial_frame_addr, StackChunkList *chunks, PyObject *frame_info, - uintptr_t gc_frame, - uintptr_t base_frame_addr + uintptr_t base_frame_addr, + uintptr_t gc_frame ); /* ============================================================================ diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index e944a3953cfcaa..0b2375dde5a0f6 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -154,14 +154,17 @@ is_frame_valid( void* frame = (void*)frame_addr; - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { + char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner); + if (owner == FRAME_OWNED_BY_INTERPRETER) { return 0; // C frame } - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR - && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { - PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", - GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + if (owner == FRAME_OWNED_BY_THREAD_STATE) { + return 0; // Sentinel base frame - end of stack + } + + if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) { + PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner); set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame"); return -1; } @@ -258,20 +261,20 @@ process_frame_chain( uintptr_t initial_frame_addr, StackChunkList *chunks, PyObject *frame_info, - uintptr_t gc_frame, - uintptr_t base_frame_addr) + uintptr_t base_frame_addr, + uintptr_t gc_frame) { uintptr_t frame_addr = initial_frame_addr; uintptr_t prev_frame_addr = 0; - uintptr_t last_frame_addr = 0; // Track the last frame we processed + uintptr_t last_frame_addr = 0; // Track last frame visited for validation const size_t MAX_FRAMES = 1024; size_t frame_count = 0; while ((void*)frame_addr != NULL) { - last_frame_addr = frame_addr; // Remember this frame before moving to next PyObject *frame = NULL; uintptr_t next_frame_addr = 0; uintptr_t stackpointer = 0; + last_frame_addr = frame_addr; // Remember this frame address if (++frame_count > MAX_FRAMES) { PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); @@ -279,7 +282,6 @@ process_frame_chain( return -1; } - // Try chunks first, fallback to direct memory read if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) { PyErr_Clear(); uintptr_t address_of_code_object = 0; @@ -350,15 +352,12 @@ process_frame_chain( frame_addr = next_frame_addr; } - // Validate we reached the base frame if it's set - if (base_frame_addr != 0 && last_frame_addr != 0) { - if (last_frame_addr != base_frame_addr) { - // We didn't reach the expected bottom frame - incomplete sample - PyErr_Format(PyExc_RuntimeError, - "Incomplete sample: reached frame 0x%lx but expected base frame 0x%lx", - last_frame_addr, base_frame_addr); - return -1; - } + // Validate we reached the base frame (sentinel at bottom of stack) + if (last_frame_addr != base_frame_addr) { + PyErr_Format(PyExc_RuntimeError, + "Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)", + base_frame_addr, last_frame_addr); + return -1; } return 0; diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c index a037b660fc2753..43de9c1e87461f 100644 --- a/Modules/_remote_debugging/threads.c +++ b/Modules/_remote_debugging/threads.c @@ -376,6 +376,7 @@ unwind_stack_for_thread( } uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); + uintptr_t base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame); frame_info = PyList_New(0); if (!frame_info) { @@ -388,13 +389,7 @@ unwind_stack_for_thread( goto error; } - // Read base_frame for validation - uintptr_t base_frame_addr = 0; - if (unwinder->debug_offsets.thread_state.base_frame != 0) { - base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame); - } - - if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame, base_frame_addr) < 0) { + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, base_frame_addr, gc_frame) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; } diff --git a/Python/ceval.c b/Python/ceval.c index 2f120c4a37c64f..5381cd826dfd19 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1234,10 +1234,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry.frame.previous = tstate->current_frame; frame->previous = &entry.frame; tstate->current_frame = frame; - /* Track entry frame for profiling/sampling */ - if (entry.frame.previous == NULL) { - tstate->entry_frame = &entry.frame; - } entry.frame.localsplus[0] = PyStackRef_NULL; #ifdef _Py_TIER2 if (tstate->current_executor != NULL) { @@ -1304,10 +1300,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(frame->owner == FRAME_OWNED_BY_INTERPRETER); /* Restore previous frame and exit */ tstate->current_frame = frame->previous; - /* Clear entry frame if we're returning to no frame */ - if (tstate->current_frame == NULL) { - tstate->entry_frame = NULL; - } return NULL; } #ifdef _Py_TIER2 diff --git a/Python/pystate.c b/Python/pystate.c index 10bff808a845f2..bf4d1e72c88842 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1482,8 +1482,31 @@ init_threadstate(_PyThreadStateImpl *_tstate, // This is cleared when PyGILState_Ensure() creates the thread state. tstate->gilstate_counter = 1; - tstate->current_frame = NULL; - tstate->entry_frame = NULL; + // Initialize the embedded base frame - sentinel at the bottom of the frame stack + _tstate->base_frame.previous = NULL; + _tstate->base_frame.f_executable = PyStackRef_None; + _tstate->base_frame.f_funcobj = PyStackRef_NULL; + _tstate->base_frame.f_globals = NULL; + _tstate->base_frame.f_builtins = NULL; + _tstate->base_frame.f_locals = NULL; + _tstate->base_frame.frame_obj = NULL; + _tstate->base_frame.instr_ptr = NULL; + _tstate->base_frame.stackpointer = _tstate->base_frame.localsplus; + _tstate->base_frame.return_offset = 0; + _tstate->base_frame.owner = FRAME_OWNED_BY_THREAD_STATE; + _tstate->base_frame.visited = 0; +#ifdef Py_DEBUG + _tstate->base_frame.lltrace = 0; +#endif +#ifdef Py_GIL_DISABLED + _tstate->base_frame.tlbc_index = 0; +#endif + _tstate->base_frame.localsplus[0] = PyStackRef_NULL; + + // current_frame starts pointing to the base frame + tstate->current_frame = &_tstate->base_frame; + // base_frame pointer for profilers to validate stack unwinding + tstate->base_frame = &_tstate->base_frame; tstate->datastack_chunk = NULL; tstate->datastack_top = NULL; tstate->datastack_limit = NULL; diff --git a/Python/traceback.c b/Python/traceback.c index 521d6322a5c439..46e70fe6116118 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -1035,8 +1035,9 @@ _Py_DumpWideString(int fd, wchar_t *str) static int dump_frame(int fd, _PyInterpreterFrame *frame) { - if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { - /* Ignore trampoline frame */ + if (frame->owner == FRAME_OWNED_BY_INTERPRETER || + frame->owner == FRAME_OWNED_BY_THREAD_STATE) { + /* Ignore trampoline frames and base frame sentinel */ return 0; } From 664d43943d80fb8b74baaee99bbcbac4ad2a78bf Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 25 Nov 2025 15:15:24 +0000 Subject: [PATCH 3/8] Add note to the docs --- InternalDocs/frames.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md index 804d7436018a10..b823156c2b04f3 100644 --- a/InternalDocs/frames.md +++ b/InternalDocs/frames.md @@ -111,6 +111,35 @@ The shim frame points to a special code object containing the `INTERPRETER_EXIT` instruction which cleans up the shim frame and returns. +### Base frame + +Each thread state contains an embedded `_PyInterpreterFrame` called the "base frame" +that serves as a sentinel at the bottom of the frame stack. This frame is allocated +in `_PyThreadStateImpl` (the internal extension of `PyThreadState`) and initialized +when the thread state is created. The `owner` field is set to `FRAME_OWNED_BY_THREAD_STATE`. + +External profilers and sampling tools can validate that they have successfully unwound +the complete call stack by checking that the frame chain terminates at the base frame. +The `PyThreadState.base_frame` pointer provides the expected address to compare against. +If a stack walk doesn't reach this frame, the sample is incomplete (possibly due to a +race condition) and should be discarded. + +The base frame is embedded in `_PyThreadStateImpl` rather than `PyThreadState` because +`_PyInterpreterFrame` is defined in internal headers that cannot be exposed in the +public API. A pointer (`PyThreadState.base_frame`) is provided for profilers to access +the address without needing internal headers. + +See the initialization in `new_threadstate()` in [Python/pystate.c](../Python/pystate.c). + +#### How profilers should use the base frame + +External profilers should read `tstate->base_frame` before walking the stack, then +walk from `tstate->current_frame` following `frame->previous` pointers until reaching +a frame with `owner == FRAME_OWNED_BY_THREAD_STATE`. After the walk, verify that the +last frame address matches `base_frame`. If not, discard the sample as incomplete +since the frame chain may have been in an inconsistent state due to concurrent updates. + + ### The Instruction Pointer `_PyInterpreterFrame` has two fields which are used to maintain the instruction From 29a04b78bdddb865ae32cbdf80a3312bf2a727c0 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 25 Nov 2025 15:20:01 +0000 Subject: [PATCH 4/8] Fix frame termination --- Python/pylifecycle.c | 2 +- Python/pystate.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 67368b5ce077aa..2527dca71d774e 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2571,7 +2571,7 @@ Py_EndInterpreter(PyThreadState *tstate) if (tstate != _PyThreadState_GET()) { Py_FatalError("thread is not current"); } - if (tstate->current_frame != NULL) { + if (tstate->current_frame != tstate->base_frame) { Py_FatalError("thread still has a frame"); } interp->finalizing = 1; diff --git a/Python/pystate.c b/Python/pystate.c index bf4d1e72c88842..66e4e517f9dd17 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1684,7 +1684,7 @@ PyThreadState_Clear(PyThreadState *tstate) int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose; - if (verbose && tstate->current_frame != NULL) { + if (verbose && tstate->current_frame != tstate->base_frame) { /* bpo-20526: After the main thread calls _PyInterpreterState_SetFinalizing() in Py_FinalizeEx() (or in Py_EndInterpreter() for subinterpreters), From 3d9c294eaf1fe87017824562079d4b852e1b994a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 25 Nov 2025 16:01:13 +0000 Subject: [PATCH 5/8] Fix assignment --- Python/ceval.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 5381cd826dfd19..f12c33cf963901 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3064,6 +3064,9 @@ PyEval_MergeCompilerFlags(PyCompilerFlags *cf) { PyThreadState *tstate = _PyThreadState_GET(); _PyInterpreterFrame *current_frame = tstate->current_frame; + if (current_frame == tstate->base_frame) { + current_frame = NULL; + } int result = cf->cf_flags != 0; if (current_frame != NULL) { From e14dc8ebb19d3c6c1bd60995266149fe3b2697cc Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 27 Nov 2025 20:49:40 +0000 Subject: [PATCH 6/8] ReuseFRAME_OWNED_BY_INTERPRETER --- Include/internal/pycore_interpframe_structs.h | 1 - InternalDocs/frames.md | 4 ++-- .../2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst | 2 +- Modules/_remote_debugging/frames.c | 6 +----- Python/pystate.c | 2 +- Python/traceback.c | 3 +-- 6 files changed, 6 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_interpframe_structs.h b/Include/internal/pycore_interpframe_structs.h index 04effdf189de52..38510685f4093c 100644 --- a/Include/internal/pycore_interpframe_structs.h +++ b/Include/internal/pycore_interpframe_structs.h @@ -24,7 +24,6 @@ enum _frameowner { FRAME_OWNED_BY_GENERATOR = 1, FRAME_OWNED_BY_FRAME_OBJECT = 2, FRAME_OWNED_BY_INTERPRETER = 3, - FRAME_OWNED_BY_THREAD_STATE = 4, /* Sentinel base frame in thread state */ }; struct _PyInterpreterFrame { diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md index b823156c2b04f3..7077726b8e3750 100644 --- a/InternalDocs/frames.md +++ b/InternalDocs/frames.md @@ -116,7 +116,7 @@ instruction which cleans up the shim frame and returns. Each thread state contains an embedded `_PyInterpreterFrame` called the "base frame" that serves as a sentinel at the bottom of the frame stack. This frame is allocated in `_PyThreadStateImpl` (the internal extension of `PyThreadState`) and initialized -when the thread state is created. The `owner` field is set to `FRAME_OWNED_BY_THREAD_STATE`. +when the thread state is created. The `owner` field is set to `FRAME_OWNED_BY_INTERPRETER`. External profilers and sampling tools can validate that they have successfully unwound the complete call stack by checking that the frame chain terminates at the base frame. @@ -135,7 +135,7 @@ See the initialization in `new_threadstate()` in [Python/pystate.c](../Python/py External profilers should read `tstate->base_frame` before walking the stack, then walk from `tstate->current_frame` following `frame->previous` pointers until reaching -a frame with `owner == FRAME_OWNED_BY_THREAD_STATE`. After the walk, verify that the +a frame with `owner == FRAME_OWNED_BY_INTERPRETER`. After the walk, verify that the last frame address matches `base_frame`. If not, discard the sample as incomplete since the frame chain may have been in an inconsistent state due to concurrent updates. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst index 6ffd26e35dca8f..a4a29e400274cb 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst @@ -1,6 +1,6 @@ Add incomplete sample detection to prevent corrupted profiling data. Each thread state now contains an embedded base frame (sentinel at the bottom of -the frame stack) with owner type ``FRAME_OWNED_BY_THREAD_STATE``. The profiler +the frame stack) with owner type ``FRAME_OWNED_BY_INTERPRETER``. The profiler validates that stack unwinding terminates at this sentinel frame. Samples that fail to reach the base frame (due to race conditions, memory corruption, or other errors) are now rejected rather than being included as spurious data. diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index 0b2375dde5a0f6..c1e978c3bddf4f 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -156,11 +156,7 @@ is_frame_valid( char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner); if (owner == FRAME_OWNED_BY_INTERPRETER) { - return 0; // C frame - } - - if (owner == FRAME_OWNED_BY_THREAD_STATE) { - return 0; // Sentinel base frame - end of stack + return 0; // C frame or sentinel base frame } if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) { diff --git a/Python/pystate.c b/Python/pystate.c index 66e4e517f9dd17..2956e785405a0e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1493,7 +1493,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, _tstate->base_frame.instr_ptr = NULL; _tstate->base_frame.stackpointer = _tstate->base_frame.localsplus; _tstate->base_frame.return_offset = 0; - _tstate->base_frame.owner = FRAME_OWNED_BY_THREAD_STATE; + _tstate->base_frame.owner = FRAME_OWNED_BY_INTERPRETER; _tstate->base_frame.visited = 0; #ifdef Py_DEBUG _tstate->base_frame.lltrace = 0; diff --git a/Python/traceback.c b/Python/traceback.c index 46e70fe6116118..1bb9b42f4d7e7b 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -1035,8 +1035,7 @@ _Py_DumpWideString(int fd, wchar_t *str) static int dump_frame(int fd, _PyInterpreterFrame *frame) { - if (frame->owner == FRAME_OWNED_BY_INTERPRETER || - frame->owner == FRAME_OWNED_BY_THREAD_STATE) { + if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { /* Ignore trampoline frames and base frame sentinel */ return 0; } From 13bd7e45f86e87609b82fc529411fa919a307ea2 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 7 Dec 2025 00:15:32 +0000 Subject: [PATCH 7/8] Fix thread cache test --- Lib/test/test_external_inspection.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 2e6e6eaad06450..75365c787c96a3 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -2543,20 +2543,18 @@ def recv_msg(): def get_thread_frames(target_funcs): """Get frames for thread matching target functions.""" - retries = 0 - for _ in busy_retry(SHORT_TIMEOUT): - if retries >= 5: - break - retries += 1 - # On Windows, ReadProcessMemory can fail with OSError - # (WinError 299) when frame pointers are in flux - with contextlib.suppress(RuntimeError, OSError): + for _ in range(self.MAX_TRIES): + try: traces = unwinder.get_stack_trace() - for interp in traces: - for thread in interp.threads: - funcs = [f.funcname for f in thread.frame_info] - if any(f in funcs for f in target_funcs): - return funcs + except (RuntimeError, OSError): + time.sleep(0.1) + continue + all_threads = [t for i in traces for t in i.threads] + for thread in all_threads: + funcs = [f.funcname for f in thread.frame_info] + if any(f in funcs for f in target_funcs): + return funcs + time.sleep(0.1) return None # Track results for each sync point From c9ab431bfd0e7e48e228dbe9d14385eeecaf2b6a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 7 Dec 2025 04:01:35 +0000 Subject: [PATCH 8/8] Fix warnings --- Lib/test/test_external_inspection.py | 43 +++++----------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 082c215051c110..a97242483a8942 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -1,6 +1,7 @@ import unittest import os import textwrap +import contextlib import importlib import sys import socket @@ -215,36 +216,13 @@ def requires_subinterpreters(meth): # Simple wrapper functions for RemoteUnwinder # ============================================================================ -# Errors that can occur transiently when reading process memory without synchronization -RETRIABLE_ERRORS = ( - "Bad address", - "Task list appears corrupted", - "Invalid linked list structure reading remote memory", - "Invalid string length", - "Unknown error reading memory", - "Unhandled frame owner", - "Failed to parse initial frame", - "Failed to process frame chain", - "Failed to unwind stack", - "process_vm_readv", -) - - -def _is_retriable_error(exc): - """Check if an exception is a transient error that should be retried.""" - msg = str(exc) - return any(msg.startswith(err) or err in msg for err in RETRIABLE_ERRORS) - - def get_stack_trace(pid): for _ in busy_retry(SHORT_TIMEOUT): try: unwinder = RemoteUnwinder(pid, all_threads=True, debug=True) return unwinder.get_stack_trace() except RuntimeError as e: - if _is_retriable_error(e): - continue - raise + continue raise RuntimeError("Failed to get stack trace after retries") @@ -254,9 +232,7 @@ def get_async_stack_trace(pid): unwinder = RemoteUnwinder(pid, debug=True) return unwinder.get_async_stack_trace() except RuntimeError as e: - if _is_retriable_error(e): - continue - raise + continue raise RuntimeError("Failed to get async stack trace after retries") @@ -266,9 +242,7 @@ def get_all_awaited_by(pid): unwinder = RemoteUnwinder(pid, debug=True) return unwinder.get_all_awaited_by() except RuntimeError as e: - if _is_retriable_error(e): - continue - raise + continue raise RuntimeError("Failed to get all awaited_by after retries") @@ -2270,16 +2244,13 @@ def make_unwinder(cache_frames=True): def _get_frames_with_retry(self, unwinder, required_funcs): """Get frames containing required_funcs, with retry for transient errors.""" for _ in range(MAX_TRIES): - try: + with contextlib.suppress(OSError, RuntimeError): traces = unwinder.get_stack_trace() for interp in traces: for thread in interp.threads: funcs = {f.funcname for f in thread.frame_info} if required_funcs.issubset(funcs): return thread.frame_info - except (OSError, RuntimeError) as e: - if not _is_retriable_error(e): - raise time.sleep(0.1) return None @@ -2819,12 +2790,12 @@ def foo2(): for i in range(4): # Extract first message from buffer msg, sep, buffer = buffer.partition(b"\n") - self.assertIn(msg, dispatch, f"Unexpected message: {msg}") + self.assertIn(msg, dispatch, f"Unexpected message: {msg!r}") # Sample frames for the thread at this sync point required_funcs = dispatch[msg] frames = self._get_frames_with_retry(unwinder, required_funcs) - self.assertIsNotNone(frames, f"Thread not found for {msg}") + self.assertIsNotNone(frames, f"Thread not found for {msg!r}") results[msg] = [f.funcname for f in frames] # Release thread and wait for next message (if not last)