From dd27e5e67985adb47f9089a854b61f0d6b9934ff Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:00 +0000 Subject: [PATCH 01/24] Extend RemoteUnwinder to capture precise bytecode locations Introduces LocationInfo struct sequence with end_lineno, col_offset, and end_col_offset fields. Adds opcodes parameter to RemoteUnwinder that extracts the currently executing opcode alongside its source span. Refactors linetable parsing to correctly accumulate line numbers separately from output values, fixing edge cases in computed_line. --- Modules/_remote_debugging/_remote_debugging.h | 16 ++- Modules/_remote_debugging/clinic/module.c.h | 33 +++++-- Modules/_remote_debugging/code_objects.c | 99 +++++++++++++------ Modules/_remote_debugging/frames.c | 4 +- Modules/_remote_debugging/module.c | 40 +++++++- 5 files changed, 146 insertions(+), 46 deletions(-) diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index c4547baf96746b..6726576d04faea 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -157,6 +157,7 @@ typedef struct { typedef struct { PyTypeObject *RemoteDebugging_Type; PyTypeObject *TaskInfo_Type; + PyTypeObject *LocationInfo_Type; PyTypeObject *FrameInfo_Type; PyTypeObject *CoroInfo_Type; PyTypeObject *ThreadInfo_Type; @@ -195,6 +196,7 @@ typedef struct { int skip_non_matching_threads; int native; int gc; + int opcodes; RemoteDebuggingState *cached_state; #ifdef Py_GIL_DISABLED uint32_t tlbc_generation; @@ -248,6 +250,7 @@ typedef int (*set_entry_processor_func)( * ============================================================================ */ extern PyStructSequence_Desc TaskInfo_desc; +extern PyStructSequence_Desc LocationInfo_desc; extern PyStructSequence_Desc FrameInfo_desc; extern PyStructSequence_Desc CoroInfo_desc; extern PyStructSequence_Desc ThreadInfo_desc; @@ -298,11 +301,20 @@ extern int parse_code_object( int32_t tlbc_index ); +extern PyObject *make_location_info( + RemoteUnwinderObject *unwinder, + int lineno, + int end_lineno, + int col_offset, + int end_col_offset +); + extern PyObject *make_frame_info( RemoteUnwinderObject *unwinder, PyObject *file, - PyObject *line, - PyObject *func + PyObject *location, // LocationInfo structseq or None for synthetic frames + PyObject *func, + PyObject *opcode ); /* Line table parsing */ diff --git a/Modules/_remote_debugging/clinic/module.c.h b/Modules/_remote_debugging/clinic/module.c.h index 60adb357e32e71..d781f7f49be856 100644 --- a/Modules/_remote_debugging/clinic/module.c.h +++ b/Modules/_remote_debugging/clinic/module.c.h @@ -12,7 +12,7 @@ preserve PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" " mode=0, debug=False, skip_non_matching_threads=True,\n" -" native=False, gc=False)\n" +" native=False, gc=False, opcodes=False)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -32,6 +32,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " non-Python code.\n" " gc: If True, include artificial \"\" frames to denote active garbage\n" " collection.\n" +" opcodes: If True, gather bytecode opcode information for instruction-level\n" +" profiling.\n" "\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "process, including examining thread states, stack frames and other runtime data.\n" @@ -48,7 +50,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int only_active_thread, int mode, int debug, int skip_non_matching_threads, - int native, int gc); + int native, int gc, + int opcodes); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -56,7 +59,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int return_value = -1; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 8 + #define NUM_KEYWORDS 9 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -65,7 +68,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(opcodes), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -74,14 +77,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "opcodes", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[8]; + PyObject *argsbuf[9]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; @@ -93,6 +96,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int skip_non_matching_threads = 1; int native = 0; int gc = 0; + int opcodes = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -160,12 +164,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje goto skip_optional_kwonly; } } - gc = PyObject_IsTrue(fastargs[7]); - if (gc < 0) { + if (fastargs[7]) { + gc = PyObject_IsTrue(fastargs[7]); + if (gc < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + opcodes = PyObject_IsTrue(fastargs[8]); + if (opcodes < 0) { goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, opcodes); exit: return return_value; @@ -347,4 +360,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=946a0838197bf141 input=a9049054013a1b77]*/ diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c index ea3f00c802b110..255a4f374f66c3 100644 --- a/Modules/_remote_debugging/code_objects.c +++ b/Modules/_remote_debugging/code_objects.c @@ -155,48 +155,45 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L { const uint8_t* ptr = (const uint8_t*)(linetable); uintptr_t addr = 0; - info->lineno = firstlineno; + int computed_line = firstlineno; // Running accumulator, separate from output while (*ptr != '\0') { - // See InternalDocs/code_objects.md for where these magic numbers are from - // and for the decoding algorithm. uint8_t first_byte = *(ptr++); uint8_t code = (first_byte >> 3) & 15; size_t length = (first_byte & 7) + 1; uintptr_t end_addr = addr + length; + switch (code) { - case PY_CODE_LOCATION_INFO_NONE: { + case PY_CODE_LOCATION_INFO_NONE: + info->lineno = info->end_lineno = -1; + info->column = info->end_column = -1; break; - } - case PY_CODE_LOCATION_INFO_LONG: { - int line_delta = scan_signed_varint(&ptr); - info->lineno += line_delta; - info->end_lineno = info->lineno + scan_varint(&ptr); + case PY_CODE_LOCATION_INFO_LONG: + computed_line += scan_signed_varint(&ptr); + info->lineno = computed_line; + info->end_lineno = computed_line + scan_varint(&ptr); info->column = scan_varint(&ptr) - 1; info->end_column = scan_varint(&ptr) - 1; break; - } - case PY_CODE_LOCATION_INFO_NO_COLUMNS: { - int line_delta = scan_signed_varint(&ptr); - info->lineno += line_delta; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + computed_line += scan_signed_varint(&ptr); + info->lineno = info->end_lineno = computed_line; info->column = info->end_column = -1; break; - } case PY_CODE_LOCATION_INFO_ONE_LINE0: case PY_CODE_LOCATION_INFO_ONE_LINE1: - case PY_CODE_LOCATION_INFO_ONE_LINE2: { - int line_delta = code - 10; - info->lineno += line_delta; - info->end_lineno = info->lineno; + case PY_CODE_LOCATION_INFO_ONE_LINE2: + computed_line += code - 10; + info->lineno = info->end_lineno = computed_line; info->column = *(ptr++); info->end_column = *(ptr++); break; - } default: { uint8_t second_byte = *(ptr++); if ((second_byte & 128) != 0) { return false; } + info->lineno = info->end_lineno = computed_line; info->column = code << 3 | (second_byte >> 4); info->end_column = info->column + (second_byte & 15); break; @@ -215,8 +212,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L * ============================================================================ */ PyObject * -make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, - PyObject *func) +make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno, + int col_offset, int end_col_offset) +{ + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *info = PyStructSequence_New(state->LocationInfo_Type); + if (info == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo"); + return NULL; + } + PyStructSequence_SetItem(info, 0, PyLong_FromLong(lineno)); + PyStructSequence_SetItem(info, 1, PyLong_FromLong(end_lineno)); + PyStructSequence_SetItem(info, 2, PyLong_FromLong(col_offset)); + PyStructSequence_SetItem(info, 3, PyLong_FromLong(end_col_offset)); + return info; +} + +PyObject * +make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location, + PyObject *func, PyObject *opcode) { RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); PyObject *info = PyStructSequence_New(state->FrameInfo_Type); @@ -225,11 +239,13 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, return NULL; } Py_INCREF(file); - Py_INCREF(line); + Py_INCREF(location); Py_INCREF(func); + Py_INCREF(opcode); PyStructSequence_SetItem(info, 0, file); - PyStructSequence_SetItem(info, 1, line); + PyStructSequence_SetItem(info, 1, location); PyStructSequence_SetItem(info, 2, func); + PyStructSequence_SetItem(info, 3, opcode); return info; } @@ -365,16 +381,43 @@ parse_code_object(RemoteUnwinderObject *unwinder, meta->first_lineno, &info); if (!ok) { info.lineno = -1; + info.end_lineno = -1; + info.column = -1; + info.end_column = -1; } - PyObject *lineno = PyLong_FromLong(info.lineno); - if (!lineno) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); + // Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset) + PyObject *location = make_location_info(unwinder, + info.lineno, + info.end_lineno, + info.column, + info.end_column); + if (!location) { goto error; } - PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name); - Py_DECREF(lineno); + // Read the instruction opcode from target process if opcodes flag is set + PyObject *opcode_obj = NULL; + if (unwinder->opcodes) { + uint16_t instruction_word = 0; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip, + sizeof(uint16_t), &instruction_word) == 0) { + opcode_obj = PyLong_FromLong(instruction_word & 0xFF); + if (!opcode_obj) { + Py_DECREF(location); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object"); + goto error; + } + } else { + // Opcode read failed - clear the exception since opcode is optional + PyErr_Clear(); + } + } + + PyObject *tuple = make_frame_info(unwinder, meta->file_name, location, + meta->func_name, opcode_obj ? opcode_obj : Py_None); + Py_DECREF(location); + Py_XDECREF(opcode_obj); if (!tuple) { goto error; } diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index d60caadcb9a11e..51e9cc936118a2 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -310,9 +310,9 @@ process_frame_chain( extra_frame = &_Py_STR(native); } if (extra_frame) { - // Use "~" as file and 0 as line, since that's what pstats uses: + // Use "~" as file, None as location (synthetic frame), None as opcode PyObject *extra_frame_info = make_frame_info( - unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame); + unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None); if (extra_frame_info == NULL) { return -1; } diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index 252291f916290c..221c17317332d2 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -28,11 +28,28 @@ PyStructSequence_Desc TaskInfo_desc = { 4 }; +// LocationInfo structseq type +static PyStructSequence_Field LocationInfo_fields[] = { + {"lineno", "Line number"}, + {"end_lineno", "End line number"}, + {"col_offset", "Column offset"}, + {"end_col_offset", "End column offset"}, + {NULL} +}; + +PyStructSequence_Desc LocationInfo_desc = { + "_remote_debugging.LocationInfo", + "Source location information: (lineno, end_lineno, col_offset, end_col_offset)", + LocationInfo_fields, + 4 +}; + // FrameInfo structseq type static PyStructSequence_Field FrameInfo_fields[] = { {"filename", "Source code filename"}, - {"lineno", "Line number"}, + {"location", "LocationInfo structseq or None for synthetic frames"}, {"funcname", "Function name"}, + {"opcode", "Opcode being executed (None if not gathered)"}, {NULL} }; @@ -40,7 +57,7 @@ PyStructSequence_Desc FrameInfo_desc = { "_remote_debugging.FrameInfo", "Information about a frame", FrameInfo_fields, - 3 + 4 }; // CoroInfo structseq type @@ -235,6 +252,7 @@ _remote_debugging.RemoteUnwinder.__init__ skip_non_matching_threads: bool = True native: bool = False gc: bool = False + opcodes: bool = False Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -253,6 +271,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. non-Python code. gc: If True, include artificial "" frames to denote active garbage collection. + opcodes: If True, gather bytecode opcode information for instruction-level + profiling. The RemoteUnwinder provides functionality to inspect and debug a running Python process, including examining thread states, stack frames and other runtime data. @@ -270,8 +290,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int only_active_thread, int mode, int debug, int skip_non_matching_threads, - int native, int gc) -/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/ + int native, int gc, + int opcodes) +/*[clinic end generated code: output=e7f77865c7dd662f input=3dba9e3da913a1e0]*/ { // Validate that all_threads and only_active_thread are not both True if (all_threads && only_active_thread) { @@ -290,6 +311,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, self->native = native; self->gc = gc; + self->opcodes = opcodes; self->debug = debug; self->only_active_thread = only_active_thread; self->mode = mode; @@ -844,6 +866,14 @@ _remote_debugging_exec(PyObject *m) return -1; } + st->LocationInfo_Type = PyStructSequence_NewType(&LocationInfo_desc); + if (st->LocationInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->LocationInfo_Type) < 0) { + return -1; + } + st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc); if (st->FrameInfo_Type == NULL) { return -1; @@ -917,6 +947,7 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_VISIT(state->RemoteDebugging_Type); Py_VISIT(state->TaskInfo_Type); + Py_VISIT(state->LocationInfo_Type); Py_VISIT(state->FrameInfo_Type); Py_VISIT(state->CoroInfo_Type); Py_VISIT(state->ThreadInfo_Type); @@ -931,6 +962,7 @@ remote_debugging_clear(PyObject *mod) RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_CLEAR(state->RemoteDebugging_Type); Py_CLEAR(state->TaskInfo_Type); + Py_CLEAR(state->LocationInfo_Type); Py_CLEAR(state->FrameInfo_Type); Py_CLEAR(state->CoroInfo_Type); Py_CLEAR(state->ThreadInfo_Type); From 70f2ae025f8c46a27f21d5abbb0dba1859280c3f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:10 +0000 Subject: [PATCH 02/24] Add opcode utilities and --opcodes CLI flag New opcode_utils.py maps opcode numbers to names and detects specialized variants using opcode module metadata. Adds normalize_location() and extract_lineno() helpers to collector base for uniform location handling. CLI gains --opcodes flag, validated against compatible formats (gecko, flamegraph, heatmap, live). --- Lib/profiling/sampling/cli.py | 31 +++++++-- Lib/profiling/sampling/collector.py | 29 ++++++++ Lib/profiling/sampling/constants.py | 4 ++ Lib/profiling/sampling/opcode_utils.py | 92 ++++++++++++++++++++++++++ Lib/profiling/sampling/sample.py | 12 +++- 5 files changed, 161 insertions(+), 7 deletions(-) create mode 100644 Lib/profiling/sampling/opcode_utils.py diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index 5c0e39d77371ef..2de132f3ec1a41 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -195,6 +195,12 @@ def _add_sampling_options(parser): dest="gc", help='Don\'t include artificial "" frames to denote active garbage collection', ) + sampling_group.add_argument( + "--opcodes", + action="store_true", + help="Gather bytecode opcode information for instruction-level profiling " + "(shows which bytecode instructions are executing, including specializations).", + ) def _add_mode_options(parser): @@ -304,13 +310,15 @@ def _sort_to_mode(sort_choice): return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) -def _create_collector(format_type, interval, skip_idle): +def _create_collector(format_type, interval, skip_idle, opcodes=False): """Create the appropriate collector based on format type. Args: - format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko') + format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap') interval: Sampling interval in microseconds skip_idle: Whether to skip idle samples + opcodes: Whether to collect opcode information (only used by gecko format + for creating interval markers in Firefox Profiler) Returns: A collector instance of the appropriate type @@ -320,8 +328,10 @@ def _create_collector(format_type, interval, skip_idle): raise ValueError(f"Unknown format: {format_type}") # Gecko format never skips idle (it needs both GIL and CPU data) + # and is the only format that uses opcodes for interval markers if format_type == "gecko": skip_idle = False + return collector_class(interval, skip_idle=skip_idle, opcodes=opcodes) return collector_class(interval, skip_idle=skip_idle) @@ -413,6 +423,13 @@ def _validate_args(args, parser): "Gecko format automatically includes both GIL-holding and CPU status analysis." ) + # Validate --opcodes is only used with compatible formats + opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap") + if args.opcodes and args.format not in opcodes_compatible_formats: + parser.error( + f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}." + ) + # Validate pstats-specific options are only used with pstats format if args.format != "pstats": issues = [] @@ -560,7 +577,7 @@ def _handle_attach(args): ) # Create the appropriate collector - collector = _create_collector(args.format, args.interval, skip_idle) + collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) # Sample the process collector = sample( @@ -572,6 +589,7 @@ def _handle_attach(args): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) # Handle output @@ -607,7 +625,7 @@ def _handle_run(args): ) # Create the appropriate collector - collector = _create_collector(args.format, args.interval, skip_idle) + collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) # Profile the subprocess try: @@ -620,6 +638,7 @@ def _handle_run(args): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) # Handle output @@ -650,6 +669,7 @@ def _handle_live_attach(args, pid): limit=20, # Default limit pid=pid, mode=mode, + opcodes=args.opcodes, ) # Sample in live mode @@ -662,6 +682,7 @@ def _handle_live_attach(args, pid): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) @@ -689,6 +710,7 @@ def _handle_live_run(args): limit=20, # Default limit pid=process.pid, mode=mode, + opcodes=args.opcodes, ) # Profile the subprocess in live mode @@ -702,6 +724,7 @@ def _handle_live_run(args): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) finally: # Clean up the subprocess diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index 6187f351cb596b..6dd3a2a288fdb3 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -1,11 +1,40 @@ from abc import ABC, abstractmethod from .constants import ( + DEFAULT_LOCATION, THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, ) + +def normalize_location(location): + """Normalize location to a 4-tuple format. + + Args: + location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None + + Returns: + tuple: (lineno, end_lineno, col_offset, end_col_offset) + """ + if location is None: + return DEFAULT_LOCATION + return location + + +def extract_lineno(location): + """Extract lineno from location. + + Args: + location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None + + Returns: + int: The line number (0 for synthetic frames) + """ + if location is None: + return 0 + return location[0] + class Collector(ABC): @abstractmethod def collect(self, stack_frames): diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py index be2ae60a88f114..b05f1703c8505f 100644 --- a/Lib/profiling/sampling/constants.py +++ b/Lib/profiling/sampling/constants.py @@ -14,6 +14,10 @@ SORT_MODE_CUMUL_PCT = 4 SORT_MODE_NSAMPLES_CUMUL = 5 +# Default location for synthetic frames (native, GC) that have no source location +# Format: (lineno, end_lineno, col_offset, end_col_offset) +DEFAULT_LOCATION = (0, 0, -1, -1) + # Thread status flags try: from _remote_debugging import ( diff --git a/Lib/profiling/sampling/opcode_utils.py b/Lib/profiling/sampling/opcode_utils.py new file mode 100644 index 00000000000000..3d6a263f0b1773 --- /dev/null +++ b/Lib/profiling/sampling/opcode_utils.py @@ -0,0 +1,92 @@ +"""Opcode utilities for bytecode-level profiler visualization. + +This module provides utilities to get opcode names and detect specialization +status using the opcode module's metadata. Used by heatmap and flamegraph +collectors to display which bytecode instructions are executing at each +source line, including Python's adaptive specialization optimizations. +""" + +import opcode + +# Build opcode name mapping: opcode number -> opcode name +# This includes both standard opcodes and specialized variants (Python 3.11+) +_OPCODE_NAMES = dict(enumerate(opcode.opname)) +if hasattr(opcode, '_specialized_opmap'): + for name, op in opcode._specialized_opmap.items(): + _OPCODE_NAMES[op] = name + +# Build deopt mapping: specialized opcode number -> base opcode number +# Python 3.11+ uses adaptive specialization where generic opcodes like +# LOAD_ATTR can be replaced at runtime with specialized variants like +# LOAD_ATTR_INSTANCE_VALUE. This mapping lets us show both forms. +_DEOPT_MAP = {} +if hasattr(opcode, '_specializations') and hasattr(opcode, '_specialized_opmap'): + for base_name, variant_names in opcode._specializations.items(): + base_opcode = opcode.opmap.get(base_name) + if base_opcode is not None: + for variant_name in variant_names: + variant_opcode = opcode._specialized_opmap.get(variant_name) + if variant_opcode is not None: + _DEOPT_MAP[variant_opcode] = base_opcode + + +def get_opcode_info(opcode_num): + """Get opcode name and specialization info from an opcode number. + + Args: + opcode_num: The opcode number (0-255 or higher for specialized) + + Returns: + A dict with keys: + - 'opname': The opcode name (e.g., 'LOAD_ATTR_INSTANCE_VALUE') + - 'base_opname': The base opcode name (e.g., 'LOAD_ATTR') + - 'is_specialized': True if this is a specialized instruction + """ + opname = _OPCODE_NAMES.get(opcode_num) + if opname is None: + return { + 'opname': f'<{opcode_num}>', + 'base_opname': f'<{opcode_num}>', + 'is_specialized': False, + } + + base_opcode = _DEOPT_MAP.get(opcode_num) + if base_opcode is not None: + base_opname = _OPCODE_NAMES.get(base_opcode, f'<{base_opcode}>') + return { + 'opname': opname, + 'base_opname': base_opname, + 'is_specialized': True, + } + + return { + 'opname': opname, + 'base_opname': opname, + 'is_specialized': False, + } + + +def format_opcode(opcode_num): + """Format an opcode for display, showing base opcode for specialized ones. + + Args: + opcode_num: The opcode number (0-255 or higher for specialized) + + Returns: + A formatted string like 'LOAD_ATTR' or 'LOAD_ATTR_INSTANCE_VALUE (LOAD_ATTR)' + """ + info = get_opcode_info(opcode_num) + if info['is_specialized']: + return f"{info['opname']} ({info['base_opname']})" + return info['opname'] + + +def get_opcode_mapping(): + """Get opcode name and deopt mappings for JavaScript consumption. + + Returns: + A dict with keys: + - 'names': Dict mapping opcode numbers to opcode names + - 'deopt': Dict mapping specialized opcode numbers to base opcode numbers + """ + return {"names": _OPCODE_NAMES, "deopt": _DEOPT_MAP} diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 46fc1a05afaa74..685da1afd0bd10 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -27,7 +27,7 @@ class SampleProfiler: - def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True): + def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, opcodes=False, skip_non_matching_threads=True): self.pid = pid self.sample_interval_usec = sample_interval_usec self.all_threads = all_threads @@ -35,13 +35,13 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD if _FREE_THREADED_BUILD: self.unwinder = _remote_debugging.RemoteUnwinder( self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc, - skip_non_matching_threads=skip_non_matching_threads + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) else: only_active_threads = bool(self.all_threads) self.unwinder = _remote_debugging.RemoteUnwinder( self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc, - skip_non_matching_threads=skip_non_matching_threads + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) # Track sample intervals and total sample count self.sample_intervals = deque(maxlen=100) @@ -193,6 +193,7 @@ def sample( mode=PROFILING_MODE_WALL, native=False, gc=True, + opcodes=False, ): """Sample a process using the provided collector. @@ -206,6 +207,7 @@ def sample( GIL (only when holding GIL), ALL (includes GIL and CPU status) native: Whether to include native frames gc: Whether to include GC frames + opcodes: Whether to include opcode information Returns: The collector with collected samples @@ -228,6 +230,7 @@ def sample( mode=mode, native=native, gc=gc, + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) profiler.realtime_stats = realtime_stats @@ -248,6 +251,7 @@ def sample_live( mode=PROFILING_MODE_WALL, native=False, gc=True, + opcodes=False, ): """Sample a process in live/interactive mode with curses TUI. @@ -261,6 +265,7 @@ def sample_live( GIL (only when holding GIL), ALL (includes GIL and CPU status) native: Whether to include native frames gc: Whether to include GC frames + opcodes: Whether to include opcode information Returns: The collector with collected samples @@ -283,6 +288,7 @@ def sample_live( mode=mode, native=native, gc=gc, + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) profiler.realtime_stats = realtime_stats From aedc000a1573b77df73e473cd6d93a2089085d98 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:18 +0000 Subject: [PATCH 03/24] Track opcode sample counts in flamegraph collector Stores per-node opcode counts in the tree structure. Exports opcode mapping (names and deopt relationships) in JSON so the JS renderer can show instruction names and distinguish specialized variants. --- .../sampling/_flamegraph_assets/flamegraph.js | 79 +++++++++++++++++++ Lib/profiling/sampling/stack_collector.py | 48 ++++++++--- 2 files changed, 117 insertions(+), 10 deletions(-) diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js index 494d156a8dddfc..617ce21b73a45e 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -8,6 +8,32 @@ let currentThreadFilter = 'all'; // Heat colors are now defined in CSS variables (--heat-1 through --heat-8) // and automatically switch with theme changes - no JS color arrays needed! +// Opcode mappings - loaded from embedded data (generated by Python) +let OPCODE_NAMES = {}; +let DEOPT_MAP = {}; + +// Initialize opcode mappings from embedded data +function initOpcodeMapping(data) { + if (data && data.opcode_mapping) { + OPCODE_NAMES = data.opcode_mapping.names || {}; + DEOPT_MAP = data.opcode_mapping.deopt || {}; + } +} + +// Get opcode info from opcode number +function getOpcodeInfo(opcode) { + const opname = OPCODE_NAMES[opcode] || `<${opcode}>`; + const baseOpcode = DEOPT_MAP[opcode]; + const isSpecialized = baseOpcode !== undefined; + const baseOpname = isSpecialized ? (OPCODE_NAMES[baseOpcode] || `<${baseOpcode}>`) : opname; + + return { + opname: opname, + baseOpname: baseOpname, + isSpecialized: isSpecialized + }; +} + // ============================================================================ // String Resolution // ============================================================================ @@ -249,6 +275,55 @@ function createPythonTooltip(data) { `; } + // Create bytecode/opcode section if available + let opcodeSection = ""; + const opcodes = d.data.opcodes; + if (opcodes && typeof opcodes === 'object' && Object.keys(opcodes).length > 0) { + // Sort opcodes by sample count (descending) + const sortedOpcodes = Object.entries(opcodes) + .sort((a, b) => b[1] - a[1]) + .slice(0, 8); // Limit to top 8 + + const totalOpcodeSamples = sortedOpcodes.reduce((sum, [, count]) => sum + count, 0); + const maxCount = sortedOpcodes[0][1] || 1; + + const opcodeLines = sortedOpcodes.map(([opcode, count]) => { + const opcodeInfo = getOpcodeInfo(parseInt(opcode, 10)); + const pct = ((count / totalOpcodeSamples) * 100).toFixed(1); + const barWidth = (count / maxCount) * 100; + const specializedBadge = opcodeInfo.isSpecialized + ? 'SPECIALIZED' + : ''; + const baseOpHint = opcodeInfo.isSpecialized + ? `(${opcodeInfo.baseOpname})` + : ''; + + return ` +
+
+ ${opcodeInfo.opname}${baseOpHint}${specializedBadge} +
+
${count.toLocaleString()}
+
+
+
+
`; + }).join(''); + + opcodeSection = ` +
+
+ Bytecode Instructions: +
+
+ ${opcodeLines} +
+
`; + } + const fileLocationHTML = isSpecialFrame ? "" : `
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
`; @@ -275,6 +350,7 @@ function createPythonTooltip(data) { ` : ''} ${sourceSection} + ${opcodeSection}
${childCount > 0 ? "Click to zoom into this function" : "Leaf function - no children"}
@@ -994,6 +1070,9 @@ function initFlamegraph() { processedData = resolveStringIndices(EMBEDDED_DATA); } + // Initialize opcode mapping from embedded data + initOpcodeMapping(EMBEDDED_DATA); + originalData = processedData; initThreadFilter(processedData); diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index e26536093130d1..8e190ebf37d514 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -7,7 +7,8 @@ import os from ._css_utils import get_combined_css -from .collector import Collector +from .collector import Collector, extract_lineno +from .opcode_utils import get_opcode_mapping from .string_table import StringTable @@ -32,7 +33,11 @@ def __init__(self, *args, **kwargs): self.stack_counter = collections.Counter() def process_frames(self, frames, thread_id): - call_tree = tuple(reversed(frames)) + # Extract only (filename, lineno, funcname) - opcode not needed for collapsed stacks + # frame is (filename, location, funcname, opcode) + call_tree = tuple( + (f[0], extract_lineno(f[1]), f[2]) for f in reversed(frames) + ) self.stack_counter[(call_tree, thread_id)] += 1 def export(self, filename): @@ -205,6 +210,11 @@ def convert_children(children, min_samples): source_indices = [self._string_table.intern(line) for line in source] child_entry["source"] = source_indices + # Include opcode data if available + opcodes = node.get("opcodes", {}) + if opcodes: + child_entry["opcodes"] = dict(opcodes) + # Recurse child_entry["children"] = convert_children( node["children"], min_samples @@ -251,6 +261,9 @@ def convert_children(children, min_samples): **stats } + # Build opcode mapping for JS + opcode_mapping = get_opcode_mapping() + # If we only have one root child, make it the root to avoid redundant level if len(root_children) == 1: main_child = root_children[0] @@ -265,6 +278,7 @@ def convert_children(children, min_samples): } main_child["threads"] = sorted(list(self._all_threads)) main_child["strings"] = self._string_table.get_strings() + main_child["opcode_mapping"] = opcode_mapping return main_child return { @@ -277,27 +291,41 @@ def convert_children(children, min_samples): "per_thread_stats": per_thread_stats_with_pct }, "threads": sorted(list(self._all_threads)), - "strings": self._string_table.get_strings() + "strings": self._string_table.get_strings(), + "opcode_mapping": opcode_mapping } def process_frames(self, frames, thread_id): - # Reverse to root->leaf - call_tree = reversed(frames) + """Process stack frames into flamegraph tree structure. + + Args: + frames: List of (filename, location, funcname, opcode) tuples in + leaf-to-root order. location is (lineno, end_lineno, col_offset, end_col_offset). + opcode is None if not gathered. + thread_id: Thread ID for this stack trace + """ + # Reverse to root->leaf order for tree building self._root["samples"] += 1 self._total_samples += 1 self._root["threads"].add(thread_id) self._all_threads.add(thread_id) current = self._root - for func in call_tree: + for filename, location, funcname, opcode in reversed(frames): + lineno = extract_lineno(location) + func = (filename, lineno, funcname) func = self._func_intern.setdefault(func, func) - children = current["children"] - node = children.get(func) + + node = current["children"].get(func) if node is None: - node = {"samples": 0, "children": {}, "threads": set()} - children[func] = node + node = {"samples": 0, "children": {}, "threads": set(), "opcodes": collections.Counter()} + current["children"][func] = node node["samples"] += 1 node["threads"].add(thread_id) + + if opcode is not None: + node["opcodes"][opcode] += 1 + current = node def _get_source_lines(self, func): From 19ff11b55f2406236258a0b1900833c2ea0493f3 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:25 +0000 Subject: [PATCH 04/24] Emit opcode interval markers in Gecko collector Tracks opcode state transitions per thread and emits interval markers when the executing opcode changes. Markers include opcode name, line, column, and duration. Adds Opcodes category to marker schema. --- Lib/profiling/sampling/gecko_collector.py | 120 ++++++++++++++++++++-- 1 file changed, 111 insertions(+), 9 deletions(-) diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 921cd625f04e3f..b25ee079dd6ce9 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -7,6 +7,7 @@ import time from .collector import Collector +from .opcode_utils import get_opcode_info, format_opcode try: from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED except ImportError: @@ -26,6 +27,7 @@ {"name": "GIL", "color": "green", "subcategories": ["Other"]}, {"name": "CPU", "color": "purple", "subcategories": ["Other"]}, {"name": "Code Type", "color": "red", "subcategories": ["Other"]}, + {"name": "Opcodes", "color": "magenta", "subcategories": ["Other"]}, ] # Category indices @@ -36,6 +38,7 @@ CATEGORY_GIL = 4 CATEGORY_CPU = 5 CATEGORY_CODE_TYPE = 6 +CATEGORY_OPCODES = 7 # Subcategory indices DEFAULT_SUBCATEGORY = 0 @@ -56,9 +59,10 @@ class GeckoCollector(Collector): - def __init__(self, sample_interval_usec, *, skip_idle=False): + def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False): self.sample_interval_usec = sample_interval_usec self.skip_idle = skip_idle + self.opcodes_enabled = opcodes self.start_time = time.time() * 1000 # milliseconds since epoch # Global string table (shared across all threads) @@ -91,6 +95,9 @@ def __init__(self, sample_interval_usec, *, skip_idle=False): # Track which threads have been initialized for state tracking self.initialized_threads = set() + # Opcode state tracking per thread: tid -> (opcode, lineno, col_offset, funcname, filename, start_time) + self.opcode_state = {} + def _track_state_transition(self, tid, condition, active_dict, inactive_dict, active_name, inactive_name, category, current_time): """Track binary state transitions and emit markers. @@ -232,6 +239,30 @@ def collect(self, stack_frames): samples["time"].append(current_time) samples["eventDelay"].append(None) + # Track opcode state changes for interval markers (leaf frame only) + if self.opcodes_enabled: + leaf_frame = frames[0] + filename, location, funcname, opcode = leaf_frame + if isinstance(location, tuple): + lineno, _, col_offset, _ = location + else: + lineno = location + col_offset = -1 + + current_state = (opcode, lineno, col_offset, funcname, filename) + + if tid not in self.opcode_state: + # First observation - start tracking + self.opcode_state[tid] = (*current_state, current_time) + elif self.opcode_state[tid][:5] != current_state: + # State changed - emit marker for previous state + prev_opcode, prev_lineno, prev_col, prev_funcname, prev_filename, prev_start = self.opcode_state[tid] + self._add_opcode_interval_marker( + tid, prev_opcode, prev_lineno, prev_col, prev_funcname, prev_start, current_time + ) + # Start tracking new state + self.opcode_state[tid] = (*current_state, current_time) + self.sample_count += 1 def _create_thread(self, tid): @@ -369,6 +400,36 @@ def _add_marker(self, tid, name, start_time, end_time, category): "tid": tid }) + def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, funcname, start_time, end_time): + """Add an interval marker for opcode execution span.""" + if tid not in self.threads or opcode is None: + return + + thread_data = self.threads[tid] + opcode_info = get_opcode_info(opcode) + # Use formatted opcode name (with base opcode for specialized ones) + formatted_opname = format_opcode(opcode) + + name_idx = self._intern_string(formatted_opname) + + markers = thread_data["markers"] + markers["name"].append(name_idx) + markers["startTime"].append(start_time) + markers["endTime"].append(end_time) + markers["phase"].append(1) # 1 = interval marker + markers["category"].append(CATEGORY_OPCODES) + markers["data"].append({ + "type": "Opcode", + "opcode": opcode, + "opname": formatted_opname, + "base_opname": opcode_info["base_opname"], + "is_specialized": opcode_info["is_specialized"], + "line": lineno, + "column": col_offset if col_offset >= 0 else None, + "function": funcname, + "duration": end_time - start_time, + }) + def _process_stack(self, thread_data, frames): """Process a stack and return the stack index.""" if not frames: @@ -386,17 +447,25 @@ def _process_stack(self, thread_data, frames): prefix_stack_idx = None for frame_tuple in reversed(frames): - # frame_tuple is (filename, lineno, funcname) - filename, lineno, funcname = frame_tuple + # frame_tuple is (filename, location, funcname, opcode) + # location is (lineno, end_lineno, col_offset, end_col_offset) or just lineno + filename, location, funcname, opcode = frame_tuple + if isinstance(location, tuple): + lineno, end_lineno, col_offset, end_col_offset = location + else: + # Legacy format: location is just lineno + lineno = location + col_offset = -1 + end_col_offset = -1 # Get or create function func_idx = self._get_or_create_func( thread_data, filename, funcname, lineno ) - # Get or create frame + # Get or create frame (include column for precise source location) frame_idx = self._get_or_create_frame( - thread_data, func_idx, lineno + thread_data, func_idx, lineno, col_offset ) # Check stack cache @@ -494,10 +563,11 @@ def _get_or_create_resource(self, thread_data, filename): resource_cache[filename] = resource_idx return resource_idx - def _get_or_create_frame(self, thread_data, func_idx, lineno): + def _get_or_create_frame(self, thread_data, func_idx, lineno, col_offset=-1): """Get or create a frame entry.""" frame_cache = thread_data["_frameCache"] - frame_key = (func_idx, lineno) + # Include column in cache key for precise frame identification + frame_key = (func_idx, lineno, col_offset if col_offset >= 0 else None) if frame_key in frame_cache: return frame_cache[frame_key] @@ -531,7 +601,8 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno): frame_inner_window_ids.append(None) frame_implementations.append(None) frame_lines.append(lineno if lineno else None) - frame_columns.append(None) + # Store column offset if available (>= 0), otherwise None + frame_columns.append(col_offset if col_offset >= 0 else None) frame_optimizations.append(None) frame_cache[frame_key] = frame_idx @@ -558,6 +629,12 @@ def _finalize_markers(self): self._add_marker(tid, marker_name, state_dict[tid], end_time, category) del state_dict[tid] + # Close any open opcode markers + for tid, state in list(self.opcode_state.items()): + opcode, lineno, col_offset, funcname, filename, start_time = state + self._add_opcode_interval_marker(tid, opcode, lineno, col_offset, funcname, start_time, end_time) + self.opcode_state.clear() + def export(self, filename): """Export the profile to a Gecko JSON file.""" @@ -600,6 +677,31 @@ def spin(): f"Open in Firefox Profiler: https://profiler.firefox.com/" ) + def _build_marker_schema(self): + """Build marker schema definitions for Firefox Profiler.""" + schema = [] + + # Opcode marker schema (only if opcodes enabled) + if self.opcodes_enabled: + schema.append({ + "name": "Opcode", + "display": ["marker-table", "marker-chart"], + "tooltipLabel": "{marker.data.opname}", + "tableLabel": "{marker.data.opname} at line {marker.data.line}", + "chartLabel": "{marker.data.opname}", + "fields": [ + {"key": "opname", "label": "Opcode", "format": "string", "searchable": True}, + {"key": "base_opname", "label": "Base Opcode", "format": "string"}, + {"key": "is_specialized", "label": "Specialized", "format": "string"}, + {"key": "line", "label": "Line", "format": "integer"}, + {"key": "column", "label": "Column", "format": "integer"}, + {"key": "function", "label": "Function", "format": "string"}, + {"key": "duration", "label": "Duration", "format": "duration"}, + ], + }) + + return schema + def _build_profile(self): """Build the complete profile structure in processed format.""" # Convert thread data to final format @@ -649,7 +751,7 @@ def _build_profile(self): "CPUName": "", "product": "Python", "symbolicated": True, - "markerSchema": [], + "markerSchema": self._build_marker_schema(), "importedFrom": "Tachyon Sampling Profiler", "extensions": { "id": [], From af27d23339f3928e9665bd0eb7b6bcbd56589dcc Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:33 +0000 Subject: [PATCH 05/24] Add bytecode panel to heatmap visualization Expandable panel per hot line shows instruction-level sample breakdown with opcode names and specialization percentage. Converts call graph data structures from lists to sets for O(1) deduplication. --- .../sampling/_heatmap_assets/heatmap.css | 382 +++++++++++++++- .../sampling/_heatmap_assets/heatmap.js | 431 +++++++++++++++++- .../heatmap_pyfile_template.html | 13 +- .../sampling/_shared_assets/base.css | 27 ++ Lib/profiling/sampling/heatmap_collector.py | 286 +++++++++++- 5 files changed, 1093 insertions(+), 46 deletions(-) diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.css b/Lib/profiling/sampling/_heatmap_assets/heatmap.css index 44915b2a2da7b8..09ab1b321459e2 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.css +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.css @@ -645,13 +645,18 @@ } .legend-content { - width: 94%; - max-width: 100%; - margin: 0 auto; + width: 100%; display: flex; align-items: center; gap: 20px; - flex-wrap: wrap; + flex-wrap: nowrap; +} + +.legend-separator { + width: 1px; + height: 24px; + background: var(--border); + flex-shrink: 0; } .legend-title { @@ -659,12 +664,13 @@ color: var(--text-primary); font-size: 13px; font-family: var(--font-sans); + flex-shrink: 0; } .legend-gradient { - flex: 1; - max-width: 300px; - height: 24px; + width: 150px; + flex-shrink: 0; + height: 20px; background: linear-gradient(90deg, var(--bg-tertiary) 0%, var(--heat-2) 25%, @@ -682,6 +688,7 @@ font-size: 11px; color: var(--text-muted); font-family: var(--font-sans); + flex-shrink: 0; } /* Toggle Switch Styles */ @@ -693,6 +700,7 @@ user-select: none; font-family: var(--font-sans); transition: opacity var(--transition-fast); + flex-shrink: 0; } .toggle-switch:hover { @@ -703,13 +711,8 @@ font-size: 11px; font-weight: 500; color: var(--text-muted); - min-width: 55px; - text-align: right; transition: color var(--transition-fast); -} - -.toggle-switch .toggle-label:last-child { - text-align: left; + white-space: nowrap; } .toggle-switch .toggle-label.active { @@ -717,6 +720,17 @@ font-weight: 600; } +.toggle-switch.disabled { + opacity: 0.4; + pointer-events: none; + cursor: not-allowed; +} + +/* Push toggles to the right */ +#toggle-color-mode { + margin-left: auto; +} + .toggle-track { position: relative; width: 36px; @@ -1144,3 +1158,345 @@ max-width: none; } } + +.bytecode-toggle { + flex-shrink: 0; + width: 20px; + height: 20px; + padding: 0; + margin: 0 4px; + border: none; + background: transparent; + color: var(--code-accent); + cursor: pointer; + font-size: 10px; + transition: transform var(--transition-fast), color var(--transition-fast); + display: inline-flex; + align-items: center; + justify-content: center; +} + +.bytecode-toggle:hover { + color: var(--accent); +} + +.bytecode-toggle.expanded { + transform: rotate(90deg); +} + +.bytecode-panel { + margin-left: 90px; + padding: 8px 15px; + background: var(--bg-secondary); + border-left: 3px solid var(--accent); + font-family: var(--font-mono); + font-size: 12px; + margin-bottom: 4px; +} + +/* Specialization summary bar */ +.bytecode-spec-summary { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 12px; + margin-bottom: 10px; + border-radius: var(--radius-sm); + background: rgba(100, 100, 100, 0.1); +} + +.bytecode-spec-summary .spec-pct { + font-size: 1.4em; + font-weight: 700; +} + +.bytecode-spec-summary .spec-label { + font-weight: 500; + text-transform: uppercase; + font-size: 0.85em; + letter-spacing: 0.5px; +} + +.bytecode-spec-summary .spec-detail { + color: var(--text-secondary); + font-size: 0.9em; + margin-left: auto; +} + +.bytecode-spec-summary.high { + background: var(--spec-high-bg); + border-left: 3px solid var(--spec-high); +} +.bytecode-spec-summary.high .spec-pct, +.bytecode-spec-summary.high .spec-label { + color: var(--spec-high-text); +} + +.bytecode-spec-summary.medium { + background: var(--spec-medium-bg); + border-left: 3px solid var(--spec-medium); +} +.bytecode-spec-summary.medium .spec-pct, +.bytecode-spec-summary.medium .spec-label { + color: var(--spec-medium-text); +} + +.bytecode-spec-summary.low { + background: var(--spec-low-bg); + border-left: 3px solid var(--spec-low); +} +.bytecode-spec-summary.low .spec-pct, +.bytecode-spec-summary.low .spec-label { + color: var(--spec-low-text); +} + +.bytecode-header { + display: grid; + grid-template-columns: 1fr 80px 80px; + gap: 12px; + padding: 4px 8px; + font-weight: 600; + color: var(--text-secondary); + border-bottom: 1px solid var(--code-border); + margin-bottom: 4px; +} + +.bytecode-expand-all { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 6px 12px; + background: var(--bg-secondary); + border: 1px solid var(--code-border); + border-radius: var(--radius-sm); + color: var(--text-secondary); + font-size: 12px; + font-weight: 500; + cursor: pointer; + transition: all var(--transition-fast); + flex-shrink: 0; +} + +.bytecode-expand-all:hover, +.bytecode-expand-all.expanded { + background: var(--accent); + color: white; + border-color: var(--accent); +} + +.bytecode-expand-all .expand-icon { + font-size: 10px; + transition: transform var(--transition-fast); +} + +.bytecode-expand-all.expanded .expand-icon { + transform: rotate(90deg); +} + +/* ======================================== + INSTRUCTION SPAN HIGHLIGHTING + (triggered only from bytecode panel hover) + ======================================== */ + +/* Highlight from bytecode panel hover */ +.instr-span.highlight-from-bytecode { + outline: 3px solid #ff6b6b !important; + background-color: rgba(255, 107, 107, 0.4) !important; + border-radius: 2px; +} + +/* Bytecode instruction row */ +.bytecode-instruction { + display: grid; + grid-template-columns: 1fr 80px 80px; + gap: 12px; + align-items: center; + padding: 4px 8px; + margin: 2px 0; + border-radius: var(--radius-sm); + cursor: pointer; + transition: background-color var(--transition-fast); +} + +.bytecode-instruction:hover, +.bytecode-instruction.highlight { + background-color: rgba(55, 118, 171, 0.15); +} + +.bytecode-instruction[data-locations] { + cursor: pointer; +} + +.bytecode-instruction[data-locations]:hover { + background-color: rgba(255, 107, 107, 0.2); +} + +.bytecode-opname { + font-weight: 600; + font-family: var(--font-mono); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.bytecode-opname.specialized { + color: #2e7d32; +} + +[data-theme="dark"] .bytecode-opname.specialized { + color: #81c784; +} + +.bytecode-opname .base-op { + color: var(--code-text-muted); + font-weight: normal; + font-size: 0.9em; + margin-left: 4px; +} + +.bytecode-samples { + text-align: right; + font-weight: 600; + color: var(--accent); + font-family: var(--font-mono); +} + +.bytecode-samples.hot { + color: #ff6b6b; +} + +.bytecode-heatbar { + width: 60px; + height: 12px; + background: var(--bg-secondary); + border-radius: 2px; + overflow: hidden; + border: 1px solid var(--code-border); +} + +.bytecode-heatbar-fill { + height: 100%; + background: linear-gradient(90deg, #00d4ff 0%, #ff6b00 100%); +} + +.specialization-badge { + display: inline-block; + padding: 1px 6px; + font-size: 0.75em; + background: #e8f5e9; + color: #2e7d32; + border-radius: 3px; + margin-left: 6px; + font-weight: 600; +} + +[data-theme="dark"] .specialization-badge { + background: rgba(129, 199, 132, 0.2); + color: #81c784; +} + +.bytecode-empty { + color: var(--code-text-muted); + font-style: italic; + padding: 8px; +} + +.bytecode-error { + color: #d32f2f; + font-style: italic; + padding: 8px; +} + +/* ======================================== + SPAN TOOLTIPS + ======================================== */ + +.span-tooltip { + position: absolute; + z-index: 10000; + background: var(--bg-primary); + color: var(--text-primary); + padding: 10px 14px; + border-radius: var(--radius-md); + border: 1px solid var(--border); + font-family: var(--font-sans); + font-size: 12px; + box-shadow: var(--shadow-lg); + pointer-events: none; + min-width: 160px; + max-width: 300px; +} + +.span-tooltip::after { + content: ''; + position: absolute; + bottom: -7px; + left: 50%; + transform: translateX(-50%); + border-width: 7px 7px 0; + border-style: solid; + border-color: var(--bg-primary) transparent transparent; + filter: drop-shadow(0 1px 1px rgba(0, 0, 0, 0.1)); +} + +.span-tooltip-header { + font-weight: 600; + margin-bottom: 8px; + padding-bottom: 6px; + border-bottom: 1px solid var(--border); + color: var(--text-primary); +} + +.span-tooltip-header.hot { + color: #e65100; +} + +.span-tooltip-header.warm { + color: #f59e0b; +} + +.span-tooltip-header.cold { + color: var(--text-muted); +} + +.span-tooltip-row { + display: flex; + justify-content: space-between; + margin: 4px 0; + gap: 16px; +} + +.span-tooltip-label { + color: var(--text-secondary); +} + +.span-tooltip-value { + font-weight: 600; + text-align: right; + color: var(--text-primary); +} + +.span-tooltip-value.highlight { + color: var(--accent); +} + +.span-tooltip-section { + font-weight: 600; + color: var(--text-secondary); + font-size: 11px; + margin-top: 8px; + margin-bottom: 4px; + padding-top: 6px; + border-top: 1px solid var(--border); +} + +.span-tooltip-opcode { + font-family: var(--font-mono); + font-size: 11px; + color: var(--text-primary); + background: var(--bg-secondary); + padding: 3px 8px; + margin: 2px 0; + border-radius: var(--radius-sm); + border-left: 2px solid var(--accent); +} diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.js b/Lib/profiling/sampling/_heatmap_assets/heatmap.js index ccf823863638dd..20c3ac4f091e0c 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.js +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.js @@ -293,7 +293,6 @@ function toggleColorMode() { // ============================================================================ document.addEventListener('DOMContentLoaded', function() { - // Restore UI state (theme, etc.) restoreUIState(); // Apply background colors @@ -319,19 +318,38 @@ document.addEventListener('DOMContentLoaded', function() { // Initialize toggle buttons const toggleColdBtn = document.getElementById('toggle-cold'); - if (toggleColdBtn) { - toggleColdBtn.addEventListener('click', toggleColdCode); - } + if (toggleColdBtn) toggleColdBtn.addEventListener('click', toggleColdCode); const colorModeBtn = document.getElementById('toggle-color-mode'); - if (colorModeBtn) { - colorModeBtn.addEventListener('click', toggleColorMode); + if (colorModeBtn) colorModeBtn.addEventListener('click', toggleColorMode); + + // Initialize specialization view toggle (hide if no bytecode data) + const hasBytecode = document.querySelectorAll('.bytecode-toggle').length > 0; + + const specViewBtn = document.getElementById('toggle-spec-view'); + if (specViewBtn) { + if (hasBytecode) { + specViewBtn.addEventListener('click', toggleSpecView); + } else { + specViewBtn.style.display = 'none'; + } } - // Build scroll marker - setTimeout(buildScrollMarker, 200); + // Initialize expand-all bytecode button + const expandAllBtn = document.getElementById('toggle-all-bytecode'); + if (expandAllBtn) { + if (hasBytecode) { + expandAllBtn.addEventListener('click', toggleAllBytecode); + } else { + expandAllBtn.style.display = 'none'; + } + } - // Setup scroll-to-line behavior + // Initialize span tooltips + initSpanTooltips(); + + // Build scroll marker and scroll to target + setTimeout(buildScrollMarker, 200); setTimeout(scrollToTargetLine, 100); }); @@ -342,6 +360,401 @@ document.addEventListener('click', e => { } }); +// ======================================== +// SPECIALIZATION VIEW TOGGLE +// ======================================== + +let specViewEnabled = false; + +/** + * Calculate heat color for given intensity (0-1) + * Hot spans (>30%) get warm orange, cold spans get dimmed gray + * @param {number} intensity - Value between 0 and 1 + * @returns {string} rgba color string + */ +function calculateHeatColor(intensity) { + // Hot threshold: only spans with >30% of max samples get color + if (intensity > 0.3) { + // Normalize intensity above threshold to 0-1 + const normalizedIntensity = (intensity - 0.3) / 0.7; + // Warm orange-red with increasing opacity for hotter spans + const alpha = 0.25 + normalizedIntensity * 0.35; // 0.25 to 0.6 + return `rgba(255, 100, 50, ${alpha})`; + } else if (intensity > 0) { + // Cold spans: very subtle gray, almost invisible + return `rgba(150, 150, 150, 0.1)`; + } + return 'transparent'; +} + +/** + * Apply intensity-based heat colors to source spans + * Hot spans get orange highlight, cold spans get dimmed + * @param {boolean} enable - Whether to enable or disable span coloring + */ +function applySpanHeatColors(enable) { + document.querySelectorAll('.instr-span').forEach(span => { + const samples = enable ? (parseInt(span.dataset.samples) || 0) : 0; + if (samples > 0) { + const intensity = samples / (parseInt(span.dataset.maxSamples) || 1); + span.style.backgroundColor = calculateHeatColor(intensity); + span.style.borderRadius = '2px'; + span.style.padding = '0 1px'; + span.style.cursor = 'pointer'; + } else { + span.style.cssText = ''; + } + }); +} + +// ======================================== +// SPAN TOOLTIPS +// ======================================== + +let activeTooltip = null; + +/** + * Create and show tooltip for a span + */ +function showSpanTooltip(span) { + hideSpanTooltip(); + + const samples = parseInt(span.dataset.samples) || 0; + const maxSamples = parseInt(span.dataset.maxSamples) || 1; + const pct = span.dataset.pct || '0'; + const opcodes = span.dataset.opcodes || ''; + + if (samples === 0) return; + + const intensity = samples / maxSamples; + const isHot = intensity > 0.7; + const isWarm = intensity > 0.3; + const hotnessText = isHot ? 'Hot' : isWarm ? 'Warm' : 'Cold'; + const hotnessClass = isHot ? 'hot' : isWarm ? 'warm' : 'cold'; + + // Build opcodes rows - each opcode on its own row + let opcodesHtml = ''; + if (opcodes) { + const opcodeList = opcodes.split(',').map(op => op.trim()).filter(op => op); + if (opcodeList.length > 0) { + opcodesHtml = ` +
Opcodes:
+ ${opcodeList.map(op => `
${op}
`).join('')} + `; + } + } + + const tooltip = document.createElement('div'); + tooltip.className = 'span-tooltip'; + tooltip.innerHTML = ` +
${hotnessText}
+
+ Samples: + ${samples.toLocaleString()} +
+
+ % of line: + ${pct}% +
+ ${opcodesHtml} + `; + + document.body.appendChild(tooltip); + activeTooltip = tooltip; + + // Position tooltip above the span + const rect = span.getBoundingClientRect(); + const tooltipRect = tooltip.getBoundingClientRect(); + + let left = rect.left + (rect.width / 2) - (tooltipRect.width / 2); + let top = rect.top - tooltipRect.height - 8; + + // Keep tooltip in viewport + if (left < 5) left = 5; + if (left + tooltipRect.width > window.innerWidth - 5) { + left = window.innerWidth - tooltipRect.width - 5; + } + if (top < 5) { + top = rect.bottom + 8; // Show below if no room above + } + + tooltip.style.left = `${left + window.scrollX}px`; + tooltip.style.top = `${top + window.scrollY}px`; +} + +/** + * Hide active tooltip + */ +function hideSpanTooltip() { + if (activeTooltip) { + activeTooltip.remove(); + activeTooltip = null; + } +} + +/** + * Initialize span tooltip handlers + */ +function initSpanTooltips() { + document.addEventListener('mouseover', (e) => { + const span = e.target.closest('.instr-span'); + if (span && specViewEnabled) { + showSpanTooltip(span); + } + }); + + document.addEventListener('mouseout', (e) => { + const span = e.target.closest('.instr-span'); + if (span) { + hideSpanTooltip(); + } + }); +} + +function toggleSpecView() { + specViewEnabled = !specViewEnabled; + const lines = document.querySelectorAll('.code-line'); + + lines.forEach(line => { + if (specViewEnabled) { + const specColor = line.getAttribute('data-spec-color'); + line.style.background = specColor || 'transparent'; + } else { + const bgColor = colorMode === 'self' + ? line.getAttribute('data-self-color') || line.getAttribute('data-bg-color') + : line.getAttribute('data-cumulative-color') || line.getAttribute('data-bg-color'); + line.style.background = bgColor || 'transparent'; + } + }); + + applySpanHeatColors(specViewEnabled); + updateToggleUI('toggle-spec-view', specViewEnabled); + + // Disable/enable color mode toggle based on spec view state + const colorModeToggle = document.getElementById('toggle-color-mode'); + if (colorModeToggle) { + colorModeToggle.classList.toggle('disabled', specViewEnabled); + } + + buildScrollMarker(); +} + +// ======================================== +// BYTECODE PANEL TOGGLE +// ======================================== + +/** + * Toggle bytecode panel visibility for a source line + * @param {HTMLElement} button - The toggle button that was clicked + */ +function toggleBytecode(button) { + const lineDiv = button.closest('.code-line'); + const lineId = lineDiv.id; + const lineNum = lineId.replace('line-', ''); + const panel = document.getElementById(`bytecode-${lineNum}`); + + if (!panel) return; + + const isExpanded = panel.style.display !== 'none'; + + if (isExpanded) { + panel.style.display = 'none'; + button.classList.remove('expanded'); + button.innerHTML = '▶'; // Right arrow + } else { + if (!panel.dataset.populated) { + populateBytecodePanel(panel, button); + } + panel.style.display = 'block'; + button.classList.add('expanded'); + button.innerHTML = '▼'; // Down arrow + } +} + +/** + * Populate bytecode panel with instruction data + * @param {HTMLElement} panel - The panel element to populate + * @param {HTMLElement} button - The button containing the bytecode data + */ +function populateBytecodePanel(panel, button) { + const bytecodeJson = button.getAttribute('data-bytecode'); + if (!bytecodeJson) return; + + // Get line number from parent + const lineDiv = button.closest('.code-line'); + const lineNum = lineDiv ? lineDiv.id.replace('line-', '') : null; + + try { + const instructions = JSON.parse(bytecodeJson); + if (!instructions.length) { + panel.innerHTML = '
No bytecode data
'; + panel.dataset.populated = 'true'; + return; + } + + const maxSamples = Math.max(...instructions.map(i => i.samples), 1); + + // Calculate specialization stats + const totalSamples = instructions.reduce((sum, i) => sum + i.samples, 0); + const specializedSamples = instructions + .filter(i => i.is_specialized) + .reduce((sum, i) => sum + i.samples, 0); + const specPct = totalSamples > 0 ? Math.round(100 * specializedSamples / totalSamples) : 0; + const specializedCount = instructions.filter(i => i.is_specialized).length; + + // Determine specialization level class + let specClass = 'low'; + if (specPct >= 67) specClass = 'high'; + else if (specPct >= 33) specClass = 'medium'; + + // Build specialization summary + let html = `
+ ${specPct}% + specialized + (${specializedCount}/${instructions.length} instructions, ${specializedSamples.toLocaleString()}/${totalSamples.toLocaleString()} samples) +
`; + + html += '
' + + 'Instruction' + + 'Samples' + + 'Heat
'; + + for (const instr of instructions) { + const heatPct = (instr.samples / maxSamples) * 100; + const isHot = heatPct > 50; + const specializedClass = instr.is_specialized ? ' specialized' : ''; + const baseOpHtml = instr.is_specialized + ? `(${escapeHtml(instr.base_opname)})` : ''; + const badge = instr.is_specialized + ? 'SPECIALIZED' : ''; + + // Build location data attributes for cross-referencing with source spans + const hasLocations = instr.locations && instr.locations.length > 0; + const locationData = hasLocations + ? `data-locations='${JSON.stringify(instr.locations)}' data-line="${lineNum}" data-opcode="${instr.opcode}"` + : ''; + + html += `
+ ${escapeHtml(instr.opname)}${baseOpHtml}${badge} + ${instr.samples.toLocaleString()} +
+
`; + } + + panel.innerHTML = html; + panel.dataset.populated = 'true'; + + // Add hover handlers for bytecode instructions to highlight source spans + panel.querySelectorAll('.bytecode-instruction[data-locations]').forEach(instrEl => { + instrEl.addEventListener('mouseenter', highlightSourceFromBytecode); + instrEl.addEventListener('mouseleave', unhighlightSourceFromBytecode); + }); + } catch (e) { + panel.innerHTML = '
Error loading bytecode
'; + console.error('Error parsing bytecode data:', e); + } +} + +/** + * Highlight source spans when hovering over bytecode instruction + */ +function highlightSourceFromBytecode(e) { + const instrEl = e.currentTarget; + const lineNum = instrEl.dataset.line; + const locationsStr = instrEl.dataset.locations; + + if (!lineNum) return; + + const lineDiv = document.getElementById(`line-${lineNum}`); + if (!lineDiv) return; + + // Parse locations and highlight matching spans by column range + try { + const locations = JSON.parse(locationsStr || '[]'); + const spans = lineDiv.querySelectorAll('.instr-span'); + spans.forEach(span => { + const spanStart = parseInt(span.dataset.colStart); + const spanEnd = parseInt(span.dataset.colEnd); + for (const loc of locations) { + // Match if span's range matches instruction's location + if (spanStart === loc.col_offset && spanEnd === loc.end_col_offset) { + span.classList.add('highlight-from-bytecode'); + break; + } + } + }); + } catch (err) { + console.error('Error parsing locations:', err); + } + + // Also highlight the instruction row itself + instrEl.classList.add('highlight'); +} + +/** + * Remove highlighting from source spans + */ +function unhighlightSourceFromBytecode(e) { + const instrEl = e.currentTarget; + const lineNum = instrEl.dataset.line; + + if (!lineNum) return; + + const lineDiv = document.getElementById(`line-${lineNum}`); + if (!lineDiv) return; + + const spans = lineDiv.querySelectorAll('.instr-span.highlight-from-bytecode'); + spans.forEach(span => { + span.classList.remove('highlight-from-bytecode'); + }); + + instrEl.classList.remove('highlight'); +} + +/** + * Escape HTML special characters + * @param {string} text - Text to escape + * @returns {string} Escaped HTML + */ +function escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; +} + +/** + * Toggle all bytecode panels at once + */ +function toggleAllBytecode() { + const buttons = document.querySelectorAll('.bytecode-toggle'); + if (buttons.length === 0) return; + + const someExpanded = Array.from(buttons).some(b => b.classList.contains('expanded')); + const expandAllBtn = document.getElementById('toggle-all-bytecode'); + + buttons.forEach(button => { + const isExpanded = button.classList.contains('expanded'); + if (someExpanded ? isExpanded : !isExpanded) { + toggleBytecode(button); + } + }); + + // Update the expand-all button state + if (expandAllBtn) { + expandAllBtn.classList.toggle('expanded', !someExpanded); + } +} + +// Keyboard shortcut: 'b' toggles all bytecode panels +document.addEventListener('keydown', function(e) { + if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') { + return; + } + if (e.key === 'b' && !e.ctrlKey && !e.altKey && !e.metaKey) { + toggleAllBytecode(); + } +}); + // Handle hash changes window.addEventListener('hashchange', () => setTimeout(scrollToTargetLine, 50)); diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html index d8b26adfb0243f..0c4bcbe22ca0c2 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html @@ -21,6 +21,7 @@ class="toolbar-btn theme-toggle" onclick="toggleTheme()" title="Toggle theme" + aria-label="Toggle theme" id="theme-btn" >☾ @@ -63,9 +64,10 @@
Cold - + Hot
+
Self Time
@@ -76,6 +78,15 @@
Hot Only
+
+ Heat +
+ Specialization +
+ + diff --git a/Lib/profiling/sampling/_shared_assets/base.css b/Lib/profiling/sampling/_shared_assets/base.css index 20516913496cbe..71d566f9126d62 100644 --- a/Lib/profiling/sampling/_shared_assets/base.css +++ b/Lib/profiling/sampling/_shared_assets/base.css @@ -29,6 +29,11 @@ --topbar-height: 56px; --statusbar-height: 32px; + /* Border radius */ + --radius-sm: 4px; + --radius-md: 8px; + --radius-lg: 12px; + /* Transitions */ --transition-fast: 0.15s ease; --transition-normal: 0.25s ease; @@ -79,6 +84,17 @@ --nav-caller-hover: #1d4ed8; --nav-callee: #dc2626; --nav-callee-hover: #b91c1c; + + /* Specialization status colors */ + --spec-high: #4caf50; + --spec-high-text: #2e7d32; + --spec-high-bg: rgba(76, 175, 80, 0.15); + --spec-medium: #ff9800; + --spec-medium-text: #e65100; + --spec-medium-bg: rgba(255, 152, 0, 0.15); + --spec-low: #9e9e9e; + --spec-low-text: #616161; + --spec-low-bg: rgba(158, 158, 158, 0.15); } /* Dark theme */ @@ -126,6 +142,17 @@ --nav-caller-hover: #4184e4; --nav-callee: #f87171; --nav-callee-hover: #e53e3e; + + /* Specialization status colors - dark theme */ + --spec-high: #81c784; + --spec-high-text: #81c784; + --spec-high-bg: rgba(129, 199, 132, 0.2); + --spec-medium: #ffb74d; + --spec-medium-text: #ffb74d; + --spec-medium-bg: rgba(255, 183, 77, 0.2); + --spec-low: #bdbdbd; + --spec-low-text: #9e9e9e; + --spec-low-bg: rgba(189, 189, 189, 0.15); } /* -------------------------------------------------------------------------- diff --git a/Lib/profiling/sampling/heatmap_collector.py b/Lib/profiling/sampling/heatmap_collector.py index eb51ce33b28a52..f05462051e9ea0 100644 --- a/Lib/profiling/sampling/heatmap_collector.py +++ b/Lib/profiling/sampling/heatmap_collector.py @@ -14,6 +14,7 @@ from typing import Dict, List, Tuple, Optional, Any from ._css_utils import get_combined_css +from .collector import normalize_location, extract_lineno from .stack_collector import StackTraceCollector @@ -488,14 +489,19 @@ def __init__(self, *args, **kwargs): self.line_self_samples = collections.Counter() self.file_self_samples = collections.defaultdict(collections.Counter) - # Call graph data structures for navigation - self.call_graph = collections.defaultdict(list) - self.callers_graph = collections.defaultdict(list) + # Call graph data structures for navigation (sets for O(1) deduplication) + self.call_graph = collections.defaultdict(set) + self.callers_graph = collections.defaultdict(set) self.function_definitions = {} # Edge counting for call path analysis self.edge_samples = collections.Counter() + # Bytecode-level tracking data structures + # Track samples per (file, lineno) -> {opcode: {'count': N, 'locations': set()}} + # Locations are deduplicated via set to minimize memory usage + self.line_opcodes = collections.defaultdict(dict) + # Statistics and metadata self._total_samples = 0 self._path_info = get_python_path_info() @@ -542,26 +548,35 @@ def process_frames(self, frames, thread_id): """Process stack frames and count samples per line. Args: - frames: List of frame tuples (filename, lineno, funcname) - frames[0] is the leaf (top of stack, where execution is) + frames: List of (filename, location, funcname, opcode) tuples in + leaf-to-root order. location is (lineno, end_lineno, col_offset, end_col_offset). + opcode is None if not gathered. thread_id: Thread ID for this stack trace """ self._total_samples += 1 - # Count each line in the stack and build call graph - for i, frame_info in enumerate(frames): - filename, lineno, funcname = frame_info + for i, (filename, location, funcname, opcode) in enumerate(frames): + # Normalize location to 4-tuple format + lineno, end_lineno, col_offset, end_col_offset = normalize_location(location) if not self._is_valid_frame(filename, lineno): continue # frames[0] is the leaf - where execution is actually happening - is_leaf = (i == 0) - self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf) + self._record_line_sample(filename, lineno, funcname, is_leaf=(i == 0)) + + if opcode is not None: + self._record_bytecode_sample(filename, lineno, opcode, + end_lineno, col_offset, end_col_offset) # Build call graph for adjacent frames if i + 1 < len(frames): - self._record_call_relationship(frames[i], frames[i + 1]) + next_frame = frames[i + 1] + next_lineno = extract_lineno(next_frame[1]) + self._record_call_relationship( + (filename, lineno, funcname), + (next_frame[0], next_lineno, next_frame[2]) + ) def _is_valid_frame(self, filename, lineno): """Check if a frame should be included in the heatmap.""" @@ -590,6 +605,79 @@ def _record_line_sample(self, filename, lineno, funcname, is_leaf=False): if funcname and (filename, funcname) not in self.function_definitions: self.function_definitions[(filename, funcname)] = lineno + def _record_bytecode_sample(self, filename, lineno, opcode, + end_lineno=None, col_offset=None, end_col_offset=None): + """Record a sample for a specific bytecode instruction. + + Args: + filename: Source filename + lineno: Line number + opcode: Opcode number being executed + end_lineno: End line number (may be -1 if not available) + col_offset: Column offset in UTF-8 bytes (may be -1 if not available) + end_col_offset: End column offset in UTF-8 bytes (may be -1 if not available) + """ + key = (filename, lineno) + + # Initialize opcode entry if needed - use set for location deduplication + if opcode not in self.line_opcodes[key]: + self.line_opcodes[key][opcode] = {'count': 0, 'locations': set()} + + self.line_opcodes[key][opcode]['count'] += 1 + + # Store unique location info if column offset is available (not -1) + if col_offset is not None and col_offset >= 0: + # Use tuple as set key for deduplication + loc_key = (end_lineno, col_offset, end_col_offset) + self.line_opcodes[key][opcode]['locations'].add(loc_key) + + def _get_bytecode_data_for_line(self, filename, lineno): + """Get bytecode disassembly data for instructions on a specific line. + + Args: + filename: Source filename + lineno: Line number + + Returns: + List of dicts with instruction info, sorted by samples descending + """ + from .opcode_utils import get_opcode_info, format_opcode + + key = (filename, lineno) + opcode_data = self.line_opcodes.get(key, {}) + + result = [] + for opcode, data in opcode_data.items(): + info = get_opcode_info(opcode) + # Handle both old format (int count) and new format (dict with count/locations) + if isinstance(data, dict): + count = data.get('count', 0) + raw_locations = data.get('locations', set()) + # Convert set of tuples to list of dicts for JSON serialization + if isinstance(raw_locations, set): + locations = [ + {'end_lineno': loc[0], 'col_offset': loc[1], 'end_col_offset': loc[2]} + for loc in raw_locations + ] + else: + locations = raw_locations + else: + count = data + locations = [] + + result.append({ + 'opcode': opcode, + 'opname': format_opcode(opcode), + 'base_opname': info['base_opname'], + 'is_specialized': info['is_specialized'], + 'samples': count, + 'locations': locations, + }) + + # Sort by samples descending, then by opcode number + result.sort(key=lambda x: (-x['samples'], x['opcode'])) + return result + def _record_call_relationship(self, callee_frame, caller_frame): """Record caller/callee relationship between adjacent frames.""" callee_filename, callee_lineno, callee_funcname = callee_frame @@ -604,17 +692,15 @@ def _record_call_relationship(self, callee_frame, caller_frame): (callee_filename, callee_funcname), callee_lineno ) - # Record caller -> callee relationship + # Record caller -> callee relationship (set handles deduplication) caller_key = (caller_filename, caller_lineno) callee_info = (callee_filename, callee_def_line, callee_funcname) - if callee_info not in self.call_graph[caller_key]: - self.call_graph[caller_key].append(callee_info) + self.call_graph[caller_key].add(callee_info) - # Record callee <- caller relationship + # Record callee <- caller relationship (set handles deduplication) callee_key = (callee_filename, callee_def_line) caller_info = (caller_filename, caller_lineno, caller_funcname) - if caller_info not in self.callers_graph[callee_key]: - self.callers_graph[callee_key].append(caller_info) + self.callers_graph[callee_key].add(caller_info) # Count this call edge for path analysis edge_key = (caller_key, callee_key) @@ -936,35 +1022,185 @@ def _build_line_html(self, line_num: int, line_content: str, cumulative_display = "" tooltip = "" + # Get bytecode data for this line (if any) + bytecode_data = self._get_bytecode_data_for_line(filename, line_num) + has_bytecode = len(bytecode_data) > 0 and cumulative_samples > 0 + + # Build bytecode toggle button if data is available + bytecode_btn_html = '' + bytecode_panel_html = '' + if has_bytecode: + bytecode_json = html.escape(json.dumps(bytecode_data)) + + # Calculate specialization percentage + total_samples = sum(d['samples'] for d in bytecode_data) + specialized_samples = sum(d['samples'] for d in bytecode_data if d['is_specialized']) + spec_pct = int(100 * specialized_samples / total_samples) if total_samples > 0 else 0 + + bytecode_btn_html = ( + f'' + ) + bytecode_panel_html = f' \n' + # Get navigation buttons nav_buttons_html = self._build_navigation_buttons(filename, line_num) - # Build line HTML - line_html = html.escape(line_content.rstrip('\n')) + # Build line HTML with instruction highlights if available + line_html = self._render_source_with_highlights(line_content, line_num, + filename, bytecode_data) title_attr = f' title="{html.escape(tooltip)}"' if tooltip else "" + # Specialization color for toggle mode (green gradient based on spec %) + spec_color_attr = '' + if has_bytecode: + spec_color = self._format_specialization_color(spec_pct) + spec_color_attr = f'data-spec-color="{spec_color}" ' + return ( f'
\n' f'
{line_num}
\n' f'
{self_display}
\n' f'
{cumulative_display}
\n' + f' {bytecode_btn_html}\n' f'
{line_html}
\n' f' {nav_buttons_html}\n' f'
\n' + f'{bytecode_panel_html}' ) + def _render_source_with_highlights(self, line_content: str, line_num: int, + filename: str, bytecode_data: list) -> str: + """Render source line with instruction highlight spans. + + Simple: collect ranges with sample counts, assign each byte position to + smallest covering range, then emit spans for contiguous runs with sample data. + """ + import html as html_module + + content = line_content.rstrip('\n') + if not content: + return '' + + # Collect all (start, end) -> {samples, opcodes} mapping from instructions + # Multiple instructions may share the same range, so we sum samples and collect opcodes + range_data = {} + for instr in bytecode_data: + samples = instr.get('samples', 0) + opname = instr.get('opname', '') + for loc in instr.get('locations', []): + if loc.get('end_lineno', line_num) == line_num: + start, end = loc.get('col_offset', -1), loc.get('end_col_offset', -1) + if start >= 0 and end >= 0: + key = (start, end) + if key not in range_data: + range_data[key] = {'samples': 0, 'opcodes': []} + range_data[key]['samples'] += samples + if opname and opname not in range_data[key]['opcodes']: + range_data[key]['opcodes'].append(opname) + + if not range_data: + return html_module.escape(content) + + # For each byte position, find the smallest covering range + byte_to_range = {} + for (start, end) in range_data.keys(): + for pos in range(start, end): + if pos not in byte_to_range: + byte_to_range[pos] = (start, end) + else: + # Keep smaller range + old_start, old_end = byte_to_range[pos] + if (end - start) < (old_end - old_start): + byte_to_range[pos] = (start, end) + + # Calculate totals for percentage and intensity + total_line_samples = sum(d['samples'] for d in range_data.values()) + max_range_samples = max(d['samples'] for d in range_data.values()) if range_data else 1 + + # Render character by character + result = [] + byte_offset = 0 + char_idx = 0 + current_range = None + span_chars = [] + + def flush_span(): + nonlocal span_chars, current_range + if span_chars: + text = html_module.escape(''.join(span_chars)) + if current_range: + data = range_data.get(current_range, {'samples': 0, 'opcodes': []}) + samples = data['samples'] + opcodes = ', '.join(data['opcodes'][:3]) # Top 3 opcodes + if len(data['opcodes']) > 3: + opcodes += f" +{len(data['opcodes']) - 3} more" + pct = int(100 * samples / total_line_samples) if total_line_samples > 0 else 0 + result.append(f'{text}') + else: + result.append(text) + span_chars = [] + + while char_idx < len(content): + char = content[char_idx] + char_bytes = len(char.encode('utf-8')) + char_range = byte_to_range.get(byte_offset) + + if char_range != current_range: + flush_span() + current_range = char_range + + span_chars.append(char) + byte_offset += char_bytes + char_idx += 1 + + flush_span() + return ''.join(result) + def _format_color_for_intensity(self, intensity: float) -> str: """Format color as rgba() string for given intensity.""" r, g, b, alpha = self._calculate_intensity_color(intensity) return f"rgba({r}, {g}, {b}, {alpha})" + def _format_specialization_color(self, spec_pct: int) -> str: + """Format specialization color based on percentage. + + Uses a gradient from gray (0%) through orange (50%) to green (100%). + """ + # Normalize to 0-1 + ratio = spec_pct / 100.0 + + if ratio >= 0.5: + # Orange to green (50-100%) + t = (ratio - 0.5) * 2 # 0 to 1 + r = int(255 * (1 - t)) # 255 -> 0 + g = int(180 + 75 * t) # 180 -> 255 + b = int(50 * (1 - t)) # 50 -> 0 + else: + # Gray to orange (0-50%) + t = ratio * 2 # 0 to 1 + r = int(158 + 97 * t) # 158 -> 255 + g = int(158 + 22 * t) # 158 -> 180 + b = int(158 - 108 * t) # 158 -> 50 + + alpha = 0.15 + 0.25 * ratio # 0.15 to 0.4 + return f"rgba({r}, {g}, {b}, {alpha})" + def _build_navigation_buttons(self, filename: str, line_num: int) -> str: """Build navigation buttons for callers/callees.""" line_key = (filename, line_num) - caller_list = self._deduplicate_by_function(self.callers_graph.get(line_key, [])) - callee_list = self._deduplicate_by_function(self.call_graph.get(line_key, [])) + caller_list = self._deduplicate_by_function(self.callers_graph.get(line_key, set())) + callee_list = self._deduplicate_by_function(self.call_graph.get(line_key, set())) # Get edge counts for each caller/callee callers_with_counts = self._get_edge_counts(line_key, caller_list, is_caller=True) @@ -996,8 +1232,12 @@ def _get_edge_counts(self, line_key: Tuple[str, int], result.sort(key=lambda x: x[3], reverse=True) return result - def _deduplicate_by_function(self, items: List[Tuple[str, int, str]]) -> List[Tuple[str, int, str]]: - """Remove duplicate entries based on (file, function) key.""" + def _deduplicate_by_function(self, items) -> List[Tuple[str, int, str]]: + """Remove duplicate entries based on (file, function) key. + + Args: + items: Iterable of (file, line, func) tuples (set or list) + """ seen = {} result = [] for file, line, func in items: From 7ffe4cb39e7db5813b1b29556d32011326a40841 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:40 +0000 Subject: [PATCH 06/24] Add opcode panel to live profiler TUI New widget displays instruction-level stats for selected function when --opcodes is enabled. Navigation via j/k keys with scroll support. Adds per-thread opcode tracking. Updates pstats collector for new frame format. --- .../sampling/live_collector/collector.py | 179 +++++++++++++++++- .../sampling/live_collector/constants.py | 3 + .../sampling/live_collector/widgets.py | 150 ++++++++++++++- Lib/profiling/sampling/pstats_collector.py | 17 +- 4 files changed, 324 insertions(+), 25 deletions(-) diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py index 4b69275a2f077f..044fe9949b7ca4 100644 --- a/Lib/profiling/sampling/live_collector/collector.py +++ b/Lib/profiling/sampling/live_collector/collector.py @@ -11,7 +11,7 @@ import time import _colorize -from ..collector import Collector +from ..collector import Collector, extract_lineno from ..constants import ( THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, @@ -41,7 +41,7 @@ COLOR_PAIR_SORTED_HEADER, ) from .display import CursesDisplay -from .widgets import HeaderWidget, TableWidget, FooterWidget, HelpWidget +from .widgets import HeaderWidget, TableWidget, FooterWidget, HelpWidget, OpcodePanel from .trend_tracker import TrendTracker @@ -67,6 +67,11 @@ class ThreadData: sample_count: int = 0 gc_frame_samples: int = 0 + # Opcode statistics: {location: {opcode: count}} + opcode_stats: dict = field(default_factory=lambda: collections.defaultdict( + lambda: collections.defaultdict(int) + )) + def increment_status_flag(self, status_flags): """Update status counts based on status bit flags.""" if status_flags & THREAD_STATUS_HAS_GIL: @@ -103,6 +108,7 @@ def __init__( pid=None, display=None, mode=None, + opcodes=False, ): """ Initialize the live stats collector. @@ -115,6 +121,7 @@ def __init__( pid: Process ID being profiled display: DisplayInterface implementation (None means curses will be used) mode: Profiling mode ('cpu', 'gil', etc.) - affects what stats are shown + opcodes: Whether to show opcode panel (requires --opcodes flag) """ self.result = collections.defaultdict( lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0) @@ -152,6 +159,12 @@ def __init__( } self.gc_frame_samples = 0 # Track samples with GC frames + # Opcode statistics: {location: {opcode: count}} + self.opcode_stats = collections.defaultdict(lambda: collections.defaultdict(int)) + self.show_opcodes = opcodes # Show opcode panel when --opcodes flag is passed + self.selected_row = 0 # Currently selected row in table for opcode view + self.scroll_offset = 0 # Scroll offset for table when in opcode mode + # Interactive controls state self.paused = False # Pause UI updates (profiling continues) self.show_help = False # Show help screen @@ -178,6 +191,7 @@ def __init__( self.table_widget = None self.footer_widget = None self.help_widget = None + self.opcode_panel = None # Color mode self._can_colorize = _colorize.can_colorize() @@ -282,18 +296,29 @@ def process_frames(self, frames, thread_id=None): thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None # Process each frame in the stack to track cumulative calls + # frame.location is (lineno, end_lineno, col_offset, end_col_offset), int, or None for frame in frames: - location = (frame.filename, frame.lineno, frame.funcname) + lineno = extract_lineno(frame.location) + location = (frame.filename, lineno, frame.funcname) self.result[location]["cumulative_calls"] += 1 if thread_data: thread_data.result[location]["cumulative_calls"] += 1 # The top frame gets counted as an inline call (directly executing) - top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname) + top_frame = frames[0] + top_lineno = extract_lineno(top_frame.location) + top_location = (top_frame.filename, top_lineno, top_frame.funcname) self.result[top_location]["direct_calls"] += 1 if thread_data: thread_data.result[top_location]["direct_calls"] += 1 + # Track opcode for top frame (the actively executing instruction) + opcode = getattr(top_frame, 'opcode', None) + if opcode is not None: + self.opcode_stats[top_location][opcode] += 1 + if thread_data: + thread_data.opcode_stats[top_location][opcode] += 1 + def collect_failed_sample(self): self.failed_samples += 1 self.total_samples += 1 @@ -431,6 +456,7 @@ def _initialize_widgets(self, colors): self.table_widget = TableWidget(self.display, colors, self) self.footer_widget = FooterWidget(self.display, colors, self) self.help_widget = HelpWidget(self.display, colors) + self.opcode_panel = OpcodePanel(self.display, colors, self) def _render_display_sections( self, height, width, elapsed, stats_list, colors @@ -451,6 +477,12 @@ def _render_display_sections( line, width, height=height, stats_list=stats_list ) + # Render opcode panel if enabled + if self.show_opcodes: + line = self.opcode_panel.render( + line, width, height=height, stats_list=stats_list + ) + except curses.error: pass @@ -918,19 +950,148 @@ def _handle_input(self): if self._trend_tracker is not None: self._trend_tracker.toggle() - elif ch == curses.KEY_LEFT or ch == curses.KEY_UP: - # Navigate to previous thread in PER_THREAD mode, or switch from ALL to PER_THREAD + elif ch == ord("j") or ch == ord("J"): + # Move selection down in opcode mode (with scrolling) + if self.show_opcodes: + # Use the actual displayed stats_list count, not raw result_source + # This matches what _prepare_display_data() produces + stats_list = self.build_stats_list() + if self.display: + height, _ = self.display.get_dimensions() + # Same calculation as _prepare_display_data + extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 + max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) + stats_list = stats_list[:max_stats] + visible_rows = max(1, height - 8 - 2 - 12) + else: + visible_rows = self.limit + total_rows = len(stats_list) + if total_rows == 0: + return + # Max scroll is when last item is at bottom + max_scroll = max(0, total_rows - visible_rows) + # Current absolute position + abs_pos = self.scroll_offset + self.selected_row + # Only move if not at the last item + if abs_pos < total_rows - 1: + # Try to move selection within visible area first + if self.selected_row < visible_rows - 1: + self.selected_row += 1 + elif self.scroll_offset < max_scroll: + # Scroll down + self.scroll_offset += 1 + # Clamp to valid range + self.scroll_offset = min(self.scroll_offset, max_scroll) + max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) + self.selected_row = min(self.selected_row, max(0, max_selected)) + + elif ch == ord("k") or ch == ord("K"): + # Move selection up in opcode mode (with scrolling) + if self.show_opcodes: + if self.selected_row > 0: + self.selected_row -= 1 + elif self.scroll_offset > 0: + self.scroll_offset -= 1 + # Clamp to valid range based on actual stats_list + stats_list = self.build_stats_list() + if self.display: + height, _ = self.display.get_dimensions() + extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 + max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) + stats_list = stats_list[:max_stats] + visible_rows = max(1, height - 8 - 2 - 12) + else: + visible_rows = self.limit + total_rows = len(stats_list) + if total_rows > 0: + max_scroll = max(0, total_rows - visible_rows) + self.scroll_offset = min(self.scroll_offset, max_scroll) + max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) + self.selected_row = min(self.selected_row, max(0, max_selected)) + + elif ch == curses.KEY_UP: + # Move selection up (same as 'k') when in opcode mode + if self.show_opcodes: + if self.selected_row > 0: + self.selected_row -= 1 + elif self.scroll_offset > 0: + self.scroll_offset -= 1 + # Clamp to valid range based on actual stats_list + stats_list = self.build_stats_list() + if self.display: + height, _ = self.display.get_dimensions() + extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 + max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) + stats_list = stats_list[:max_stats] + visible_rows = max(1, height - 8 - 2 - 12) + else: + visible_rows = self.limit + total_rows = len(stats_list) + if total_rows > 0: + max_scroll = max(0, total_rows - visible_rows) + self.scroll_offset = min(self.scroll_offset, max_scroll) + max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) + self.selected_row = min(self.selected_row, max(0, max_selected)) + else: + # Navigate to previous thread (same as KEY_LEFT) + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = len(self.thread_ids) - 1 + else: + self.current_thread_index = ( + self.current_thread_index - 1 + ) % len(self.thread_ids) + + elif ch == curses.KEY_DOWN: + # Move selection down (same as 'j') when in opcode mode + if self.show_opcodes: + stats_list = self.build_stats_list() + if self.display: + height, _ = self.display.get_dimensions() + extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 + max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) + stats_list = stats_list[:max_stats] + visible_rows = max(1, height - 8 - 2 - 12) + else: + visible_rows = self.limit + total_rows = len(stats_list) + if total_rows == 0: + return + max_scroll = max(0, total_rows - visible_rows) + abs_pos = self.scroll_offset + self.selected_row + if abs_pos < total_rows - 1: + if self.selected_row < visible_rows - 1: + self.selected_row += 1 + elif self.scroll_offset < max_scroll: + self.scroll_offset += 1 + self.scroll_offset = min(self.scroll_offset, max_scroll) + max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) + self.selected_row = min(self.selected_row, max(0, max_selected)) + else: + # Navigate to next thread (same as KEY_RIGHT) + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.current_thread_index = ( + self.current_thread_index + 1 + ) % len(self.thread_ids) + + elif ch == curses.KEY_LEFT: + # Navigate to previous thread if len(self.thread_ids) > 0: if self.view_mode == "ALL": self.view_mode = "PER_THREAD" - self.current_thread_index = 0 + self.current_thread_index = len(self.thread_ids) - 1 else: self.current_thread_index = ( self.current_thread_index - 1 ) % len(self.thread_ids) - elif ch == curses.KEY_RIGHT or ch == curses.KEY_DOWN: - # Navigate to next thread in PER_THREAD mode, or switch from ALL to PER_THREAD + elif ch == curses.KEY_RIGHT: + # Navigate to next thread if len(self.thread_ids) > 0: if self.view_mode == "ALL": self.view_mode = "PER_THREAD" diff --git a/Lib/profiling/sampling/live_collector/constants.py b/Lib/profiling/sampling/live_collector/constants.py index e4690c90bafb7f..8462c0de3fd680 100644 --- a/Lib/profiling/sampling/live_collector/constants.py +++ b/Lib/profiling/sampling/live_collector/constants.py @@ -45,6 +45,9 @@ # Finished banner display FINISHED_BANNER_EXTRA_LINES = 3 # Blank line + banner + blank line +# Opcode panel display +OPCODE_PANEL_HEIGHT = 12 # Height reserved for opcode statistics panel + # Color pair IDs COLOR_PAIR_HEADER_BG = 4 COLOR_PAIR_CYAN = 5 diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py index 2af8caa2c2f6d9..869405671ffeed 100644 --- a/Lib/profiling/sampling/live_collector/widgets.py +++ b/Lib/profiling/sampling/live_collector/widgets.py @@ -20,6 +20,7 @@ MIN_SAMPLE_RATE_FOR_SCALING, FOOTER_LINES, FINISHED_BANNER_EXTRA_LINES, + OPCODE_PANEL_HEIGHT, ) from ..constants import ( THREAD_STATUS_HAS_GIL, @@ -730,8 +731,21 @@ def draw_stats_rows(self, line, height, width, stats_list, column_flags): # Get trend tracker for color decisions trend_tracker = self.collector._trend_tracker - for stat in stats_list: - if line >= height - FOOTER_LINES: + # Check if opcode mode is enabled for row selection highlighting + show_opcodes = getattr(self.collector, 'show_opcodes', False) + selected_row = getattr(self.collector, 'selected_row', 0) + scroll_offset = getattr(self.collector, 'scroll_offset', 0) if show_opcodes else 0 + A_REVERSE = self.display.get_attr("A_REVERSE") + A_BOLD = self.display.get_attr("A_BOLD") + + # Reserve space for opcode panel when enabled + opcode_panel_height = OPCODE_PANEL_HEIGHT if show_opcodes else 0 + + # Apply scroll offset when in opcode mode + display_stats = stats_list[scroll_offset:] if show_opcodes else stats_list + + for row_idx, stat in enumerate(display_stats): + if line >= height - FOOTER_LINES - opcode_panel_height: break func = stat["func"] @@ -752,8 +766,13 @@ def draw_stats_rows(self, line, height, width, stats_list, column_flags): else 0 ) + # Check if this row is selected + is_selected = show_opcodes and row_idx == selected_row + # Helper function to get trend color for a specific column def get_trend_color(column_name): + if is_selected: + return A_REVERSE | A_BOLD trend = trends.get(column_name, "stable") if trend_tracker is not None: return trend_tracker.get_color(trend) @@ -763,33 +782,45 @@ def get_trend_color(column_name): samples_str = f"{direct_calls}/{cumulative_calls}" col = 0 + # Fill entire row with reverse video background for selected row + if is_selected: + self.add_str(line, 0, " " * (width - 1), A_REVERSE | A_BOLD) + + # Show selection indicator when opcode panel is enabled + if show_opcodes: + if is_selected: + self.add_str(line, col, "►", A_REVERSE | A_BOLD) + else: + self.add_str(line, col, " ", curses.A_NORMAL) + col += 2 + # Samples column - apply trend color based on nsamples trend nsamples_color = get_trend_color("nsamples") - self.add_str(line, col, f"{samples_str:>13}", nsamples_color) + self.add_str(line, col, f"{samples_str:>13} ", nsamples_color) col += 15 # Sample % column if show_sample_pct: sample_pct_color = get_trend_color("sample_pct") - self.add_str(line, col, f"{sample_pct:>5.1f}", sample_pct_color) + self.add_str(line, col, f"{sample_pct:>5.1f} ", sample_pct_color) col += 7 # Total time column if show_tottime: tottime_color = get_trend_color("tottime") - self.add_str(line, col, f"{total_time:>10.3f}", tottime_color) + self.add_str(line, col, f"{total_time:>10.3f} ", tottime_color) col += 12 # Cumul % column if show_cumul_pct: cumul_pct_color = get_trend_color("cumul_pct") - self.add_str(line, col, f"{cum_pct:>5.1f}", cumul_pct_color) + self.add_str(line, col, f"{cum_pct:>5.1f} ", cumul_pct_color) col += 7 # Cumul time column if show_cumtime: cumtime_color = get_trend_color("cumtime") - self.add_str(line, col, f"{cumulative_time:>10.3f}", cumtime_color) + self.add_str(line, col, f"{cumulative_time:>10.3f} ", cumtime_color) col += 12 # Function name column @@ -804,7 +835,8 @@ def get_trend_color(column_name): if len(funcname) > func_width: func_display = funcname[: func_width - 3] + "..." func_display = f"{func_display:<{func_width}}" - self.add_str(line, col, func_display, color_func) + func_color = A_REVERSE | A_BOLD if is_selected else color_func + self.add_str(line, col, func_display, func_color) col += func_width + 2 # File:line column @@ -812,8 +844,9 @@ def get_trend_color(column_name): simplified_path = self.collector.simplify_path(filename) file_line = f"{simplified_path}:{lineno}" remaining_width = width - col - 1 + file_color = A_REVERSE | A_BOLD if is_selected else color_file self.add_str( - line, col, file_line[:remaining_width], color_file + line, col, file_line[:remaining_width], file_color ) line += 1 @@ -934,7 +967,8 @@ def render(self, line, width, **kwargs): (" S - Cycle through sort modes (backward)", A_NORMAL), (" t - Toggle view mode (ALL / per-thread)", A_NORMAL), (" x - Toggle trend colors (on/off)", A_NORMAL), - (" ← → ↑ ↓ - Navigate threads (in per-thread mode)", A_NORMAL), + (" j/k or ↑/↓ - Select next/previous function (--opcodes)", A_NORMAL), + (" ← / → - Cycle through threads", A_NORMAL), (" + - Faster display refresh rate", A_NORMAL), (" - - Slower display refresh rate", A_NORMAL), ("", A_NORMAL), @@ -961,3 +995,99 @@ def render(self, line, width, **kwargs): self.add_str(start_line + i, col, text[: width - 3], attr) return line # Not used for overlays + + +class OpcodePanel(Widget): + """Widget for displaying opcode statistics for a selected function.""" + + def __init__(self, display, colors, collector): + super().__init__(display, colors) + self.collector = collector + + def render(self, line, width, **kwargs): + """Render opcode statistics panel. + + Args: + line: Starting line number + width: Available width + kwargs: Must contain 'stats_list', 'height' + + Returns: + Next available line number + """ + from ..opcode_utils import get_opcode_info, format_opcode + + stats_list = kwargs.get("stats_list", []) + height = kwargs.get("height", 24) + selected_row = self.collector.selected_row + scroll_offset = getattr(self.collector, 'scroll_offset', 0) + + A_BOLD = self.display.get_attr("A_BOLD") + A_NORMAL = self.display.get_attr("A_NORMAL") + color_cyan = self.colors.get("color_cyan", A_NORMAL) + color_yellow = self.colors.get("color_yellow", A_NORMAL) + color_magenta = self.colors.get("color_magenta", A_NORMAL) + + # Get the selected function from stats_list (accounting for scroll) + actual_index = scroll_offset + selected_row + if not stats_list or actual_index >= len(stats_list): + self.add_str(line, 0, "No function selected (use j/k to select)", A_NORMAL) + return line + 1 + + selected_stat = stats_list[actual_index] + func = selected_stat["func"] + filename, lineno, funcname = func + + # Get opcode stats for this function + opcode_stats = self.collector.opcode_stats.get(func, {}) + + if not opcode_stats: + self.add_str(line, 0, f"No opcode data for {funcname}() (requires --opcodes)", A_NORMAL) + return line + 1 + + # Sort opcodes by count + sorted_opcodes = sorted(opcode_stats.items(), key=lambda x: -x[1]) + total_opcode_samples = sum(opcode_stats.values()) + + # Draw header + header = f"─── Opcodes for {funcname}() " + header += "─" * max(0, width - len(header) - 1) + self.add_str(line, 0, header[:width-1], color_cyan | A_BOLD) + line += 1 + + # Calculate max samples for bar scaling + max_count = sorted_opcodes[0][1] if sorted_opcodes else 1 + + # Draw opcode rows (limit to available space) + max_rows = min(8, height - line - 3) # Leave room for footer + bar_width = 20 + + for i, (opcode_num, count) in enumerate(sorted_opcodes[:max_rows]): + if line >= height - 3: + break + + opcode_info = get_opcode_info(opcode_num) + is_specialized = opcode_info["is_specialized"] + name_display = format_opcode(opcode_num) + + pct = (count / total_opcode_samples * 100) if total_opcode_samples > 0 else 0 + + # Draw bar + bar_fill = int((count / max_count) * bar_width) if max_count > 0 else 0 + bar = "█" * bar_fill + "░" * (bar_width - bar_fill) + + # Format: [████████░░░░] LOAD_ATTR 45.2% (1234) + # Specialized opcodes shown in magenta, base opcodes in yellow + name_color = color_magenta if is_specialized else color_yellow + + row_text = f"[{bar}] {name_display:<35} {pct:>5.1f}% ({count:>6})" + self.add_str(line, 2, row_text[:width-3], name_color) + line += 1 + + # Show "..." if more opcodes exist + if len(sorted_opcodes) > max_rows: + remaining = len(sorted_opcodes) - max_rows + self.add_str(line, 2, f"... and {remaining} more opcodes", A_NORMAL) + line += 1 + + return line diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py index b8b37a10c43ad3..eeff33b37779ea 100644 --- a/Lib/profiling/sampling/pstats_collector.py +++ b/Lib/profiling/sampling/pstats_collector.py @@ -2,7 +2,7 @@ import marshal from _colorize import ANSIColors -from .collector import Collector +from .collector import Collector, extract_lineno class PstatsCollector(Collector): @@ -23,12 +23,15 @@ def _process_frames(self, frames): return # Process each frame in the stack to track cumulative calls + # frame.location is int, tuple (lineno, end_lineno, col_offset, end_col_offset), or None for frame in frames: - location = (frame.filename, frame.lineno, frame.funcname) - self.result[location]["cumulative_calls"] += 1 + lineno = extract_lineno(frame.location) + loc = (frame.filename, lineno, frame.funcname) + self.result[loc]["cumulative_calls"] += 1 # The top frame gets counted as an inline call (directly executing) - top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname) + top_lineno = extract_lineno(frames[0].location) + top_location = (frames[0].filename, top_lineno, frames[0].funcname) self.result[top_location]["direct_calls"] += 1 # Track caller-callee relationships for call graph @@ -36,8 +39,10 @@ def _process_frames(self, frames): callee_frame = frames[i - 1] caller_frame = frames[i] - callee = (callee_frame.filename, callee_frame.lineno, callee_frame.funcname) - caller = (caller_frame.filename, caller_frame.lineno, caller_frame.funcname) + callee_lineno = extract_lineno(callee_frame.location) + caller_lineno = extract_lineno(caller_frame.location) + callee = (callee_frame.filename, callee_lineno, callee_frame.funcname) + caller = (caller_frame.filename, caller_lineno, caller_frame.funcname) self.callers[callee][caller] += 1 From 8b423df632c6c20cfb2960c495cad92a50c157b6 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:47 +0000 Subject: [PATCH 07/24] Update tests for location tuple and opcode field Frame location is now a 4-tuple (lineno, end_lineno, col_offset, end_col_offset). MockFrameInfo wraps locations in LocationInfo struct. Updates assertions throughout and adds opcode_utils coverage. --- Lib/test/test_external_inspection.py | 313 +++++++++-- Lib/test/test_profiling/test_heatmap.py | 190 +++++-- .../_live_collector_helpers.py | 12 +- .../test_sampling_profiler/mocks.py | 28 +- .../test_sampling_profiler/test_collectors.py | 519 ++++++++++++++++-- .../test_integration.py | 22 +- 6 files changed, 924 insertions(+), 160 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 7decd8f32d5a2b..5fa97d69699f20 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -155,12 +155,12 @@ def foo(): p.wait(timeout=SHORT_TIMEOUT) thread_expected_stack_trace = [ - FrameInfo([script_name, 15, "foo"]), - FrameInfo([script_name, 12, "baz"]), - FrameInfo([script_name, 9, "bar"]), - FrameInfo([threading.__file__, ANY, "Thread.run"]), - FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner"]), - FrameInfo([threading.__file__, ANY, "Thread._bootstrap"]), + FrameInfo([script_name, (15, ANY, ANY, ANY), "foo", ANY]), + FrameInfo([script_name, (12, ANY, ANY, ANY), "baz", ANY]), + FrameInfo([script_name, (9, ANY, ANY, ANY), "bar", ANY]), + FrameInfo([threading.__file__, ANY, "Thread.run", ANY]), + FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner", ANY]), + FrameInfo([threading.__file__, ANY, "Thread._bootstrap", ANY]), ] # Is possible that there are more threads, so we check that the # expected stack traces are in the result (looking at you Windows!) @@ -175,7 +175,7 @@ def foo(): self.assertTrue(found_expected_stack, "Expected thread stack trace not found") # Check that the main thread stack trace is in the result - frame = FrameInfo([script_name, 19, ""]) + frame = FrameInfo([script_name, (19, ANY, ANY, ANY), "", ANY]) main_thread_found = False for interpreter_info in stack_trace: for thread_info in interpreter_info.threads: @@ -323,6 +323,7 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup._aexit", + None, ] ), tuple( @@ -330,21 +331,22 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup.__aexit__", + None, ] ), - tuple([script_name, 26, "main"]), + tuple([script_name, (26, ANY, ANY, ANY), "main", None]), ) ], "c2_root": [ ( - tuple([script_name, 10, "c5"]), - tuple([script_name, 14, "c4"]), - tuple([script_name, 17, "c3"]), - tuple([script_name, 20, "c2"]), + tuple([script_name, (10, ANY, ANY, ANY), "c5", None]), + tuple([script_name, (14, ANY, ANY, ANY), "c4", None]), + tuple([script_name, (17, ANY, ANY, ANY), "c3", None]), + tuple([script_name, (20, ANY, ANY, ANY), "c2", None]), ) ], - "sub_main_1": [(tuple([script_name, 23, "c1"]),)], - "sub_main_2": [(tuple([script_name, 23, "c1"]),)], + "sub_main_1": [(tuple([script_name, (23, ANY, ANY, ANY), "c1", None]),)], + "sub_main_2": [(tuple([script_name, (23, ANY, ANY, ANY), "c1", None]),)], }, ) @@ -372,6 +374,7 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup._aexit", + None, ] ), tuple( @@ -379,13 +382,14 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup.__aexit__", + None, ] ), - tuple([script_name, 26, "main"]), + tuple([script_name, (26, ANY, ANY, ANY), "main", None]), ), ), - ("sub_main_1", (tuple([script_name, 23, "c1"]),)), - ("sub_main_2", (tuple([script_name, 23, "c1"]),)), + ("sub_main_1", (tuple([script_name, (23, ANY, ANY, ANY), "c1", None]),)), + ("sub_main_2", (tuple([script_name, (23, ANY, ANY, ANY), "c1", None]),)), ], "sub_main_1": [ ( @@ -396,6 +400,7 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup._aexit", + None, ] ), tuple( @@ -403,9 +408,10 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup.__aexit__", + None, ] ), - tuple([script_name, 26, "main"]), + tuple([script_name, (26, ANY, ANY, ANY), "main", None]), ), ) ], @@ -418,6 +424,7 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup._aexit", + None, ] ), tuple( @@ -425,9 +432,10 @@ def new_eager_loop(): taskgroups.__file__, ANY, "TaskGroup.__aexit__", + None, ] ), - tuple([script_name, 26, "main"]), + tuple([script_name, (26, ANY, ANY, ANY), "main", None]), ), ) ], @@ -512,9 +520,9 @@ async def main(): coroutine_stack, [ ( - tuple([script_name, 10, "gen_nested_call"]), - tuple([script_name, 16, "gen"]), - tuple([script_name, 19, "main"]), + tuple([script_name, (10, ANY, ANY, ANY), "gen_nested_call", None]), + tuple([script_name, (16, ANY, ANY, ANY), "gen", None]), + tuple([script_name, (19, ANY, ANY, ANY), "main", None]), ) ], ) @@ -624,11 +632,11 @@ async def main(): self.assertEqual( coroutine_stacks, { - "Task-1": [(tuple([script_name, 21, "main"]),)], + "Task-1": [(tuple([script_name, (21, ANY, ANY, ANY), "main", None]),)], "Task-2": [ ( - tuple([script_name, 11, "deep"]), - tuple([script_name, 15, "c1"]), + tuple([script_name, (11, ANY, ANY, ANY), "deep", None]), + tuple([script_name, (15, ANY, ANY, ANY), "c1", None]), ) ], }, @@ -650,7 +658,7 @@ async def main(): { "Task-1": [], "Task-2": [ - ("Task-1", (tuple([script_name, 21, "main"]),)) + ("Task-1", (tuple([script_name, (21, ANY, ANY, ANY), "main", None]),)) ], }, ) @@ -762,19 +770,20 @@ async def main(): { "Task-1": [ ( - tuple([staggered.__file__, ANY, "staggered_race"]), - tuple([script_name, 21, "main"]), + tuple([staggered.__file__, ANY, "staggered_race", None]), + tuple([script_name, (21, ANY, ANY, ANY), "main", None]), ) ], "Task-2": [ ( - tuple([script_name, 11, "deep"]), - tuple([script_name, 15, "c1"]), + tuple([script_name, (11, ANY, ANY, ANY), "deep", None]), + tuple([script_name, (15, ANY, ANY, ANY), "c1", None]), tuple( [ staggered.__file__, ANY, "staggered_race..run_one_coro", + None, ] ), ) @@ -802,9 +811,9 @@ async def main(): "Task-1", ( tuple( - [staggered.__file__, ANY, "staggered_race"] + [staggered.__file__, ANY, "staggered_race", None] ), - tuple([script_name, 21, "main"]), + tuple([script_name, (21, ANY, ANY, ANY), "main", None]), ), ) ], @@ -938,11 +947,11 @@ async def main(): self.assertGreaterEqual(len(entries), 1000) # the first three tasks stem from the code structure main_stack = [ - FrameInfo([taskgroups.__file__, ANY, "TaskGroup._aexit"]), + FrameInfo([taskgroups.__file__, ANY, "TaskGroup._aexit", ANY]), FrameInfo( - [taskgroups.__file__, ANY, "TaskGroup.__aexit__"] + [taskgroups.__file__, ANY, "TaskGroup.__aexit__", ANY] ), - FrameInfo([script_name, 60, "main"]), + FrameInfo([script_name, (60, ANY, ANY, ANY), "main", ANY]), ] self.assertIn( TaskInfo( @@ -964,6 +973,7 @@ async def main(): base_events.__file__, ANY, "Server.serve_forever", + ANY, ] ) ], @@ -980,6 +990,7 @@ async def main(): taskgroups.__file__, ANY, "TaskGroup._aexit", + ANY, ] ), FrameInfo( @@ -987,11 +998,10 @@ async def main(): taskgroups.__file__, ANY, "TaskGroup.__aexit__", + ANY, ] ), - FrameInfo( - [script_name, ANY, "main"] - ), + FrameInfo([script_name, ANY, "main", ANY]), ], ANY, ] @@ -1010,14 +1020,13 @@ async def main(): CoroInfo( [ [ - FrameInfo( - [tasks.__file__, ANY, "sleep"] - ), + FrameInfo([tasks.__file__, ANY, "sleep", ANY]), FrameInfo( [ script_name, - 38, + (38, ANY, ANY, ANY), "echo_client", + ANY, ] ), ], @@ -1034,6 +1043,7 @@ async def main(): taskgroups.__file__, ANY, "TaskGroup._aexit", + ANY, ] ), FrameInfo( @@ -1041,13 +1051,15 @@ async def main(): taskgroups.__file__, ANY, "TaskGroup.__aexit__", + ANY, ] ), FrameInfo( [ script_name, - 41, + (41, ANY, ANY, ANY), "echo_client_spam", + ANY, ] ), ], @@ -1069,6 +1081,7 @@ async def main(): taskgroups.__file__, ANY, "TaskGroup._aexit", + ANY, ] ), FrameInfo( @@ -1076,11 +1089,10 @@ async def main(): taskgroups.__file__, ANY, "TaskGroup.__aexit__", + ANY, ] ), - FrameInfo( - [script_name, 41, "echo_client_spam"] - ), + FrameInfo([script_name, (41, ANY, ANY, ANY), "echo_client_spam", ANY]), ], ANY, ] @@ -1137,20 +1149,223 @@ def test_self_trace(self): FrameInfo( [ __file__, - get_stack_trace.__code__.co_firstlineno + 2, + (get_stack_trace.__code__.co_firstlineno + 2, ANY, ANY, ANY), "get_stack_trace", + ANY, ] ), FrameInfo( [ __file__, - self.test_self_trace.__code__.co_firstlineno + 6, + (self.test_self_trace.__code__.co_firstlineno + 6, ANY, ANY, ANY), "TestGetStackTrace.test_self_trace", + ANY, ] ), ], ) + @skip_if_not_supported + @unittest.skipIf( + sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, + "Test only runs on Linux with process_vm_readv support", + ) + def test_opcodes_collection(self): + """Test that opcodes are collected when the opcodes flag is set.""" + port = find_unused_port() + script = textwrap.dedent( + f"""\ + import time + import sys + import socket + + def compute(): + # Do some work that involves bytecode execution + total = 0 + for i in range(1000): + total += i + return total + + def bar(): + compute() + + def foo(): + bar() + + # Signal that we're ready + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + sock.sendall(b"ready") + sock.close() + + # Keep computing in a loop + while True: + foo() + """ + ) + + with os.fdopen(os.dup(1), "w") as stdout: + with subprocess.Popen( + [sys.executable, "-c", script], + stdout=stdout, + stderr=stdout, + text=True, + ) as p: + client_socket = None + try: + # Accept the ready signal + server_socket = socket.socket( + socket.AF_INET, socket.SOCK_STREAM + ) + server_socket.setsockopt( + socket.SOL_SOCKET, socket.SO_REUSEADDR, 1 + ) + server_socket.bind(("localhost", port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) + client_socket, _ = server_socket.accept() + client_socket.settimeout(SHORT_TIMEOUT) + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") + server_socket.close() + + # Get stack trace with opcodes=True + unwinder = RemoteUnwinder(p.pid, opcodes=True) + stack_trace = unwinder.get_stack_trace() + + # Find the thread with our compute/bar/foo stack + found_opcodes = False + for interpreter_info in stack_trace: + for thread_info in interpreter_info.threads: + for frame in thread_info.frame_info: + # Check that frames have opcodes (not None) + # when opcodes=True is set + if frame.funcname in ("compute", "bar", "foo"): + # Opcode should be an integer, not None + self.assertIsInstance( + frame.opcode, + int, + f"Expected opcode to be int for {frame.funcname}, got {type(frame.opcode)}" + ) + self.assertGreaterEqual(frame.opcode, 0) + found_opcodes = True + + self.assertTrue( + found_opcodes, + "Did not find any frames with opcodes from compute/bar/foo" + ) + + finally: + if client_socket is not None: + client_socket.close() + p.kill() + p.terminate() + p.wait(timeout=SHORT_TIMEOUT) + + @skip_if_not_supported + @unittest.skipIf( + sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, + "Test only runs on Linux with process_vm_readv support", + ) + def test_location_extraction(self): + """Test that location tuples (lineno, end_lineno, col_offset, end_col_offset) are correctly extracted.""" + port = find_unused_port() + # Script with predictable column positions + # Line 1: import time, sys, socket + # Line 2: (empty or comment) + # ... + # The key is foo() function where we can predict column offsets + script = textwrap.dedent( + f"""\ + import time, sys, socket + + def foo(): + x = 1 + 2 + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + sock.sendall(b"ready") + time.sleep(10_000) + + foo() + """ + ) + + with os_helper.temp_dir() as work_dir: + script_dir = os.path.join(work_dir, "script_pkg") + os.mkdir(script_dir) + + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(("localhost", port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) + + script_name = _make_test_script(script_dir, "script", script) + client_socket = None + try: + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") + + # Get stack trace with opcodes to ensure we get full location info + unwinder = RemoteUnwinder(p.pid, opcodes=True) + stack_trace = unwinder.get_stack_trace() + + # Find the foo function frame + foo_frame = None + for interpreter_info in stack_trace: + for thread_info in interpreter_info.threads: + for frame in thread_info.frame_info: + if frame.funcname == "foo": + foo_frame = frame + break + if foo_frame: + break + if foo_frame: + break + + self.assertIsNotNone(foo_frame, "Should find 'foo' function in stack trace") + + # Verify location is a tuple with 4 elements + location = foo_frame.location + self.assertIsInstance(location, tuple, "location should be a tuple") + self.assertEqual(len(location), 4, "location should have 4 elements") + + lineno, end_lineno, col_offset, end_col_offset = location + + # Verify lineno is reasonable (should be line 8 where time.sleep is) + self.assertIsInstance(lineno, int, "lineno should be an integer") + self.assertEqual(lineno, 8, "lineno should be 8 (time.sleep line)") + + # Verify end_lineno + self.assertIsInstance(end_lineno, int, "end_lineno should be an integer") + self.assertGreaterEqual(end_lineno, lineno, "end_lineno should be >= lineno") + + # Verify col_offset and end_col_offset are integers + # They may be -1 if not available, or valid column offsets + self.assertIsInstance(col_offset, int, "col_offset should be an integer") + self.assertIsInstance(end_col_offset, int, "end_col_offset should be an integer") + + # If column info is available (not -1), verify it's reasonable + if col_offset >= 0: + self.assertLess(col_offset, 100, "col_offset should be reasonable") + if end_col_offset >= 0: + self.assertLess(end_col_offset, 100, "end_col_offset should be reasonable") + if col_offset >= 0: + self.assertGreaterEqual(end_col_offset, col_offset, + "end_col_offset should be >= col_offset") + + except PermissionError: + self.skipTest("Insufficient permissions to read the stack trace") + finally: + if client_socket is not None: + client_socket.close() + p.kill() + p.terminate() + p.wait(timeout=SHORT_TIMEOUT) + @skip_if_not_supported @unittest.skipIf( sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, @@ -1605,7 +1820,7 @@ def main_work(): current_frame = thread_info.frame_info[0] if ( current_frame.funcname == "main_work" - and current_frame.lineno > 15 + and current_frame.location[0] > 15 ): found = True break diff --git a/Lib/test/test_profiling/test_heatmap.py b/Lib/test/test_profiling/test_heatmap.py index a6ff3b83ea1e0b..0f5f8d4b2f6292 100644 --- a/Lib/test/test_profiling/test_heatmap.py +++ b/Lib/test/test_profiling/test_heatmap.py @@ -4,8 +4,12 @@ import shutil import tempfile import unittest +from collections import namedtuple from pathlib import Path +# Matches the C structseq LocationInfo from _remote_debugging +LocationInfo = namedtuple('LocationInfo', ['lineno', 'end_lineno', 'col_offset', 'end_col_offset']) + from profiling.sampling.heatmap_collector import ( HeatmapCollector, get_python_path_info, @@ -220,7 +224,7 @@ def test_process_frames_increments_total_samples(self): collector = HeatmapCollector(sample_interval_usec=100) initial_count = collector._total_samples - frames = [('file.py', 10, 'func')] + frames = [('file.py', (10, 10, -1, -1), 'func', None)] collector.process_frames(frames, thread_id=1) self.assertEqual(collector._total_samples, initial_count + 1) @@ -229,7 +233,7 @@ def test_process_frames_records_line_samples(self): """Test that process_frames records line samples.""" collector = HeatmapCollector(sample_interval_usec=100) - frames = [('test.py', 5, 'test_func')] + frames = [('test.py', (5, 5, -1, -1), 'test_func', None)] collector.process_frames(frames, thread_id=1) # Check that line was recorded @@ -241,9 +245,9 @@ def test_process_frames_records_multiple_lines_in_stack(self): collector = HeatmapCollector(sample_interval_usec=100) frames = [ - ('file1.py', 10, 'func1'), - ('file2.py', 20, 'func2'), - ('file3.py', 30, 'func3') + ('file1.py', (10, 10, -1, -1), 'func1', None), + ('file2.py', (20, 20, -1, -1), 'func2', None), + ('file3.py', (30, 30, -1, -1), 'func3', None) ] collector.process_frames(frames, thread_id=1) @@ -257,8 +261,8 @@ def test_process_frames_distinguishes_self_samples(self): collector = HeatmapCollector(sample_interval_usec=100) frames = [ - ('leaf.py', 5, 'leaf_func'), # This is the leaf (top of stack) - ('caller.py', 10, 'caller_func') + ('leaf.py', (5, 5, -1, -1), 'leaf_func', None), # This is the leaf (top of stack) + ('caller.py', (10, 10, -1, -1), 'caller_func', None) ] collector.process_frames(frames, thread_id=1) @@ -273,7 +277,7 @@ def test_process_frames_accumulates_samples(self): """Test that multiple calls accumulate samples.""" collector = HeatmapCollector(sample_interval_usec=100) - frames = [('file.py', 10, 'func')] + frames = [('file.py', (10, 10, -1, -1), 'func', None)] collector.process_frames(frames, thread_id=1) collector.process_frames(frames, thread_id=1) @@ -288,11 +292,11 @@ def test_process_frames_ignores_invalid_frames(self): # These should be ignored invalid_frames = [ - ('', 1, 'test'), - ('[eval]', 1, 'test'), - ('', 1, 'test'), - (None, 1, 'test'), - ('__init__', 0, 'test'), # Special invalid frame + ('', (1, 1, -1, -1), 'test', None), + ('[eval]', (1, 1, -1, -1), 'test', None), + ('', (1, 1, -1, -1), 'test', None), + (None, (1, 1, -1, -1), 'test', None), + ('__init__', (0, 0, -1, -1), 'test', None), # Special invalid frame ] for frame in invalid_frames: @@ -301,15 +305,15 @@ def test_process_frames_ignores_invalid_frames(self): # Should not record these invalid frames for frame in invalid_frames: if frame[0]: - self.assertNotIn((frame[0], frame[1]), collector.line_samples) + self.assertNotIn((frame[0], frame[1][0]), collector.line_samples) def test_process_frames_builds_call_graph(self): """Test that process_frames builds call graph relationships.""" collector = HeatmapCollector(sample_interval_usec=100) frames = [ - ('callee.py', 5, 'callee_func'), - ('caller.py', 10, 'caller_func') + ('callee.py', (5, 5, -1, -1), 'callee_func', None), + ('caller.py', (10, 10, -1, -1), 'caller_func', None) ] collector.process_frames(frames, thread_id=1) @@ -325,7 +329,7 @@ def test_process_frames_records_function_definitions(self): """Test that process_frames records function definition locations.""" collector = HeatmapCollector(sample_interval_usec=100) - frames = [('module.py', 42, 'my_function')] + frames = [('module.py', (42, 42, -1, -1), 'my_function', None)] collector.process_frames(frames, thread_id=1) self.assertIn(('module.py', 'my_function'), collector.function_definitions) @@ -336,8 +340,8 @@ def test_process_frames_tracks_edge_samples(self): collector = HeatmapCollector(sample_interval_usec=100) frames = [ - ('callee.py', 5, 'callee'), - ('caller.py', 10, 'caller') + ('callee.py', (5, 5, -1, -1), 'callee', None), + ('caller.py', (10, 10, -1, -1), 'caller', None) ] # Process same call stack multiple times @@ -361,7 +365,7 @@ def test_process_frames_with_file_samples_dict(self): """Test that file_samples dict is properly populated.""" collector = HeatmapCollector(sample_interval_usec=100) - frames = [('test.py', 10, 'func')] + frames = [('test.py', (10, 10, -1, -1), 'func', None)] collector.process_frames(frames, thread_id=1) self.assertIn('test.py', collector.file_samples) @@ -382,7 +386,7 @@ def test_export_creates_output_directory(self): collector = HeatmapCollector(sample_interval_usec=100) # Add some data - frames = [('test.py', 10, 'func')] + frames = [('test.py', (10, 10, -1, -1), 'func', None)] collector.process_frames(frames, thread_id=1) output_path = os.path.join(self.test_dir, 'heatmap_output') @@ -397,7 +401,7 @@ def test_export_creates_index_html(self): """Test that export creates index.html.""" collector = HeatmapCollector(sample_interval_usec=100) - frames = [('test.py', 10, 'func')] + frames = [('test.py', (10, 10, -1, -1), 'func', None)] collector.process_frames(frames, thread_id=1) output_path = os.path.join(self.test_dir, 'heatmap_output') @@ -412,7 +416,7 @@ def test_export_creates_file_htmls(self): """Test that export creates individual file HTMLs.""" collector = HeatmapCollector(sample_interval_usec=100) - frames = [('test.py', 10, 'func')] + frames = [('test.py', (10, 10, -1, -1), 'func', None)] collector.process_frames(frames, thread_id=1) output_path = os.path.join(self.test_dir, 'heatmap_output') @@ -439,7 +443,7 @@ def test_export_handles_html_suffix(self): """Test that export handles .html suffix in output path.""" collector = HeatmapCollector(sample_interval_usec=100) - frames = [('test.py', 10, 'func')] + frames = [('test.py', (10, 10, -1, -1), 'func', None)] collector.process_frames(frames, thread_id=1) # Path with .html suffix should be stripped @@ -457,9 +461,9 @@ def test_export_with_multiple_files(self): collector = HeatmapCollector(sample_interval_usec=100) # Add samples for multiple files - collector.process_frames([('file1.py', 10, 'func1')], thread_id=1) - collector.process_frames([('file2.py', 20, 'func2')], thread_id=1) - collector.process_frames([('file3.py', 30, 'func3')], thread_id=1) + collector.process_frames([('file1.py', (10, 10, -1, -1), 'func1', None)], thread_id=1) + collector.process_frames([('file2.py', (20, 20, -1, -1), 'func2', None)], thread_id=1) + collector.process_frames([('file3.py', (30, 30, -1, -1), 'func3', None)], thread_id=1) output_path = os.path.join(self.test_dir, 'multi_file') @@ -476,7 +480,7 @@ def test_export_index_contains_file_references(self): collector = HeatmapCollector(sample_interval_usec=100) collector.set_stats(sample_interval_usec=100, duration_sec=1.0, sample_rate=100.0) - frames = [('mytest.py', 10, 'my_func')] + frames = [('mytest.py', (10, 10, -1, -1), 'my_func', None)] collector.process_frames(frames, thread_id=1) output_path = os.path.join(self.test_dir, 'test_output') @@ -500,7 +504,7 @@ def test_export_file_html_has_line_numbers(self): with open(temp_file, 'w') as f: f.write('def test():\n pass\n') - frames = [(temp_file, 1, 'test')] + frames = [(temp_file, (1, 1, -1, -1), 'test', None)] collector.process_frames(frames, thread_id=1) output_path = os.path.join(self.test_dir, 'line_test') @@ -521,23 +525,39 @@ def test_export_file_html_has_line_numbers(self): class MockFrameInfo: - """Mock FrameInfo for testing since the real one isn't accessible.""" + """Mock FrameInfo for testing. + + Frame format: (filename, location, funcname, opcode) where: + - location is a tuple (lineno, end_lineno, col_offset, end_col_offset) + - opcode is an int or None + """ - def __init__(self, filename, lineno, funcname): + def __init__(self, filename, lineno, funcname, opcode=None): self.filename = filename - self.lineno = lineno self.funcname = funcname + self.opcode = opcode + self.location = (lineno, lineno, -1, -1) + + def __iter__(self): + return iter((self.filename, self.location, self.funcname, self.opcode)) + + def __getitem__(self, index): + return (self.filename, self.location, self.funcname, self.opcode)[index] + + def __len__(self): + return 4 def __repr__(self): - return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')" + return f"MockFrameInfo('{self.filename}', {self.location}, '{self.funcname}', {self.opcode})" class MockThreadInfo: """Mock ThreadInfo for testing since the real one isn't accessible.""" - def __init__(self, thread_id, frame_info): + def __init__(self, thread_id, frame_info, status=0): self.thread_id = thread_id self.frame_info = frame_info + self.status = status # Thread status flags def __repr__(self): return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info})" @@ -565,13 +585,13 @@ def test_heatmap_collector_basic(self): self.assertEqual(len(collector.file_samples), 0) self.assertEqual(len(collector.line_samples), 0) - # Test collecting sample data + # Test collecting sample data - frames are 4-tuples: (filename, location, funcname, opcode) test_frames = [ MockInterpreterInfo( 0, [MockThreadInfo( 1, - [("file.py", 10, "func1"), ("file.py", 20, "func2")], + [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")], )] ) ] @@ -592,21 +612,21 @@ def test_heatmap_collector_export(self): collector = HeatmapCollector(sample_interval_usec=100) - # Create test data with multiple files + # Create test data with multiple files using MockFrameInfo test_frames1 = [ MockInterpreterInfo( 0, - [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])], + [MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")])], ) ] test_frames2 = [ MockInterpreterInfo( 0, - [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])], + [MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")])], ) ] # Same stack test_frames3 = [ - MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])]) + MockInterpreterInfo(0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])]) ] collector.collect(test_frames1) @@ -649,5 +669,95 @@ def test_heatmap_collector_export(self): self.assertIn("nav-btn", file_content) +class TestHeatmapCollectorLocation(unittest.TestCase): + """Tests for HeatmapCollector location handling.""" + + def test_heatmap_with_full_location_info(self): + """Test HeatmapCollector uses full location tuple.""" + collector = HeatmapCollector(sample_interval_usec=1000) + + # Frame with full location: (lineno, end_lineno, col_offset, end_col_offset) + frame = MockFrameInfo("test.py", 10, "func") + # Override with full location info + frame.location = LocationInfo(10, 15, 4, 20) + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame])] + ) + ] + collector.collect(frames) + + # Verify data was collected with location info + # HeatmapCollector uses file_samples dict with filename -> Counter of linenos + self.assertIn("test.py", collector.file_samples) + # Line 10 should have samples + self.assertIn(10, collector.file_samples["test.py"]) + + def test_heatmap_with_none_location(self): + """Test HeatmapCollector handles None location gracefully.""" + collector = HeatmapCollector(sample_interval_usec=1000) + + # Synthetic frame with None location + frame = MockFrameInfo("~", 0, "") + frame.location = None + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame])] + ) + ] + # Should not raise + collector.collect(frames) + + def test_heatmap_export_with_location_data(self): + """Test HeatmapCollector export includes location info.""" + tmp_dir = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, tmp_dir) + + collector = HeatmapCollector(sample_interval_usec=1000) + + frame = MockFrameInfo("test.py", 10, "process") + frame.location = LocationInfo(10, 12, 0, 30) + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame])] + ) + ] + collector.collect(frames) + + # Export should work + with (captured_stdout(), captured_stderr()): + collector.export(tmp_dir) + self.assertTrue(os.path.exists(os.path.join(tmp_dir, "index.html"))) + + def test_heatmap_collector_frame_format(self): + """Test HeatmapCollector with 4-element frame format.""" + collector = HeatmapCollector(sample_interval_usec=1000) + + frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("app.py", 100, "main", opcode=90), + MockFrameInfo("utils.py", 50, "helper", opcode=100), + MockFrameInfo("lib.py", 25, "process", opcode=None), + ], + ) + ], + ) + ] + collector.collect(frames) + + # Should have recorded data for the files + self.assertIn("app.py", collector.file_samples) + self.assertIn("utils.py", collector.file_samples) + self.assertIn("lib.py", collector.file_samples) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py index 4bb6877f16fda2..2e989968100957 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py +++ b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py @@ -5,17 +5,7 @@ THREAD_STATUS_ON_CPU, ) - -class MockFrameInfo: - """Mock FrameInfo for testing.""" - - def __init__(self, filename, lineno, funcname): - self.filename = filename - self.lineno = lineno - self.funcname = funcname - - def __repr__(self): - return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')" +from .mocks import LocationInfo, MockFrameInfo class MockThreadInfo: diff --git a/Lib/test/test_profiling/test_sampling_profiler/mocks.py b/Lib/test/test_profiling/test_sampling_profiler/mocks.py index 9f1cd5b83e0856..698bc3edb255e6 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/mocks.py +++ b/Lib/test/test_profiling/test_sampling_profiler/mocks.py @@ -1,16 +1,36 @@ """Mock classes for sampling profiler tests.""" +from collections import namedtuple + +# Matches the C structseq LocationInfo from _remote_debugging +LocationInfo = namedtuple('LocationInfo', ['lineno', 'end_lineno', 'col_offset', 'end_col_offset']) + class MockFrameInfo: - """Mock FrameInfo for testing since the real one isn't accessible.""" + """Mock FrameInfo for testing. + + Frame format: (filename, location, funcname, opcode) where: + - location is a tuple (lineno, end_lineno, col_offset, end_col_offset) + - opcode is an int or None + """ - def __init__(self, filename, lineno, funcname): + def __init__(self, filename, lineno, funcname, opcode=None): self.filename = filename - self.lineno = lineno self.funcname = funcname + self.opcode = opcode + self.location = LocationInfo(lineno, lineno, -1, -1) + + def __iter__(self): + return iter((self.filename, self.location, self.funcname, self.opcode)) + + def __getitem__(self, index): + return (self.filename, self.location, self.funcname, self.opcode)[index] + + def __len__(self): + return 4 def __repr__(self): - return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')" + return f"MockFrameInfo('{self.filename}', {self.location}, '{self.funcname}', {self.opcode})" class MockThreadInfo: diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index e8c12c2221549a..75c4e79591000b 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -14,9 +14,12 @@ FlamegraphCollector, ) from profiling.sampling.gecko_collector import GeckoCollector + from profiling.sampling.collector import extract_lineno, normalize_location + from profiling.sampling.opcode_utils import get_opcode_info, format_opcode from profiling.sampling.constants import ( PROFILING_MODE_WALL, PROFILING_MODE_CPU, + DEFAULT_LOCATION, ) from _remote_debugging import ( THREAD_STATUS_HAS_GIL, @@ -30,7 +33,7 @@ from test.support import captured_stdout, captured_stderr -from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo +from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, LocationInfo from .helpers import close_and_unlink @@ -42,9 +45,8 @@ def test_mock_frame_info_with_empty_and_unicode_values(self): # Test with empty strings frame = MockFrameInfo("", 0, "") self.assertEqual(frame.filename, "") - self.assertEqual(frame.lineno, 0) + self.assertEqual(frame.location.lineno, 0) self.assertEqual(frame.funcname, "") - self.assertIn("filename=''", repr(frame)) # Test with unicode characters frame = MockFrameInfo("文件.py", 42, "函数名") @@ -56,7 +58,7 @@ def test_mock_frame_info_with_empty_and_unicode_values(self): long_funcname = "func_" + "x" * 1000 frame = MockFrameInfo(long_filename, 999999, long_funcname) self.assertEqual(frame.filename, long_filename) - self.assertEqual(frame.lineno, 999999) + self.assertEqual(frame.location.lineno, 999999) self.assertEqual(frame.funcname, long_funcname) def test_pstats_collector_with_extreme_intervals_and_empty_data(self): @@ -78,7 +80,7 @@ def test_pstats_collector_with_extreme_intervals_and_empty_data(self): test_frames = [ MockInterpreterInfo( 0, - [MockThreadInfo(None, [MockFrameInfo("file.py", 10, "func")])], + [MockThreadInfo(None, [MockFrameInfo("file.py", 10, "func", None)])], ) ] collector.collect(test_frames) @@ -193,7 +195,7 @@ def test_collapsed_stack_collector_with_empty_and_deep_stacks(self): # Test with single frame stack test_frames = [ MockInterpreterInfo( - 0, [MockThreadInfo(1, [("file.py", 10, "func")])] + 0, [MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func")])] ) ] collector.collect(test_frames) @@ -204,7 +206,7 @@ def test_collapsed_stack_collector_with_empty_and_deep_stacks(self): self.assertEqual(count, 1) # Test with very deep stack - deep_stack = [(f"file{i}.py", i, f"func{i}") for i in range(100)] + deep_stack = [MockFrameInfo(f"file{i}.py", i, f"func{i}") for i in range(100)] test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, deep_stack)])] collector = CollapsedStackCollector(1000) collector.collect(test_frames) @@ -317,7 +319,7 @@ def test_collapsed_stack_collector_basic(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) @@ -343,7 +345,7 @@ def test_collapsed_stack_collector_export(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) @@ -353,14 +355,14 @@ def test_collapsed_stack_collector_export(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) ] # Same stack test_frames3 = [ MockInterpreterInfo( - 0, [MockThreadInfo(1, [("other.py", 5, "other_func")])] + 0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])] ) ] @@ -406,7 +408,7 @@ def test_flamegraph_collector_basic(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) @@ -454,7 +456,7 @@ def test_flamegraph_collector_export(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) @@ -464,14 +466,14 @@ def test_flamegraph_collector_export(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) ] # Same stack test_frames3 = [ MockInterpreterInfo( - 0, [MockThreadInfo(1, [("other.py", 5, "other_func")])] + 0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])] ) ] @@ -518,7 +520,7 @@ def test_gecko_collector_basic(self): [ MockThreadInfo( 1, - [("file.py", 10, "func1"), ("file.py", 20, "func2")], + [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")], ) ], ) @@ -608,7 +610,7 @@ def test_gecko_collector_export(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) @@ -618,14 +620,14 @@ def test_gecko_collector_export(self): 0, [ MockThreadInfo( - 1, [("file.py", 10, "func1"), ("file.py", 20, "func2")] + 1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")] ) ], ) ] # Same stack test_frames3 = [ MockInterpreterInfo( - 0, [MockThreadInfo(1, [("other.py", 5, "other_func")])] + 0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])] ) ] @@ -683,7 +685,7 @@ def test_gecko_collector_markers(self): [ MockThreadInfo( 1, - [("test.py", 10, "python_func")], + [MockFrameInfo("test.py", 10, "python_func")], status=HAS_GIL_ON_CPU, ) ], @@ -698,7 +700,7 @@ def test_gecko_collector_markers(self): [ MockThreadInfo( 1, - [("test.py", 15, "wait_func")], + [MockFrameInfo("test.py", 15, "wait_func")], status=WAITING_FOR_GIL, ) ], @@ -713,7 +715,7 @@ def test_gecko_collector_markers(self): [ MockThreadInfo( 1, - [("test.py", 20, "python_func2")], + [MockFrameInfo("test.py", 20, "python_func2")], status=HAS_GIL_ON_CPU, ) ], @@ -728,7 +730,7 @@ def test_gecko_collector_markers(self): [ MockThreadInfo( 1, - [("native.c", 100, "native_func")], + [MockFrameInfo("native.c", 100, "native_func")], status=NO_GIL_ON_CPU, ) ], @@ -902,8 +904,8 @@ def test_flamegraph_collector_stats_accumulation(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), - MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(2, [MockFrameInfo("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), ], ) ] @@ -917,9 +919,9 @@ def test_flamegraph_collector_stats_accumulation(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_GIL_REQUESTED), - MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_HAS_GIL), - MockThreadInfo(3, [("c.py", 3, "func_c")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func_a")], status=THREAD_STATUS_GIL_REQUESTED), + MockThreadInfo(2, [MockFrameInfo("b.py", 2, "func_b")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(3, [MockFrameInfo("c.py", 3, "func_c")], status=THREAD_STATUS_ON_CPU), ], ) ] @@ -936,7 +938,7 @@ def test_flamegraph_collector_stats_accumulation(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("~", 0, "")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(1, [MockFrameInfo("~", 0, "")], status=THREAD_STATUS_HAS_GIL), ], ) ] @@ -960,9 +962,9 @@ def test_flamegraph_collector_per_thread_stats(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), - MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), - MockThreadInfo(3, [("c.py", 3, "func_c")], status=THREAD_STATUS_GIL_REQUESTED), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(2, [MockFrameInfo("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(3, [MockFrameInfo("c.py", 3, "func_c")], status=THREAD_STATUS_GIL_REQUESTED), ], ) ] @@ -992,7 +994,7 @@ def test_flamegraph_collector_per_thread_stats(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(1, [MockFrameInfo("a.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), ], ) ] @@ -1012,7 +1014,7 @@ def test_flamegraph_collector_percentage_calculations(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL), ], ) ] @@ -1023,7 +1025,7 @@ def test_flamegraph_collector_percentage_calculations(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func")], status=THREAD_STATUS_ON_CPU), ], ) ] @@ -1046,7 +1048,7 @@ def test_flamegraph_collector_mode_handling(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL), ], ) ] @@ -1085,8 +1087,8 @@ def test_flamegraph_collector_json_structure_includes_stats(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), - MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(2, [MockFrameInfo("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), ], ) ] @@ -1142,13 +1144,13 @@ def test_flamegraph_collector_per_thread_gc_percentage(self): # First 5 samples: both threads, thread 1 has GC in 2 for i in range(5): has_gc = i < 2 # First 2 samples have GC for thread 1 - frames_1 = [("~", 0, "")] if has_gc else [("a.py", 1, "func_a")] + frames_1 = [MockFrameInfo("~", 0, "")] if has_gc else [MockFrameInfo("a.py", 1, "func_a")] stack_frames = [ MockInterpreterInfo( 0, [ MockThreadInfo(1, frames_1, status=THREAD_STATUS_HAS_GIL), - MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(2, [MockFrameInfo("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU), ], ) ] @@ -1162,8 +1164,8 @@ def test_flamegraph_collector_per_thread_gc_percentage(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), - MockThreadInfo(2, [("~", 0, "")], status=THREAD_STATUS_ON_CPU), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(2, [MockFrameInfo("~", 0, "")], status=THREAD_STATUS_ON_CPU), ], ) ] @@ -1173,7 +1175,7 @@ def test_flamegraph_collector_per_thread_gc_percentage(self): MockInterpreterInfo( 0, [ - MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), + MockThreadInfo(1, [MockFrameInfo("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL), ], ) ] @@ -1201,3 +1203,434 @@ def test_flamegraph_collector_per_thread_gc_percentage(self): self.assertEqual(collector.per_thread_stats[2]["gc_samples"], 1) self.assertEqual(collector.per_thread_stats[2]["total"], 6) self.assertAlmostEqual(per_thread_stats[2]["gc_pct"], 10.0, places=1) + + +class TestLocationHelpers(unittest.TestCase): + """Tests for location handling helper functions.""" + + def test_extract_lineno_from_location_info(self): + """Test extracting lineno from LocationInfo namedtuple.""" + loc = LocationInfo(42, 45, 0, 10) + self.assertEqual(extract_lineno(loc), 42) + + def test_extract_lineno_from_tuple(self): + """Test extracting lineno from plain tuple.""" + loc = (100, 105, 5, 20) + self.assertEqual(extract_lineno(loc), 100) + + def test_extract_lineno_from_none(self): + """Test extracting lineno from None (synthetic frames).""" + self.assertEqual(extract_lineno(None), 0) + + def test_normalize_location_with_location_info(self): + """Test normalize_location passes through LocationInfo.""" + loc = LocationInfo(10, 15, 0, 5) + result = normalize_location(loc) + self.assertEqual(result, loc) + + def test_normalize_location_with_tuple(self): + """Test normalize_location passes through tuple.""" + loc = (10, 15, 0, 5) + result = normalize_location(loc) + self.assertEqual(result, loc) + + def test_normalize_location_with_none(self): + """Test normalize_location returns DEFAULT_LOCATION for None.""" + result = normalize_location(None) + self.assertEqual(result, DEFAULT_LOCATION) + self.assertEqual(result, (0, 0, -1, -1)) + + +class TestOpcodeFormatting(unittest.TestCase): + """Tests for opcode formatting utilities.""" + + def test_get_opcode_info_standard_opcode(self): + """Test get_opcode_info for a standard opcode.""" + import opcode + # LOAD_CONST is a standard opcode + load_const = opcode.opmap.get('LOAD_CONST') + if load_const is not None: + info = get_opcode_info(load_const) + self.assertEqual(info['opname'], 'LOAD_CONST') + self.assertEqual(info['base_opname'], 'LOAD_CONST') + self.assertFalse(info['is_specialized']) + + def test_get_opcode_info_unknown_opcode(self): + """Test get_opcode_info for an unknown opcode.""" + info = get_opcode_info(999) + self.assertEqual(info['opname'], '<999>') + self.assertEqual(info['base_opname'], '<999>') + self.assertFalse(info['is_specialized']) + + def test_format_opcode_standard(self): + """Test format_opcode for a standard opcode.""" + import opcode + load_const = opcode.opmap.get('LOAD_CONST') + if load_const is not None: + formatted = format_opcode(load_const) + self.assertEqual(formatted, 'LOAD_CONST') + + def test_format_opcode_specialized(self): + """Test format_opcode for a specialized opcode shows base in parens.""" + import opcode + if not hasattr(opcode, '_specialized_opmap'): + self.skipTest("No specialized opcodes in this Python version") + if not hasattr(opcode, '_specializations'): + self.skipTest("No specialization info in this Python version") + + # Find any specialized opcode to test + for base_name, variants in opcode._specializations.items(): + if not variants: + continue + variant_name = variants[0] + variant_opcode = opcode._specialized_opmap.get(variant_name) + if variant_opcode is None: + continue + formatted = format_opcode(variant_opcode) + # Should show: VARIANT_NAME (BASE_NAME) + self.assertIn(variant_name, formatted) + self.assertIn(f'({base_name})', formatted) + return + + self.skipTest("No specialized opcodes found") + + def test_format_opcode_unknown(self): + """Test format_opcode for an unknown opcode.""" + formatted = format_opcode(999) + self.assertEqual(formatted, '<999>') + + +class TestLocationInCollectors(unittest.TestCase): + """Tests for location tuple handling in each collector.""" + + def _make_frames_with_location(self, location, opcode=None): + """Create test frames with a specific location.""" + frame = MockFrameInfo("test.py", 0, "test_func", opcode) + # Override the location + frame.location = location + return [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + + def test_pstats_collector_with_location_info(self): + """Test PstatsCollector handles LocationInfo properly.""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Frame with LocationInfo + frame = MockFrameInfo("test.py", 42, "my_function") + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + # Should extract lineno from location + key = ("test.py", 42, "my_function") + self.assertIn(key, collector.result) + self.assertEqual(collector.result[key]["direct_calls"], 1) + + def test_pstats_collector_with_none_location(self): + """Test PstatsCollector handles None location (synthetic frames).""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Create frame with None location (like GC frame) + frame = MockFrameInfo("~", 0, "") + frame.location = None # Synthetic frame has no location + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + # Should use lineno=0 for None location + key = ("~", 0, "") + self.assertIn(key, collector.result) + + def test_collapsed_stack_with_location_info(self): + """Test CollapsedStackCollector handles LocationInfo properly.""" + collector = CollapsedStackCollector(1000) + + frame1 = MockFrameInfo("main.py", 10, "main") + frame2 = MockFrameInfo("utils.py", 25, "helper") + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame1, frame2], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + # Check that linenos were extracted correctly + self.assertEqual(len(collector.stack_counter), 1) + (path, _), count = list(collector.stack_counter.items())[0] + # Reversed order: helper at top, main at bottom + self.assertEqual(path[0], ("utils.py", 25, "helper")) + self.assertEqual(path[1], ("main.py", 10, "main")) + + def test_flamegraph_collector_with_location_info(self): + """Test FlamegraphCollector handles LocationInfo properly.""" + collector = FlamegraphCollector(sample_interval_usec=1000) + + frame = MockFrameInfo("app.py", 100, "process_data") + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + data = collector._convert_to_flamegraph_format() + # Verify the function name includes lineno from location + strings = data.get("strings", []) + name_found = any("process_data" in s and "100" in s for s in strings if isinstance(s, str)) + self.assertTrue(name_found, f"Expected to find 'process_data' with line 100 in {strings}") + + def test_gecko_collector_with_location_info(self): + """Test GeckoCollector handles LocationInfo properly.""" + collector = GeckoCollector(sample_interval_usec=1000) + + frame = MockFrameInfo("server.py", 50, "handle_request") + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + profile = collector._build_profile() + # Check that the function was recorded + self.assertEqual(len(profile["threads"]), 1) + thread_data = profile["threads"][0] + string_array = profile["shared"]["stringArray"] + + # Verify function name is in string table + self.assertIn("handle_request", string_array) + + +class TestOpcodeHandling(unittest.TestCase): + """Tests for opcode field handling in collectors.""" + + def test_frame_with_opcode(self): + """Test MockFrameInfo properly stores opcode.""" + frame = MockFrameInfo("test.py", 10, "my_func", opcode=90) + self.assertEqual(frame.opcode, 90) + # Verify tuple representation includes opcode + self.assertEqual(frame[3], 90) + self.assertEqual(len(frame), 4) + + def test_frame_without_opcode(self): + """Test MockFrameInfo with no opcode defaults to None.""" + frame = MockFrameInfo("test.py", 10, "my_func") + self.assertIsNone(frame.opcode) + self.assertIsNone(frame[3]) + + def test_collectors_ignore_opcode_for_key_generation(self): + """Test that collectors use (filename, lineno, funcname) as key, not opcode.""" + collector = PstatsCollector(sample_interval_usec=1000) + + # Same function, different opcodes + frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) + frame2 = MockFrameInfo("test.py", 10, "func", opcode=100) + + frames1 = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame1], status=THREAD_STATUS_HAS_GIL)] + ) + ] + frames2 = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame2], status=THREAD_STATUS_HAS_GIL)] + ) + ] + + collector.collect(frames1) + collector.collect(frames2) + + # Should be counted as same function (opcode not in key) + key = ("test.py", 10, "func") + self.assertIn(key, collector.result) + self.assertEqual(collector.result[key]["direct_calls"], 2) + + +class TestGeckoOpcodeMarkers(unittest.TestCase): + """Tests for GeckoCollector opcode interval markers.""" + + def test_gecko_collector_opcodes_disabled_by_default(self): + """Test that opcode tracking is disabled by default.""" + collector = GeckoCollector(sample_interval_usec=1000) + self.assertFalse(collector.opcodes_enabled) + + def test_gecko_collector_opcodes_enabled(self): + """Test that opcode tracking can be enabled.""" + collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) + self.assertTrue(collector.opcodes_enabled) + + def test_gecko_opcode_state_tracking(self): + """Test that GeckoCollector tracks opcode state changes.""" + collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) + + # First sample with opcode 90 (RAISE_VARARGS) + frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) + frames1 = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame1], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames1) + + # Should start tracking this opcode state + self.assertIn(1, collector.opcode_state) + state = collector.opcode_state[1] + self.assertEqual(state[0], 90) # opcode + self.assertEqual(state[1], 10) # lineno + self.assertEqual(state[3], "func") # funcname + + def test_gecko_opcode_state_change_emits_marker(self): + """Test that opcode state change emits an interval marker.""" + collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) + + # First sample: opcode 90 + frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) + frames1 = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame1], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames1) + + # Second sample: different opcode 100 + frame2 = MockFrameInfo("test.py", 10, "func", opcode=100) + frames2 = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame2], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames2) + + # Should have emitted a marker for the first opcode + thread_data = collector.threads[1] + markers = thread_data["markers"] + # At least one marker should have been added + self.assertGreater(len(markers["name"]), 0) + + def test_gecko_opcode_markers_not_emitted_when_disabled(self): + """Test that no opcode markers when opcodes=False.""" + collector = GeckoCollector(sample_interval_usec=1000, opcodes=False) + + frame1 = MockFrameInfo("test.py", 10, "func", opcode=90) + frames1 = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame1], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames1) + + frame2 = MockFrameInfo("test.py", 10, "func", opcode=100) + frames2 = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame2], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames2) + + # opcode_state should not be tracked + self.assertEqual(len(collector.opcode_state), 0) + + def test_gecko_opcode_with_none_opcode(self): + """Test that None opcode doesn't cause issues.""" + collector = GeckoCollector(sample_interval_usec=1000, opcodes=True) + + # Frame with no opcode (None) + frame = MockFrameInfo("test.py", 10, "func", opcode=None) + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + # Should track the state but opcode is None + self.assertIn(1, collector.opcode_state) + self.assertIsNone(collector.opcode_state[1][0]) + + +class TestCollectorFrameFormat(unittest.TestCase): + """Tests verifying all collectors handle the 4-element frame format.""" + + def _make_sample_frames(self): + """Create sample frames with full format: (filename, location, funcname, opcode).""" + return [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("app.py", 100, "main", opcode=90), + MockFrameInfo("utils.py", 50, "helper", opcode=100), + MockFrameInfo("lib.py", 25, "process", opcode=None), + ], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + + def test_pstats_collector_frame_format(self): + """Test PstatsCollector with 4-element frame format.""" + collector = PstatsCollector(sample_interval_usec=1000) + collector.collect(self._make_sample_frames()) + + # All three functions should be recorded + self.assertEqual(len(collector.result), 3) + self.assertIn(("app.py", 100, "main"), collector.result) + self.assertIn(("utils.py", 50, "helper"), collector.result) + self.assertIn(("lib.py", 25, "process"), collector.result) + + def test_collapsed_stack_frame_format(self): + """Test CollapsedStackCollector with 4-element frame format.""" + collector = CollapsedStackCollector(sample_interval_usec=1000) + collector.collect(self._make_sample_frames()) + + self.assertEqual(len(collector.stack_counter), 1) + (path, _), _ = list(collector.stack_counter.items())[0] + # 3 frames in the path (reversed order) + self.assertEqual(len(path), 3) + + def test_flamegraph_collector_frame_format(self): + """Test FlamegraphCollector with 4-element frame format.""" + collector = FlamegraphCollector(sample_interval_usec=1000) + collector.collect(self._make_sample_frames()) + + data = collector._convert_to_flamegraph_format() + # Should have processed the frames + self.assertIn("children", data) + + def test_gecko_collector_frame_format(self): + """Test GeckoCollector with 4-element frame format.""" + collector = GeckoCollector(sample_interval_usec=1000) + collector.collect(self._make_sample_frames()) + + profile = collector._build_profile() + # Should have one thread with the frames + self.assertEqual(len(profile["threads"]), 1) + thread = profile["threads"][0] + # Should have recorded 3 functions + self.assertEqual(thread["funcTable"]["length"], 3) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py index e4c5032425ddcd..9d8f55345635ac 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py @@ -34,7 +34,7 @@ skip_if_not_supported, PROCESS_VM_READV_SUPPORTED, ) -from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo +from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, LocationInfo # Duration for profiling tests - long enough for process to complete naturally PROFILING_TIMEOUT = str(int(SHORT_TIMEOUT)) @@ -301,10 +301,10 @@ def test_collapsed_stack_with_recursion(self): MockThreadInfo( 1, [ - ("factorial.py", 10, "factorial"), - ("factorial.py", 10, "factorial"), # recursive - ("factorial.py", 10, "factorial"), # deeper - ("main.py", 5, "main"), + MockFrameInfo("factorial.py", 10, "factorial"), + MockFrameInfo("factorial.py", 10, "factorial"), # recursive + MockFrameInfo("factorial.py", 10, "factorial"), # deeper + MockFrameInfo("main.py", 5, "main"), ], ) ], @@ -315,13 +315,9 @@ def test_collapsed_stack_with_recursion(self): MockThreadInfo( 1, [ - ("factorial.py", 10, "factorial"), - ( - "factorial.py", - 10, - "factorial", - ), # different depth - ("main.py", 5, "main"), + MockFrameInfo("factorial.py", 10, "factorial"), + MockFrameInfo("factorial.py", 10, "factorial"), # different depth + MockFrameInfo("main.py", 5, "main"), ], ) ], @@ -385,7 +381,7 @@ def cpu_intensive_work(): def main_loop(): """Main test loop.""" - max_iterations = 200 + max_iterations = 1000 for iteration in range(max_iterations): if iteration % 2 == 0: From 965f5210f4d2f902f0793ae96ffb3fde5e553a86 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 7 Dec 2025 04:45:52 +0000 Subject: [PATCH 08/24] Better test --- Lib/test/test_external_inspection.py | 94 +++++++--------------------- 1 file changed, 24 insertions(+), 70 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 61441d20c1a7b2..971708b3e09291 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -1878,66 +1878,33 @@ def test_opcodes_collection(self): """Test that opcodes are collected when the opcodes flag is set.""" script = textwrap.dedent( """\ - import time - import sys - import socket - - def compute(): - # Do some work that involves bytecode execution - total = 0 - for i in range(1000): - total += i - return total - - def bar(): - compute() - - def foo(): - bar() + import time, sys, socket - # Signal that we're ready sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(('localhost', {port})) - sock.sendall(b"ready") - sock.close() - # Keep computing in a loop - while True: - foo() + def foo(): + sock.sendall(b"ready") + time.sleep(10_000) + + foo() """ ) def get_trace_with_opcodes(pid): - unwinder = RemoteUnwinder(pid, opcodes=True) - return unwinder.get_stack_trace() + return RemoteUnwinder(pid, opcodes=True).get_stack_trace() stack_trace, _ = self._run_script_and_get_trace( - script, - get_trace_with_opcodes, - wait_for_signals=b"ready", + script, get_trace_with_opcodes, wait_for_signals=b"ready" ) - # Find the thread with our compute/bar/foo stack - found_opcodes = False - for interpreter_info in stack_trace: - for thread_info in interpreter_info.threads: - for frame in thread_info.frame_info: - # Check that frames have opcodes (not None) - # when opcodes=True is set - if frame.funcname in ("compute", "bar", "foo"): - # Opcode should be an integer, not None - self.assertIsInstance( - frame.opcode, - int, - f"Expected opcode to be int for {frame.funcname}, got {type(frame.opcode)}" - ) - self.assertGreaterEqual(frame.opcode, 0) - found_opcodes = True - - self.assertTrue( - found_opcodes, - "Did not find any frames with opcodes from compute/bar/foo" + # Find our foo frame and verify it has an opcode + foo_frame = self._find_frame_in_trace( + stack_trace, lambda f: f.funcname == "foo" ) + self.assertIsNotNone(foo_frame, "Could not find foo frame") + self.assertIsInstance(foo_frame.opcode, int) + self.assertGreaterEqual(foo_frame.opcode, 0) @skip_if_not_supported @unittest.skipIf( @@ -1950,10 +1917,10 @@ def test_location_tuple_format(self): """\ import time, sys, socket + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + def foo(): - x = 1 + 2 - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect(('localhost', {port})) sock.sendall(b"ready") time.sleep(10_000) @@ -1962,13 +1929,10 @@ def foo(): ) def get_trace_with_opcodes(pid): - unwinder = RemoteUnwinder(pid, opcodes=True) - return unwinder.get_stack_trace() + return RemoteUnwinder(pid, opcodes=True).get_stack_trace() - stack_trace, script_name = self._run_script_and_get_trace( - script, - get_trace_with_opcodes, - wait_for_signals=b"ready", + stack_trace, _ = self._run_script_and_get_trace( + script, get_trace_with_opcodes, wait_for_signals=b"ready" ) # Find our foo frame @@ -1977,25 +1941,15 @@ def get_trace_with_opcodes(pid): ) self.assertIsNotNone(foo_frame, "Could not find foo frame") - # Check location is a tuple with 4 elements + # Check location is a 4-tuple with valid values location = foo_frame.location - self.assertIsInstance(location, tuple, "Location should be a tuple") - self.assertEqual( - len(location), 4, - f"Location should have 4 elements (lineno, end_lineno, col_offset, end_col_offset), got {len(location)}" - ) - + self.assertIsInstance(location, tuple) + self.assertEqual(len(location), 4) lineno, end_lineno, col_offset, end_col_offset = location - - # Lineno should be positive self.assertIsInstance(lineno, int) - self.assertGreater(lineno, 0, "lineno should be positive") - - # end_lineno should be >= lineno + self.assertGreater(lineno, 0) self.assertIsInstance(end_lineno, int) self.assertGreaterEqual(end_lineno, lineno) - - # col_offset and end_col_offset should be non-negative self.assertIsInstance(col_offset, int) self.assertGreaterEqual(col_offset, 0) self.assertIsInstance(end_col_offset, int) From 12c02f66124354c4a88f498b1bfd819793356f87 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 7 Dec 2025 23:22:12 +0000 Subject: [PATCH 09/24] Add news entry --- Lib/profiling/sampling/opcode_utils.py | 30 ++++++++++--------- .../_live_collector_helpers.py | 1 - .../test_integration.py | 2 +- ...-12-07-23-21-13.gh-issue-138122.m3EF9E.rst | 5 ++++ 4 files changed, 22 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-07-23-21-13.gh-issue-138122.m3EF9E.rst diff --git a/Lib/profiling/sampling/opcode_utils.py b/Lib/profiling/sampling/opcode_utils.py index 3d6a263f0b1773..71b35383da153d 100644 --- a/Lib/profiling/sampling/opcode_utils.py +++ b/Lib/profiling/sampling/opcode_utils.py @@ -11,7 +11,7 @@ # Build opcode name mapping: opcode number -> opcode name # This includes both standard opcodes and specialized variants (Python 3.11+) _OPCODE_NAMES = dict(enumerate(opcode.opname)) -if hasattr(opcode, '_specialized_opmap'): +if hasattr(opcode, "_specialized_opmap"): for name, op in opcode._specialized_opmap.items(): _OPCODE_NAMES[op] = name @@ -20,7 +20,9 @@ # LOAD_ATTR can be replaced at runtime with specialized variants like # LOAD_ATTR_INSTANCE_VALUE. This mapping lets us show both forms. _DEOPT_MAP = {} -if hasattr(opcode, '_specializations') and hasattr(opcode, '_specialized_opmap'): +if hasattr(opcode, "_specializations") and hasattr( + opcode, "_specialized_opmap" +): for base_name, variant_names in opcode._specializations.items(): base_opcode = opcode.opmap.get(base_name) if base_opcode is not None: @@ -45,24 +47,24 @@ def get_opcode_info(opcode_num): opname = _OPCODE_NAMES.get(opcode_num) if opname is None: return { - 'opname': f'<{opcode_num}>', - 'base_opname': f'<{opcode_num}>', - 'is_specialized': False, + "opname": f"<{opcode_num}>", + "base_opname": f"<{opcode_num}>", + "is_specialized": False, } base_opcode = _DEOPT_MAP.get(opcode_num) if base_opcode is not None: - base_opname = _OPCODE_NAMES.get(base_opcode, f'<{base_opcode}>') + base_opname = _OPCODE_NAMES.get(base_opcode, f"<{base_opcode}>") return { - 'opname': opname, - 'base_opname': base_opname, - 'is_specialized': True, + "opname": opname, + "base_opname": base_opname, + "is_specialized": True, } return { - 'opname': opname, - 'base_opname': opname, - 'is_specialized': False, + "opname": opname, + "base_opname": opname, + "is_specialized": False, } @@ -76,9 +78,9 @@ def format_opcode(opcode_num): A formatted string like 'LOAD_ATTR' or 'LOAD_ATTR_INSTANCE_VALUE (LOAD_ATTR)' """ info = get_opcode_info(opcode_num) - if info['is_specialized']: + if info["is_specialized"]: return f"{info['opname']} ({info['base_opname']})" - return info['opname'] + return info["opname"] def get_opcode_mapping(): diff --git a/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py index 2e989968100957..05e6cc33edc5f8 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py +++ b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py @@ -5,7 +5,6 @@ THREAD_STATUS_ON_CPU, ) -from .mocks import LocationInfo, MockFrameInfo class MockThreadInfo: diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py index cc8cd4ff095ada..029952da697751 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py @@ -34,7 +34,7 @@ skip_if_not_supported, PROCESS_VM_READV_SUPPORTED, ) -from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, LocationInfo +from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo # Duration for profiling tests - long enough for process to complete naturally PROFILING_TIMEOUT = str(int(SHORT_TIMEOUT)) diff --git a/Misc/NEWS.d/next/Library/2025-12-07-23-21-13.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Library/2025-12-07-23-21-13.gh-issue-138122.m3EF9E.rst new file mode 100644 index 00000000000000..5cc54e68f24848 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-07-23-21-13.gh-issue-138122.m3EF9E.rst @@ -0,0 +1,5 @@ +Add bytecode-level instruction profiling to the sampling profiler via the +new ``--opcodes`` flag. When enabled, the profiler captures which bytecode +opcode is executing at each sample, including Python 3.11+ adaptive +specializations, and visualizes this data in the heatmap, flamegraph, gecko, +and live output formats. Patch by Pablo Galindo From c10628acb7cd0dee9c1545e81d23c887086f66f8 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 7 Dec 2025 23:40:04 +0000 Subject: [PATCH 10/24] CI fixes --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 +++ .../_live_collector_helpers.py | 32 +++++++++++++++++++ 5 files changed, 39 insertions(+) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index d23d6d4f91bc28..6473a3c64a6c23 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1937,6 +1937,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_keys)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(oparg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opcode)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opcodes)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(open)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opener)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(operation)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 5c3ea474ad09b7..ec720de2524e6e 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -660,6 +660,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(only_keys) STRUCT_FOR_ID(oparg) STRUCT_FOR_ID(opcode) + STRUCT_FOR_ID(opcodes) STRUCT_FOR_ID(open) STRUCT_FOR_ID(opener) STRUCT_FOR_ID(operation) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 31d88339a13425..b32083db98e29e 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1935,6 +1935,7 @@ extern "C" { INIT_ID(only_keys), \ INIT_ID(oparg), \ INIT_ID(opcode), \ + INIT_ID(opcodes), \ INIT_ID(open), \ INIT_ID(opener), \ INIT_ID(operation), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index c5b01ff9876643..f3756fde2c4073 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2420,6 +2420,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(opcodes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(open); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py index 05e6cc33edc5f8..2c672895099140 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py +++ b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py @@ -1,11 +1,43 @@ """Common test helpers and mocks for live collector tests.""" +from collections import namedtuple + from profiling.sampling.constants import ( THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, ) +# Matches the C structseq LocationInfo from _remote_debugging +LocationInfo = namedtuple('LocationInfo', ['lineno', 'end_lineno', 'col_offset', 'end_col_offset']) + + +class MockFrameInfo: + """Mock FrameInfo for testing. + + Frame format: (filename, location, funcname, opcode) where: + - location is a tuple (lineno, end_lineno, col_offset, end_col_offset) + - opcode is an int or None + """ + + def __init__(self, filename, lineno, funcname, opcode=None): + self.filename = filename + self.funcname = funcname + self.opcode = opcode + self.location = LocationInfo(lineno, lineno, -1, -1) + + def __iter__(self): + return iter((self.filename, self.location, self.funcname, self.opcode)) + + def __getitem__(self, index): + return (self.filename, self.location, self.funcname, self.opcode)[index] + + def __len__(self): + return 4 + + def __repr__(self): + return f"MockFrameInfo('{self.filename}', {self.location}, '{self.funcname}', {self.opcode})" + class MockThreadInfo: """Mock ThreadInfo for testing.""" From 04563f02765bc6c54fb7e55fad4ab9af1659c9fc Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 8 Dec 2025 00:10:56 +0000 Subject: [PATCH 11/24] CI fixes --- Lib/test/test_external_inspection.py | 250 +++++++++++++-------------- 1 file changed, 125 insertions(+), 125 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 53fdf166e25dcf..05da8deb31c1a3 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -379,6 +379,31 @@ def _extract_coroutine_stacks(self, stack_trace): for task in stack_trace[0].awaited_by } + @staticmethod + def _frame_to_lineno_tuple(frame): + """Convert frame to (filename, lineno, funcname, opcode) tuple. + + This extracts just the line number from the location, ignoring column + offsets which can vary due to sampling timing (e.g., when two statements + are on the same line, the sample might catch either one). + """ + filename, location, funcname, opcode = frame + return (filename, location.lineno, funcname, opcode) + + def _extract_coroutine_stacks_lineno_only(self, stack_trace): + """Extract coroutine stacks with line numbers only (no column offsets). + + Use this for tests where sampling timing can cause column offset + variations (e.g., 'expr1; expr2' on the same line). + """ + return { + task.task_name: sorted( + tuple(self._frame_to_lineno_tuple(frame) for frame in coro.call_stack) + for coro in task.coroutine_stack + ) + for task in stack_trace[0].awaited_by + } + # ============================================================================ # Test classes @@ -582,8 +607,10 @@ def new_eager_loop(): }, ) - # Check coroutine stacks - coroutine_stacks = self._extract_coroutine_stacks( + # Check coroutine stacks (using line numbers only to avoid + # flakiness from column offset variations when sampling + # catches different statements on the same line) + coroutine_stacks = self._extract_coroutine_stacks_lineno_only( stack_trace ) self.assertEqual( @@ -591,50 +618,36 @@ def new_eager_loop(): { "Task-1": [ ( - tuple( - [ - taskgroups.__file__, - (121, 121, 16, 44), - "TaskGroup._aexit", - None, - ] - ), - tuple( - [ - taskgroups.__file__, - (72, 72, 19, 45), - "TaskGroup.__aexit__", - None, - ] - ), - tuple([script_name, (26, 26, 15, 34), "main", None]), + (taskgroups.__file__, 121, "TaskGroup._aexit", None), + (taskgroups.__file__, 72, "TaskGroup.__aexit__", None), + (script_name, 26, "main", None), ) ], "c2_root": [ ( - tuple([script_name, (10, 10, 28, 46), "c5", None]), - tuple([script_name, (14, 14, 4, 8), "c4", None]), - tuple([script_name, (17, 17, 4, 14), "c3", None]), - tuple([script_name, (20, 20, 4, 14), "c2", None]), + (script_name, 10, "c5", None), + (script_name, 14, "c4", None), + (script_name, 17, "c3", None), + (script_name, 20, "c2", None), ) ], "sub_main_1": [ - (tuple([script_name, (23, 23, 4, 14), "c1", None]),) + ((script_name, 23, "c1", None),) ], "sub_main_2": [ - (tuple([script_name, (23, 23, 4, 14), "c1", None]),) + ((script_name, 23, "c1", None),) ], }, ) - # Check awaited_by coroutine stacks + # Check awaited_by coroutine stacks (line numbers only) id_to_task = self._get_task_id_map(stack_trace) awaited_by_coroutine_stacks = { task.task_name: sorted( ( id_to_task[coro.task_name].task_name, tuple( - tuple(frame) + self._frame_to_lineno_tuple(frame) for frame in coro.call_stack ), ) @@ -650,55 +663,27 @@ def new_eager_loop(): ( "Task-1", ( - tuple( - [ - taskgroups.__file__, - (121, 121, 16, 44), - "TaskGroup._aexit", - None, - ] - ), - tuple( - [ - taskgroups.__file__, - (72, 72, 19, 45), - "TaskGroup.__aexit__", - None, - ] - ), - tuple([script_name, (26, 26, 15, 34), "main", None]), + (taskgroups.__file__, 121, "TaskGroup._aexit", None), + (taskgroups.__file__, 72, "TaskGroup.__aexit__", None), + (script_name, 26, "main", None), ), ), ( "sub_main_1", - (tuple([script_name, (23, 23, 4, 14), "c1", None]),), + ((script_name, 23, "c1", None),), ), ( "sub_main_2", - (tuple([script_name, (23, 23, 4, 14), "c1", None]),), + ((script_name, 23, "c1", None),), ), ], "sub_main_1": [ ( "Task-1", ( - tuple( - [ - taskgroups.__file__, - (121, 121, 16, 44), - "TaskGroup._aexit", - None, - ] - ), - tuple( - [ - taskgroups.__file__, - (72, 72, 19, 45), - "TaskGroup.__aexit__", - None, - ] - ), - tuple([script_name, (26, 26, 15, 34), "main", None]), + (taskgroups.__file__, 121, "TaskGroup._aexit", None), + (taskgroups.__file__, 72, "TaskGroup.__aexit__", None), + (script_name, 26, "main", None), ), ) ], @@ -706,23 +691,9 @@ def new_eager_loop(): ( "Task-1", ( - tuple( - [ - taskgroups.__file__, - (121, 121, 16, 44), - "TaskGroup._aexit", - None, - ] - ), - tuple( - [ - taskgroups.__file__, - (72, 72, 19, 45), - "TaskGroup.__aexit__", - None, - ] - ), - tuple([script_name, (26, 26, 15, 34), "main", None]), + (taskgroups.__file__, 121, "TaskGroup._aexit", None), + (taskgroups.__file__, 72, "TaskGroup.__aexit__", None), + (script_name, 26, "main", None), ), ) ], @@ -794,18 +765,20 @@ async def main(): task = stack_trace[0].awaited_by[0] self.assertEqual(task.task_name, "Task-1") - # Check the coroutine stack + # Check the coroutine stack (using line numbers only to avoid + # flakiness from column offset variations when sampling + # catches different statements on the same line) coroutine_stack = sorted( - tuple(tuple(frame) for frame in coro.call_stack) + tuple(self._frame_to_lineno_tuple(frame) for frame in coro.call_stack) for coro in task.coroutine_stack ) self.assertEqual( coroutine_stack, [ ( - tuple([script_name, (10, 10, 28, 46), "gen_nested_call", None]), - tuple([script_name, (16, 16, 12, 35), "gen", None]), - tuple([script_name, (19, 20, 4, 12), "main", None]), + (script_name, 10, "gen_nested_call", None), + (script_name, 16, "gen", None), + (script_name, 19, "main", None), ) ], ) @@ -893,31 +866,33 @@ async def main(): }, ) - # Check coroutine stacks - coroutine_stacks = self._extract_coroutine_stacks( + # Check coroutine stacks (using line numbers only to avoid + # flakiness from column offset variations when sampling + # catches different statements on the same line) + coroutine_stacks = self._extract_coroutine_stacks_lineno_only( stack_trace ) self.assertEqual( coroutine_stacks, { - "Task-1": [(tuple([script_name, (21, 21, 4, 36), "main", None]),)], + "Task-1": [((script_name, 21, "main", None),)], "Task-2": [ ( - tuple([script_name, (11, 11, 28, 46), "deep", None]), - tuple([script_name, (15, 15, 4, 16), "c1", None]), + (script_name, 11, "deep", None), + (script_name, 15, "c1", None), ) ], }, ) - # Check awaited_by coroutine stacks + # Check awaited_by coroutine stacks (line numbers only) id_to_task = self._get_task_id_map(stack_trace) awaited_by_coroutine_stacks = { task.task_name: sorted( ( id_to_task[coro.task_name].task_name, tuple( - tuple(frame) for frame in coro.call_stack + self._frame_to_lineno_tuple(frame) for frame in coro.call_stack ), ) for coro in task.awaited_by @@ -929,7 +904,7 @@ async def main(): { "Task-1": [], "Task-2": [ - ("Task-1", (tuple([script_name, (21, 21, 4, 36), "main", None]),)) + ("Task-1", ((script_name, 21, "main", None),)) ], }, ) @@ -1017,8 +992,10 @@ async def main(): }, ) - # Check coroutine stacks - coroutine_stacks = self._extract_coroutine_stacks( + # Check coroutine stacks (using line numbers only to avoid + # flakiness from column offset variations when sampling + # catches different statements on the same line) + coroutine_stacks = self._extract_coroutine_stacks_lineno_only( stack_trace ) self.assertEqual( @@ -1026,42 +1003,28 @@ async def main(): { "Task-1": [ ( - tuple( - [ - staggered.__file__, - (164, 164, 16, 38), - "staggered_race", - None, - ] - ), - tuple([script_name, (21, 24, 4, 5), "main", None]), + (staggered.__file__, 164, "staggered_race", None), + (script_name, 21, "main", None), ) ], "Task-2": [ ( - tuple([script_name, (11, 11, 28, 46), "deep", None]), - tuple([script_name, (15, 15, 4, 16), "c1", None]), - tuple( - [ - staggered.__file__, - (126, 126, 21, 36), - "staggered_race..run_one_coro", - None, - ] - ), + (script_name, 11, "deep", None), + (script_name, 15, "c1", None), + (staggered.__file__, 126, "staggered_race..run_one_coro", None), ) ], }, ) - # Check awaited_by coroutine stacks + # Check awaited_by coroutine stacks (line numbers only) id_to_task = self._get_task_id_map(stack_trace) awaited_by_coroutine_stacks = { task.task_name: sorted( ( id_to_task[coro.task_name].task_name, tuple( - tuple(frame) for frame in coro.call_stack + self._frame_to_lineno_tuple(frame) for frame in coro.call_stack ), ) for coro in task.awaited_by @@ -1076,15 +1039,8 @@ async def main(): ( "Task-1", ( - tuple( - [ - staggered.__file__, - (164, 164, 16, 38), - "staggered_race", - None, - ] - ), - tuple([script_name, (21, 24, 4, 5), "main", None]), + (staggered.__file__, 164, "staggered_race", None), + (script_name, 21, "main", None), ), ) ], @@ -1931,6 +1887,50 @@ def get_trace_with_opcodes(pid): self.assertIsInstance(end_col_offset, int) self.assertGreaterEqual(end_col_offset, col_offset) + @skip_if_not_supported + @unittest.skipIf( + sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, + "Test only runs on Linux with process_vm_readv support", + ) + def test_location_tuple_exact_values(self): + """Test exact values of location tuple including column offsets.""" + script = textwrap.dedent( + """\ + import time, sys, socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + + def foo(): + sock.sendall(b"ready") + time.sleep(10_000) + + foo() + """ + ) + + def get_trace_with_opcodes(pid): + return RemoteUnwinder(pid, opcodes=True).get_stack_trace() + + stack_trace, _ = self._run_script_and_get_trace( + script, get_trace_with_opcodes, wait_for_signals=b"ready" + ) + + foo_frame = self._find_frame_in_trace( + stack_trace, lambda f: f.funcname == "foo" + ) + self.assertIsNotNone(foo_frame, "Could not find foo frame") + + # Can catch either sock.sendall (line 7) or time.sleep (line 8) + location = foo_frame.location + valid_locations = [ + (7, 7, 4, 26), # sock.sendall(b"ready") + (8, 8, 4, 22), # time.sleep(10_000) + ] + actual = (location.lineno, location.end_lineno, + location.col_offset, location.end_col_offset) + self.assertIn(actual, valid_locations) + class TestUnsupportedPlatformHandling(unittest.TestCase): @unittest.skipIf( From f3688901f26a320f46618bb4833958b07da91826 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 8 Dec 2025 15:33:59 +0000 Subject: [PATCH 12/24] Fix C-API calls --- Modules/_remote_debugging/code_objects.c | 33 +++++++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c index 091248c639c0ae..98fe74e8cb6331 100644 --- a/Modules/_remote_debugging/code_objects.c +++ b/Modules/_remote_debugging/code_objects.c @@ -221,10 +221,35 @@ make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno, set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo"); return NULL; } - PyStructSequence_SetItem(info, 0, PyLong_FromLong(lineno)); - PyStructSequence_SetItem(info, 1, PyLong_FromLong(end_lineno)); - PyStructSequence_SetItem(info, 2, PyLong_FromLong(col_offset)); - PyStructSequence_SetItem(info, 3, PyLong_FromLong(end_col_offset)); + + PyObject *py_lineno = PyLong_FromLong(lineno); + if (py_lineno == NULL) { + Py_DECREF(info); + return NULL; + } + PyStructSequence_SetItem(info, 0, py_lineno); // steals reference + + PyObject *py_end_lineno = PyLong_FromLong(end_lineno); + if (py_end_lineno == NULL) { + Py_DECREF(info); + return NULL; + } + PyStructSequence_SetItem(info, 1, py_end_lineno); // steals reference + + PyObject *py_col_offset = PyLong_FromLong(col_offset); + if (py_col_offset == NULL) { + Py_DECREF(info); + return NULL; + } + PyStructSequence_SetItem(info, 2, py_col_offset); // steals reference + + PyObject *py_end_col_offset = PyLong_FromLong(end_col_offset); + if (py_end_col_offset == NULL) { + Py_DECREF(info); + return NULL; + } + PyStructSequence_SetItem(info, 3, py_end_col_offset); // steals reference + return info; } From 93f7abd9785cae652649b28ab088c529b7727bc0 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Mon, 8 Dec 2025 21:12:06 -0800 Subject: [PATCH 13/24] CSS fixes for classes and dark mode --- .../_flamegraph_assets/flamegraph.css | 78 +++++++++++++++++++ .../sampling/_flamegraph_assets/flamegraph.js | 28 ++++--- .../sampling/_shared_assets/base.css | 18 ++--- 3 files changed, 100 insertions(+), 24 deletions(-) diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css index c75f2324b6d499..c3dcfce8432441 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css @@ -863,6 +863,84 @@ body.resizing-sidebar { text-align: center; } +/* -------------------------------------------------------------------------- + Tooltip Bytecode/Opcode Section + -------------------------------------------------------------------------- */ + +.tooltip-opcodes { + margin-top: 16px; + padding-top: 12px; + border-top: 1px solid var(--border); +} + +.tooltip-opcodes-title { + color: var(--accent); + font-size: 13px; + margin-bottom: 8px; + font-weight: 600; +} + +.tooltip-opcodes-list { + background: var(--bg-tertiary); + border: 1px solid var(--border); + border-radius: 6px; + padding: 10px; +} + +.tooltip-opcode-row { + display: grid; + grid-template-columns: 1fr 60px 60px; + gap: 8px; + align-items: center; + padding: 3px 0; +} + +.tooltip-opcode-name { + font-family: var(--font-mono); + font-size: 11px; + color: var(--text-primary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.tooltip-opcode-name.specialized { + color: var(--spec-high-text); +} + +.tooltip-opcode-base-hint { + color: var(--text-muted); + font-size: 11px; + margin-left: 4px; +} + +.tooltip-opcode-badge { + background: var(--spec-high); + color: white; + font-size: 9px; + padding: 1px 4px; + border-radius: 3px; + margin-left: 4px; +} + +.tooltip-opcode-count { + text-align: right; + font-size: 11px; + color: var(--text-secondary); +} + +.tooltip-opcode-bar { + background: var(--bg-secondary); + border-radius: 2px; + height: 8px; + overflow: hidden; +} + +.tooltip-opcode-bar-fill { + background: linear-gradient(90deg, var(--python-blue), var(--python-blue-light)); + height: 100%; +} + /* -------------------------------------------------------------------------- Responsive (Flamegraph-specific) -------------------------------------------------------------------------- */ diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js index 617ce21b73a45e..8f98b422e74521 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -292,33 +292,31 @@ function createPythonTooltip(data) { const pct = ((count / totalOpcodeSamples) * 100).toFixed(1); const barWidth = (count / maxCount) * 100; const specializedBadge = opcodeInfo.isSpecialized - ? 'SPECIALIZED' + ? 'SPECIALIZED' : ''; const baseOpHint = opcodeInfo.isSpecialized - ? `(${opcodeInfo.baseOpname})` + ? `(${opcodeInfo.baseOpname})` : ''; + const nameClass = opcodeInfo.isSpecialized + ? 'tooltip-opcode-name specialized' + : 'tooltip-opcode-name'; return ` -
-
+
+
${opcodeInfo.opname}${baseOpHint}${specializedBadge}
-
${count.toLocaleString()}
-
-
+
${count.toLocaleString()}
+
+
`; }).join(''); opcodeSection = ` -
-
- Bytecode Instructions: -
-
+
+
Bytecode Instructions:
+
${opcodeLines}
`; diff --git a/Lib/profiling/sampling/_shared_assets/base.css b/Lib/profiling/sampling/_shared_assets/base.css index 0db4c41d00ceab..8a381e2d9e67db 100644 --- a/Lib/profiling/sampling/_shared_assets/base.css +++ b/Lib/profiling/sampling/_shared_assets/base.css @@ -119,15 +119,15 @@ --header-gradient: linear-gradient(135deg, #21262d 0%, #30363d 100%); - /* Dark mode heat palette - dark blue to teal to yellow to orange (cold to hot) */ - --heat-1: #4a7ba7; - --heat-2: #5a9fa8; - --heat-3: #6ab5b5; - --heat-4: #7ec488; - --heat-5: #a0d878; - --heat-6: #c4de6a; - --heat-7: #f4d44d; - --heat-8: #ff6b35; + /* Dark mode heat palette - muted colors that provide sufficient contrast with light text */ + --heat-1: rgba(74, 123, 167, 0.35); + --heat-2: rgba(90, 159, 168, 0.38); + --heat-3: rgba(106, 181, 181, 0.40); + --heat-4: rgba(126, 196, 136, 0.42); + --heat-5: rgba(160, 216, 120, 0.45); + --heat-6: rgba(196, 222, 106, 0.48); + --heat-7: rgba(244, 212, 77, 0.50); + --heat-8: rgba(255, 107, 53, 0.55); /* Code view specific - dark mode */ --code-bg: #0d1117; From 43a298b512226570ca02329214a3fd7ce51dcd5d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 9 Dec 2025 13:29:00 +0000 Subject: [PATCH 14/24] address review --- .../sampling/_flamegraph_assets/flamegraph.js | 2 +- .../sampling/_heatmap_assets/heatmap.js | 6 +- .../sampling/_shared_assets/base.css | 8 + .../sampling/live_collector/collector.py | 217 ++++++++---------- 4 files changed, 103 insertions(+), 130 deletions(-) diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js index 8f98b422e74521..3076edd1d68cba 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -306,7 +306,7 @@ function createPythonTooltip(data) {
${opcodeInfo.opname}${baseOpHint}${specializedBadge}
-
${count.toLocaleString()}
+
${count.toLocaleString()} (${pct}%)
diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.js b/Lib/profiling/sampling/_heatmap_assets/heatmap.js index 7d5b5cb4922113..626faea5303eb6 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.js +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.js @@ -368,10 +368,12 @@ function calculateHeatColor(intensity) { const normalizedIntensity = (intensity - 0.3) / 0.7; // Warm orange-red with increasing opacity for hotter spans const alpha = 0.25 + normalizedIntensity * 0.35; // 0.25 to 0.6 - return `rgba(255, 100, 50, ${alpha})`; + const hotColor = getComputedStyle(document.documentElement).getPropertyValue('--span-hot-base').trim(); + return `rgba(${hotColor}, ${alpha})`; } else if (intensity > 0) { // Cold spans: very subtle gray, almost invisible - return `rgba(150, 150, 150, 0.1)`; + const coldColor = getComputedStyle(document.documentElement).getPropertyValue('--span-cold-base').trim(); + return `rgba(${coldColor}, 0.1)`; } return 'transparent'; } diff --git a/Lib/profiling/sampling/_shared_assets/base.css b/Lib/profiling/sampling/_shared_assets/base.css index 8a381e2d9e67db..bcd063c9409e31 100644 --- a/Lib/profiling/sampling/_shared_assets/base.css +++ b/Lib/profiling/sampling/_shared_assets/base.css @@ -95,6 +95,10 @@ --spec-low: #9e9e9e; --spec-low-text: #616161; --spec-low-bg: rgba(158, 158, 158, 0.15); + + /* Heatmap span highlighting colors */ + --span-hot-base: 255, 100, 50; + --span-cold-base: 150, 150, 150; } /* Dark theme */ @@ -153,6 +157,10 @@ --spec-low: #bdbdbd; --spec-low-text: #9e9e9e; --spec-low-bg: rgba(189, 189, 189, 0.15); + + /* Heatmap span highlighting colors - dark theme */ + --span-hot-base: 255, 107, 53; + --span-cold-base: 189, 189, 189; } /* -------------------------------------------------------------------------- diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py index 080ac38e191e97..3d25b5969835c0 100644 --- a/Lib/profiling/sampling/live_collector/collector.py +++ b/Lib/profiling/sampling/live_collector/collector.py @@ -751,6 +751,88 @@ def _handle_finished_input_update(self, had_input): if self.finished and had_input and self.display is not None: self._update_display() + def _get_visible_rows_info(self): + """Calculate visible rows and stats list for opcode navigation.""" + stats_list = self.build_stats_list() + if self.display: + height, _ = self.display.get_dimensions() + extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 + max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) + stats_list = stats_list[:max_stats] + visible_rows = max(1, height - 8 - 2 - 12) + else: + visible_rows = self.limit + total_rows = len(stats_list) + return stats_list, visible_rows, total_rows + + def _move_selection_down(self): + """Move selection down in opcode mode with scrolling.""" + if not self.show_opcodes: + return + + stats_list, visible_rows, total_rows = self._get_visible_rows_info() + if total_rows == 0: + return + + # Max scroll is when last item is at bottom + max_scroll = max(0, total_rows - visible_rows) + # Current absolute position + abs_pos = self.scroll_offset + self.selected_row + + # Only move if not at the last item + if abs_pos < total_rows - 1: + # Try to move selection within visible area first + if self.selected_row < visible_rows - 1: + self.selected_row += 1 + elif self.scroll_offset < max_scroll: + # Scroll down + self.scroll_offset += 1 + + # Clamp to valid range + self.scroll_offset = min(self.scroll_offset, max_scroll) + max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) + self.selected_row = min(self.selected_row, max(0, max_selected)) + + def _move_selection_up(self): + """Move selection up in opcode mode with scrolling.""" + if not self.show_opcodes: + return + + if self.selected_row > 0: + self.selected_row -= 1 + elif self.scroll_offset > 0: + self.scroll_offset -= 1 + + # Clamp to valid range based on actual stats_list + stats_list, visible_rows, total_rows = self._get_visible_rows_info() + if total_rows > 0: + max_scroll = max(0, total_rows - visible_rows) + self.scroll_offset = min(self.scroll_offset, max_scroll) + max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) + self.selected_row = min(self.selected_row, max(0, max_selected)) + + def _navigate_to_previous_thread(self): + """Navigate to previous thread in PER_THREAD mode, or switch from ALL to PER_THREAD.""" + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = len(self.thread_ids) - 1 + else: + self.current_thread_index = ( + self.current_thread_index - 1 + ) % len(self.thread_ids) + + def _navigate_to_next_thread(self): + """Navigate to next thread in PER_THREAD mode, or switch from ALL to PER_THREAD.""" + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.current_thread_index = ( + self.current_thread_index + 1 + ) % len(self.thread_ids) + def _show_terminal_too_small(self, height, width): """Display a message when terminal is too small.""" A_BOLD = self.display.get_attr("A_BOLD") @@ -930,154 +1012,35 @@ def _handle_input(self): elif ch == ord("j") or ch == ord("J"): # Move selection down in opcode mode (with scrolling) - if self.show_opcodes: - # Use the actual displayed stats_list count, not raw result_source - # This matches what _prepare_display_data() produces - stats_list = self.build_stats_list() - if self.display: - height, _ = self.display.get_dimensions() - # Same calculation as _prepare_display_data - extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 - max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) - stats_list = stats_list[:max_stats] - visible_rows = max(1, height - 8 - 2 - 12) - else: - visible_rows = self.limit - total_rows = len(stats_list) - if total_rows == 0: - return - # Max scroll is when last item is at bottom - max_scroll = max(0, total_rows - visible_rows) - # Current absolute position - abs_pos = self.scroll_offset + self.selected_row - # Only move if not at the last item - if abs_pos < total_rows - 1: - # Try to move selection within visible area first - if self.selected_row < visible_rows - 1: - self.selected_row += 1 - elif self.scroll_offset < max_scroll: - # Scroll down - self.scroll_offset += 1 - # Clamp to valid range - self.scroll_offset = min(self.scroll_offset, max_scroll) - max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) - self.selected_row = min(self.selected_row, max(0, max_selected)) + self._move_selection_down() elif ch == ord("k") or ch == ord("K"): # Move selection up in opcode mode (with scrolling) - if self.show_opcodes: - if self.selected_row > 0: - self.selected_row -= 1 - elif self.scroll_offset > 0: - self.scroll_offset -= 1 - # Clamp to valid range based on actual stats_list - stats_list = self.build_stats_list() - if self.display: - height, _ = self.display.get_dimensions() - extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 - max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) - stats_list = stats_list[:max_stats] - visible_rows = max(1, height - 8 - 2 - 12) - else: - visible_rows = self.limit - total_rows = len(stats_list) - if total_rows > 0: - max_scroll = max(0, total_rows - visible_rows) - self.scroll_offset = min(self.scroll_offset, max_scroll) - max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) - self.selected_row = min(self.selected_row, max(0, max_selected)) + self._move_selection_up() elif ch == curses.KEY_UP: # Move selection up (same as 'k') when in opcode mode if self.show_opcodes: - if self.selected_row > 0: - self.selected_row -= 1 - elif self.scroll_offset > 0: - self.scroll_offset -= 1 - # Clamp to valid range based on actual stats_list - stats_list = self.build_stats_list() - if self.display: - height, _ = self.display.get_dimensions() - extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 - max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) - stats_list = stats_list[:max_stats] - visible_rows = max(1, height - 8 - 2 - 12) - else: - visible_rows = self.limit - total_rows = len(stats_list) - if total_rows > 0: - max_scroll = max(0, total_rows - visible_rows) - self.scroll_offset = min(self.scroll_offset, max_scroll) - max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) - self.selected_row = min(self.selected_row, max(0, max_selected)) + self._move_selection_up() else: # Navigate to previous thread (same as KEY_LEFT) - if len(self.thread_ids) > 0: - if self.view_mode == "ALL": - self.view_mode = "PER_THREAD" - self.current_thread_index = len(self.thread_ids) - 1 - else: - self.current_thread_index = ( - self.current_thread_index - 1 - ) % len(self.thread_ids) + self._navigate_to_previous_thread() elif ch == curses.KEY_DOWN: # Move selection down (same as 'j') when in opcode mode if self.show_opcodes: - stats_list = self.build_stats_list() - if self.display: - height, _ = self.display.get_dimensions() - extra_header = FINISHED_BANNER_EXTRA_LINES if self.finished else 0 - max_stats = max(0, height - HEADER_LINES - extra_header - FOOTER_LINES - SAFETY_MARGIN) - stats_list = stats_list[:max_stats] - visible_rows = max(1, height - 8 - 2 - 12) - else: - visible_rows = self.limit - total_rows = len(stats_list) - if total_rows == 0: - return - max_scroll = max(0, total_rows - visible_rows) - abs_pos = self.scroll_offset + self.selected_row - if abs_pos < total_rows - 1: - if self.selected_row < visible_rows - 1: - self.selected_row += 1 - elif self.scroll_offset < max_scroll: - self.scroll_offset += 1 - self.scroll_offset = min(self.scroll_offset, max_scroll) - max_selected = min(visible_rows - 1, total_rows - self.scroll_offset - 1) - self.selected_row = min(self.selected_row, max(0, max_selected)) + self._move_selection_down() else: # Navigate to next thread (same as KEY_RIGHT) - if len(self.thread_ids) > 0: - if self.view_mode == "ALL": - self.view_mode = "PER_THREAD" - self.current_thread_index = 0 - else: - self.current_thread_index = ( - self.current_thread_index + 1 - ) % len(self.thread_ids) + self._navigate_to_next_thread() elif ch == curses.KEY_LEFT: # Navigate to previous thread - if len(self.thread_ids) > 0: - if self.view_mode == "ALL": - self.view_mode = "PER_THREAD" - self.current_thread_index = len(self.thread_ids) - 1 - else: - self.current_thread_index = ( - self.current_thread_index - 1 - ) % len(self.thread_ids) + self._navigate_to_previous_thread() elif ch == curses.KEY_RIGHT: # Navigate to next thread - if len(self.thread_ids) > 0: - if self.view_mode == "ALL": - self.view_mode = "PER_THREAD" - self.current_thread_index = 0 - else: - self.current_thread_index = ( - self.current_thread_index + 1 - ) % len(self.thread_ids) + self._navigate_to_next_thread() # Update display if input was processed while finished self._handle_finished_input_update(ch != -1) From b13b6f09c115d8157d62ccb692746ae22abbd775 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 9 Dec 2025 17:16:03 +0000 Subject: [PATCH 15/24] Docs --- Doc/library/profiling.sampling.rst | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/Doc/library/profiling.sampling.rst b/Doc/library/profiling.sampling.rst index e2e354c3d134fe..3bcd576184eb34 100644 --- a/Doc/library/profiling.sampling.rst +++ b/Doc/library/profiling.sampling.rst @@ -146,6 +146,10 @@ Generate a line-by-line heatmap:: python -m profiling.sampling run --heatmap script.py +Enable opcode-level profiling to see which bytecode instructions are executing:: + + python -m profiling.sampling run --opcodes --flamegraph script.py + Commands ======== @@ -379,6 +383,44 @@ see substantial time in ```` frames, consider investigating object allocation rates or using object pooling. +Opcode-aware profiling +---------------------- + +The ``--opcodes`` option enables instruction-level profiling that captures +which Python bytecode instructions are executing at each sample:: + + python -m profiling.sampling run --opcodes --flamegraph script.py + +This feature provides visibility into Python's bytecode execution, including +adaptive specialization optimizations. When a generic instruction like +``LOAD_ATTR`` is specialized at runtime into a more efficient variant like +``LOAD_ATTR_INSTANCE_VALUE``, the profiler shows both the specialized name +and the base instruction. + +Opcode information appears in several output formats: + +- **Live mode**: An opcode panel shows instruction-level statistics for the + selected function, accessible via keyboard navigation +- **Flame graphs**: Nodes display opcode information when available, helping + identify which instructions consume the most time +- **Heatmap**: Expandable bytecode panels per source line show instruction + breakdown with specialization percentages +- **Gecko format**: Opcode transitions are emitted as interval markers in the + Firefox Profiler timeline + +This level of detail is particularly useful for: + +- Understanding the performance impact of Python's adaptive specialization +- Identifying hot bytecode instructions that might benefit from optimization +- Analyzing the effectiveness of different code patterns at the instruction level +- Debugging performance issues that occur at the bytecode level + +The ``--opcodes`` option is compatible with ``--live``, ``--flamegraph``, +``--heatmap``, and ``--gecko`` formats. It requires additional memory to store +opcode information and may slightly reduce sampling performance, but provides +unprecedented visibility into Python's execution model. + + Real-time statistics -------------------- @@ -760,6 +802,11 @@ and thread status statistics (GIL held percentage, CPU usage, GC time). The main table shows function statistics with the currently sorted column indicated by an arrow (▼). +When ``--opcodes`` is enabled, an additional opcode panel appears below the +main table, showing instruction-level statistics for the currently selected +function. This panel displays which bytecode instructions are executing most +frequently, including specialized variants and their base opcodes. + Keyboard commands ----------------- @@ -813,6 +860,11 @@ Within live mode, keyboard commands control the display: :kbd:`h` or :kbd:`?` Show the help screen with all available commands. +:kbd:`j` / :kbd:`k` + Navigate through opcode entries in the opcode panel (when ``--opcodes`` is + enabled). These keys scroll through the instruction-level statistics for the + currently selected function. + When profiling finishes (duration expires or target process exits), the display shows a "PROFILING COMPLETE" banner and freezes the final results. You can still navigate, sort, and filter the results before pressing :kbd:`q` to exit. @@ -939,6 +991,13 @@ Sampling options Enable async-aware profiling for asyncio programs. +.. option:: --opcodes + + Gather bytecode opcode information for instruction-level profiling. Shows + which bytecode instructions are executing, including specializations. + Compatible with ``--live``, ``--flamegraph``, ``--heatmap``, and ``--gecko`` + formats only. + Mode options ------------ From 50f63d09aaf98bb119036d70efff378ff3b7a273 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 9 Dec 2025 17:44:40 +0000 Subject: [PATCH 16/24] Make bytecode spacer Signed-off-by: Pablo Galindo --- Lib/profiling/sampling/_heatmap_assets/heatmap.css | 7 +++++++ Lib/profiling/sampling/heatmap_collector.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.css b/Lib/profiling/sampling/_heatmap_assets/heatmap.css index 8b09db5c4764d8..052df3857f6520 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.css +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.css @@ -1200,6 +1200,13 @@ transform: rotate(90deg); } +.bytecode-spacer { + flex-shrink: 0; + width: 20px; + height: 20px; + margin: 0 4px; +} + .bytecode-panel { margin-left: 90px; padding: 8px 15px; diff --git a/Lib/profiling/sampling/heatmap_collector.py b/Lib/profiling/sampling/heatmap_collector.py index 64fcbb32602b81..45ccdf42b35603 100644 --- a/Lib/profiling/sampling/heatmap_collector.py +++ b/Lib/profiling/sampling/heatmap_collector.py @@ -477,6 +477,9 @@ def __init__(self, *args, **kwargs): self._path_info = get_python_path_info() self.stats = {} + # Opcode collection flag + self.opcodes_enabled = False + # Template loader (loads all templates once) self._template_loader = _TemplateLoader() @@ -528,6 +531,8 @@ def process_frames(self, frames, thread_id): self._record_line_sample(filename, lineno, funcname, is_leaf=(i == 0)) if opcode is not None: + # Set opcodes_enabled flag when we first encounter opcode data + self.opcodes_enabled = True self._record_bytecode_sample(filename, lineno, opcode, end_lineno, col_offset, end_col_offset) @@ -953,6 +958,9 @@ def _build_line_html(self, line_num: int, line_content: str, f'onclick="toggleBytecode(this)" title="Show bytecode">▶' ) bytecode_panel_html = f' \n' + elif self.opcodes_enabled: + # Add invisible spacer to maintain consistent indentation when opcodes are enabled + bytecode_btn_html = '
' # Get navigation buttons nav_buttons_html = self._build_navigation_buttons(filename, line_num) From 6eed927be824a5617f282d4b54702f7f207953d1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 9 Dec 2025 18:21:43 +0000 Subject: [PATCH 17/24] Update Lib/profiling/sampling/_heatmap_assets/heatmap.js Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Lib/profiling/sampling/_heatmap_assets/heatmap.js | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.js b/Lib/profiling/sampling/_heatmap_assets/heatmap.js index 626faea5303eb6..9cedb2d84698b6 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.js +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.js @@ -506,17 +506,14 @@ function toggleSpecView() { specViewEnabled = !specViewEnabled; const lines = document.querySelectorAll('.code-line'); - lines.forEach(line => { - if (specViewEnabled) { + if (specViewEnabled) { + lines.forEach(line => { const specColor = line.getAttribute('data-spec-color'); line.style.background = specColor || 'transparent'; - } else { - const bgColor = colorMode === 'self' - ? line.getAttribute('data-self-color') || line.getAttribute('data-bg-color') - : line.getAttribute('data-cumulative-color') || line.getAttribute('data-bg-color'); - line.style.background = bgColor || 'transparent'; - } - }); + }); + } else { + applyLineColors(); + } applySpanHeatColors(specViewEnabled); updateToggleUI('toggle-spec-view', specViewEnabled); From 1c630ce82833f4f7d07b3171389f5901853c65fa Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 9 Dec 2025 19:13:14 +0000 Subject: [PATCH 18/24] Update heatmap.css Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Lib/profiling/sampling/_heatmap_assets/heatmap.css | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.css b/Lib/profiling/sampling/_heatmap_assets/heatmap.css index 052df3857f6520..ac1c2d460f997a 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.css +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.css @@ -1196,10 +1196,6 @@ color: var(--accent); } -.bytecode-toggle.expanded { - transform: rotate(90deg); -} - .bytecode-spacer { flex-shrink: 0; width: 20px; From 56e68c1f809dcff9192ce15cd74ef36081ee8964 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 9 Dec 2025 20:08:20 -0800 Subject: [PATCH 19/24] Tachyon Heatmap responsive styles --- .../sampling/_heatmap_assets/heatmap.css | 75 +++++++++++++++++-- .../heatmap_pyfile_template.html | 38 +++++----- 2 files changed, 90 insertions(+), 23 deletions(-) diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.css b/Lib/profiling/sampling/_heatmap_assets/heatmap.css index ac1c2d460f997a..f90792e0a0eea2 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.css +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.css @@ -691,6 +691,15 @@ flex-shrink: 0; } +/* Legend Controls Group - wraps toggles and bytecode button together */ +.legend-controls { + display: flex; + align-items: center; + gap: 20px; + flex-shrink: 0; + margin-left: auto; +} + /* Toggle Switch Styles */ .toggle-switch { display: inline-flex; @@ -726,11 +735,6 @@ cursor: not-allowed; } -/* Push toggles to the right */ -#toggle-color-mode { - margin-left: auto; -} - .toggle-track { position: relative; width: 36px; @@ -1147,6 +1151,15 @@ .stats-summary { grid-template-columns: repeat(2, 1fr); } + + .legend-content { + flex-wrap: wrap; + justify-content: center; + } + + .legend-controls { + margin-left: 0; + } } @media (max-width: 900px) { @@ -1166,6 +1179,7 @@ .legend-content { flex-direction: column; + align-items: center; gap: 12px; } @@ -1173,6 +1187,57 @@ width: 100%; max-width: none; } + + .legend-separator { + width: 80%; + height: 1px; + } + + .legend-controls { + flex-direction: column; + gap: 12px; + } + + .legend-controls .toggle-switch { + justify-content: center; + } + + .legend-controls .toggle-switch .toggle-label:first-child { + width: 70px; + text-align: right; + } + + .legend-controls .toggle-switch .toggle-label:last-child { + width: 90px; + text-align: left; + } + + /* Compact code columns on small screens */ + .header-line-number, + .line-number { + width: 40px; + } + + .header-samples-self, + .header-samples-cumulative, + .line-samples-self, + .line-samples-cumulative { + width: 55px; + font-size: 10px; + } + + /* Adjust padding - headers need vertical, data rows don't */ + .header-line-number, + .header-samples-self, + .header-samples-cumulative { + padding: 8px 4px; + } + + .line-number, + .line-samples-self, + .line-samples-cumulative { + padding: 0 4px; + } } .bytecode-toggle { diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html index 888bb795c8a1cb..cad0d16ed47685 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html @@ -69,25 +69,27 @@ Hot
-
- Self Time -
- Total Time +
+
+ Self Time +
+ Total Time +
+
+ Show All +
+ Hot Only +
+
+ Heat +
+ Specialization +
+ +
-
- Show All -
- Hot Only -
-
- Heat -
- Specialization -
- -
From ede0f79c4560b26f5a2f51dfe27e2b0687dc7182 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 10 Dec 2025 13:45:47 +0000 Subject: [PATCH 20/24] Update Doc/library/profiling.sampling.rst Co-authored-by: Savannah Ostrowski --- Doc/library/profiling.sampling.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/profiling.sampling.rst b/Doc/library/profiling.sampling.rst index 3bcd576184eb34..66eca5fd2064b3 100644 --- a/Doc/library/profiling.sampling.rst +++ b/Doc/library/profiling.sampling.rst @@ -860,7 +860,7 @@ Within live mode, keyboard commands control the display: :kbd:`h` or :kbd:`?` Show the help screen with all available commands. -:kbd:`j` / :kbd:`k` +:kbd:`j` / :kbd:`k` (or :kbd:`Up` / :kbd:`Down`) Navigate through opcode entries in the opcode panel (when ``--opcodes`` is enabled). These keys scroll through the instruction-level statistics for the currently selected function. From f838c5d6855b677004fe36272ca09b44e396fc39 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 10 Dec 2025 14:29:37 +0000 Subject: [PATCH 21/24] Fix shift when selecting --- Lib/profiling/sampling/_heatmap_assets/heatmap.css | 10 ++++++++++ .../_heatmap_assets/heatmap_pyfile_template.html | 12 ++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.css b/Lib/profiling/sampling/_heatmap_assets/heatmap.css index f90792e0a0eea2..9088e801a9e665 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap.css +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.css @@ -722,6 +722,8 @@ color: var(--text-muted); transition: color var(--transition-fast); white-space: nowrap; + display: inline-flex; + flex-direction: column; } .toggle-switch .toggle-label.active { @@ -729,6 +731,14 @@ font-weight: 600; } +/* Reserve space for bold text to prevent layout shift on toggle */ +.toggle-switch .toggle-label::after { + content: attr(data-text); + font-weight: 600; + height: 0; + visibility: hidden; +} + .toggle-switch.disabled { opacity: 0.4; pointer-events: none; diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html index cad0d16ed47685..e2eb8cd45e40b1 100644 --- a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html @@ -71,19 +71,19 @@
- Self Time + Self Time
- Total Time + Total Time
- Show All + Show All
- Hot Only + Hot Only
- Heat + Heat
- Specialization + Specialization