From 9af7a20caeb2912a05dd0fa07bbb4bfe7fb874e4 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 24 Dec 2025 12:38:17 +0000 Subject: [PATCH 1/7] gh-136186: Fix flaky tests in test_external_inspection (#143110) --- Lib/test/test_external_inspection.py | 233 ++++++++++++++++----------- 1 file changed, 135 insertions(+), 98 deletions(-) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 4c502cd1de7418..b1a3a8e65a9802 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -253,6 +253,31 @@ def get_all_awaited_by(pid): raise RuntimeError("Failed to get all awaited_by after retries") +def _get_stack_trace_with_retry(unwinder, timeout=SHORT_TIMEOUT, condition=None): + """Get stack trace from an existing unwinder with retry for transient errors. + + This handles the case where we want to reuse an existing RemoteUnwinder + instance but still handle transient failures like "Failed to parse initial + frame in chain" that can occur when sampling at an inopportune moment. + If condition is provided, keeps retrying until condition(traces) is True. + """ + last_error = None + for _ in busy_retry(timeout): + try: + traces = unwinder.get_stack_trace() + if condition is None or condition(traces): + return traces + # Condition not met yet, keep retrying + except TRANSIENT_ERRORS as e: + last_error = e + continue + if last_error: + raise RuntimeError( + f"Failed to get stack trace after retries: {last_error}" + ) + raise RuntimeError("Condition never satisfied within timeout") + + # ============================================================================ # Base test class with shared infrastructure # ============================================================================ @@ -1708,16 +1733,16 @@ def main_work(): # Get stack trace with all threads unwinder_all = RemoteUnwinder(p.pid, all_threads=True) - for _ in range(MAX_TRIES): - all_traces = unwinder_all.get_stack_trace() - found = self._find_frame_in_trace( - all_traces, - lambda f: f.funcname == "main_work" - and f.location.lineno > 12, - ) - if found: - break - time.sleep(RETRY_DELAY) + for _ in busy_retry(SHORT_TIMEOUT): + with contextlib.suppress(*TRANSIENT_ERRORS): + all_traces = unwinder_all.get_stack_trace() + found = self._find_frame_in_trace( + all_traces, + lambda f: f.funcname == "main_work" + and f.location.lineno > 12, + ) + if found: + break else: self.fail( "Main thread did not start its busy work on time" @@ -1727,7 +1752,7 @@ def main_work(): unwinder_gil = RemoteUnwinder( p.pid, only_active_thread=True ) - gil_traces = unwinder_gil.get_stack_trace() + gil_traces = _get_stack_trace_with_retry(unwinder_gil) # Count threads total_threads = sum( @@ -2002,15 +2027,15 @@ def busy(): mode=mode, skip_non_matching_threads=False, ) - for _ in range(MAX_TRIES): - traces = unwinder.get_stack_trace() - statuses = self._get_thread_statuses(traces) + for _ in busy_retry(SHORT_TIMEOUT): + with contextlib.suppress(*TRANSIENT_ERRORS): + traces = unwinder.get_stack_trace() + statuses = self._get_thread_statuses(traces) - if check_condition( - statuses, sleeper_tid, busy_tid - ): - break - time.sleep(0.5) + if check_condition( + statuses, sleeper_tid, busy_tid + ): + break return statuses, sleeper_tid, busy_tid finally: @@ -2154,29 +2179,29 @@ def busy_thread(): mode=PROFILING_MODE_ALL, skip_non_matching_threads=False, ) - for _ in range(MAX_TRIES): - traces = unwinder.get_stack_trace() - statuses = self._get_thread_statuses(traces) - - # Check ALL mode provides both GIL and CPU info - if ( - sleeper_tid in statuses - and busy_tid in statuses - and not ( - statuses[sleeper_tid] - & THREAD_STATUS_ON_CPU - ) - and not ( - statuses[sleeper_tid] - & THREAD_STATUS_HAS_GIL - ) - and (statuses[busy_tid] & THREAD_STATUS_ON_CPU) - and ( - statuses[busy_tid] & THREAD_STATUS_HAS_GIL - ) - ): - break - time.sleep(0.5) + for _ in busy_retry(SHORT_TIMEOUT): + with contextlib.suppress(*TRANSIENT_ERRORS): + traces = unwinder.get_stack_trace() + statuses = self._get_thread_statuses(traces) + + # Check ALL mode provides both GIL and CPU info + if ( + sleeper_tid in statuses + and busy_tid in statuses + and not ( + statuses[sleeper_tid] + & THREAD_STATUS_ON_CPU + ) + and not ( + statuses[sleeper_tid] + & THREAD_STATUS_HAS_GIL + ) + and (statuses[busy_tid] & THREAD_STATUS_ON_CPU) + and ( + statuses[busy_tid] & THREAD_STATUS_HAS_GIL + ) + ): + break self.assertIsNotNone( sleeper_tid, "Sleeper thread id not received" @@ -2300,18 +2325,18 @@ def test_thread_status_exception_detection(self): mode=PROFILING_MODE_ALL, skip_non_matching_threads=False, ) - for _ in range(MAX_TRIES): - traces = unwinder.get_stack_trace() - statuses = self._get_thread_statuses(traces) - - if ( - exception_tid in statuses - and normal_tid in statuses - and (statuses[exception_tid] & THREAD_STATUS_HAS_EXCEPTION) - and not (statuses[normal_tid] & THREAD_STATUS_HAS_EXCEPTION) - ): - break - time.sleep(0.5) + for _ in busy_retry(SHORT_TIMEOUT): + with contextlib.suppress(*TRANSIENT_ERRORS): + traces = unwinder.get_stack_trace() + statuses = self._get_thread_statuses(traces) + + if ( + exception_tid in statuses + and normal_tid in statuses + and (statuses[exception_tid] & THREAD_STATUS_HAS_EXCEPTION) + and not (statuses[normal_tid] & THREAD_STATUS_HAS_EXCEPTION) + ): + break self.assertIn(exception_tid, statuses) self.assertIn(normal_tid, statuses) @@ -2343,18 +2368,18 @@ def test_thread_status_exception_mode_filtering(self): mode=PROFILING_MODE_EXCEPTION, skip_non_matching_threads=True, ) - for _ in range(MAX_TRIES): - traces = unwinder.get_stack_trace() - statuses = self._get_thread_statuses(traces) - - if exception_tid in statuses: - self.assertNotIn( - normal_tid, - statuses, - "Normal thread should be filtered out in exception mode", - ) - return - time.sleep(0.5) + for _ in busy_retry(SHORT_TIMEOUT): + with contextlib.suppress(*TRANSIENT_ERRORS): + traces = unwinder.get_stack_trace() + statuses = self._get_thread_statuses(traces) + + if exception_tid in statuses: + self.assertNotIn( + normal_tid, + statuses, + "Normal thread should be filtered out in exception mode", + ) + return self.fail("Never found exception thread in exception mode") @@ -2497,8 +2522,23 @@ def _run_scenario_process(self, scenario): finally: _cleanup_sockets(client_socket, server_socket) - def _check_exception_status(self, p, thread_tid, expect_exception): - """Helper to check if thread has expected exception status.""" + def _check_thread_status( + self, p, thread_tid, condition, condition_name="condition" + ): + """Helper to check thread status with a custom condition. + + This waits until we see 3 consecutive samples where the condition + returns True, which confirms the thread has reached and is stable + in the expected state. Samples that don't match are ignored (the + thread may not have reached the expected state yet). + + Args: + p: Process object with pid attribute + thread_tid: Thread ID to check + condition: Callable(statuses, thread_tid) -> bool that returns + True when the thread is in the expected state + condition_name: Description of condition for error messages + """ unwinder = RemoteUnwinder( p.pid, all_threads=True, @@ -2506,40 +2546,37 @@ def _check_exception_status(self, p, thread_tid, expect_exception): skip_non_matching_threads=False, ) - # Collect multiple samples for reliability - results = [] - for _ in range(MAX_TRIES): - try: + # Wait for 3 consecutive samples matching expected state + matching_samples = 0 + for _ in busy_retry(SHORT_TIMEOUT): + with contextlib.suppress(*TRANSIENT_ERRORS): traces = unwinder.get_stack_trace() - except TRANSIENT_ERRORS: - time.sleep(RETRY_DELAY) - continue - statuses = self._get_thread_statuses(traces) - - if thread_tid in statuses: - has_exc = bool(statuses[thread_tid] & THREAD_STATUS_HAS_EXCEPTION) - results.append(has_exc) + statuses = self._get_thread_statuses(traces) - if len(results) >= 3: - break + if thread_tid in statuses: + if condition(statuses, thread_tid): + matching_samples += 1 + if matching_samples >= 3: + return # Success - confirmed stable in expected state + else: + # Thread not yet in expected state, reset counter + matching_samples = 0 - time.sleep(RETRY_DELAY) + self.fail( + f"Thread did not stabilize in expected state " + f"({condition_name}) within timeout" + ) - # Check majority of samples match expected - if not results: - self.fail("Never found target thread in stack traces") + def _check_exception_status(self, p, thread_tid, expect_exception): + """Helper to check if thread has expected exception status.""" + def condition(statuses, tid): + has_exc = bool(statuses[tid] & THREAD_STATUS_HAS_EXCEPTION) + return has_exc == expect_exception - majority = sum(results) > len(results) // 2 - if expect_exception: - self.assertTrue( - majority, - f"Thread should have HAS_EXCEPTION flag, got {results}" - ) - else: - self.assertFalse( - majority, - f"Thread should NOT have HAS_EXCEPTION flag, got {results}" - ) + self._check_thread_status( + p, thread_tid, condition, + condition_name=f"expect_exception={expect_exception}" + ) @unittest.skipIf( sys.platform not in ("linux", "darwin", "win32"), @@ -3445,7 +3482,7 @@ def test_get_stats(self): _wait_for_signal(client_socket, b"ready") # Take a sample - unwinder.get_stack_trace() + _get_stack_trace_with_retry(unwinder) stats = unwinder.get_stats() client_socket.sendall(b"done") From 57937a8e5e293f0dcba5115f7b7a11b1e0c9a273 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 24 Dec 2025 08:01:45 -0500 Subject: [PATCH 2/7] gh-142145: Avoid timing measurements in quadratic behavior test (gh-143105) Count the number of Element attribute accesses as a proxy for work done. With double the amount of work, a ratio of 2.0 indicates linear scaling and 4.0 quadratic scaling. Use 3.2 as an intermediate threshold. --- Lib/test/test_minidom.py | 48 +++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 69fae957ec7fc9..46249e5138aed5 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -2,7 +2,6 @@ import copy import pickle -import time import io from test import support import unittest @@ -178,23 +177,36 @@ def testAppendChild(self): def testAppendChildNoQuadraticComplexity(self): impl = getDOMImplementation() - newdoc = impl.createDocument(None, "some_tag", None) - top_element = newdoc.documentElement - children = [newdoc.createElement(f"child-{i}") for i in range(1, 2 ** 15 + 1)] - element = top_element - - start = time.monotonic() - for child in children: - element.appendChild(child) - element = child - end = time.monotonic() - - # This example used to take at least 30 seconds. - # Conservative assertion due to the wide variety of systems and - # build configs timing based tests wind up run under. - # A --with-address-sanitizer --with-pydebug build on a rpi5 still - # completes this loop in <0.5 seconds. - self.assertLess(end - start, 4) + def work(n): + doc = impl.createDocument(None, "some_tag", None) + element = doc.documentElement + total_calls = 0 + + # Count attribute accesses as a proxy for work done + def getattribute_counter(self, attr): + nonlocal total_calls + total_calls += 1 + return object.__getattribute__(self, attr) + + with support.swap_attr(Element, "__getattribute__", getattribute_counter): + for _ in range(n): + child = doc.createElement("child") + element.appendChild(child) + element = child + return total_calls + + # Doubling N should not ~quadruple the work. + w1 = work(1024) + w2 = work(2048) + w3 = work(4096) + + self.assertGreater(w1, 0) + r1 = w2 / w1 + r2 = w3 / w2 + self.assertLess( + max(r1, r2), 3.2, + msg=f"Possible quadratic behavior: work={w1,w2,w3} ratios={r1,r2}" + ) def testSetAttributeNodeWithoutOwnerDocument(self): # regression test for gh-142754 From 4ee6929d606fa7b976eba229de24219f0edac3d7 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 24 Dec 2025 08:02:02 -0500 Subject: [PATCH 3/7] gh-143121: Skip test that leak threads under TSan (gh-143125) --- Lib/test/_test_multiprocessing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index c8c386101a0669..844539104e3a3e 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -3392,6 +3392,7 @@ class _TestMyManager(BaseTestCase): ALLOWED_TYPES = ('manager',) @warnings_helper.ignore_fork_in_thread_deprecation_warnings() + @support.skip_if_sanitizer('TSan: leaks threads', thread=True) def test_mymanager(self): manager = MyManager(shutdown_timeout=SHUTDOWN_TIMEOUT) manager.start() @@ -3404,6 +3405,7 @@ def test_mymanager(self): self.assertIn(manager._process.exitcode, (0, -signal.SIGTERM)) @warnings_helper.ignore_fork_in_thread_deprecation_warnings() + @support.skip_if_sanitizer('TSan: leaks threads', thread=True) def test_mymanager_context(self): manager = MyManager(shutdown_timeout=SHUTDOWN_TIMEOUT) with manager: @@ -3414,6 +3416,7 @@ def test_mymanager_context(self): self.assertIn(manager._process.exitcode, (0, -signal.SIGTERM)) @warnings_helper.ignore_fork_in_thread_deprecation_warnings() + @support.skip_if_sanitizer('TSan: leaks threads', thread=True) def test_mymanager_context_prestarted(self): manager = MyManager(shutdown_timeout=SHUTDOWN_TIMEOUT) manager.start() @@ -3485,6 +3488,7 @@ def _putter(cls, address, authkey): queue.put(tuple(cls.values)) @warnings_helper.ignore_fork_in_thread_deprecation_warnings() + @support.skip_if_sanitizer('TSan: leaks threads', thread=True) def test_remote(self): authkey = os.urandom(32) @@ -3527,6 +3531,7 @@ def _putter(cls, address, authkey): queue.put('hello world') @warnings_helper.ignore_fork_in_thread_deprecation_warnings() + @support.skip_if_sanitizer("TSan: leaks threads", thread=True) def test_rapid_restart(self): authkey = os.urandom(32) manager = QueueManager( From e8e044eda343b4b3dd7a7e532c88c2c97242000d Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 24 Dec 2025 08:02:19 -0500 Subject: [PATCH 4/7] gh-143100: Fix memcpy data race in setobject.c (gh-143127) --- Objects/setobject.c | 16 ++++++++++++++++ Tools/tsan/suppressions_free_threading.txt | 3 --- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 5f868c858273ee..55e30fe2cdd8f7 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1420,6 +1420,17 @@ set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return make_new_set(type, NULL); } +#ifdef Py_GIL_DISABLED +static void +copy_small_table(setentry *dest, setentry *src) +{ + for (Py_ssize_t i = 0; i < PySet_MINSIZE; i++) { + _Py_atomic_store_ptr_release(&dest[i].key, src[i].key); + _Py_atomic_store_ssize_relaxed(&dest[i].hash, src[i].hash); + } +} +#endif + /* set_swap_bodies() switches the contents of any two sets by moving their internal data pointers and, if needed, copying the internal smalltables. Semantically equivalent to: @@ -1462,8 +1473,13 @@ set_swap_bodies(PySetObject *a, PySetObject *b) if (a_table == a->smalltable || b_table == b->smalltable) { memcpy(tab, a->smalltable, sizeof(tab)); +#ifndef Py_GIL_DISABLED memcpy(a->smalltable, b->smalltable, sizeof(tab)); memcpy(b->smalltable, tab, sizeof(tab)); +#else + copy_small_table(a->smalltable, b->smalltable); + copy_small_table(b->smalltable, tab); +#endif } if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) && diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index f05e0ded9865f8..a3e1e54284f0ae 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -20,6 +20,3 @@ thread:pthread_create # PyObject_Realloc internally does memcpy which isn't atomic so can race # with non-locking reads. See #132070 race:PyObject_Realloc - -# gh-143100: set_swap_bodies in setobject.c calls memcpy, which isn't atomic -race:set_swap_bodies From d4dc3dd9aab6e860ea29e3bd133147a3f795cf60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Kiss=20Koll=C3=A1r?= Date: Wed, 24 Dec 2025 13:46:33 +0000 Subject: [PATCH 5/7] gh-138122: Replace --interval with --sampling-rate (#143085) --- Doc/library/profiling.sampling.rst | 37 ++++--- Lib/profiling/sampling/_child_monitor.py | 4 +- Lib/profiling/sampling/_sync_coordinator.py | 8 +- Lib/profiling/sampling/cli.py | 98 ++++++++++++++----- Lib/profiling/sampling/constants.py | 4 + .../sampling/live_collector/__init__.py | 4 +- .../sampling/live_collector/collector.py | 14 +-- .../sampling/live_collector/constants.py | 2 +- .../sampling/live_collector/widgets.py | 8 +- Lib/profiling/sampling/pstats_collector.py | 5 +- .../test_sampling_profiler/test_advanced.py | 4 +- .../test_sampling_profiler/test_children.py | 26 ++--- .../test_sampling_profiler/test_cli.py | 6 +- .../test_live_collector_interaction.py | 26 ++--- .../test_sampling_profiler/test_modes.py | 8 +- 15 files changed, 154 insertions(+), 100 deletions(-) diff --git a/Doc/library/profiling.sampling.rst b/Doc/library/profiling.sampling.rst index b5e6a2c7a0ed8e..370bbcd3242526 100644 --- a/Doc/library/profiling.sampling.rst +++ b/Doc/library/profiling.sampling.rst @@ -53,7 +53,7 @@ counts**, not direct measurements. Tachyon counts how many times each function appears in the collected samples, then multiplies by the sampling interval to estimate time. -For example, with a 100 microsecond sampling interval over a 10-second profile, +For example, with a 10 kHz sampling rate over a 10-second profile, Tachyon collects approximately 100,000 samples. If a function appears in 5,000 samples (5% of total), Tachyon estimates it consumed 5% of the 10-second duration, or about 500 milliseconds. This is a statistical estimate, not a @@ -142,7 +142,7 @@ Use live mode for real-time monitoring (press ``q`` to quit):: Profile for 60 seconds with a faster sampling rate:: - python -m profiling.sampling run -d 60 -i 50 script.py + python -m profiling.sampling run -d 60 -r 20khz script.py Generate a line-by-line heatmap:: @@ -326,8 +326,8 @@ The default configuration works well for most use cases: * - Option - Default - * - Default for ``--interval`` / ``-i`` - - 100 µs between samples (~10,000 samples/sec) + * - Default for ``--sampling-rate`` / ``-r`` + - 1 kHz * - Default for ``--duration`` / ``-d`` - 10 seconds * - Default for ``--all-threads`` / ``-a`` @@ -346,23 +346,22 @@ The default configuration works well for most use cases: - Disabled (non-blocking sampling) -Sampling interval and duration ------------------------------- +Sampling rate and duration +-------------------------- -The two most fundamental parameters are the sampling interval and duration. +The two most fundamental parameters are the sampling rate and duration. Together, these determine how many samples will be collected during a profiling session. -The :option:`--interval` option (:option:`-i`) sets the time between samples in -microseconds. The default is 100 microseconds, which produces approximately -10,000 samples per second:: +The :option:`--sampling-rate` option (:option:`-r`) sets how frequently samples +are collected. The default is 1 kHz (10,000 samples per second):: - python -m profiling.sampling run -i 50 script.py + python -m profiling.sampling run -r 20khz script.py -Lower intervals capture more samples and provide finer-grained data at the -cost of slightly higher profiler CPU usage. Higher intervals reduce profiler +Higher rates capture more samples and provide finer-grained data at the +cost of slightly higher profiler CPU usage. Lower rates reduce profiler overhead but may miss short-lived functions. For most applications, the -default interval provides a good balance between accuracy and overhead. +default rate provides a good balance between accuracy and overhead. The :option:`--duration` option (:option:`-d`) sets how long to profile in seconds. The default is 10 seconds:: @@ -573,9 +572,9 @@ appended: - For pstats format (which defaults to stdout), subprocesses produce files like ``profile_12345.pstats`` -The subprocess profilers inherit most sampling options from the parent (interval, -duration, thread selection, native frames, GC frames, async-aware mode, and -output format). All Python descendant processes are profiled recursively, +The subprocess profilers inherit most sampling options from the parent (sampling +rate, duration, thread selection, native frames, GC frames, async-aware mode, +and output format). All Python descendant processes are profiled recursively, including grandchildren and further descendants. Subprocess detection works by periodically scanning for new descendants of @@ -1389,9 +1388,9 @@ Global options Sampling options ---------------- -.. option:: -i , --interval +.. option:: -r , --sampling-rate - Sampling interval in microseconds. Default: 100. + Sampling rate (for example, ``10000``, ``10khz``, ``10k``). Default: ``1khz``. .. option:: -d , --duration diff --git a/Lib/profiling/sampling/_child_monitor.py b/Lib/profiling/sampling/_child_monitor.py index e06c550d938b13..ec56f75719f9d1 100644 --- a/Lib/profiling/sampling/_child_monitor.py +++ b/Lib/profiling/sampling/_child_monitor.py @@ -16,7 +16,7 @@ _CHILD_POLL_INTERVAL_SEC = 0.1 # Default timeout for waiting on child profilers -_DEFAULT_WAIT_TIMEOUT = 30.0 +_DEFAULT_WAIT_TIMEOUT_SEC = 30.0 # Maximum number of child profilers to spawn (prevents resource exhaustion) _MAX_CHILD_PROFILERS = 100 @@ -138,7 +138,7 @@ def spawned_profilers(self): with self._lock: return list(self._spawned_profilers) - def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT): + def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT_SEC): """ Wait for all spawned child profilers to complete. diff --git a/Lib/profiling/sampling/_sync_coordinator.py b/Lib/profiling/sampling/_sync_coordinator.py index 1a4af42588a3f5..63d057043f0416 100644 --- a/Lib/profiling/sampling/_sync_coordinator.py +++ b/Lib/profiling/sampling/_sync_coordinator.py @@ -73,8 +73,8 @@ def _validate_arguments(args: List[str]) -> tuple[int, str, List[str]]: # Constants for socket communication _MAX_RETRIES = 3 -_INITIAL_RETRY_DELAY = 0.1 -_SOCKET_TIMEOUT = 2.0 +_INITIAL_RETRY_DELAY_SEC = 0.1 +_SOCKET_TIMEOUT_SEC = 2.0 _READY_MESSAGE = b"ready" @@ -93,14 +93,14 @@ def _signal_readiness(sync_port: int) -> None: for attempt in range(_MAX_RETRIES): try: # Use context manager for automatic cleanup - with socket.create_connection(("127.0.0.1", sync_port), timeout=_SOCKET_TIMEOUT) as sock: + with socket.create_connection(("127.0.0.1", sync_port), timeout=_SOCKET_TIMEOUT_SEC) as sock: sock.send(_READY_MESSAGE) return except (socket.error, OSError) as e: last_error = e if attempt < _MAX_RETRIES - 1: # Exponential backoff before retry - time.sleep(_INITIAL_RETRY_DELAY * (2 ** attempt)) + time.sleep(_INITIAL_RETRY_DELAY_SEC * (2 ** attempt)) # If we get here, all retries failed raise SyncError(f"Failed to signal readiness after {_MAX_RETRIES} attempts: {last_error}") from last_error diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index ccd6e954d79698..9e60961943a8d0 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -4,6 +4,7 @@ import importlib.util import locale import os +import re import selectors import socket import subprocess @@ -20,6 +21,7 @@ from .binary_collector import BinaryCollector from .binary_reader import BinaryReader from .constants import ( + MICROSECONDS_PER_SECOND, PROFILING_MODE_ALL, PROFILING_MODE_WALL, PROFILING_MODE_CPU, @@ -66,8 +68,8 @@ class CustomFormatter( # Constants for socket synchronization -_SYNC_TIMEOUT = 5.0 -_PROCESS_KILL_TIMEOUT = 2.0 +_SYNC_TIMEOUT_SEC = 5.0 +_PROCESS_KILL_TIMEOUT_SEC = 2.0 _READY_MESSAGE = b"ready" _RECV_BUFFER_SIZE = 1024 @@ -116,7 +118,8 @@ def _build_child_profiler_args(args): child_args = [] # Sampling options - child_args.extend(["-i", str(args.interval)]) + hz = MICROSECONDS_PER_SECOND // args.sample_interval_usec + child_args.extend(["-r", str(hz)]) child_args.extend(["-d", str(args.duration)]) if args.all_threads: @@ -239,7 +242,7 @@ def _run_with_sync(original_cmd, suppress_output=False): sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port sync_port = sync_sock.getsockname()[1] sync_sock.listen(1) - sync_sock.settimeout(_SYNC_TIMEOUT) + sync_sock.settimeout(_SYNC_TIMEOUT_SEC) # Get current working directory to preserve it cwd = os.getcwd() @@ -268,7 +271,7 @@ def _run_with_sync(original_cmd, suppress_output=False): process = subprocess.Popen(cmd, **popen_kwargs) try: - _wait_for_ready_signal(sync_sock, process, _SYNC_TIMEOUT) + _wait_for_ready_signal(sync_sock, process, _SYNC_TIMEOUT_SEC) # Close stderr pipe if we were capturing it if process.stderr: @@ -279,7 +282,7 @@ def _run_with_sync(original_cmd, suppress_output=False): if process.poll() is None: process.terminate() try: - process.wait(timeout=_PROCESS_KILL_TIMEOUT) + process.wait(timeout=_PROCESS_KILL_TIMEOUT_SEC) except subprocess.TimeoutExpired: process.kill() process.wait() @@ -290,16 +293,64 @@ def _run_with_sync(original_cmd, suppress_output=False): return process +_RATE_PATTERN = re.compile(r''' + ^ # Start of string + ( # Group 1: The numeric value + \d+ # One or more digits (integer part) + (?:\.\d+)? # Optional: decimal point followed by digits + ) # Examples: "10", "0.5", "100.25" + ( # Group 2: Optional unit suffix + hz # "hz" - hertz + | khz # "khz" - kilohertz + | k # "k" - shorthand for kilohertz + )? # Suffix is optional (bare number = Hz) + $ # End of string + ''', re.VERBOSE | re.IGNORECASE) + + +def _parse_sampling_rate(rate_str: str) -> int: + """Parse sampling rate string to microseconds.""" + rate_str = rate_str.strip().lower() + + match = _RATE_PATTERN.match(rate_str) + if not match: + raise argparse.ArgumentTypeError( + f"Invalid sampling rate format: {rate_str}. " + "Expected: number followed by optional suffix (hz, khz, k) with no spaces (e.g., 10khz)" + ) + + number_part = match.group(1) + suffix = match.group(2) or '' + + # Determine multiplier based on suffix + suffix_map = { + 'hz': 1, + 'khz': 1000, + 'k': 1000, + } + multiplier = suffix_map.get(suffix, 1) + hz = float(number_part) * multiplier + if hz <= 0: + raise argparse.ArgumentTypeError(f"Sampling rate must be positive: {rate_str}") + + interval_usec = int(MICROSECONDS_PER_SECOND / hz) + if interval_usec < 1: + raise argparse.ArgumentTypeError(f"Sampling rate too high: {rate_str}") + + return interval_usec + + def _add_sampling_options(parser): """Add sampling configuration options to a parser.""" sampling_group = parser.add_argument_group("Sampling configuration") sampling_group.add_argument( - "-i", - "--interval", - type=int, - default=100, - metavar="MICROSECONDS", - help="sampling interval", + "-r", + "--sampling-rate", + type=_parse_sampling_rate, + default="1khz", + metavar="RATE", + dest="sample_interval_usec", + help="sampling rate (e.g., 10000, 10khz, 10k)", ) sampling_group.add_argument( "-d", @@ -487,14 +538,13 @@ def _sort_to_mode(sort_choice): } return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) - -def _create_collector(format_type, interval, skip_idle, opcodes=False, +def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False, output_file=None, compression='auto'): """Create the appropriate collector based on format type. Args: format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary') - interval: Sampling interval in microseconds + sample_interval_usec: Sampling interval in microseconds skip_idle: Whether to skip idle samples opcodes: Whether to collect opcode information (only used by gecko format for creating interval markers in Firefox Profiler) @@ -519,9 +569,9 @@ def _create_collector(format_type, interval, skip_idle, opcodes=False, # and is the only format that uses opcodes for interval markers if format_type == "gecko": skip_idle = False - return collector_class(interval, skip_idle=skip_idle, opcodes=opcodes) + return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes) - return collector_class(interval, skip_idle=skip_idle) + return collector_class(sample_interval_usec, skip_idle=skip_idle) def _generate_output_filename(format_type, pid): @@ -725,8 +775,8 @@ def _main(): # Generate flamegraph from a script `python -m profiling.sampling run --flamegraph -o output.html script.py` - # Profile with custom interval and duration - `python -m profiling.sampling run -i 50 -d 30 script.py` + # Profile with custom rate and duration + `python -m profiling.sampling run -r 5khz -d 30 script.py` # Save collapsed stacks to file `python -m profiling.sampling run --collapsed -o stacks.txt script.py` @@ -860,7 +910,7 @@ def _handle_attach(args): # Create the appropriate collector collector = _create_collector( - args.format, args.interval, skip_idle, args.opcodes, + args.format, args.sample_interval_usec, skip_idle, args.opcodes, output_file=output_file, compression=getattr(args, 'compression', 'auto') ) @@ -938,7 +988,7 @@ def _handle_run(args): # Create the appropriate collector collector = _create_collector( - args.format, args.interval, skip_idle, args.opcodes, + args.format, args.sample_interval_usec, skip_idle, args.opcodes, output_file=output_file, compression=getattr(args, 'compression', 'auto') ) @@ -965,7 +1015,7 @@ def _handle_run(args): if process.poll() is None: process.terminate() try: - process.wait(timeout=_PROCESS_KILL_TIMEOUT) + process.wait(timeout=_PROCESS_KILL_TIMEOUT_SEC) except subprocess.TimeoutExpired: process.kill() process.wait() @@ -980,7 +1030,7 @@ def _handle_live_attach(args, pid): # Create live collector with default settings collector = LiveStatsCollector( - args.interval, + args.sample_interval_usec, skip_idle=skip_idle, sort_by="tottime", # Default initial sort limit=20, # Default limit @@ -1027,7 +1077,7 @@ def _handle_live_run(args): # Create live collector with default settings collector = LiveStatsCollector( - args.interval, + args.sample_interval_usec, skip_idle=skip_idle, sort_by="tottime", # Default initial sort limit=20, # Default limit diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py index 34b85ba4b3c61d..366cbb38365c9f 100644 --- a/Lib/profiling/sampling/constants.py +++ b/Lib/profiling/sampling/constants.py @@ -1,5 +1,9 @@ """Constants for the sampling profiler.""" +# Time unit conversion constants +MICROSECONDS_PER_SECOND = 1_000_000 +MILLISECONDS_PER_SECOND = 1_000 + # Profiling mode constants PROFILING_MODE_WALL = 0 PROFILING_MODE_CPU = 1 diff --git a/Lib/profiling/sampling/live_collector/__init__.py b/Lib/profiling/sampling/live_collector/__init__.py index 175e4610d232c5..59d50955e52959 100644 --- a/Lib/profiling/sampling/live_collector/__init__.py +++ b/Lib/profiling/sampling/live_collector/__init__.py @@ -114,7 +114,7 @@ from .constants import ( MICROSECONDS_PER_SECOND, DISPLAY_UPDATE_HZ, - DISPLAY_UPDATE_INTERVAL, + DISPLAY_UPDATE_INTERVAL_SEC, MIN_TERMINAL_WIDTH, MIN_TERMINAL_HEIGHT, WIDTH_THRESHOLD_SAMPLE_PCT, @@ -165,7 +165,7 @@ # Constants "MICROSECONDS_PER_SECOND", "DISPLAY_UPDATE_HZ", - "DISPLAY_UPDATE_INTERVAL", + "DISPLAY_UPDATE_INTERVAL_SEC", "MIN_TERMINAL_WIDTH", "MIN_TERMINAL_HEIGHT", "WIDTH_THRESHOLD_SAMPLE_PCT", diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py index dcb9fcabe32779..cdf95a77eeccd8 100644 --- a/Lib/profiling/sampling/live_collector/collector.py +++ b/Lib/profiling/sampling/live_collector/collector.py @@ -24,7 +24,7 @@ ) from .constants import ( MICROSECONDS_PER_SECOND, - DISPLAY_UPDATE_INTERVAL, + DISPLAY_UPDATE_INTERVAL_SEC, MIN_TERMINAL_WIDTH, MIN_TERMINAL_HEIGHT, HEADER_LINES, @@ -157,7 +157,7 @@ def __init__( self.max_sample_rate = 0 # Track maximum sample rate seen self.successful_samples = 0 # Track samples that captured frames self.failed_samples = 0 # Track samples that failed to capture frames - self.display_update_interval = DISPLAY_UPDATE_INTERVAL # Instance variable for display refresh rate + self.display_update_interval_sec = DISPLAY_UPDATE_INTERVAL_SEC # Instance variable for display refresh rate # Thread status statistics (bit flags) self.thread_status_counts = { @@ -410,7 +410,7 @@ def collect(self, stack_frames, timestamp_us=None): if ( self._last_display_update is None or (current_time - self._last_display_update) - >= self.display_update_interval + >= self.display_update_interval_sec ): self._update_display() self._last_display_update = current_time @@ -987,14 +987,14 @@ def _handle_input(self): elif ch == ord("+") or ch == ord("="): # Decrease update interval (faster refresh) - self.display_update_interval = max( - 0.05, self.display_update_interval - 0.05 + self.display_update_interval_sec = max( + 0.05, self.display_update_interval_sec - 0.05 ) # Min 20Hz elif ch == ord("-") or ch == ord("_"): # Increase update interval (slower refresh) - self.display_update_interval = min( - 1.0, self.display_update_interval + 0.05 + self.display_update_interval_sec = min( + 1.0, self.display_update_interval_sec + 0.05 ) # Max 1Hz elif ch == ord("c") or ch == ord("C"): diff --git a/Lib/profiling/sampling/live_collector/constants.py b/Lib/profiling/sampling/live_collector/constants.py index 8462c0de3fd680..4f4575f7b7aae2 100644 --- a/Lib/profiling/sampling/live_collector/constants.py +++ b/Lib/profiling/sampling/live_collector/constants.py @@ -5,7 +5,7 @@ # Display update constants DISPLAY_UPDATE_HZ = 10 -DISPLAY_UPDATE_INTERVAL = 1.0 / DISPLAY_UPDATE_HZ # 0.1 seconds +DISPLAY_UPDATE_INTERVAL_SEC = 1.0 / DISPLAY_UPDATE_HZ # 0.1 seconds # Terminal size constraints MIN_TERMINAL_WIDTH = 60 diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py index 314f3796a093ad..cf04f3aa3254ef 100644 --- a/Lib/profiling/sampling/live_collector/widgets.py +++ b/Lib/profiling/sampling/live_collector/widgets.py @@ -13,7 +13,7 @@ WIDTH_THRESHOLD_CUMUL_PCT, WIDTH_THRESHOLD_CUMTIME, MICROSECONDS_PER_SECOND, - DISPLAY_UPDATE_INTERVAL, + DISPLAY_UPDATE_INTERVAL_SEC, MIN_BAR_WIDTH, MAX_SAMPLE_RATE_BAR_WIDTH, MAX_EFFICIENCY_BAR_WIDTH, @@ -181,7 +181,7 @@ def draw_header_info(self, line, width, elapsed): # Calculate display refresh rate refresh_hz = ( - 1.0 / self.collector.display_update_interval if self.collector.display_update_interval > 0 else 0 + 1.0 / self.collector.display_update_interval_sec if self.collector.display_update_interval_sec > 0 else 0 ) # Get current view mode and thread display @@ -235,8 +235,8 @@ def draw_header_info(self, line, width, elapsed): def format_rate_with_units(self, rate_hz): """Format a rate in Hz with appropriate units (Hz, KHz, MHz).""" - if rate_hz >= 1_000_000: - return f"{rate_hz / 1_000_000:.1f}MHz" + if rate_hz >= MICROSECONDS_PER_SECOND: + return f"{rate_hz / MICROSECONDS_PER_SECOND:.1f}MHz" elif rate_hz >= 1_000: return f"{rate_hz / 1_000:.1f}KHz" else: diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py index 1b2fe6a77278ee..e0dc9ab6bb7edb 100644 --- a/Lib/profiling/sampling/pstats_collector.py +++ b/Lib/profiling/sampling/pstats_collector.py @@ -3,6 +3,7 @@ from _colorize import ANSIColors from .collector import Collector, extract_lineno +from .constants import MICROSECONDS_PER_SECOND class PstatsCollector(Collector): @@ -68,7 +69,7 @@ def _dump_stats(self, file): # Needed for compatibility with pstats.Stats def create_stats(self): - sample_interval_sec = self.sample_interval_usec / 1_000_000 + sample_interval_sec = self.sample_interval_usec / MICROSECONDS_PER_SECOND callers = {} for fname, call_counts in self.result.items(): total = call_counts["direct_calls"] * sample_interval_sec @@ -263,7 +264,7 @@ def _determine_best_unit(max_value): elif max_value >= 0.001: return "ms", 1000.0 else: - return "μs", 1000000.0 + return "μs", float(MICROSECONDS_PER_SECOND) def _print_summary(self, stats_list, total_samples): """Print summary of interesting functions.""" diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py b/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py index ef9ea64b67af61..bcd4de7f5d7ebe 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py @@ -219,8 +219,8 @@ def worker(x): "run", "-d", "5", - "-i", - "100000", + "-r", + "10", script, stdout=subprocess.PIPE, stderr=subprocess.PIPE, diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_children.py b/Lib/test/test_profiling/test_sampling_profiler/test_children.py index 4007b3e8d7a41f..b7dc878a238f8d 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_children.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_children.py @@ -279,11 +279,11 @@ def test_monitor_creation(self): monitor = ChildProcessMonitor( pid=os.getpid(), - cli_args=["-i", "100", "-d", "5"], + cli_args=["-r", "10khz", "-d", "5"], output_pattern="test_{pid}.pstats", ) self.assertEqual(monitor.parent_pid, os.getpid()) - self.assertEqual(monitor.cli_args, ["-i", "100", "-d", "5"]) + self.assertEqual(monitor.cli_args, ["-r", "10khz", "-d", "5"]) self.assertEqual(monitor.output_pattern, "test_{pid}.pstats") def test_monitor_lifecycle(self): @@ -386,7 +386,7 @@ def test_build_child_profiler_args(self): from profiling.sampling.cli import _build_child_profiler_args args = argparse.Namespace( - interval=200, + sample_interval_usec=200, duration=15, all_threads=True, realtime_stats=False, @@ -420,7 +420,7 @@ def assert_flag_value_pair(flag, value): f"'{child_args[flag_index + 1]}' in args: {child_args}", ) - assert_flag_value_pair("-i", 200) + assert_flag_value_pair("-r", 5000) assert_flag_value_pair("-d", 15) assert_flag_value_pair("--mode", "cpu") @@ -444,7 +444,7 @@ def test_build_child_profiler_args_no_gc(self): from profiling.sampling.cli import _build_child_profiler_args args = argparse.Namespace( - interval=100, + sample_interval_usec=100, duration=5, all_threads=False, realtime_stats=False, @@ -510,7 +510,7 @@ def test_setup_child_monitor(self): from profiling.sampling.cli import _setup_child_monitor args = argparse.Namespace( - interval=100, + sample_interval_usec=100, duration=5, all_threads=False, realtime_stats=False, @@ -690,7 +690,7 @@ def test_monitor_respects_max_limit(self): # Create a monitor monitor = ChildProcessMonitor( pid=os.getpid(), - cli_args=["-i", "100", "-d", "5"], + cli_args=["-r", "10khz", "-d", "5"], output_pattern="test_{pid}.pstats", ) @@ -927,8 +927,8 @@ def test_subprocesses_flag_spawns_child_and_creates_output(self): "--subprocesses", "-d", "3", - "-i", - "10000", + "-r", + "100", "-o", output_file, script_file, @@ -989,8 +989,8 @@ def test_subprocesses_flag_with_flamegraph_output(self): "--subprocesses", "-d", "2", - "-i", - "10000", + "-r", + "100", "--flamegraph", "-o", output_file, @@ -1043,8 +1043,8 @@ def test_subprocesses_flag_no_crash_on_quick_child(self): "--subprocesses", "-d", "2", - "-i", - "10000", + "-r", + "100", "-o", output_file, script_file, diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py index 9b2b16d6e1965b..fb4816a0b6085a 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py @@ -232,7 +232,7 @@ def test_cli_module_with_profiler_options(self): test_args = [ "profiling.sampling.cli", "run", - "-i", + "-r", "1000", "-d", "30", @@ -265,8 +265,8 @@ def test_cli_script_with_profiler_options(self): test_args = [ "profiling.sampling.cli", "run", - "-i", - "2000", + "-r", + "500", "-d", "60", "--collapsed", diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_interaction.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_interaction.py index a5870366552854..38f1d03e4939f1 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_interaction.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_interaction.py @@ -35,7 +35,7 @@ def setUp(self): ) self.collector.start_time = time.perf_counter() # Set a consistent display update interval for tests - self.collector.display_update_interval = 0.1 + self.collector.display_update_interval_sec = 0.1 def tearDown(self): """Clean up after test.""" @@ -110,45 +110,45 @@ def test_reset_stats(self): def test_increase_refresh_rate(self): """Test increasing refresh rate (faster updates).""" - initial_interval = self.collector.display_update_interval + initial_interval = self.collector.display_update_interval_sec # Simulate '+' key press (faster = smaller interval) self.display.simulate_input(ord("+")) self.collector._handle_input() - self.assertLess(self.collector.display_update_interval, initial_interval) + self.assertLess(self.collector.display_update_interval_sec, initial_interval) def test_decrease_refresh_rate(self): """Test decreasing refresh rate (slower updates).""" - initial_interval = self.collector.display_update_interval + initial_interval = self.collector.display_update_interval_sec # Simulate '-' key press (slower = larger interval) self.display.simulate_input(ord("-")) self.collector._handle_input() - self.assertGreater(self.collector.display_update_interval, initial_interval) + self.assertGreater(self.collector.display_update_interval_sec, initial_interval) def test_refresh_rate_minimum(self): """Test that refresh rate has a minimum (max speed).""" - self.collector.display_update_interval = 0.05 # Set to minimum + self.collector.display_update_interval_sec = 0.05 # Set to minimum # Try to go faster self.display.simulate_input(ord("+")) self.collector._handle_input() # Should stay at minimum - self.assertEqual(self.collector.display_update_interval, 0.05) + self.assertEqual(self.collector.display_update_interval_sec, 0.05) def test_refresh_rate_maximum(self): """Test that refresh rate has a maximum (min speed).""" - self.collector.display_update_interval = 1.0 # Set to maximum + self.collector.display_update_interval_sec = 1.0 # Set to maximum # Try to go slower self.display.simulate_input(ord("-")) self.collector._handle_input() # Should stay at maximum - self.assertEqual(self.collector.display_update_interval, 1.0) + self.assertEqual(self.collector.display_update_interval_sec, 1.0) def test_help_toggle(self): """Test help screen toggle.""" @@ -289,23 +289,23 @@ def test_filter_clear_uppercase(self): def test_increase_refresh_rate_with_equals(self): """Test increasing refresh rate with '=' key.""" - initial_interval = self.collector.display_update_interval + initial_interval = self.collector.display_update_interval_sec # Simulate '=' key press (alternative to '+') self.display.simulate_input(ord("=")) self.collector._handle_input() - self.assertLess(self.collector.display_update_interval, initial_interval) + self.assertLess(self.collector.display_update_interval_sec, initial_interval) def test_decrease_refresh_rate_with_underscore(self): """Test decreasing refresh rate with '_' key.""" - initial_interval = self.collector.display_update_interval + initial_interval = self.collector.display_update_interval_sec # Simulate '_' key press (alternative to '-') self.display.simulate_input(ord("_")) self.collector._handle_input() - self.assertGreater(self.collector.display_update_interval, initial_interval) + self.assertGreater(self.collector.display_update_interval_sec, initial_interval) def test_finished_state_displays_banner(self): """Test that finished state shows prominent banner.""" diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py index 247416389daa07..877237866b1e65 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py @@ -306,8 +306,8 @@ def test_gil_mode_cli_argument_parsing(self): "12345", "--mode", "gil", - "-i", - "500", + "-r", + "2000", "-d", "5", ] @@ -488,8 +488,8 @@ def test_exception_mode_cli_argument_parsing(self): "12345", "--mode", "exception", - "-i", - "500", + "-r", + "2000", "-d", "5", ] From 1e17ccd030a2285ad53db5952360fffa33a8a877 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Wed, 24 Dec 2025 09:14:39 -0500 Subject: [PATCH 6/7] Correctly fold unknown-8bit originating from encoded words. (#142517) The unknown-8bit trick was designed to deal with unknown bytes in an ASCII message, and it works fine for that. However, I also tried to extend it to handle bytes that can't be decoded using the charset specified in an encoded word, and there it fails because there can be other non-ASCII characters that were *successfully* decoded. The fix is simple: do the unknown-8bit encoding using the utf-8 codec. This is especially appropriate since anyone trying to do recovery on an unknown byte string will probably attempt utf-8 first. --- Lib/email/_encoded_words.py | 2 +- Lib/test/test_email/test__header_value_parser.py | 8 ++++++++ .../2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 6795a606de037e..05a34a4c105233 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''): """ if charset == 'unknown-8bit': - bstring = string.encode('ascii', 'surrogateescape') + bstring = string.encode('utf-8', 'surrogateescape') else: bstring = string.encode(charset) if encoding is None: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index f33844910beee4..426ec4644e3096 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3340,5 +3340,13 @@ def test_fold_unfoldable_element_stealing_whitespace(self): token = parser.get_address_list(text)[0] self._test(token, expected, policy=policy) + def test_encoded_word_with_undecodable_bytes(self): + self._test(parser.get_address_list( + ' =?utf-8?Q?=E5=AE=A2=E6=88=B6=E6=AD=A3=E8=A6=8F=E4=BA=A4=E7?=' + )[0], + ' =?unknown-8bit?b?5a6i5oi25q2j6KaP5Lqk5w==?=\n', + ) + + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst b/Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst new file mode 100644 index 00000000000000..388fff0e2acb96 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-10-10-00-06.gh-issue-142517.fG4hbe.rst @@ -0,0 +1,4 @@ +The non-``compat32`` :mod:`email` policies now correctly handle refolding +encoded words that contain bytes that can not be decoded in their specified +character set. Previously this resulting in an encoding exception during +folding. From 7c44f37170cf87a898a8b3ff008c845b8e780c3d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 24 Dec 2025 16:15:11 +0000 Subject: [PATCH 7/7] gh-138122: Extend binary profiling format with full source location and opcode (#143088) Co-authored-by: Stan Ulbrych --- InternalDocs/profiling_binary_format.md | 86 ++++++-- Lib/profiling/sampling/cli.py | 2 +- .../test_binary_format.py | 184 ++++++++++++++---- Modules/_remote_debugging/binary_io.h | 24 ++- Modules/_remote_debugging/binary_io_reader.c | 99 +++++++--- Modules/_remote_debugging/binary_io_writer.c | 120 ++++++++++-- 6 files changed, 411 insertions(+), 104 deletions(-) diff --git a/InternalDocs/profiling_binary_format.md b/InternalDocs/profiling_binary_format.md index b3ebdfd22edf8c..7e4592a0d89705 100644 --- a/InternalDocs/profiling_binary_format.md +++ b/InternalDocs/profiling_binary_format.md @@ -272,33 +272,85 @@ byte. ## Frame Table -The frame table stores deduplicated frame entries: +The frame table stores deduplicated frame entries with full source position +information and bytecode opcode: ``` -+----------------------+ -| filename_idx: varint | -| funcname_idx: varint | -| lineno: svarint | -+----------------------+ (repeated for each frame) ++----------------------------+ +| filename_idx: varint | +| funcname_idx: varint | +| lineno: svarint | +| end_lineno_delta: svarint | +| column: svarint | +| end_column_delta: svarint | +| opcode: u8 | ++----------------------------+ (repeated for each frame) ``` -Each unique (filename, funcname, lineno) combination gets one entry. Two -calls to the same function at different line numbers produce different -frame entries; two calls at the same line number share one entry. +### Field Definitions + +| Field | Type | Description | +|------------------|---------------|----------------------------------------------------------| +| filename_idx | varint | Index into string table for file name | +| funcname_idx | varint | Index into string table for function name | +| lineno | zigzag varint | Start line number (-1 for synthetic frames) | +| end_lineno_delta | zigzag varint | Delta from lineno (end_lineno = lineno + delta) | +| column | zigzag varint | Start column offset in UTF-8 bytes (-1 if not available) | +| end_column_delta | zigzag varint | Delta from column (end_column = column + delta) | +| opcode | u8 | Python bytecode opcode (0-254) or 255 for None | + +### Delta Encoding + +Position end values use delta encoding for efficiency: + +- `end_lineno = lineno + end_lineno_delta` +- `end_column = column + end_column_delta` + +Typical values: +- `end_lineno_delta`: Usually 0 (single-line expressions) → encodes to 1 byte +- `end_column_delta`: Usually 5-20 (expression width) → encodes to 1 byte + +This saves ~1-2 bytes per frame compared to absolute encoding. When the base +value (lineno or column) is -1 (not available), the delta is stored as 0 and +the reconstructed value is -1. + +### Sentinel Values + +- `opcode = 255`: No opcode captured +- `lineno = -1`: Synthetic frame (no source location) +- `column = -1`: Column offset not available + +### Deduplication + +Each unique (filename, funcname, lineno, end_lineno, column, end_column, +opcode) combination gets one entry. This enables instruction-level profiling +where multiple bytecode instructions on the same line can be distinguished. Strings and frames are deduplicated separately because they have different cardinalities and reference patterns. A codebase might have hundreds of unique source files but thousands of unique functions. Many functions share the same filename, so storing the filename index in each frame entry (rather than the full string) provides an additional layer of deduplication. A frame -entry is just three varints (typically 3-6 bytes) rather than two full -strings plus a line number. - -Line numbers use signed varint (zigzag encoding) rather than unsigned to -handle edge cases. Synthetic frames—generated frames that don't correspond -directly to Python source code, such as C extension boundaries or internal -interpreter frames—use line number 0 or -1 to indicate the absence of a -source location. Zigzag encoding ensures these small negative values encode +entry is typically 7-9 bytes rather than two full strings plus location data. + +### Size Analysis + +Typical frame size with delta encoding: +- file_idx: 1-2 bytes +- func_idx: 1-2 bytes +- lineno: 1-2 bytes +- end_lineno_delta: 1 byte (usually 0) +- column: 1 byte (usually < 64) +- end_column_delta: 1 byte (usually < 64) +- opcode: 1 byte + +**Total: ~7-9 bytes per frame** + +Line numbers and columns use signed varint (zigzag encoding) to handle +sentinel values efficiently. Synthetic frames—generated frames that don't +correspond directly to Python source code, such as C extension boundaries or +internal interpreter frames—use -1 to indicate the absence of a source +location. Zigzag encoding ensures these small negative values encode efficiently (−1 becomes 1, which is one byte) rather than requiring the maximum varint length. diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index 9e60961943a8d0..10341c1570ceca 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -715,7 +715,7 @@ def _validate_args(args, parser): ) # Validate --opcodes is only used with compatible formats - opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap") + opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap", "binary") if getattr(args, 'opcodes', False) and args.format not in opcodes_compatible_formats: parser.error( f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}." diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 2bc005901e321f..033a533fe5444e 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -29,10 +29,17 @@ ) -def make_frame(filename, lineno, funcname): - """Create a FrameInfo struct sequence.""" - location = LocationInfo((lineno, lineno, -1, -1)) - return FrameInfo((filename, location, funcname, None)) +def make_frame(filename, lineno, funcname, end_lineno=None, column=None, + end_column=None, opcode=None): + """Create a FrameInfo struct sequence with full location info and opcode.""" + if end_lineno is None: + end_lineno = lineno + if column is None: + column = 0 + if end_column is None: + end_column = 0 + location = LocationInfo((lineno, end_lineno, column, end_column)) + return FrameInfo((filename, location, funcname, opcode)) def make_thread(thread_id, frames, status=0): @@ -54,6 +61,36 @@ def extract_lineno(location): return location +def extract_location(location): + """Extract full location info as dict from location tuple or None.""" + if location is None: + return {"lineno": 0, "end_lineno": 0, "column": 0, "end_column": 0} + if isinstance(location, tuple) and len(location) >= 4: + return { + "lineno": location[0] if location[0] is not None else 0, + "end_lineno": location[1] if location[1] is not None else 0, + "column": location[2] if location[2] is not None else 0, + "end_column": location[3] if location[3] is not None else 0, + } + # Fallback for old-style location + lineno = location[0] if isinstance(location, tuple) else location + return {"lineno": lineno or 0, "end_lineno": lineno or 0, "column": 0, "end_column": 0} + + +def frame_to_dict(frame): + """Convert a FrameInfo to a dict.""" + loc = extract_location(frame.location) + return { + "filename": frame.filename, + "funcname": frame.funcname, + "lineno": loc["lineno"], + "end_lineno": loc["end_lineno"], + "column": loc["column"], + "end_column": loc["end_column"], + "opcode": frame.opcode, + } + + class RawCollector: """Collector that captures all raw data grouped by thread.""" @@ -68,15 +105,7 @@ def collect(self, stack_frames, timestamps_us): count = len(timestamps_us) for interp in stack_frames: for thread in interp.threads: - frames = [] - for frame in thread.frame_info: - frames.append( - { - "filename": frame.filename, - "funcname": frame.funcname, - "lineno": extract_lineno(frame.location), - } - ) + frames = [frame_to_dict(f) for f in thread.frame_info] key = (interp.interpreter_id, thread.thread_id) sample = {"status": thread.status, "frames": frames} for _ in range(count): @@ -93,15 +122,7 @@ def samples_to_by_thread(samples): for sample in samples: for interp in sample: for thread in interp.threads: - frames = [] - for frame in thread.frame_info: - frames.append( - { - "filename": frame.filename, - "funcname": frame.funcname, - "lineno": extract_lineno(frame.location), - } - ) + frames = [frame_to_dict(f) for f in thread.frame_info] key = (interp.interpreter_id, thread.thread_id) by_thread[key].append( { @@ -187,25 +208,15 @@ def assert_samples_equal(self, expected_samples, collector): for j, (exp_frame, act_frame) in enumerate( zip(exp["frames"], act["frames"]) ): - self.assertEqual( - exp_frame["filename"], - act_frame["filename"], - f"Thread ({interp_id}, {thread_id}), sample {i}, " - f"frame {j}: filename mismatch", - ) - self.assertEqual( - exp_frame["funcname"], - act_frame["funcname"], - f"Thread ({interp_id}, {thread_id}), sample {i}, " - f"frame {j}: funcname mismatch", - ) - self.assertEqual( - exp_frame["lineno"], - act_frame["lineno"], - f"Thread ({interp_id}, {thread_id}), sample {i}, " - f"frame {j}: lineno mismatch " - f"(expected {exp_frame['lineno']}, got {act_frame['lineno']})", - ) + for field in ("filename", "funcname", "lineno", "end_lineno", + "column", "end_column", "opcode"): + self.assertEqual( + exp_frame[field], + act_frame[field], + f"Thread ({interp_id}, {thread_id}), sample {i}, " + f"frame {j}: {field} mismatch " + f"(expected {exp_frame[field]!r}, got {act_frame[field]!r})", + ) class TestBinaryRoundTrip(BinaryFormatTestBase): @@ -484,6 +495,97 @@ def test_threads_interleaved_samples(self): self.assertEqual(count, 60) self.assert_samples_equal(samples, collector) + def test_full_location_roundtrip(self): + """Full source location (end_lineno, column, end_column) roundtrips.""" + frames = [ + make_frame("test.py", 10, "func1", end_lineno=12, column=4, end_column=20), + make_frame("test.py", 20, "func2", end_lineno=20, column=8, end_column=45), + make_frame("test.py", 30, "func3", end_lineno=35, column=0, end_column=100), + ] + samples = [[make_interpreter(0, [make_thread(1, frames)])]] + collector, count = self.roundtrip(samples) + self.assertEqual(count, 1) + self.assert_samples_equal(samples, collector) + + def test_opcode_roundtrip(self): + """Opcode values roundtrip exactly.""" + opcodes = [0, 1, 50, 100, 150, 200, 254] # Valid Python opcodes + samples = [] + for opcode in opcodes: + frame = make_frame("test.py", 10, "func", opcode=opcode) + samples.append([make_interpreter(0, [make_thread(1, [frame])])]) + collector, count = self.roundtrip(samples) + self.assertEqual(count, len(opcodes)) + self.assert_samples_equal(samples, collector) + + def test_opcode_none_roundtrip(self): + """Opcode=None (sentinel 255) roundtrips as None.""" + frame = make_frame("test.py", 10, "func", opcode=None) + samples = [[make_interpreter(0, [make_thread(1, [frame])])]] + collector, count = self.roundtrip(samples) + self.assertEqual(count, 1) + self.assert_samples_equal(samples, collector) + + def test_mixed_location_and_opcode(self): + """Mixed full location and opcode data roundtrips.""" + frames = [ + make_frame("a.py", 10, "a", end_lineno=15, column=4, end_column=30, opcode=100), + make_frame("b.py", 20, "b", end_lineno=20, column=0, end_column=50, opcode=None), + make_frame("c.py", 30, "c", end_lineno=32, column=8, end_column=25, opcode=50), + ] + samples = [[make_interpreter(0, [make_thread(1, frames)])]] + collector, count = self.roundtrip(samples) + self.assertEqual(count, 1) + self.assert_samples_equal(samples, collector) + + def test_delta_encoding_multiline(self): + """Multi-line spans (large end_lineno delta) roundtrip correctly.""" + # This tests the delta encoding: end_lineno = lineno + delta + frames = [ + make_frame("test.py", 1, "small", end_lineno=1, column=0, end_column=10), + make_frame("test.py", 100, "medium", end_lineno=110, column=0, end_column=50), + make_frame("test.py", 1000, "large", end_lineno=1500, column=0, end_column=200), + ] + samples = [[make_interpreter(0, [make_thread(1, frames)])]] + collector, count = self.roundtrip(samples) + self.assertEqual(count, 1) + self.assert_samples_equal(samples, collector) + + def test_column_positions_preserved(self): + """Various column positions are preserved exactly.""" + columns = [(0, 10), (4, 50), (8, 100), (100, 200)] + samples = [] + for col, end_col in columns: + frame = make_frame("test.py", 10, "func", column=col, end_column=end_col) + samples.append([make_interpreter(0, [make_thread(1, [frame])])]) + collector, count = self.roundtrip(samples) + self.assertEqual(count, len(columns)) + self.assert_samples_equal(samples, collector) + + def test_same_line_different_opcodes(self): + """Same line with different opcodes creates distinct frames.""" + # This tests that opcode is part of the frame key + frames = [ + make_frame("test.py", 10, "func", opcode=100), + make_frame("test.py", 10, "func", opcode=101), + make_frame("test.py", 10, "func", opcode=102), + ] + samples = [[make_interpreter(0, [make_thread(1, [f])]) for f in frames]] + collector, count = self.roundtrip(samples) + # Verify all three opcodes are preserved distinctly + self.assertEqual(count, 3) + + def test_same_line_different_columns(self): + """Same line with different columns creates distinct frames.""" + frames = [ + make_frame("test.py", 10, "func", column=0, end_column=10), + make_frame("test.py", 10, "func", column=15, end_column=25), + make_frame("test.py", 10, "func", column=30, end_column=40), + ] + samples = [[make_interpreter(0, [make_thread(1, [f])]) for f in frames]] + collector, count = self.roundtrip(samples) + self.assertEqual(count, 3) + class TestBinaryEdgeCases(BinaryFormatTestBase): """Tests for edge cases in binary format.""" diff --git a/Modules/_remote_debugging/binary_io.h b/Modules/_remote_debugging/binary_io.h index bdfe35f5f2ce04..f8399f4aebe74b 100644 --- a/Modules/_remote_debugging/binary_io.h +++ b/Modules/_remote_debugging/binary_io.h @@ -25,6 +25,10 @@ extern "C" { #define BINARY_FORMAT_MAGIC_SWAPPED 0x48434154 /* Byte-swapped magic for endianness detection */ #define BINARY_FORMAT_VERSION 1 +/* Sentinel values for optional frame fields */ +#define OPCODE_NONE 255 /* No opcode captured (u8 sentinel) */ +#define LOCATION_NOT_AVAILABLE (-1) /* lineno/column not available (zigzag sentinel) */ + /* Conditional byte-swap macros for cross-endian file reading. * Uses Python's optimized byte-swap functions from pycore_bitutils.h */ #define SWAP16_IF(swap, x) ((swap) ? _Py_bswap16(x) : (x)) @@ -172,18 +176,28 @@ typedef struct { size_t compressed_buffer_size; } ZstdCompressor; -/* Frame entry - combines all frame data for better cache locality */ +/* Frame entry - combines all frame data for better cache locality. + * Stores full source position (line, end_line, column, end_column) and opcode. + * Delta values are computed during serialization for efficiency. */ typedef struct { uint32_t filename_idx; uint32_t funcname_idx; - int32_t lineno; + int32_t lineno; /* Start line number (-1 for synthetic frames) */ + int32_t end_lineno; /* End line number (-1 if not available) */ + int32_t column; /* Start column in UTF-8 bytes (-1 if not available) */ + int32_t end_column; /* End column in UTF-8 bytes (-1 if not available) */ + uint8_t opcode; /* Python opcode (0-254) or OPCODE_NONE (255) */ } FrameEntry; -/* Frame key for hash table lookup */ +/* Frame key for hash table lookup - includes all fields for proper deduplication */ typedef struct { uint32_t filename_idx; uint32_t funcname_idx; int32_t lineno; + int32_t end_lineno; + int32_t column; + int32_t end_column; + uint8_t opcode; } FrameKey; /* Pending RLE sample - buffered for run-length encoding */ @@ -305,8 +319,8 @@ typedef struct { PyObject **strings; uint32_t strings_count; - /* Parsed frame table: packed as [filename_idx, funcname_idx, lineno] */ - uint32_t *frame_data; + /* Parsed frame table: array of FrameEntry structures */ + FrameEntry *frames; uint32_t frames_count; /* Sample data region */ diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index f47e3a1767f622..cb58a0ed199d4a 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -276,47 +276,86 @@ reader_parse_string_table(BinaryReader *reader, const uint8_t *data, size_t file static inline int reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_size) { - /* Check for integer overflow in allocation size calculation. - Only needed on 32-bit where SIZE_MAX can be exceeded by uint32_t * 12. */ + /* Check for integer overflow in allocation size calculation. */ #if SIZEOF_SIZE_T < 8 - if (reader->frames_count > SIZE_MAX / (3 * sizeof(uint32_t))) { + if (reader->frames_count > SIZE_MAX / sizeof(FrameEntry)) { PyErr_SetString(PyExc_OverflowError, "Frame count too large for allocation"); return -1; } #endif - size_t alloc_size = (size_t)reader->frames_count * 3 * sizeof(uint32_t); - reader->frame_data = PyMem_Malloc(alloc_size); - if (!reader->frame_data && reader->frames_count > 0) { + size_t alloc_size = (size_t)reader->frames_count * sizeof(FrameEntry); + reader->frames = PyMem_Malloc(alloc_size); + if (!reader->frames && reader->frames_count > 0) { PyErr_NoMemory(); return -1; } size_t offset = reader->frame_table_offset; for (uint32_t i = 0; i < reader->frames_count; i++) { - size_t base = (size_t)i * 3; + FrameEntry *frame = &reader->frames[i]; size_t prev_offset; prev_offset = offset; - reader->frame_data[base] = decode_varint_u32(data, &offset, file_size); + frame->filename_idx = decode_varint_u32(data, &offset, file_size); if (offset == prev_offset) { PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (filename)"); return -1; } prev_offset = offset; - reader->frame_data[base + 1] = decode_varint_u32(data, &offset, file_size); + frame->funcname_idx = decode_varint_u32(data, &offset, file_size); if (offset == prev_offset) { PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (funcname)"); return -1; } prev_offset = offset; - reader->frame_data[base + 2] = (uint32_t)decode_varint_i32(data, &offset, file_size); + frame->lineno = decode_varint_i32(data, &offset, file_size); if (offset == prev_offset) { PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (lineno)"); return -1; } + + prev_offset = offset; + int32_t end_lineno_delta = decode_varint_i32(data, &offset, file_size); + if (offset == prev_offset) { + PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (end_lineno_delta)"); + return -1; + } + /* Reconstruct end_lineno from delta. If lineno is -1, result is -1. */ + if (frame->lineno == LOCATION_NOT_AVAILABLE) { + frame->end_lineno = LOCATION_NOT_AVAILABLE; + } else { + frame->end_lineno = frame->lineno + end_lineno_delta; + } + + prev_offset = offset; + frame->column = decode_varint_i32(data, &offset, file_size); + if (offset == prev_offset) { + PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (column)"); + return -1; + } + + prev_offset = offset; + int32_t end_column_delta = decode_varint_i32(data, &offset, file_size); + if (offset == prev_offset) { + PyErr_SetString(PyExc_ValueError, "Malformed varint in frame table (end_column_delta)"); + return -1; + } + /* Reconstruct end_column from delta. If column is -1, result is -1. */ + if (frame->column == LOCATION_NOT_AVAILABLE) { + frame->end_column = LOCATION_NOT_AVAILABLE; + } else { + frame->end_column = frame->column + end_column_delta; + } + + /* Read opcode byte */ + if (offset >= file_size) { + PyErr_SetString(PyExc_ValueError, "Unexpected end of frame table (opcode)"); + return -1; + } + frame->opcode = data[offset++]; } return 0; @@ -683,13 +722,10 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader *reader, goto error; } - size_t base = frame_idx * 3; - uint32_t filename_idx = reader->frame_data[base]; - uint32_t funcname_idx = reader->frame_data[base + 1]; - int32_t lineno = (int32_t)reader->frame_data[base + 2]; + FrameEntry *frame = &reader->frames[frame_idx]; - if (filename_idx >= reader->strings_count || - funcname_idx >= reader->strings_count) { + if (frame->filename_idx >= reader->strings_count || + frame->funcname_idx >= reader->strings_count) { PyErr_SetString(PyExc_ValueError, "Invalid string index in frame"); goto error; } @@ -699,9 +735,14 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader *reader, goto error; } + /* Build location tuple with full position info */ PyObject *location; - if (lineno > 0) { - location = Py_BuildValue("(iiii)", lineno, lineno, 0, 0); + if (frame->lineno != LOCATION_NOT_AVAILABLE) { + location = Py_BuildValue("(iiii)", + frame->lineno, + frame->end_lineno != LOCATION_NOT_AVAILABLE ? frame->end_lineno : frame->lineno, + frame->column != LOCATION_NOT_AVAILABLE ? frame->column : 0, + frame->end_column != LOCATION_NOT_AVAILABLE ? frame->end_column : 0); if (!location) { Py_DECREF(frame_info); goto error; @@ -711,10 +752,24 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader *reader, location = Py_NewRef(Py_None); } - PyStructSequence_SetItem(frame_info, 0, Py_NewRef(reader->strings[filename_idx])); + /* Build opcode object */ + PyObject *opcode_obj; + if (frame->opcode != OPCODE_NONE) { + opcode_obj = PyLong_FromLong(frame->opcode); + if (!opcode_obj) { + Py_DECREF(location); + Py_DECREF(frame_info); + goto error; + } + } + else { + opcode_obj = Py_NewRef(Py_None); + } + + PyStructSequence_SetItem(frame_info, 0, Py_NewRef(reader->strings[frame->filename_idx])); PyStructSequence_SetItem(frame_info, 1, location); - PyStructSequence_SetItem(frame_info, 2, Py_NewRef(reader->strings[funcname_idx])); - PyStructSequence_SetItem(frame_info, 3, Py_NewRef(Py_None)); + PyStructSequence_SetItem(frame_info, 2, Py_NewRef(reader->strings[frame->funcname_idx])); + PyStructSequence_SetItem(frame_info, 3, opcode_obj); PyList_SET_ITEM(frame_list, k, frame_info); } @@ -1192,7 +1247,7 @@ binary_reader_close(BinaryReader *reader) PyMem_Free(reader->strings); } - PyMem_Free(reader->frame_data); + PyMem_Free(reader->frames); if (reader->thread_states) { for (size_t i = 0; i < reader->thread_state_count; i++) { diff --git a/Modules/_remote_debugging/binary_io_writer.c b/Modules/_remote_debugging/binary_io_writer.c index 3a20f3463b0384..c8857cec6218be 100644 --- a/Modules/_remote_debugging/binary_io_writer.c +++ b/Modules/_remote_debugging/binary_io_writer.c @@ -11,6 +11,7 @@ #include "binary_io.h" #include "_remote_debugging.h" +#include "pycore_opcode_utils.h" // MAX_REAL_OPCODE #include #ifdef HAVE_ZSTD @@ -32,6 +33,16 @@ /* File structure sizes */ #define FILE_FOOTER_SIZE 32 +/* Helper macro: convert PyLong to int32, using default_val if conversion fails */ +#define PYLONG_TO_INT32_OR_DEFAULT(obj, var, default_val) \ + do { \ + (var) = (int32_t)PyLong_AsLong(obj); \ + if (UNLIKELY(PyErr_Occurred() != NULL)) { \ + PyErr_Clear(); \ + (var) = (default_val); \ + } \ + } while (0) + /* ============================================================================ * WRITER-SPECIFIC UTILITY HELPERS * ============================================================================ */ @@ -311,7 +322,7 @@ static Py_uhash_t frame_key_hash_func(const void *key) { const FrameKey *fk = (const FrameKey *)key; - /* FNV-1a style hash combining all three values */ + /* FNV-1a style hash combining all fields */ Py_uhash_t hash = 2166136261u; hash ^= fk->filename_idx; hash *= 16777619u; @@ -319,6 +330,14 @@ frame_key_hash_func(const void *key) hash *= 16777619u; hash ^= (uint32_t)fk->lineno; hash *= 16777619u; + hash ^= (uint32_t)fk->end_lineno; + hash *= 16777619u; + hash ^= (uint32_t)fk->column; + hash *= 16777619u; + hash ^= (uint32_t)fk->end_column; + hash *= 16777619u; + hash ^= fk->opcode; + hash *= 16777619u; return hash; } @@ -329,7 +348,11 @@ frame_key_compare_func(const void *key1, const void *key2) const FrameKey *fk2 = (const FrameKey *)key2; return (fk1->filename_idx == fk2->filename_idx && fk1->funcname_idx == fk2->funcname_idx && - fk1->lineno == fk2->lineno); + fk1->lineno == fk2->lineno && + fk1->end_lineno == fk2->end_lineno && + fk1->column == fk2->column && + fk1->end_column == fk2->end_column && + fk1->opcode == fk2->opcode); } static void @@ -388,10 +411,14 @@ writer_intern_string(BinaryWriter *writer, PyObject *string, uint32_t *index) } static inline int -writer_intern_frame(BinaryWriter *writer, uint32_t filename_idx, uint32_t funcname_idx, - int32_t lineno, uint32_t *index) +writer_intern_frame(BinaryWriter *writer, const FrameEntry *entry, uint32_t *index) { - FrameKey lookup_key = {filename_idx, funcname_idx, lineno}; + FrameKey lookup_key = { + entry->filename_idx, entry->funcname_idx, + entry->lineno, entry->end_lineno, + entry->column, entry->end_column, + entry->opcode + }; void *existing = _Py_hashtable_get(writer->frame_hash, &lookup_key); if (existing != NULL) { @@ -412,10 +439,7 @@ writer_intern_frame(BinaryWriter *writer, uint32_t filename_idx, uint32_t funcna *key = lookup_key; *index = (uint32_t)writer->frame_count; - FrameEntry *fe = &writer->frame_entries[writer->frame_count]; - fe->filename_idx = filename_idx; - fe->funcname_idx = funcname_idx; - fe->lineno = lineno; + writer->frame_entries[writer->frame_count] = *entry; if (_Py_hashtable_set(writer->frame_hash, key, (void *)(uintptr_t)(*index + 1)) < 0) { PyMem_Free(key); @@ -810,22 +834,49 @@ build_frame_stack(BinaryWriter *writer, PyObject *frame_list, /* Use unchecked accessors since we control the data structures */ PyObject *frame_info = PyList_GET_ITEM(frame_list, k); - /* Get filename, location, funcname from FrameInfo using unchecked access */ + /* Get filename, location, funcname, opcode from FrameInfo using unchecked access */ PyObject *filename = PyStructSequence_GET_ITEM(frame_info, 0); PyObject *location = PyStructSequence_GET_ITEM(frame_info, 1); PyObject *funcname = PyStructSequence_GET_ITEM(frame_info, 2); + PyObject *opcode_obj = PyStructSequence_GET_ITEM(frame_info, 3); + + /* Extract location fields (can be None for synthetic frames) */ + int32_t lineno = LOCATION_NOT_AVAILABLE; + int32_t end_lineno = LOCATION_NOT_AVAILABLE; + int32_t column = LOCATION_NOT_AVAILABLE; + int32_t end_column = LOCATION_NOT_AVAILABLE; - /* Extract lineno from location (can be None for synthetic frames) */ - int32_t lineno = 0; if (location != Py_None) { - /* Use unchecked access - first element is lineno */ + /* LocationInfo is a struct sequence or tuple with: + * (lineno, end_lineno, column, end_column) */ PyObject *lineno_obj = PyTuple_Check(location) ? PyTuple_GET_ITEM(location, 0) : PyStructSequence_GET_ITEM(location, 0); - lineno = (int32_t)PyLong_AsLong(lineno_obj); + PyObject *end_lineno_obj = PyTuple_Check(location) ? + PyTuple_GET_ITEM(location, 1) : + PyStructSequence_GET_ITEM(location, 1); + PyObject *column_obj = PyTuple_Check(location) ? + PyTuple_GET_ITEM(location, 2) : + PyStructSequence_GET_ITEM(location, 2); + PyObject *end_column_obj = PyTuple_Check(location) ? + PyTuple_GET_ITEM(location, 3) : + PyStructSequence_GET_ITEM(location, 3); + + PYLONG_TO_INT32_OR_DEFAULT(lineno_obj, lineno, LOCATION_NOT_AVAILABLE); + PYLONG_TO_INT32_OR_DEFAULT(end_lineno_obj, end_lineno, LOCATION_NOT_AVAILABLE); + PYLONG_TO_INT32_OR_DEFAULT(column_obj, column, LOCATION_NOT_AVAILABLE); + PYLONG_TO_INT32_OR_DEFAULT(end_column_obj, end_column, LOCATION_NOT_AVAILABLE); + } + + /* Extract opcode (can be None) */ + uint8_t opcode = OPCODE_NONE; + if (opcode_obj != Py_None) { + long opcode_long = PyLong_AsLong(opcode_obj); if (UNLIKELY(PyErr_Occurred() != NULL)) { PyErr_Clear(); - lineno = 0; + opcode = OPCODE_NONE; + } else if (opcode_long >= 0 && opcode_long <= MAX_REAL_OPCODE) { + opcode = (uint8_t)opcode_long; } } @@ -841,9 +892,18 @@ build_frame_stack(BinaryWriter *writer, PyObject *frame_list, return -1; } - /* Intern frame */ + /* Intern frame with full location info */ + FrameEntry frame_entry = { + .filename_idx = filename_idx, + .funcname_idx = funcname_idx, + .lineno = lineno, + .end_lineno = end_lineno, + .column = column, + .end_column = end_column, + .opcode = opcode + }; uint32_t frame_idx; - if (writer_intern_frame(writer, filename_idx, funcname_idx, lineno, &frame_idx) < 0) { + if (writer_intern_frame(writer, &frame_entry, &frame_idx) < 0) { return -1; } @@ -1038,10 +1098,33 @@ binary_writer_finalize(BinaryWriter *writer) for (size_t i = 0; i < writer->frame_count; i++) { FrameEntry *entry = &writer->frame_entries[i]; - uint8_t buf[30]; + uint8_t buf[64]; /* Increased buffer for additional fields */ size_t pos = encode_varint_u32(buf, entry->filename_idx); pos += encode_varint_u32(buf + pos, entry->funcname_idx); pos += encode_varint_i32(buf + pos, entry->lineno); + + /* Delta encode end_lineno: store (end_lineno - lineno) as zigzag. + * When lineno is -1, store delta as 0 (result will be -1). */ + int32_t end_lineno_delta = 0; + if (entry->lineno != LOCATION_NOT_AVAILABLE && + entry->end_lineno != LOCATION_NOT_AVAILABLE) { + end_lineno_delta = entry->end_lineno - entry->lineno; + } + pos += encode_varint_i32(buf + pos, end_lineno_delta); + + pos += encode_varint_i32(buf + pos, entry->column); + + /* Delta encode end_column: store (end_column - column) as zigzag. + * When column is -1, store delta as 0 (result will be -1). */ + int32_t end_column_delta = 0; + if (entry->column != LOCATION_NOT_AVAILABLE && + entry->end_column != LOCATION_NOT_AVAILABLE) { + end_column_delta = entry->end_column - entry->column; + } + pos += encode_varint_i32(buf + pos, end_column_delta); + + buf[pos++] = entry->opcode; + if (fwrite_checked_allow_threads(buf, pos, writer->fp) < 0) { return -1; } @@ -1156,3 +1239,4 @@ binary_writer_destroy(BinaryWriter *writer) PyMem_Free(writer); } +#undef PYLONG_TO_INT32_OR_DEFAULT