From c7dcb2652083a702a023881bc328ecaa5f1f2ac5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 17 Dec 2025 12:07:07 +0000 Subject: [PATCH 01/10] GH-142621: JIT: Avoid memory load for symbols within 4GB on AArch64 (GH-142820) --- Python/jit.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Python/jit.c b/Python/jit.c index 602d7a519bd3fc..ccafe0ce497f43 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -432,6 +432,15 @@ patch_aarch64_33rx(unsigned char *location, uint64_t value) loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg; return; } + int64_t page_delta = (relaxed >> 12) - ((uintptr_t)location >> 12); + if (page_delta >= -(1L << 20) && + page_delta < (1L << 20)) + { + // adrp reg, AAA; ldr reg, [reg + BBB] -> adrp reg, AAA; add reg, reg, BBB + patch_aarch64_21rx(location, relaxed); + loc32[1] = 0x91000000 | get_bits(relaxed, 0, 12) << 10 | reg << 5 | reg; + return; + } relaxed = value - (uintptr_t)location; if ((relaxed & 0x3) == 0 && (int64_t)relaxed >= -(1L << 19) && From 1fc3039d7172095d90df489c7a5e555f7128ee90 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 17 Dec 2025 04:14:44 -0800 Subject: [PATCH 02/10] gh-139038: Add JIT What's New for 3.15 (#142845) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Ken Jin --- Doc/whatsnew/3.15.rst | 86 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 24a51f87c0f410..53d613ffb3a471 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -73,6 +73,7 @@ Summary -- Release highlights ` * :pep:`782`: :ref:`A new PyBytesWriter C API to create a Python bytes object ` +* :ref:`The JIT compiler has been significantly upgraded ` * :ref:`Improved error messages ` @@ -850,6 +851,91 @@ csv (Contributed by Maurycy Pawłowski-Wieroński in :gh:`137628`.) +.. _whatsnew315-jit: + +Upgraded JIT compiler +===================== + +Results from the `pyperformance `__ +benchmark suite report +`3-4% `__ +geometric mean performance improvement for the JIT over the standard CPython +interpreter built with all optimizations enabled. The speedups for JIT +builds versus no JIT builds range from roughly 20% slowdown to over +100% speedup (ignoring the ``unpack_sequence`` microbenchmark) on +x86-64 Linux and AArch64 macOS systems. + +.. attention:: + These results are not yet final. + +The major upgrades to the JIT are: + +* LLVM 21 build-time dependency +* New tracing frontend +* Basic register allocation in the JIT +* More JIT optimizations +* Better machine code generation + +.. rubric:: LLVM 21 build-time dependency + +The JIT compiler now uses LLVM 21 for build-time stencil generation. As +always, LLVM is only needed when building CPython with the JIT enabled; +end users running Python do not need LLVM installed. Instructions for +installing LLVM can be found in the `JIT compiler documentation +`__ +for all supported platforms. + +(Contributed by Savannah Ostrowski in :gh:`140973`.) + +.. rubric:: A new tracing frontend + +The JIT compiler now supports significantly more bytecode operations and +control flow than in Python 3.14, enabling speedups on a wider variety of +code. For example, simple Python object creation is now understood by the +3.15 JIT compiler. Overloaded operations and generators are also partially +supported. This was made possible by an overhauled JIT tracing frontend +that records actual execution paths through code, rather than estimating +them as the previous implementation did. + +(Contributed by Ken Jin in :gh:`139109`. Support for Windows added by +Mark Shannon in :gh:`141703`.) + +.. rubric:: Basic register allocation in the JIT + +A basic form of register allocation has been added to the JIT compiler's +optimizer. This allows the JIT compiler to avoid certain stack operations +altogether and instead operate on registers. This allows the JIT to produce +more efficient traces by avoiding reads and writes to memory. + +(Contributed by Mark Shannon in :gh:`135379`.) + +.. rubric:: More JIT optimizations + +More `constant-propagation `__ +is now performed. This means when the JIT compiler detects that certain user +code results in constants, the code can be simplified by the JIT. + +(Contributed by Ken Jin and Savannah Ostrowski in :gh:`132732`.) + +The JIT avoids :term:`reference count`\ s where possible. This generally +reduces the cost of most operations in Python. + +(Contributed by Ken Jin, Donghee Na, Nadeshiko Manju, Savannah Ostrowski, +Noam Cohen, Tomas Roun, PuQing in :gh:`134584`.) + +.. rubric:: Better machine code generation + +The JIT compiler's machine code generator now produces better machine code +for x86-64 and AArch64 macOS and Linux targets. In general, users should +experience lower memory usage for generated machine code and more efficient +machine code versus the old JIT. + +(Contributed by Brandt Bucher in :gh:`136528` and :gh:`136528`. +Implementation for AArch64 contributed by Mark Shannon in :gh:`139855`. +Additional optimizations for AArch64 contributed by Mark Shannon and +Diego Russo in :gh:`140683` and :gh:`142305`.) + + Removed ======= From d4095f25e8573efb63196ae96a2f7ba8b5f06dce Mon Sep 17 00:00:00 2001 From: Keming Date: Wed, 17 Dec 2025 22:15:22 +0800 Subject: [PATCH 03/10] gh-142654: show the clear error message when sampling on an unknown PID in tachyon (#142655) Co-authored-by: Pablo Galindo Salgado --- Lib/profiling/sampling/__main__.py | 7 ++ Lib/profiling/sampling/cli.py | 9 ++- Lib/profiling/sampling/errors.py | 19 ++++++ Lib/profiling/sampling/sample.py | 68 +++++++++++-------- .../test_sampling_profiler/test_cli.py | 26 +++++-- .../test_integration.py | 8 +-- .../test_sampling_profiler/test_modes.py | 4 ++ ...-12-13-10-34-59.gh-issue-142654.fmm974.rst | 2 + 8 files changed, 102 insertions(+), 41 deletions(-) create mode 100644 Lib/profiling/sampling/errors.py create mode 100644 Misc/NEWS.d/next/Library/2025-12-13-10-34-59.gh-issue-142654.fmm974.rst diff --git a/Lib/profiling/sampling/__main__.py b/Lib/profiling/sampling/__main__.py index 47bd3a0113eb3d..a45b645eae05fc 100644 --- a/Lib/profiling/sampling/__main__.py +++ b/Lib/profiling/sampling/__main__.py @@ -46,6 +46,7 @@ """ from .cli import main +from .errors import SamplingUnknownProcessError, SamplingModuleNotFoundError, SamplingScriptNotFoundError def handle_permission_error(): """Handle PermissionError by displaying appropriate error message.""" @@ -64,3 +65,9 @@ def handle_permission_error(): main() except PermissionError: handle_permission_error() + except SamplingUnknownProcessError as err: + print(f"Tachyon cannot find the process: {err}", file=sys.stderr) + sys.exit(1) + except (SamplingModuleNotFoundError, SamplingScriptNotFoundError) as err: + print(f"Tachyon cannot find the target: {err}", file=sys.stderr) + sys.exit(1) diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index e1ff3758c0d341..554167e43f5ed8 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -10,7 +10,8 @@ import time from contextlib import nullcontext -from .sample import sample, sample_live +from .errors import SamplingUnknownProcessError, SamplingModuleNotFoundError, SamplingScriptNotFoundError +from .sample import sample, sample_live, _is_process_running from .pstats_collector import PstatsCollector from .stack_collector import CollapsedStackCollector, FlamegraphCollector from .heatmap_collector import HeatmapCollector @@ -743,6 +744,8 @@ def main(): def _handle_attach(args): """Handle the 'attach' command.""" + if not _is_process_running(args.pid): + raise SamplingUnknownProcessError(args.pid) # Check if live mode is requested if args.live: _handle_live_attach(args, args.pid) @@ -792,13 +795,13 @@ def _handle_run(args): added_cwd = True try: if importlib.util.find_spec(args.target) is None: - sys.exit(f"Error: Module not found: {args.target}") + raise SamplingModuleNotFoundError(args.target) finally: if added_cwd: sys.path.remove(cwd) else: if not os.path.exists(args.target): - sys.exit(f"Error: Script not found: {args.target}") + raise SamplingScriptNotFoundError(args.target) # Check if live mode is requested if args.live: diff --git a/Lib/profiling/sampling/errors.py b/Lib/profiling/sampling/errors.py new file mode 100644 index 00000000000000..0832ad2d4381e0 --- /dev/null +++ b/Lib/profiling/sampling/errors.py @@ -0,0 +1,19 @@ +"""Custom exceptions for the sampling profiler.""" + +class SamplingProfilerError(Exception): + """Base exception for sampling profiler errors.""" + +class SamplingUnknownProcessError(SamplingProfilerError): + def __init__(self, pid): + self.pid = pid + super().__init__(f"Process with PID '{pid}' does not exist.") + +class SamplingScriptNotFoundError(SamplingProfilerError): + def __init__(self, script_path): + self.script_path = script_path + super().__init__(f"Script '{script_path}' not found.") + +class SamplingModuleNotFoundError(SamplingProfilerError): + def __init__(self, module_name): + self.module_name = module_name + super().__init__(f"Module '{module_name}' not found.") diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 294ec3003fc6bc..d4c3b577a17c7b 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -34,23 +34,29 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD self.all_threads = all_threads self.mode = mode # Store mode for later use self.collect_stats = collect_stats + try: + self.unwinder = self._new_unwinder(native, gc, opcodes, skip_non_matching_threads) + except RuntimeError as err: + raise SystemExit(err) from err + # Track sample intervals and total sample count + self.sample_intervals = deque(maxlen=100) + self.total_samples = 0 + self.realtime_stats = False + + def _new_unwinder(self, native, gc, opcodes, skip_non_matching_threads): if _FREE_THREADED_BUILD: - self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc, + unwinder = _remote_debugging.RemoteUnwinder( + self.pid, all_threads=self.all_threads, mode=self.mode, native=native, gc=gc, opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads, - cache_frames=True, stats=collect_stats + cache_frames=True, stats=self.collect_stats ) else: - only_active_threads = bool(self.all_threads) - self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc, + unwinder = _remote_debugging.RemoteUnwinder( + self.pid, only_active_thread=bool(self.all_threads), mode=self.mode, native=native, gc=gc, opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads, - cache_frames=True, stats=collect_stats + cache_frames=True, stats=self.collect_stats ) - # Track sample intervals and total sample count - self.sample_intervals = deque(maxlen=100) - self.total_samples = 0 - self.realtime_stats = False + return unwinder def sample(self, collector, duration_sec=10, *, async_aware=False): sample_interval_sec = self.sample_interval_usec / 1_000_000 @@ -86,7 +92,7 @@ def sample(self, collector, duration_sec=10, *, async_aware=False): collector.collect_failed_sample() errors += 1 except Exception as e: - if not self._is_process_running(): + if not _is_process_running(self.pid): break raise e from None @@ -148,22 +154,6 @@ def sample(self, collector, duration_sec=10, *, async_aware=False): f"({(expected_samples - num_samples) / expected_samples * 100:.2f}%)" ) - def _is_process_running(self): - if sys.platform == "linux" or sys.platform == "darwin": - try: - os.kill(self.pid, 0) - return True - except ProcessLookupError: - return False - elif sys.platform == "win32": - try: - _remote_debugging.RemoteUnwinder(self.pid) - except Exception: - return False - return True - else: - raise ValueError(f"Unsupported platform: {sys.platform}") - def _print_realtime_stats(self): """Print real-time sampling statistics.""" if len(self.sample_intervals) < 2: @@ -279,6 +269,28 @@ def _print_unwinder_stats(self): print(f" {ANSIColors.YELLOW}Stale cache invalidations: {stale_invalidations}{ANSIColors.RESET}") +def _is_process_running(pid): + if pid <= 0: + return False + if os.name == "posix": + try: + os.kill(pid, 0) + return True + except ProcessLookupError: + return False + except PermissionError: + # EPERM means process exists but we can't signal it + return True + elif sys.platform == "win32": + try: + _remote_debugging.RemoteUnwinder(pid) + except Exception: + return False + return True + else: + raise ValueError(f"Unsupported platform: {sys.platform}") + + def sample( pid, collector, diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py index 4434335130c325..9b2b16d6e1965b 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py @@ -16,6 +16,7 @@ from test.support import is_emscripten, requires_remote_subprocess_debugging from profiling.sampling.cli import main +from profiling.sampling.errors import SamplingScriptNotFoundError, SamplingModuleNotFoundError, SamplingUnknownProcessError class TestSampleProfilerCLI(unittest.TestCase): @@ -203,12 +204,12 @@ def test_cli_mutually_exclusive_pid_script(self): with ( mock.patch("sys.argv", test_args), mock.patch("sys.stderr", io.StringIO()) as mock_stderr, - self.assertRaises(SystemExit) as cm, + self.assertRaises(SamplingScriptNotFoundError) as cm, ): main() # Verify the error is about the non-existent script - self.assertIn("12345", str(cm.exception.code)) + self.assertIn("12345", str(cm.exception)) def test_cli_no_target_specified(self): # In new CLI, must specify a subcommand @@ -436,6 +437,7 @@ def test_cli_default_collapsed_filename(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): main() @@ -475,6 +477,7 @@ def test_cli_custom_output_filenames(self): for test_args, expected_filename, expected_format in test_cases: with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): main() @@ -513,6 +516,7 @@ def test_argument_parsing_basic(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): main() @@ -534,6 +538,7 @@ def test_sort_options(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): main() @@ -547,6 +552,7 @@ def test_async_aware_flag_defaults_to_running(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): main() @@ -562,6 +568,7 @@ def test_async_aware_with_async_mode_all(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): main() @@ -576,6 +583,7 @@ def test_async_aware_default_is_none(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): main() @@ -697,14 +705,20 @@ def test_async_aware_incompatible_with_all_threads(self): def test_run_nonexistent_script_exits_cleanly(self): """Test that running a non-existent script exits with a clean error.""" with mock.patch("sys.argv", ["profiling.sampling.cli", "run", "/nonexistent/script.py"]): - with self.assertRaises(SystemExit) as cm: + with self.assertRaisesRegex(SamplingScriptNotFoundError, "Script '[\\w/.]+' not found."): main() - self.assertIn("Script not found", str(cm.exception.code)) @unittest.skipIf(is_emscripten, "subprocess not available") def test_run_nonexistent_module_exits_cleanly(self): """Test that running a non-existent module exits with a clean error.""" with mock.patch("sys.argv", ["profiling.sampling.cli", "run", "-m", "nonexistent_module_xyz"]): - with self.assertRaises(SystemExit) as cm: + with self.assertRaisesRegex(SamplingModuleNotFoundError, "Module '[\\w/.]+' not found."): + main() + + def test_cli_attach_nonexistent_pid(self): + fake_pid = "99999" + with mock.patch("sys.argv", ["profiling.sampling.cli", "attach", fake_pid]): + with self.assertRaises(SamplingUnknownProcessError) as cm: main() - self.assertIn("Module not found", str(cm.exception.code)) + + self.assertIn(fake_pid, str(cm.exception)) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py index 08a96d7eb459f0..e7f6c59d5cb770 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py @@ -17,7 +17,7 @@ import profiling.sampling.sample from profiling.sampling.pstats_collector import PstatsCollector from profiling.sampling.stack_collector import CollapsedStackCollector - from profiling.sampling.sample import SampleProfiler + from profiling.sampling.sample import SampleProfiler, _is_process_running except ImportError: raise unittest.SkipTest( "Test only runs when _remote_debugging is available" @@ -602,7 +602,7 @@ def test_sample_target_module(self): @requires_remote_subprocess_debugging() class TestSampleProfilerErrorHandling(unittest.TestCase): def test_invalid_pid(self): - with self.assertRaises((OSError, RuntimeError)): + with self.assertRaises((SystemExit, PermissionError)): collector = PstatsCollector(sample_interval_usec=100, skip_idle=False) profiling.sampling.sample.sample(-1, collector, duration_sec=1) @@ -638,7 +638,7 @@ def test_is_process_running(self): sample_interval_usec=1000, all_threads=False, ) - self.assertTrue(profiler._is_process_running()) + self.assertTrue(_is_process_running(profiler.pid)) self.assertIsNotNone(profiler.unwinder.get_stack_trace()) subproc.process.kill() subproc.process.wait() @@ -647,7 +647,7 @@ def test_is_process_running(self): ) # Exit the context manager to ensure the process is terminated - self.assertFalse(profiler._is_process_running()) + self.assertFalse(_is_process_running(profiler.pid)) self.assertRaises( ProcessLookupError, profiler.unwinder.get_stack_trace ) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py index f1293544776bc3..247416389daa07 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py @@ -252,6 +252,7 @@ def test_gil_mode_validation(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): try: @@ -313,6 +314,7 @@ def test_gil_mode_cli_argument_parsing(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): try: @@ -432,6 +434,7 @@ def test_exception_mode_validation(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): try: @@ -493,6 +496,7 @@ def test_exception_mode_cli_argument_parsing(self): with ( mock.patch("sys.argv", test_args), + mock.patch("profiling.sampling.cli._is_process_running", return_value=True), mock.patch("profiling.sampling.cli.sample") as mock_sample, ): try: diff --git a/Misc/NEWS.d/next/Library/2025-12-13-10-34-59.gh-issue-142654.fmm974.rst b/Misc/NEWS.d/next/Library/2025-12-13-10-34-59.gh-issue-142654.fmm974.rst new file mode 100644 index 00000000000000..7bb14cb499d850 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-13-10-34-59.gh-issue-142654.fmm974.rst @@ -0,0 +1,2 @@ +Show the clearer error message when using ``profiling.sampling`` on an +unknown PID. From 49627dc991aae534c09fe55cf64e3fc6cef71e56 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 17 Dec 2025 22:21:02 +0800 Subject: [PATCH 04/10] Use other name for JIT contributor (#142877) --- Doc/whatsnew/3.15.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 53d613ffb3a471..7e032fe5df2fdf 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -920,7 +920,7 @@ code results in constants, the code can be simplified by the JIT. The JIT avoids :term:`reference count`\ s where possible. This generally reduces the cost of most operations in Python. -(Contributed by Ken Jin, Donghee Na, Nadeshiko Manju, Savannah Ostrowski, +(Contributed by Ken Jin, Donghee Na, Zheao Li, Savannah Ostrowski, Noam Cohen, Tomas Roun, PuQing in :gh:`134584`.) .. rubric:: Better machine code generation From 2b466c47c333106dc9522ab77898e6972e25a2c6 Mon Sep 17 00:00:00 2001 From: Benjamin Johnson Date: Wed, 17 Dec 2025 07:09:57 -0800 Subject: [PATCH 05/10] gh-112127: Fix possible use-after-free in atexit.unregister() (GH-114092) Co-authored-by: Serhiy Storchaka --- Lib/test/_test_atexit.py | 13 +++++++++++++ Misc/ACKS | 1 + .../2025-12-17-14-41-09.gh-issue-112127.13OHQk.rst | 2 ++ Modules/atexitmodule.c | 3 ++- 4 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-17-14-41-09.gh-issue-112127.13OHQk.rst diff --git a/Lib/test/_test_atexit.py b/Lib/test/_test_atexit.py index f618c1fcbca52b..490b0686a0c179 100644 --- a/Lib/test/_test_atexit.py +++ b/Lib/test/_test_atexit.py @@ -135,6 +135,19 @@ def func(): finally: atexit.unregister(func) + def test_eq_unregister_clear(self): + # Issue #112127: callback's __eq__ may call unregister or _clear + class Evil: + def __eq__(self, other): + action(other) + return NotImplemented + + for action in atexit.unregister, lambda o: atexit._clear(): + with self.subTest(action=action): + atexit.register(lambda: None) + atexit.unregister(Evil()) + atexit._clear() + if __name__ == "__main__": unittest.main() diff --git a/Misc/ACKS b/Misc/ACKS index e3927ff0b3364e..a14089a39cce82 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -908,6 +908,7 @@ Jim Jewett Pedro Diaz Jimenez Orjan Johansen Fredrik Johansson +Benjamin Johnson Benjamin K. Johnson Gregory K. Johnson Kent Johnson diff --git a/Misc/NEWS.d/next/Library/2025-12-17-14-41-09.gh-issue-112127.13OHQk.rst b/Misc/NEWS.d/next/Library/2025-12-17-14-41-09.gh-issue-112127.13OHQk.rst new file mode 100644 index 00000000000000..c983683ebd5589 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-17-14-41-09.gh-issue-112127.13OHQk.rst @@ -0,0 +1,2 @@ +Fix possible use-after-free in :func:`atexit.unregister` when the callback +is unregistered during comparison. diff --git a/Modules/atexitmodule.c b/Modules/atexitmodule.c index 4536b03fbc4de9..f81f0b5724799b 100644 --- a/Modules/atexitmodule.c +++ b/Modules/atexitmodule.c @@ -257,10 +257,11 @@ static int atexit_unregister_locked(PyObject *callbacks, PyObject *func) { for (Py_ssize_t i = 0; i < PyList_GET_SIZE(callbacks); ++i) { - PyObject *tuple = PyList_GET_ITEM(callbacks, i); + PyObject *tuple = Py_NewRef(PyList_GET_ITEM(callbacks, i)); assert(PyTuple_CheckExact(tuple)); PyObject *to_compare = PyTuple_GET_ITEM(tuple, 0); int cmp = PyObject_RichCompareBool(func, to_compare, Py_EQ); + Py_DECREF(tuple); if (cmp < 0) { return -1; From 568a819f677238095441d17b9d8ac6d3ea4e3314 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 17 Dec 2025 15:12:28 +0000 Subject: [PATCH 06/10] gh-138122: Validate base frame before caching in remote debugging frame cache (#142852) --- .../test_integration.py | 95 +++++++++++++++++++ ...-12-17-03-03-12.gh-issue-138122.m3EF9E.rst | 4 + Modules/_remote_debugging/_remote_debugging.h | 9 +- Modules/_remote_debugging/frame_cache.c | 16 +++- Modules/_remote_debugging/frames.c | 41 +++++--- Modules/_remote_debugging/threads.c | 4 +- 6 files changed, 152 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py index e7f6c59d5cb770..b82474858ddd4a 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py @@ -863,3 +863,98 @@ def test_async_aware_running_sees_only_cpu_task(self): self.assertGreater(cpu_percentage, 90.0, f"cpu_leaf should dominate samples in 'running' mode, " f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})") + + +def _generate_deep_generators_script(chain_depth=20, recurse_depth=150): + """Generate a script with deep nested generators for stress testing.""" + lines = [ + 'import sys', + 'sys.setrecursionlimit(5000)', + '', + ] + # Generate chain of yield-from functions + for i in range(chain_depth - 1): + lines.extend([ + f'def deep_yield_chain_{i}(n):', + f' yield ("L{i}", n)', + f' yield from deep_yield_chain_{i + 1}(n)', + '', + ]) + # Last chain function calls recursive_diver + lines.extend([ + f'def deep_yield_chain_{chain_depth - 1}(n):', + f' yield ("L{chain_depth - 1}", n)', + f' yield from recursive_diver(n, {chain_depth})', + '', + 'def recursive_diver(n, depth):', + ' yield (f"DIVE_{depth}", n)', + f' if depth < {recurse_depth}:', + ' yield from recursive_diver(n, depth + 1)', + ' else:', + ' for i in range(5):', + ' yield (f"BOTTOM_{depth}", i)', + '', + 'def oscillating_generator(iterations=1000):', + ' for i in range(iterations):', + ' yield ("OSCILLATE", i)', + ' yield from deep_yield_chain_0(i)', + '', + 'def run_forever():', + ' while True:', + ' for _ in oscillating_generator(10):', + ' pass', + '', + '_test_sock.sendall(b"working")', + 'run_forever()', + ]) + return '\n'.join(lines) + + +@requires_remote_subprocess_debugging() +class TestDeepGeneratorFrameCache(unittest.TestCase): + """Test frame cache consistency with deep oscillating generator stacks.""" + + def test_all_stacks_share_same_base_frame(self): + """Verify all sampled stacks reach the entry point function. + + When profiling deep generators that oscillate up and down the call + stack, every sample should include the entry point function + (run_forever) in its call chain. If the frame cache stores + incomplete stacks, some samples will be missing this base function, + causing broken flamegraphs. + """ + script = _generate_deep_generators_script() + with test_subprocess(script, wait_for_working=True) as subproc: + collector = CollapsedStackCollector(sample_interval_usec=1, skip_idle=False) + + with ( + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + profiling.sampling.sample.sample( + subproc.process.pid, + collector, + duration_sec=2, + ) + + samples_with_entry_point = 0 + samples_without_entry_point = 0 + total_samples = 0 + + for (call_tree, _thread_id), count in collector.stack_counter.items(): + total_samples += count + if call_tree: + has_entry_point = call_tree and call_tree[0][2] == "" + if has_entry_point: + samples_with_entry_point += count + else: + samples_without_entry_point += count + + self.assertGreater(total_samples, 100, + f"Expected at least 100 samples, got {total_samples}") + + self.assertEqual(samples_without_entry_point, 0, + f"Found {samples_without_entry_point}/{total_samples} samples " + f"missing the entry point function 'run_forever'. This indicates " + f"incomplete stacks are being returned, likely due to frame cache " + f"storing partial stack traces.") diff --git a/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst new file mode 100644 index 00000000000000..e33a761aa61825 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst @@ -0,0 +1,4 @@ +Fix incomplete stack traces in the Tachyon profiler's frame cache when +profiling code with deeply nested generators. The frame cache now validates +that stack traces reach the base frame before caching, preventing broken +flamegraphs. Patch by Pablo Galindo. diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 2f3efedd1e0ed5..663479f235af09 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -429,7 +429,8 @@ extern int process_frame_chain( int *stopped_at_cached_frame, uintptr_t *frame_addrs, Py_ssize_t *num_addrs, - Py_ssize_t max_addrs + Py_ssize_t max_addrs, + uintptr_t *out_last_frame_addr ); /* Frame cache functions */ @@ -447,18 +448,22 @@ extern int frame_cache_lookup_and_extend( Py_ssize_t *num_addrs, Py_ssize_t max_addrs); // Returns: 1 = stored, 0 = not stored (graceful), -1 = error +// Only stores complete stacks that reach base_frame_addr extern int frame_cache_store( RemoteUnwinderObject *unwinder, uint64_t thread_id, PyObject *frame_list, const uintptr_t *addrs, - Py_ssize_t num_addrs); + Py_ssize_t num_addrs, + uintptr_t base_frame_addr, + uintptr_t last_frame_visited); extern int collect_frames_with_cache( RemoteUnwinderObject *unwinder, uintptr_t frame_addr, StackChunkList *chunks, PyObject *frame_info, + uintptr_t base_frame_addr, uintptr_t gc_frame, uintptr_t last_profiled_frame, uint64_t thread_id); diff --git a/Modules/_remote_debugging/frame_cache.c b/Modules/_remote_debugging/frame_cache.c index 4598b9dc353278..ab7891445e07a3 100644 --- a/Modules/_remote_debugging/frame_cache.c +++ b/Modules/_remote_debugging/frame_cache.c @@ -194,6 +194,7 @@ frame_cache_lookup_and_extend( } // Store frame list with addresses in cache +// Only stores complete stacks that reach base_frame_addr (validation done internally) // Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error int frame_cache_store( @@ -201,12 +202,25 @@ frame_cache_store( uint64_t thread_id, PyObject *frame_list, const uintptr_t *addrs, - Py_ssize_t num_addrs) + Py_ssize_t num_addrs, + uintptr_t base_frame_addr, + uintptr_t last_frame_visited) { if (!unwinder->frame_cache || thread_id == 0) { return 0; } + // Validate we have a complete stack before caching. + // Only cache if last_frame_visited matches base_frame_addr (the sentinel + // at the bottom of the stack). Note: we use last_frame_visited rather than + // addrs[num_addrs-1] because the base frame is visited but not added to the + // addrs array (it returns frame==NULL from is_frame_valid due to + // owner==FRAME_OWNED_BY_INTERPRETER). + if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) { + // Incomplete stack - don't cache (graceful degradation) + return 0; + } + // Clamp to max frames if (num_addrs > FRAME_CACHE_MAX_FRAMES) { num_addrs = FRAME_CACHE_MAX_FRAMES; diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index abde60c45766a5..47e34e9f945cbd 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -265,7 +265,8 @@ process_frame_chain( int *stopped_at_cached_frame, uintptr_t *frame_addrs, // optional: C array to receive frame addresses Py_ssize_t *num_addrs, // in/out: current count / updated count - Py_ssize_t max_addrs) // max capacity of frame_addrs array + Py_ssize_t max_addrs, // max capacity of frame_addrs array + uintptr_t *out_last_frame_addr) // optional: receives last frame address visited { uintptr_t frame_addr = initial_frame_addr; uintptr_t prev_frame_addr = 0; @@ -273,10 +274,13 @@ process_frame_chain( const size_t MAX_FRAMES = 1024 + 512; size_t frame_count = 0; - // Initialize output flag + // Initialize output parameters if (stopped_at_cached_frame) { *stopped_at_cached_frame = 0; } + if (out_last_frame_addr) { + *out_last_frame_addr = 0; + } // Quick check: if current_frame == last_profiled_frame, entire stack is unchanged if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) { @@ -390,6 +394,11 @@ process_frame_chain( return -1; } + // Set output parameter for caller (needed for cache validation) + if (out_last_frame_addr) { + *out_last_frame_addr = last_frame_addr; + } + return 0; } @@ -537,6 +546,7 @@ collect_frames_with_cache( uintptr_t frame_addr, StackChunkList *chunks, PyObject *frame_info, + uintptr_t base_frame_addr, uintptr_t gc_frame, uintptr_t last_profiled_frame, uint64_t thread_id) @@ -551,11 +561,13 @@ collect_frames_with_cache( uintptr_t addrs[FRAME_CACHE_MAX_FRAMES]; Py_ssize_t num_addrs = 0; Py_ssize_t frames_before = PyList_GET_SIZE(frame_info); + uintptr_t last_frame_visited = 0; int stopped_at_cached = 0; - if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame, + if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame, last_profiled_frame, &stopped_at_cached, - addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) { + addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES, + &last_frame_visited) < 0) { return -1; } @@ -575,23 +587,28 @@ collect_frames_with_cache( // Cache miss - continue walking from last_profiled_frame to get the rest STATS_INC(unwinder, frame_cache_misses); Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info); - if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame, - 0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) { + if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame, + 0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES, + &last_frame_visited) < 0) { return -1; } STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk); } else { - // Partial cache hit + // Partial cache hit - cache was validated when stored, so we trust it STATS_INC(unwinder, frame_cache_partial_hits); STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache); } - } else if (last_profiled_frame == 0) { - // No cache involvement (no last_profiled_frame or cache disabled) - STATS_INC(unwinder, frame_cache_misses); + } else { + if (last_profiled_frame == 0) { + // No cache involvement (no last_profiled_frame or cache disabled) + STATS_INC(unwinder, frame_cache_misses); + } } - // Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES) - if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) { + // Store in cache - frame_cache_store validates internally that we have a + // complete stack (reached base_frame_addr) before actually storing + if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs, + base_frame_addr, last_frame_visited) < 0) { return -1; } diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c index 81c13ea48e3c49..6db774ecfc269e 100644 --- a/Modules/_remote_debugging/threads.c +++ b/Modules/_remote_debugging/threads.c @@ -430,7 +430,7 @@ unwind_stack_for_thread( uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.last_profiled_frame); if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info, - gc_frame, last_profiled_frame, tid) < 0) { + base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames"); goto error; } @@ -444,7 +444,7 @@ unwind_stack_for_thread( } else { // No caching - process entire frame chain with base_frame validation if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, - base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) { + base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0, NULL) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; } From 7d81eab923f71f6b7e80f130bc3ad5ca613b88fd Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 17 Dec 2025 16:33:09 +0100 Subject: [PATCH 07/10] gh-142225: Add PyABIInfo_VAR to to _testcapimodule & _testinternalcapi (GH-142833) --- Modules/_testcapimodule.c | 3 +++ Modules/_testinternalcapi.c | 3 +++ Tools/c-analyzer/cpython/_analyzer.py | 1 + 3 files changed, 7 insertions(+) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index c14f925b4e7632..de6d3cbce54fbe 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3523,7 +3523,10 @@ _testcapi_exec(PyObject *m) return 0; } +PyABIInfo_VAR(abi_info); + static PyModuleDef_Slot _testcapi_slots[] = { + {Py_mod_abi, &abi_info}, {Py_mod_exec, _testcapi_exec}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 4140cd23ded95e..a7fbb0f87b6e9c 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2696,7 +2696,10 @@ module_exec(PyObject *module) return 0; } +PyABIInfo_VAR(abi_info); + static struct PyModuleDef_Slot module_slots[] = { + {Py_mod_abi, &abi_info}, {Py_mod_exec, module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, diff --git a/Tools/c-analyzer/cpython/_analyzer.py b/Tools/c-analyzer/cpython/_analyzer.py index 6f0f464892845f..43ed552fcf75d9 100644 --- a/Tools/c-analyzer/cpython/_analyzer.py +++ b/Tools/c-analyzer/cpython/_analyzer.py @@ -76,6 +76,7 @@ 'PyBufferProcs', 'PyStructSequence_Field[]', 'PyStructSequence_Desc', + 'PyABIInfo', } # XXX We should normalize all cases to a single name, From 77c8e6a2b8e206ea8151ab1b431e32f1cad51ddd Mon Sep 17 00:00:00 2001 From: Damian Birchler Date: Wed, 17 Dec 2025 17:40:03 +0100 Subject: [PATCH 08/10] gh-142876: remove reference to thread in documentation of `asyncio.Queue.shutdown` (#142888) --- Doc/library/asyncio-queue.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/asyncio-queue.rst b/Doc/library/asyncio-queue.rst index d481a1921d532b..a9735ae80652df 100644 --- a/Doc/library/asyncio-queue.rst +++ b/Doc/library/asyncio-queue.rst @@ -107,7 +107,7 @@ Queue The queue can no longer grow. Future calls to :meth:`~Queue.put` raise :exc:`QueueShutDown`. Currently blocked callers of :meth:`~Queue.put` will be unblocked - and will raise :exc:`QueueShutDown` in the formerly blocked thread. + and will raise :exc:`QueueShutDown` in the formerly awaiting task. If *immediate* is false (the default), the queue can be wound down normally with :meth:`~Queue.get` calls to extract tasks From e61a447d0e0753621e8fc347e99dfc87884d68ab Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 17 Dec 2025 19:41:36 +0300 Subject: [PATCH 09/10] gh-142873: Do not check for `PyContextVar_CheckExact` twice in `PyContextVar_Set` (#142874) --- Python/context.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/Python/context.c b/Python/context.c index 620e78ab1f9ec8..606ce4b1c8f60a 100644 --- a/Python/context.c +++ b/Python/context.c @@ -343,12 +343,6 @@ PyContextVar_Set(PyObject *ovar, PyObject *val) ENSURE_ContextVar(ovar, NULL) PyContextVar *var = (PyContextVar *)ovar; - if (!PyContextVar_CheckExact(var)) { - PyErr_SetString( - PyExc_TypeError, "an instance of ContextVar was expected"); - return NULL; - } - PyContext *ctx = context_get(); if (ctx == NULL) { return NULL; @@ -1025,12 +1019,6 @@ static PyObject * _contextvars_ContextVar_get_impl(PyContextVar *self, PyObject *default_value) /*[clinic end generated code: output=0746bd0aa2ced7bf input=da66664d5d0af4ad]*/ { - if (!PyContextVar_CheckExact(self)) { - PyErr_SetString( - PyExc_TypeError, "an instance of ContextVar was expected"); - return NULL; - } - PyObject *val; if (PyContextVar_Get((PyObject *)self, default_value, &val) < 0) { return NULL; From fba4584ffccde789919799c38a55ce4c17ba4d8b Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Thu, 18 Dec 2025 01:05:21 +0800 Subject: [PATCH 10/10] gh-142849: Fix segfault in `executor_to_gv` (GH-142885) Fix segfault in `executor_to_gv` --- Python/optimizer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 18f54f9bc23476..fc984a5374a554 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1977,7 +1977,7 @@ executor_to_gv(_PyExecutorObject *executor, FILE *out) #else fprintf(out, " %s op0=%" PRIu64 "\n", i, opname, inst->operand0); #endif - if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { + if (base_opcode == _EXIT_TRACE || base_opcode == _JUMP_TO_TOP) { break; } } @@ -1990,7 +1990,7 @@ executor_to_gv(_PyExecutorObject *executor, FILE *out) for (uint32_t i = 0; i < executor->code_size; i++) { _PyUOpInstruction const *inst = &executor->trace[i]; uint16_t base_opcode = _PyUop_Uncached[inst->opcode]; - uint16_t flags = _PyUop_Flags[inst->opcode]; + uint16_t flags = _PyUop_Flags[base_opcode]; _PyExitData *exit = NULL; if (base_opcode == _EXIT_TRACE) { exit = (_PyExitData *)inst->operand0;