tj-python · pull · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/Doc/library/profiling.sampling.rst b/Doc/library/profiling.sampling.rst
@@ -53,7 +53,7 @@ counts**, not direct measurements. Tachyon counts how many times each function
 appears in the collected samples, then multiplies by the sampling interval to
 estimate time.
 
-For example, with a 100 microsecond sampling interval over a 10-second profile,
+For example, with a 10 kHz sampling rate over a 10-second profile,
 Tachyon collects approximately 100,000 samples. If a function appears in 5,000
 samples (5% of total), Tachyon estimates it consumed 5% of the 10-second
 duration, or about 500 milliseconds. This is a statistical estimate, not a
@@ -142,7 +142,7 @@ Use live mode for real-time monitoring (press ``q`` to quit)::
 
 Profile for 60 seconds with a faster sampling rate::
 
-   python -m profiling.sampling run -d 60 -i 50 script.py
+   python -m profiling.sampling run -d 60 -r 20khz script.py
 
 Generate a line-by-line heatmap::
 
@@ -326,8 +326,8 @@ The default configuration works well for most use cases:
 
    * - Option
      - Default
-   * - Default for ``--interval`` / ``-i``
-     - 100 µs between samples (~10,000 samples/sec)
+   * - Default for ``--sampling-rate`` / ``-r``
+     - 1 kHz
    * - Default for ``--duration`` / ``-d``
      - 10 seconds
    * - Default for ``--all-threads`` / ``-a``
@@ -346,23 +346,22 @@ The default configuration works well for most use cases:
      - Disabled (non-blocking sampling)
 
 
-Sampling interval and duration
-------------------------------
+Sampling rate and duration
+--------------------------
 
-The two most fundamental parameters are the sampling interval and duration.
+The two most fundamental parameters are the sampling rate and duration.
 Together, these determine how many samples will be collected during a profiling
 session.
 
-The :option:`--interval` option (:option:`-i`) sets the time between samples in
-microseconds. The default is 100 microseconds, which produces approximately
-10,000 samples per second::
+The :option:`--sampling-rate` option (:option:`-r`) sets how frequently samples
+are collected. The default is 1 kHz (10,000 samples per second)::
 
-   python -m profiling.sampling run -i 50 script.py
+   python -m profiling.sampling run -r 20khz script.py
 
-Lower intervals capture more samples and provide finer-grained data at the
-cost of slightly higher profiler CPU usage. Higher intervals reduce profiler
+Higher rates capture more samples and provide finer-grained data at the
+cost of slightly higher profiler CPU usage. Lower rates reduce profiler
 overhead but may miss short-lived functions. For most applications, the
-default interval provides a good balance between accuracy and overhead.
+default rate provides a good balance between accuracy and overhead.
 
 The :option:`--duration` option (:option:`-d`) sets how long to profile in seconds. The
 default is 10 seconds::
@@ -573,9 +572,9 @@ appended:
 - For pstats format (which defaults to stdout), subprocesses produce files like
   ``profile_12345.pstats``
 
-The subprocess profilers inherit most sampling options from the parent (interval,
-duration, thread selection, native frames, GC frames, async-aware mode, and
-output format). All Python descendant processes are profiled recursively,
+The subprocess profilers inherit most sampling options from the parent (sampling
+rate, duration, thread selection, native frames, GC frames, async-aware mode,
+and output format). All Python descendant processes are profiled recursively,
 including grandchildren and further descendants.
 
 Subprocess detection works by periodically scanning for new descendants of
@@ -1389,9 +1388,9 @@ Global options
 Sampling options
 ----------------
 
-.. option:: -i <microseconds>, --interval <microseconds>
+.. option:: -r <rate>, --sampling-rate <rate>
 
-   Sampling interval in microseconds. Default: 100.
+   Sampling rate (for example, ``10000``, ``10khz``, ``10k``). Default: ``1khz``.
 
 .. option:: -d <seconds>, --duration <seconds>
 

diff --git a/InternalDocs/profiling_binary_format.md b/InternalDocs/profiling_binary_format.md
@@ -272,33 +272,85 @@ byte.
 
 ## Frame Table
 
-The frame table stores deduplicated frame entries:
+The frame table stores deduplicated frame entries with full source position
+information and bytecode opcode:
 
 ```
-+----------------------+
-| filename_idx: varint |
-| funcname_idx: varint |
-| lineno: svarint      |
-+----------------------+  (repeated for each frame)
++----------------------------+
+| filename_idx: varint       |
+| funcname_idx: varint       |
+| lineno: svarint            |
+| end_lineno_delta: svarint  |
+| column: svarint            |
+| end_column_delta: svarint  |
+| opcode: u8                 |
++----------------------------+  (repeated for each frame)
 ```
 
-Each unique (filename, funcname, lineno) combination gets one entry. Two
-calls to the same function at different line numbers produce different
-frame entries; two calls at the same line number share one entry.
+### Field Definitions
+
+| Field            | Type          | Description                                              |
+|------------------|---------------|----------------------------------------------------------|
+| filename_idx     | varint        | Index into string table for file name                    |
+| funcname_idx     | varint        | Index into string table for function name                |
+| lineno           | zigzag varint | Start line number (-1 for synthetic frames)              |
+| end_lineno_delta | zigzag varint | Delta from lineno (end_lineno = lineno + delta)          |
+| column           | zigzag varint | Start column offset in UTF-8 bytes (-1 if not available) |
+| end_column_delta | zigzag varint | Delta from column (end_column = column + delta)          |
+| opcode           | u8            | Python bytecode opcode (0-254) or 255 for None           |
+
+### Delta Encoding
+
+Position end values use delta encoding for efficiency:
+
+- `end_lineno = lineno + end_lineno_delta`
+- `end_column = column + end_column_delta`
+
+Typical values:
+- `end_lineno_delta`: Usually 0 (single-line expressions) → encodes to 1 byte
+- `end_column_delta`: Usually 5-20 (expression width) → encodes to 1 byte
+
+This saves ~1-2 bytes per frame compared to absolute encoding. When the base
+value (lineno or column) is -1 (not available), the delta is stored as 0 and
+the reconstructed value is -1.
+
+### Sentinel Values
+
+- `opcode = 255`: No opcode captured
+- `lineno = -1`: Synthetic frame (no source location)
+- `column = -1`: Column offset not available
+
+### Deduplication
+
+Each unique (filename, funcname, lineno, end_lineno, column, end_column,
+opcode) combination gets one entry. This enables instruction-level profiling
+where multiple bytecode instructions on the same line can be distinguished.
 
 Strings and frames are deduplicated separately because they have different
 cardinalities and reference patterns. A codebase might have hundreds of
 unique source files but thousands of unique functions. Many functions share
 the same filename, so storing the filename index in each frame entry (rather
 than the full string) provides an additional layer of deduplication. A frame
-entry is just three varints (typically 3-6 bytes) rather than two full
-strings plus a line number.
-
-Line numbers use signed varint (zigzag encoding) rather than unsigned to
-handle edge cases. Synthetic frames—generated frames that don't correspond
-directly to Python source code, such as C extension boundaries or internal
-interpreter frames—use line number 0 or -1 to indicate the absence of a
-source location. Zigzag encoding ensures these small negative values encode
+entry is typically 7-9 bytes rather than two full strings plus location data.
+
+### Size Analysis
+
+Typical frame size with delta encoding:
+- file_idx: 1-2 bytes
+- func_idx: 1-2 bytes
+- lineno: 1-2 bytes
+- end_lineno_delta: 1 byte (usually 0)
+- column: 1 byte (usually < 64)
+- end_column_delta: 1 byte (usually < 64)
+- opcode: 1 byte
+
+**Total: ~7-9 bytes per frame**
+
+Line numbers and columns use signed varint (zigzag encoding) to handle
+sentinel values efficiently. Synthetic frames—generated frames that don't
+correspond directly to Python source code, such as C extension boundaries or
+internal interpreter frames—use -1 to indicate the absence of a source
+location. Zigzag encoding ensures these small negative values encode
 efficiently (−1 becomes 1, which is one byte) rather than requiring the
 maximum varint length.
 

diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
@@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''):
 
     """
     if charset == 'unknown-8bit':
-        bstring = string.encode('ascii', 'surrogateescape')
+        bstring = string.encode('utf-8', 'surrogateescape')
     else:
         bstring = string.encode(charset)
     if encoding is None:

diff --git a/Lib/profiling/sampling/_child_monitor.py b/Lib/profiling/sampling/_child_monitor.py
@@ -16,7 +16,7 @@
 _CHILD_POLL_INTERVAL_SEC = 0.1
 
 # Default timeout for waiting on child profilers
-_DEFAULT_WAIT_TIMEOUT = 30.0
+_DEFAULT_WAIT_TIMEOUT_SEC = 30.0
 
 # Maximum number of child profilers to spawn (prevents resource exhaustion)
 _MAX_CHILD_PROFILERS = 100
@@ -138,7 +138,7 @@ def spawned_profilers(self):
         with self._lock:
             return list(self._spawned_profilers)
 
-    def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT):
+    def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT_SEC):
         """
         Wait for all spawned child profilers to complete.
 

diff --git a/Lib/profiling/sampling/_sync_coordinator.py b/Lib/profiling/sampling/_sync_coordinator.py
@@ -73,8 +73,8 @@ def _validate_arguments(args: List[str]) -> tuple[int, str, List[str]]:
 
 # Constants for socket communication
 _MAX_RETRIES = 3
-_INITIAL_RETRY_DELAY = 0.1
-_SOCKET_TIMEOUT = 2.0
+_INITIAL_RETRY_DELAY_SEC = 0.1
+_SOCKET_TIMEOUT_SEC = 2.0
 _READY_MESSAGE = b"ready"
 
 
@@ -93,14 +93,14 @@ def _signal_readiness(sync_port: int) -> None:
     for attempt in range(_MAX_RETRIES):
         try:
             # Use context manager for automatic cleanup
-            with socket.create_connection(("127.0.0.1", sync_port), timeout=_SOCKET_TIMEOUT) as sock:
+            with socket.create_connection(("127.0.0.1", sync_port), timeout=_SOCKET_TIMEOUT_SEC) as sock:
                 sock.send(_READY_MESSAGE)
                 return
         except (socket.error, OSError) as e:
             last_error = e
             if attempt < _MAX_RETRIES - 1:
                 # Exponential backoff before retry
-                time.sleep(_INITIAL_RETRY_DELAY * (2 ** attempt))
+                time.sleep(_INITIAL_RETRY_DELAY_SEC * (2 ** attempt))
 
     # If we get here, all retries failed
     raise SyncError(f"Failed to signal readiness after {_MAX_RETRIES} attempts: {last_error}") from last_error