Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Lib/test/test_free_threading/test_lzma.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,24 @@ def worker():
data = lzd.decompress(compressed, chunk_size)
self.assertEqual(len(data), chunk_size)
output.append(data)
# Read attributes concurrently with other threads decompressing
self.assertEqual(lzd.check, lzma.CHECK_CRC64)
self.assertIsInstance(lzd.eof, bool)
self.assertIsInstance(lzd.needs_input, bool)
self.assertIsInstance(lzd.unused_data, bytes)

run_concurrently(worker_func=worker, nthreads=NTHREADS)
self.assertEqual(len(output), NTHREADS)
# Verify the expected chunks (order doesn't matter due to append race)
self.assertSetEqual(set(output), set(chunks))
self.assertEqual(lzd.check, lzma.CHECK_CRC64)
self.assertTrue(lzd.eof)
self.assertFalse(lzd.needs_input)
# Each thread added full compressed data to the buffer, but only 1 copy
# is consumed to produce the output. The rest remains as unused_data.
self.assertEqual(
len(lzd.unused_data), len(compressed) * (NTHREADS - 1)
)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Make the attributes in :mod:`lzma` thread-safe on the :term:`free threaded
<free threading>` build.
35 changes: 27 additions & 8 deletions Modules/_lzmamodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "pycore_long.h" // _PyLong_UInt32_Converter()
// Blocks output buffer wrappers
#include "pycore_blocks_output_buffer.h"
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_*_RELAXED

#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
#error "The maximum block size accepted by liblzma is SIZE_MAX."
Expand Down Expand Up @@ -948,10 +949,10 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
goto error;
}
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
d->check = lzma_get_check(&d->lzs);
FT_ATOMIC_STORE_INT_RELAXED(d->check, lzma_get_check(&d->lzs));
}
if (lzret == LZMA_STREAM_END) {
d->eof = 1;
FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1);
break;
} else if (lzs->avail_out == 0) {
/* Need to check lzs->avail_out before lzs->avail_in.
Expand Down Expand Up @@ -1038,7 +1039,7 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
}

if (d->eof) {
d->needs_input = 0;
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
if (lzs->avail_in > 0) {
Py_XSETREF(d->unused_data,
PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
Expand All @@ -1054,17 +1055,17 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
/* (avail_in==0 && avail_out==0)
Maybe lzs's internal state still have a few bytes can
be output, try to output them next time. */
d->needs_input = 0;
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);

/* If max_length < 0, lzs->avail_out always > 0 */
assert(max_length >= 0);
} else {
/* Input buffer exhausted, output buffer has space. */
d->needs_input = 1;
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 1);
}
}
else {
d->needs_input = 0;
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);

/* If we did not use the input buffer, we now have
to copy the tail from the caller's buffer into the
Expand Down Expand Up @@ -1314,15 +1315,32 @@ PyDoc_STRVAR(Decompressor_needs_input_doc,
PyDoc_STRVAR(Decompressor_unused_data_doc,
"Data found after the end of the compressed stream.");

static PyObject *
Decompressor_unused_data_get(PyObject *op, void *Py_UNUSED(ignored))
{
Decompressor *self = Decompressor_CAST(op);
PyMutex_Lock(&self->mutex);
PyObject *result = Py_XNewRef(self->unused_data);
PyMutex_Unlock(&self->mutex);
if (result == NULL) {
PyErr_SetString(PyExc_AttributeError, "unused_data");
}
Comment on lines +1325 to +1327
Copy link
Member

@emmatyping emmatyping Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused_data should be b"" if not set, so I don't think this check is needed. It's initialized in __new__ https://github.com/python/cpython/blob/6a4f10325d58deb1906b39d68dc8e84f4c2bf5a4/Modules/_lzmamodule.c#L1239C5-L1239C76

I think we should probably initialize unused_data in bz2 in __new__ as well.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One other possibility would be to standardize on the implementation in compression.zstd https://github.com/python/cpython/blob/main/Modules/_zstd/decompressor.c#L583-L611

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @emmatyping for pointing this out.

I actually wanted to assert(self->unused_data != NULL), but I was a little bit worried about the code block below. I think PyBytes_FromStringAndSize() failure is very unlikely in L1044, and I believe it could happen only with allocation errors, but this was the main reason. With Py_T_OBJECT_EX in PyMemberDef, reading a NULL
raises AttributeError, so I was trying to keep the current behaviour.

cpython/Modules/_lzmamodule.c

Lines 1040 to 1049 in 08bc03f

if (d->eof) {
d->needs_input = 0;
if (lzs->avail_in > 0) {
Py_XSETREF(d->unused_data,
PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
if (d->unused_data == NULL) {
goto error;
}
}
}

So, how do you think we should change it?

return result;
}

static PyGetSetDef Decompressor_getset[] = {
{"unused_data", Decompressor_unused_data_get, NULL,
Decompressor_unused_data_doc},
{NULL},
};

static PyMemberDef Decompressor_members[] = {
{"check", Py_T_INT, offsetof(Decompressor, check), Py_READONLY,
Decompressor_check_doc},
{"eof", Py_T_BOOL, offsetof(Decompressor, eof), Py_READONLY,
Decompressor_eof_doc},
{"needs_input", Py_T_BOOL, offsetof(Decompressor, needs_input), Py_READONLY,
Decompressor_needs_input_doc},
{"unused_data", Py_T_OBJECT_EX, offsetof(Decompressor, unused_data), Py_READONLY,
Decompressor_unused_data_doc},
{NULL}
};

Expand All @@ -1332,6 +1350,7 @@ static PyType_Slot lzma_decompressor_type_slots[] = {
{Py_tp_new, _lzma_LZMADecompressor},
{Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
{Py_tp_members, Decompressor_members},
{Py_tp_getset, Decompressor_getset},
{0, 0}
};

Expand Down
Loading