diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index df1c35e0dabee..0f2b380bc70bf 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -10,12 +10,7 @@ import warnings from pandas.compat import pickle_compat -from pandas.util._decorators import ( - doc, - set_module, -) - -from pandas.core.shared_docs import _shared_docs +from pandas.util._decorators import set_module from pandas.io.common import get_handle @@ -35,10 +30,6 @@ @set_module("pandas") -@doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", -) def to_pickle( obj: Any, filepath_or_buffer: FilePath | WriteBuffer[bytes], @@ -57,8 +48,21 @@ def to_pickle( String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``write()`` function. Also accepts URL. URL has to be of S3 or GCS. - {compression_options} - + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and + 'filepath_or_buffer' is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar', + '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression + and to create a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. protocol : int Int which indicates which protocol should be used by the pickler, default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible @@ -67,8 +71,15 @@ def to_pickle( For Python >= 3.4, 4 is a valid value. A negative value for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL. - - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. .. [1] https://docs.python.org/3/library/pickle.html @@ -117,10 +128,6 @@ def to_pickle( @set_module("pandas") -@doc( - storage_options=_shared_docs["storage_options"], - decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", -) def read_pickle( filepath_or_buffer: FilePath | ReadPickleBuffer, compression: CompressionOptions = "infer", @@ -140,10 +147,32 @@ def read_pickle( String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``readlines()`` function. Also accepts URL. URL is not limited to S3 and GCS. - - {decompression_options} - - {storage_options} + compression : str or dict, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer' and + 'filepath_or_buffer' is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar', + '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression). + If using 'zip' or 'tar', the ZIP file must contain only one data file + to be read in. + Set to ``None`` for no decompression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for Zstandard decompression + using a custom compression dictionary: + ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. Returns -------