Skip to content

Commit cbb1a89

Browse files
committed
RF: Move gzip/zstd specific opening logic from openers into _compression module
1 parent 20df2cf commit cbb1a89

File tree

2 files changed

+74
-69
lines changed

2 files changed

+74
-69
lines changed

nibabel/_compression.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,69 @@
5555
if HAVE_ZSTD:
5656
COMPRESSED_FILE_LIKES += (zstd.ZstdFile,)
5757
COMPRESSION_ERRORS += (zstd.ZstdError,)
58+
59+
60+
61+
class DeterministicGzipFile(gzip.GzipFile):
62+
"""Deterministic variant of GzipFile
63+
64+
This writer does not add filename information to the header, and defaults
65+
to a modification time (``mtime``) of 0 seconds.
66+
"""
67+
68+
def __init__(
69+
self,
70+
filename: str | None = None,
71+
mode: Mode | None = None,
72+
compresslevel: int = 9,
73+
fileobj: io.FileIO | None = None,
74+
mtime: int = 0,
75+
):
76+
if mode is None:
77+
mode = 'rb'
78+
modestr: str = mode
79+
80+
# These two guards are adapted from
81+
# https://github.com/python/cpython/blob/6ab65c6/Lib/gzip.py#L171-L174
82+
if 'b' not in modestr:
83+
modestr = f'{mode}b'
84+
if fileobj is None:
85+
if filename is None:
86+
raise TypeError('Must define either fileobj or filename')
87+
# Cast because GzipFile.myfileobj has type io.FileIO while open returns ty.IO
88+
fileobj = self.myfileobj = ty.cast('io.FileIO', open(filename, modestr))
89+
super().__init__(
90+
filename='',
91+
mode=modestr,
92+
compresslevel=compresslevel,
93+
fileobj=fileobj,
94+
mtime=mtime,
95+
)
96+
97+
def gzip_open(
98+
filename: str,
99+
mode: Mode = 'rb',
100+
compresslevel: int = 9,
101+
mtime: int = 0,
102+
keep_open: bool = False,
103+
) -> gzip.GzipFile:
104+
if not HAVE_INDEXED_GZIP or mode != 'rb':
105+
gzip_file = DeterministicGzipFile(filename, mode, compresslevel, mtime=mtime)
106+
107+
# use indexed_gzip if possible for faster read access. If keep_open ==
108+
# True, we tell IndexedGzipFile to keep the file handle open. Otherwise
109+
# the IndexedGzipFile will close/open the file on each read.
110+
else:
111+
gzip_file = IndexedGzipFile(filename, drop_handles=not keep_open)
112+
113+
return gzip_file
114+
115+
116+
def zstd_open(
117+
filename: str,
118+
mode: Mode = 'r',
119+
*,
120+
level_or_option: int | dict | None = None,
121+
zstd_dict: zstd.ZstdDict | None = None,
122+
) -> zstd.ZstdFile:
123+
return zstd.ZstdFile(filename, mode, level_or_option=level_or_option, zstd_dict=zstd_dict)

nibabel/openers.py

Lines changed: 8 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
from bz2 import BZ2File
1717
from os.path import splitext
1818

19-
from ._compression import HAVE_INDEXED_GZIP, IndexedGzipFile, zstd
19+
from ._compression import (HAVE_INDEXED_GZIP,
20+
IndexedGzipFile,
21+
DeterministicGzipFile,
22+
zstd,
23+
gzip_open,
24+
zstd_open)
2025

2126
if ty.TYPE_CHECKING:
2227
from types import TracebackType
@@ -42,72 +47,6 @@ def read(self, size: int = -1, /) -> bytes: ...
4247
def write(self, b: bytes, /) -> int | None: ...
4348

4449

45-
class DeterministicGzipFile(gzip.GzipFile):
46-
"""Deterministic variant of GzipFile
47-
48-
This writer does not add filename information to the header, and defaults
49-
to a modification time (``mtime``) of 0 seconds.
50-
"""
51-
52-
def __init__(
53-
self,
54-
filename: str | None = None,
55-
mode: Mode | None = None,
56-
compresslevel: int = 9,
57-
fileobj: io.FileIO | None = None,
58-
mtime: int = 0,
59-
):
60-
if mode is None:
61-
mode = 'rb'
62-
modestr: str = mode
63-
64-
# These two guards are adapted from
65-
# https://github.com/python/cpython/blob/6ab65c6/Lib/gzip.py#L171-L174
66-
if 'b' not in modestr:
67-
modestr = f'{mode}b'
68-
if fileobj is None:
69-
if filename is None:
70-
raise TypeError('Must define either fileobj or filename')
71-
# Cast because GzipFile.myfileobj has type io.FileIO while open returns ty.IO
72-
fileobj = self.myfileobj = ty.cast('io.FileIO', open(filename, modestr))
73-
super().__init__(
74-
filename='',
75-
mode=modestr,
76-
compresslevel=compresslevel,
77-
fileobj=fileobj,
78-
mtime=mtime,
79-
)
80-
81-
82-
def _gzip_open(
83-
filename: str,
84-
mode: Mode = 'rb',
85-
compresslevel: int = 9,
86-
mtime: int = 0,
87-
keep_open: bool = False,
88-
) -> gzip.GzipFile:
89-
if not HAVE_INDEXED_GZIP or mode != 'rb':
90-
gzip_file = DeterministicGzipFile(filename, mode, compresslevel, mtime=mtime)
91-
92-
# use indexed_gzip if possible for faster read access. If keep_open ==
93-
# True, we tell IndexedGzipFile to keep the file handle open. Otherwise
94-
# the IndexedGzipFile will close/open the file on each read.
95-
else:
96-
gzip_file = IndexedGzipFile(filename, drop_handles=not keep_open)
97-
98-
return gzip_file
99-
100-
101-
def _zstd_open(
102-
filename: str,
103-
mode: Mode = 'r',
104-
*,
105-
level_or_option: int | dict | None = None,
106-
zstd_dict: zstd.ZstdDict | None = None,
107-
) -> zstd.ZstdFile:
108-
return zstd.ZstdFile(filename, mode, level_or_option=level_or_option, zstd_dict=zstd_dict)
109-
110-
11150
class Opener:
11251
r"""Class to accept, maybe open, and context-manage file-likes / filenames
11352
@@ -129,9 +68,9 @@ class Opener:
12968
for \*args
13069
"""
13170

132-
gz_def = (_gzip_open, ('mode', 'compresslevel', 'mtime', 'keep_open'))
71+
gz_def = (gzip_open, ('mode', 'compresslevel', 'mtime', 'keep_open'))
13372
bz2_def = (BZ2File, ('mode', 'buffering', 'compresslevel'))
134-
zstd_def = (_zstd_open, ('mode', 'level_or_option', 'zstd_dict'))
73+
zstd_def = (zstd_open, ('mode', 'level_or_option', 'zstd_dict'))
13574
compress_ext_map: dict[str | None, OpenerDef] = {
13675
'.gz': gz_def,
13776
'.bz2': bz2_def,

0 commit comments

Comments
 (0)