-
Notifications
You must be signed in to change notification settings - Fork 270
Open
Description
With hdf5, one can save in attributes a number and an array of size 1. These are two different things:
ATTRIBUTE "a_int" {
DATATYPE H5T_STD_I64LE
DATASPACE SCALAR
DATA {
(0): 10
}
}
ATTRIBUTE "a_list1" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): 10
}
}
Unfortunately, netcdf4-python saves a number as an array of size 1, so it is not possible to differentiate these two different things and one can easily obtain bad behavior.
This script demonstrates the issue:
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "h5netcdf",
# "h5py",
# "netcdf4",
# ]
# ///
"""
# use netCDF4 (bug)
$ ./bad_behavior.py net
# use h5py (works fine)
$ ./bad_behavior.py
"""
import sys
import subprocess
if len(sys.argv) > 1:
package = sys.argv[-1]
else:
package = "h5py"
if package not in ["h5py", "h5netcdf"] and not package.startswith("net"):
raise ValueError(f"wrong option '{package}'")
if package == "h5py":
from h5py import File
path = "data.h5"
elif package == "h5netcdf":
import h5netcdf
File = h5netcdf.File
print(f"h5netcdf {h5netcdf.__version__}")
path = "data.nc"
else:
import netCDF4
File = netCDF4.Dataset
print(f"netCDF4 {netCDF4.__version__}")
path = "data_netCDF4.nc"
number = 10
if package.startswith("h5"):
with File(path, "w") as file:
file.attrs["a_int"] = number
file.attrs["a_list1"] = [number]
file.attrs["a_list2"] = [number, 2 * number]
else:
with File(path, "w") as file:
file.a_int = number
file.a_list1 = [number]
file.a_list2 = [number, 2 * number]
subprocess.run(["h5dump", str(path)], check=True)
if package.startswith("h5"):
with File(path, "r") as file:
attrs = dict(file.attrs)
else:
with File(path, "r") as file:
attrs = {name: file.getncattr(name) for name in file.ncattrs()}
print(attrs)
assert len(attrs["a_list2"]) == 2
assert attrs["a_list2"][0] == attrs["a_int"]
# TypeError: object of type 'numpy.int64' has no len()
assert len(attrs["a_list1"]) == 1
# IndexError: invalid index to scalar variable.
assert attrs["a_list1"][0] == attrs["a_int"]It would be great if numbers could be saved as scalars and lists of size 1 saved as SIMPLE { ( 1 ) / ( 1 ) }.
Related to h5netcdf/h5netcdf#301
CC @kmuehlbauer, @shoyer
Metadata
Metadata
Assignees
Labels
No labels