Skip to content

Save number attribute as scalar and not as array? #1445

@paugier

Description

@paugier

With hdf5, one can save in attributes a number and an array of size 1. These are two different things:

   ATTRIBUTE "a_int" {
      DATATYPE  H5T_STD_I64LE
      DATASPACE  SCALAR
      DATA {
      (0): 10
      }
   }
   ATTRIBUTE "a_list1" {
      DATATYPE  H5T_STD_I64LE
      DATASPACE  SIMPLE { ( 1 ) / ( 1 ) }
      DATA {
      (0): 10
      }
   }

Unfortunately, netcdf4-python saves a number as an array of size 1, so it is not possible to differentiate these two different things and one can easily obtain bad behavior.

This script demonstrates the issue:

#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "h5netcdf",
#     "h5py",
#     "netcdf4",
# ]
# ///

"""
# use netCDF4 (bug)
$ ./bad_behavior.py net

# use h5py (works fine)
$ ./bad_behavior.py
"""

import sys
import subprocess

if len(sys.argv) > 1:
    package = sys.argv[-1]
else:
    package = "h5py"

if package not in ["h5py", "h5netcdf"] and not package.startswith("net"):
    raise ValueError(f"wrong option '{package}'")

if package == "h5py":
    from h5py import File

    path = "data.h5"
elif package == "h5netcdf":
    import h5netcdf

    File = h5netcdf.File
    print(f"h5netcdf {h5netcdf.__version__}")
    path = "data.nc"
else:
    import netCDF4

    File = netCDF4.Dataset
    print(f"netCDF4 {netCDF4.__version__}")
    path = "data_netCDF4.nc"

number = 10

if package.startswith("h5"):
    with File(path, "w") as file:
        file.attrs["a_int"] = number
        file.attrs["a_list1"] = [number]
        file.attrs["a_list2"] = [number, 2 * number]
else:
    with File(path, "w") as file:
        file.a_int = number
        file.a_list1 = [number]
        file.a_list2 = [number, 2 * number]

subprocess.run(["h5dump", str(path)], check=True)

if package.startswith("h5"):
    with File(path, "r") as file:
        attrs = dict(file.attrs)
else:
    with File(path, "r") as file:
        attrs = {name: file.getncattr(name) for name in file.ncattrs()}

print(attrs)

assert len(attrs["a_list2"]) == 2
assert attrs["a_list2"][0] == attrs["a_int"]

# TypeError: object of type 'numpy.int64' has no len()
assert len(attrs["a_list1"]) == 1

# IndexError: invalid index to scalar variable.
assert attrs["a_list1"][0] == attrs["a_int"]

It would be great if numbers could be saved as scalars and lists of size 1 saved as SIMPLE { ( 1 ) / ( 1 ) }.

Related to h5netcdf/h5netcdf#301

CC @kmuehlbauer, @shoyer

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions