bioimageio.core.io

  1import collections.abc
  2import warnings
  3from pathlib import Path, PurePosixPath
  4from typing import Any, Mapping, Optional, Sequence, Tuple, Union
  5
  6import h5py
  7import numpy as np
  8from imageio.v3 import imread, imwrite
  9from loguru import logger
 10from numpy.typing import NDArray
 11from pydantic import BaseModel, ConfigDict, TypeAdapter
 12
 13from bioimageio.spec.utils import load_array, save_array
 14
 15from .axis import AxisLike
 16from .common import PerMember
 17from .sample import Sample
 18from .stat_measures import DatasetMeasure, MeasureValue
 19from .tensor import Tensor
 20
 21DEFAULT_H5_DATASET_PATH = "data"
 22
 23
 24def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
 25    """load a single image as numpy array
 26
 27    Args:
 28        path: image path
 29        is_volume: deprecated
 30    """
 31    if is_volume is not None:
 32        warnings.warn("**is_volume** is deprecated and will be removed soon.")
 33
 34    file_path, subpath = _split_dataset_path(Path(path))
 35
 36    if file_path.suffix == ".npy":
 37        if subpath is not None:
 38            raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}")
 39        return load_array(path)
 40    elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
 41        if subpath is None:
 42            dataset_path = DEFAULT_H5_DATASET_PATH
 43        else:
 44            dataset_path = str(subpath)
 45
 46        with h5py.File(file_path, "r") as f:
 47            h5_dataset = f.get(  # pyright: ignore[reportUnknownVariableType]
 48                dataset_path
 49            )
 50            if not isinstance(h5_dataset, h5py.Dataset):
 51                raise ValueError(
 52                    f"{path} is not of type {h5py.Dataset}, but has type "
 53                    + str(
 54                        type(h5_dataset)  # pyright: ignore[reportUnknownArgumentType]
 55                    )
 56                )
 57            image: NDArray[Any]
 58            image = h5_dataset[:]  # pyright: ignore[reportUnknownVariableType]
 59            assert isinstance(image, np.ndarray), type(
 60                image  # pyright: ignore[reportUnknownArgumentType]
 61            )
 62            return image  # pyright: ignore[reportUnknownVariableType]
 63    else:
 64        return imread(path)  # pyright: ignore[reportUnknownVariableType]
 65
 66
 67def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor:
 68    # TODO: load axis meta data
 69    array = load_image(path)
 70
 71    return Tensor.from_numpy(array, dims=axes)
 72
 73
 74def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]:
 75    """Split off subpath (e.g. internal  h5 dataset path)
 76    from a file path following a file extension.
 77
 78    Examples:
 79        >>> _split_dataset_path(Path("my_file.h5/dataset"))
 80        (PosixPath('my_file.h5'), PurePosixPath('dataset'))
 81
 82        If no suffix is detected the path is returned with
 83        >>> _split_dataset_path(Path("my_plain_file"))
 84        (PosixPath('my_plain_file'), None)
 85
 86    """
 87    if path.suffix:
 88        return path, None
 89
 90    for p in path.parents:
 91        if p.suffix:
 92            return p, PurePosixPath(path.relative_to(p))
 93
 94    return path, None
 95
 96
 97def save_tensor(path: Path, tensor: Tensor) -> None:
 98    # TODO: save axis meta data
 99
100    data: NDArray[Any] = tensor.data.to_numpy()
101    file_path, subpath = _split_dataset_path(Path(path))
102    if not file_path.suffix:
103        raise ValueError(f"No suffix (needed to decide file format) found in {path}")
104
105    file_path.parent.mkdir(exist_ok=True, parents=True)
106    if file_path.suffix == ".npy":
107        if subpath is not None:
108            raise ValueError(f"Unexpected subpath {subpath} found in .npy path {path}")
109        save_array(file_path, data)
110    elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
111        if subpath is None:
112            dataset_path = DEFAULT_H5_DATASET_PATH
113        else:
114            dataset_path = str(subpath)
115
116        with h5py.File(file_path, "a") as f:
117            if dataset_path in f:
118                del f[dataset_path]
119
120            _ = f.create_dataset(dataset_path, data=data, chunks=True)
121    else:
122        # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]:
123        #     tensor = tensor[{a: 0 for a in singleton_axes}]
124        #     singleton_axes_msg = f"(without singleton axes {singleton_axes}) "
125        # else:
126        singleton_axes_msg = ""
127
128        logger.debug(
129            "writing tensor {} {}to {}",
130            dict(tensor.tagged_shape),
131            singleton_axes_msg,
132            path,
133        )
134        imwrite(path, data)
135
136
137def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None:
138    """save a sample to path
139
140    If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`,
141    which are resolved with the `sample` object.
142    """
143
144    if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path):
145        raise ValueError(f"missing `{{member_id}}` in path {path}")
146
147    for m, t in sample.members.items():
148        if isinstance(path, collections.abc.Mapping):
149            p = path[m]
150        else:
151            p = Path(str(path).format(sample_id=sample.id, member_id=m))
152
153        save_tensor(p, t)
154
155
156class _SerializedDatasetStatsEntry(
157    BaseModel, frozen=True, arbitrary_types_allowed=True
158):
159    measure: DatasetMeasure
160    value: MeasureValue
161
162
163_stat_adapter = TypeAdapter(
164    Sequence[_SerializedDatasetStatsEntry],
165    config=ConfigDict(arbitrary_types_allowed=True),
166)
167
168
169def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path):
170    serializable = [
171        _SerializedDatasetStatsEntry(measure=k, value=v) for k, v in stat.items()
172    ]
173    _ = path.write_bytes(_stat_adapter.dump_json(serializable))
174
175
176def load_dataset_stat(path: Path):
177    seq = _stat_adapter.validate_json(path.read_bytes())
178    return {e.measure: e.value for e in seq}
DEFAULT_H5_DATASET_PATH = 'data'
def load_image( path: pathlib.Path, is_volume: Optional[bool] = None) -> numpy.ndarray[typing.Any, numpy.dtype[typing.Any]]:
25def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
26    """load a single image as numpy array
27
28    Args:
29        path: image path
30        is_volume: deprecated
31    """
32    if is_volume is not None:
33        warnings.warn("**is_volume** is deprecated and will be removed soon.")
34
35    file_path, subpath = _split_dataset_path(Path(path))
36
37    if file_path.suffix == ".npy":
38        if subpath is not None:
39            raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}")
40        return load_array(path)
41    elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
42        if subpath is None:
43            dataset_path = DEFAULT_H5_DATASET_PATH
44        else:
45            dataset_path = str(subpath)
46
47        with h5py.File(file_path, "r") as f:
48            h5_dataset = f.get(  # pyright: ignore[reportUnknownVariableType]
49                dataset_path
50            )
51            if not isinstance(h5_dataset, h5py.Dataset):
52                raise ValueError(
53                    f"{path} is not of type {h5py.Dataset}, but has type "
54                    + str(
55                        type(h5_dataset)  # pyright: ignore[reportUnknownArgumentType]
56                    )
57                )
58            image: NDArray[Any]
59            image = h5_dataset[:]  # pyright: ignore[reportUnknownVariableType]
60            assert isinstance(image, np.ndarray), type(
61                image  # pyright: ignore[reportUnknownArgumentType]
62            )
63            return image  # pyright: ignore[reportUnknownVariableType]
64    else:
65        return imread(path)  # pyright: ignore[reportUnknownVariableType]

load a single image as numpy array

Arguments:
  • path: image path
  • is_volume: deprecated
def load_tensor( path: pathlib.Path, axes: Optional[Sequence[Union[bioimageio.spec.model.v0_5.AxisId, Literal['b', 'i', 't', 'c', 'z', 'y', 'x'], Annotated[Union[bioimageio.spec.model.v0_5.BatchAxis, bioimageio.spec.model.v0_5.ChannelAxis, bioimageio.spec.model.v0_5.IndexInputAxis, bioimageio.spec.model.v0_5.TimeInputAxis, bioimageio.spec.model.v0_5.SpaceInputAxis], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.model.v0_5.BatchAxis, bioimageio.spec.model.v0_5.ChannelAxis, bioimageio.spec.model.v0_5.IndexOutputAxis, Annotated[Union[Annotated[bioimageio.spec.model.v0_5.TimeOutputAxis, Tag(tag='wo_halo')], Annotated[bioimageio.spec.model.v0_5.TimeOutputAxisWithHalo, Tag(tag='with_halo')]], Discriminator(discriminator=<function _get_halo_axis_discriminator_value>, custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.model.v0_5.SpaceOutputAxis, Tag(tag='wo_halo')], Annotated[bioimageio.spec.model.v0_5.SpaceOutputAxisWithHalo, Tag(tag='with_halo')]], Discriminator(discriminator=<function _get_halo_axis_discriminator_value>, custom_error_type=None, custom_error_message=None, custom_error_context=None)]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], bioimageio.core.Axis]]] = None) -> bioimageio.core.Tensor:
68def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor:
69    # TODO: load axis meta data
70    array = load_image(path)
71
72    return Tensor.from_numpy(array, dims=axes)
def save_tensor(path: pathlib.Path, tensor: bioimageio.core.Tensor) -> None:
 98def save_tensor(path: Path, tensor: Tensor) -> None:
 99    # TODO: save axis meta data
100
101    data: NDArray[Any] = tensor.data.to_numpy()
102    file_path, subpath = _split_dataset_path(Path(path))
103    if not file_path.suffix:
104        raise ValueError(f"No suffix (needed to decide file format) found in {path}")
105
106    file_path.parent.mkdir(exist_ok=True, parents=True)
107    if file_path.suffix == ".npy":
108        if subpath is not None:
109            raise ValueError(f"Unexpected subpath {subpath} found in .npy path {path}")
110        save_array(file_path, data)
111    elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
112        if subpath is None:
113            dataset_path = DEFAULT_H5_DATASET_PATH
114        else:
115            dataset_path = str(subpath)
116
117        with h5py.File(file_path, "a") as f:
118            if dataset_path in f:
119                del f[dataset_path]
120
121            _ = f.create_dataset(dataset_path, data=data, chunks=True)
122    else:
123        # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]:
124        #     tensor = tensor[{a: 0 for a in singleton_axes}]
125        #     singleton_axes_msg = f"(without singleton axes {singleton_axes}) "
126        # else:
127        singleton_axes_msg = ""
128
129        logger.debug(
130            "writing tensor {} {}to {}",
131            dict(tensor.tagged_shape),
132            singleton_axes_msg,
133            path,
134        )
135        imwrite(path, data)
def save_sample( path: Union[pathlib.Path, str, Mapping[bioimageio.spec.model.v0_5.TensorId, pathlib.Path]], sample: bioimageio.core.Sample) -> None:
138def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None:
139    """save a sample to path
140
141    If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`,
142    which are resolved with the `sample` object.
143    """
144
145    if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path):
146        raise ValueError(f"missing `{{member_id}}` in path {path}")
147
148    for m, t in sample.members.items():
149        if isinstance(path, collections.abc.Mapping):
150            p = path[m]
151        else:
152            p = Path(str(path).format(sample_id=sample.id, member_id=m))
153
154        save_tensor(p, t)

save a sample to path

If path is a pathlib.Path or a string it must contain {member_id} and may contain {sample_id}, which are resolved with the sample object.

def save_dataset_stat( stat: Mapping[Annotated[Union[bioimageio.core.stat_measures.DatasetMean, bioimageio.core.stat_measures.DatasetStd, bioimageio.core.stat_measures.DatasetVar, bioimageio.core.stat_measures.DatasetPercentile], Discriminator(discriminator='name', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Union[float, Annotated[bioimageio.core.Tensor, BeforeValidator(func=<function tensor_custom_before_validator>, json_schema_input_type=PydanticUndefined), PlainSerializer(func=<function tensor_custom_serializer>, return_type=PydanticUndefined, when_used='always')]]], path: pathlib.Path):
170def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path):
171    serializable = [
172        _SerializedDatasetStatsEntry(measure=k, value=v) for k, v in stat.items()
173    ]
174    _ = path.write_bytes(_stat_adapter.dump_json(serializable))
def load_dataset_stat(path: pathlib.Path):
177def load_dataset_stat(path: Path):
178    seq = _stat_adapter.validate_json(path.read_bytes())
179    return {e.measure: e.value for e in seq}