bioimageio.core.io
1import collections.abc 2import warnings 3from pathlib import Path, PurePosixPath 4from typing import Any, Mapping, Optional, Sequence, Tuple, Union 5 6import h5py 7import numpy as np 8from imageio.v3 import imread, imwrite 9from loguru import logger 10from numpy.typing import NDArray 11from pydantic import BaseModel, ConfigDict, TypeAdapter 12 13from bioimageio.spec.utils import load_array, save_array 14 15from .axis import AxisLike 16from .common import PerMember 17from .sample import Sample 18from .stat_measures import DatasetMeasure, MeasureValue 19from .tensor import Tensor 20 21DEFAULT_H5_DATASET_PATH = "data" 22 23 24def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]: 25 """load a single image as numpy array 26 27 Args: 28 path: image path 29 is_volume: deprecated 30 """ 31 if is_volume is not None: 32 warnings.warn("**is_volume** is deprecated and will be removed soon.") 33 34 file_path, subpath = _split_dataset_path(Path(path)) 35 36 if file_path.suffix == ".npy": 37 if subpath is not None: 38 raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}") 39 return load_array(path) 40 elif file_path.suffix in (".h5", ".hdf", ".hdf5"): 41 if subpath is None: 42 dataset_path = DEFAULT_H5_DATASET_PATH 43 else: 44 dataset_path = str(subpath) 45 46 with h5py.File(file_path, "r") as f: 47 h5_dataset = f.get( # pyright: ignore[reportUnknownVariableType] 48 dataset_path 49 ) 50 if not isinstance(h5_dataset, h5py.Dataset): 51 raise ValueError( 52 f"{path} is not of type {h5py.Dataset}, but has type " 53 + str( 54 type(h5_dataset) # pyright: ignore[reportUnknownArgumentType] 55 ) 56 ) 57 image: NDArray[Any] 58 image = h5_dataset[:] # pyright: ignore[reportUnknownVariableType] 59 assert isinstance(image, np.ndarray), type( 60 image # pyright: ignore[reportUnknownArgumentType] 61 ) 62 return image # pyright: ignore[reportUnknownVariableType] 63 else: 64 return imread(path) # pyright: ignore[reportUnknownVariableType] 65 66 67def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor: 68 # TODO: load axis meta data 69 array = load_image(path) 70 71 return Tensor.from_numpy(array, dims=axes) 72 73 74def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]: 75 """Split off subpath (e.g. internal h5 dataset path) 76 from a file path following a file extension. 77 78 Examples: 79 >>> _split_dataset_path(Path("my_file.h5/dataset")) 80 (PosixPath('my_file.h5'), PurePosixPath('dataset')) 81 82 If no suffix is detected the path is returned with 83 >>> _split_dataset_path(Path("my_plain_file")) 84 (PosixPath('my_plain_file'), None) 85 86 """ 87 if path.suffix: 88 return path, None 89 90 for p in path.parents: 91 if p.suffix: 92 return p, PurePosixPath(path.relative_to(p)) 93 94 return path, None 95 96 97def save_tensor(path: Path, tensor: Tensor) -> None: 98 # TODO: save axis meta data 99 100 data: NDArray[Any] = tensor.data.to_numpy() 101 file_path, subpath = _split_dataset_path(Path(path)) 102 if not file_path.suffix: 103 raise ValueError(f"No suffix (needed to decide file format) found in {path}") 104 105 file_path.parent.mkdir(exist_ok=True, parents=True) 106 if file_path.suffix == ".npy": 107 if subpath is not None: 108 raise ValueError(f"Unexpected subpath {subpath} found in .npy path {path}") 109 save_array(file_path, data) 110 elif file_path.suffix in (".h5", ".hdf", ".hdf5"): 111 if subpath is None: 112 dataset_path = DEFAULT_H5_DATASET_PATH 113 else: 114 dataset_path = str(subpath) 115 116 with h5py.File(file_path, "a") as f: 117 if dataset_path in f: 118 del f[dataset_path] 119 120 _ = f.create_dataset(dataset_path, data=data, chunks=True) 121 else: 122 # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]: 123 # tensor = tensor[{a: 0 for a in singleton_axes}] 124 # singleton_axes_msg = f"(without singleton axes {singleton_axes}) " 125 # else: 126 singleton_axes_msg = "" 127 128 logger.debug( 129 "writing tensor {} {}to {}", 130 dict(tensor.tagged_shape), 131 singleton_axes_msg, 132 path, 133 ) 134 imwrite(path, data) 135 136 137def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None: 138 """save a sample to path 139 140 If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`, 141 which are resolved with the `sample` object. 142 """ 143 144 if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path): 145 raise ValueError(f"missing `{{member_id}}` in path {path}") 146 147 for m, t in sample.members.items(): 148 if isinstance(path, collections.abc.Mapping): 149 p = path[m] 150 else: 151 p = Path(str(path).format(sample_id=sample.id, member_id=m)) 152 153 save_tensor(p, t) 154 155 156class _SerializedDatasetStatsEntry( 157 BaseModel, frozen=True, arbitrary_types_allowed=True 158): 159 measure: DatasetMeasure 160 value: MeasureValue 161 162 163_stat_adapter = TypeAdapter( 164 Sequence[_SerializedDatasetStatsEntry], 165 config=ConfigDict(arbitrary_types_allowed=True), 166) 167 168 169def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path): 170 serializable = [ 171 _SerializedDatasetStatsEntry(measure=k, value=v) for k, v in stat.items() 172 ] 173 _ = path.write_bytes(_stat_adapter.dump_json(serializable)) 174 175 176def load_dataset_stat(path: Path): 177 seq = _stat_adapter.validate_json(path.read_bytes()) 178 return {e.measure: e.value for e in seq}
DEFAULT_H5_DATASET_PATH =
'data'
def
load_image( path: pathlib.Path, is_volume: Optional[bool] = None) -> numpy.ndarray[typing.Any, numpy.dtype[typing.Any]]:
25def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]: 26 """load a single image as numpy array 27 28 Args: 29 path: image path 30 is_volume: deprecated 31 """ 32 if is_volume is not None: 33 warnings.warn("**is_volume** is deprecated and will be removed soon.") 34 35 file_path, subpath = _split_dataset_path(Path(path)) 36 37 if file_path.suffix == ".npy": 38 if subpath is not None: 39 raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}") 40 return load_array(path) 41 elif file_path.suffix in (".h5", ".hdf", ".hdf5"): 42 if subpath is None: 43 dataset_path = DEFAULT_H5_DATASET_PATH 44 else: 45 dataset_path = str(subpath) 46 47 with h5py.File(file_path, "r") as f: 48 h5_dataset = f.get( # pyright: ignore[reportUnknownVariableType] 49 dataset_path 50 ) 51 if not isinstance(h5_dataset, h5py.Dataset): 52 raise ValueError( 53 f"{path} is not of type {h5py.Dataset}, but has type " 54 + str( 55 type(h5_dataset) # pyright: ignore[reportUnknownArgumentType] 56 ) 57 ) 58 image: NDArray[Any] 59 image = h5_dataset[:] # pyright: ignore[reportUnknownVariableType] 60 assert isinstance(image, np.ndarray), type( 61 image # pyright: ignore[reportUnknownArgumentType] 62 ) 63 return image # pyright: ignore[reportUnknownVariableType] 64 else: 65 return imread(path) # pyright: ignore[reportUnknownVariableType]
load a single image as numpy array
Arguments:
- path: image path
- is_volume: deprecated
def
load_tensor( path: pathlib.Path, axes: Optional[Sequence[Union[bioimageio.spec.model.v0_5.AxisId, Literal['b', 'i', 't', 'c', 'z', 'y', 'x'], Annotated[Union[bioimageio.spec.model.v0_5.BatchAxis, bioimageio.spec.model.v0_5.ChannelAxis, bioimageio.spec.model.v0_5.IndexInputAxis, bioimageio.spec.model.v0_5.TimeInputAxis, bioimageio.spec.model.v0_5.SpaceInputAxis], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.model.v0_5.BatchAxis, bioimageio.spec.model.v0_5.ChannelAxis, bioimageio.spec.model.v0_5.IndexOutputAxis, Annotated[Union[Annotated[bioimageio.spec.model.v0_5.TimeOutputAxis, Tag(tag='wo_halo')], Annotated[bioimageio.spec.model.v0_5.TimeOutputAxisWithHalo, Tag(tag='with_halo')]], Discriminator(discriminator=<function _get_halo_axis_discriminator_value>, custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.model.v0_5.SpaceOutputAxis, Tag(tag='wo_halo')], Annotated[bioimageio.spec.model.v0_5.SpaceOutputAxisWithHalo, Tag(tag='with_halo')]], Discriminator(discriminator=<function _get_halo_axis_discriminator_value>, custom_error_type=None, custom_error_message=None, custom_error_context=None)]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], bioimageio.core.Axis]]] = None) -> bioimageio.core.Tensor:
98def save_tensor(path: Path, tensor: Tensor) -> None: 99 # TODO: save axis meta data 100 101 data: NDArray[Any] = tensor.data.to_numpy() 102 file_path, subpath = _split_dataset_path(Path(path)) 103 if not file_path.suffix: 104 raise ValueError(f"No suffix (needed to decide file format) found in {path}") 105 106 file_path.parent.mkdir(exist_ok=True, parents=True) 107 if file_path.suffix == ".npy": 108 if subpath is not None: 109 raise ValueError(f"Unexpected subpath {subpath} found in .npy path {path}") 110 save_array(file_path, data) 111 elif file_path.suffix in (".h5", ".hdf", ".hdf5"): 112 if subpath is None: 113 dataset_path = DEFAULT_H5_DATASET_PATH 114 else: 115 dataset_path = str(subpath) 116 117 with h5py.File(file_path, "a") as f: 118 if dataset_path in f: 119 del f[dataset_path] 120 121 _ = f.create_dataset(dataset_path, data=data, chunks=True) 122 else: 123 # if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]: 124 # tensor = tensor[{a: 0 for a in singleton_axes}] 125 # singleton_axes_msg = f"(without singleton axes {singleton_axes}) " 126 # else: 127 singleton_axes_msg = "" 128 129 logger.debug( 130 "writing tensor {} {}to {}", 131 dict(tensor.tagged_shape), 132 singleton_axes_msg, 133 path, 134 ) 135 imwrite(path, data)
def
save_sample( path: Union[pathlib.Path, str, Mapping[bioimageio.spec.model.v0_5.TensorId, pathlib.Path]], sample: bioimageio.core.Sample) -> None:
138def save_sample(path: Union[Path, str, PerMember[Path]], sample: Sample) -> None: 139 """save a sample to path 140 141 If `path` is a pathlib.Path or a string it must contain `{member_id}` and may contain `{sample_id}`, 142 which are resolved with the `sample` object. 143 """ 144 145 if not isinstance(path, collections.abc.Mapping) and "{member_id}" not in str(path): 146 raise ValueError(f"missing `{{member_id}}` in path {path}") 147 148 for m, t in sample.members.items(): 149 if isinstance(path, collections.abc.Mapping): 150 p = path[m] 151 else: 152 p = Path(str(path).format(sample_id=sample.id, member_id=m)) 153 154 save_tensor(p, t)
save a sample to path
If path
is a pathlib.Path or a string it must contain {member_id}
and may contain {sample_id}
,
which are resolved with the sample
object.
def
save_dataset_stat( stat: Mapping[Annotated[Union[bioimageio.core.stat_measures.DatasetMean, bioimageio.core.stat_measures.DatasetStd, bioimageio.core.stat_measures.DatasetVar, bioimageio.core.stat_measures.DatasetPercentile], Discriminator(discriminator='name', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Union[float, Annotated[bioimageio.core.Tensor, BeforeValidator(func=<function tensor_custom_before_validator>, json_schema_input_type=PydanticUndefined), PlainSerializer(func=<function tensor_custom_serializer>, return_type=PydanticUndefined, when_used='always')]]], path: pathlib.Path):
def
load_dataset_stat(path: pathlib.Path):