Coverage for bioimageio/spec/_internal/io_utils.py: 81%
158 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-18 12:47 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-18 12:47 +0000
1import collections.abc
2import io
3import shutil
4import zipfile
5from contextlib import nullcontext
6from difflib import get_close_matches
7from pathlib import Path
8from types import MappingProxyType
9from typing import (
10 IO,
11 Any,
12 Dict,
13 Mapping,
14 Union,
15 cast,
16)
17from zipfile import ZipFile
19import httpx
20import numpy
21from loguru import logger
22from numpy.typing import NDArray
23from pydantic import BaseModel, FilePath, NewPath, RootModel
24from ruyaml import YAML
25from typing_extensions import Unpack
27from ._settings import settings
28from .io import (
29 BIOIMAGEIO_YAML,
30 BioimageioYamlContent,
31 BioimageioYamlContentView,
32 BytesReader,
33 FileDescr,
34 HashKwargs,
35 LightHttpFileDescr,
36 OpenedBioimageioYaml,
37 RelativeFilePath,
38 YamlValue,
39 extract_file_name,
40 find_bioimageio_yaml_file_name,
41 get_reader,
42 identify_bioimageio_yaml_file_name,
43 interprete_file_source,
44)
45from .io_basics import AbsoluteDirectory, FileName, ZipPath
46from .types import FileSource, PermissiveFileSource
47from .url import HttpUrl, RootHttpUrl
48from .utils import cache
49from .validation_context import ValidationContext
51_yaml_load = YAML(typ="safe")
53_yaml_dump = YAML()
54_yaml_dump.version = (1, 2) # pyright: ignore[reportAttributeAccessIssue]
55_yaml_dump.default_flow_style = False
56_yaml_dump.indent(mapping=2, sequence=4, offset=2)
57_yaml_dump.width = 88 # pyright: ignore[reportAttributeAccessIssue]
60def read_yaml(
61 file: Union[FilePath, ZipPath, IO[str], IO[bytes], BytesReader, str],
62) -> YamlValue:
63 if isinstance(file, (ZipPath, Path)):
64 data = file.read_text(encoding="utf-8")
65 else:
66 data = file
68 content: YamlValue = _yaml_load.load(data)
69 return content
72def write_yaml(
73 content: Union[YamlValue, BioimageioYamlContentView, BaseModel],
74 /,
75 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath],
76):
77 if isinstance(file, Path):
78 cm = file.open("w", encoding="utf-8")
79 else:
80 cm = nullcontext(file)
82 if isinstance(content, BaseModel):
83 content = content.model_dump(mode="json")
85 with cm as f:
86 _yaml_dump.dump(content, f)
89def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent:
90 if not isinstance(content, dict):
91 raise ValueError(
92 f"Expected {BIOIMAGEIO_YAML} content to be a mapping (got {type(content)})."
93 )
95 for key in content:
96 if not isinstance(key, str):
97 raise ValueError(
98 f"Expected all keys (field names) in a {BIOIMAGEIO_YAML} "
99 + f"to be strings (got '{key}' of type {type(key)})."
100 )
102 return cast(BioimageioYamlContent, content)
105def _open_bioimageio_rdf_in_zip(
106 path: ZipPath,
107 original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile],
108) -> OpenedBioimageioYaml:
109 with path.open("rb") as f:
110 assert not isinstance(f, io.TextIOWrapper)
111 unparsed_content = f.read().decode(encoding="utf-8")
113 content = _sanitize_bioimageio_yaml(read_yaml(io.StringIO(unparsed_content)))
115 return OpenedBioimageioYaml(
116 content,
117 original_root,
118 extract_file_name(path),
119 unparsed_content=unparsed_content,
120 )
123def _open_bioimageio_zip(
124 source: ZipFile,
125) -> OpenedBioimageioYaml:
126 rdf_name = identify_bioimageio_yaml_file_name(
127 [info.filename for info in source.filelist]
128 )
129 return _open_bioimageio_rdf_in_zip(ZipPath(source, rdf_name), source)
132def open_bioimageio_yaml(
133 source: Union[PermissiveFileSource, ZipFile, ZipPath],
134 /,
135 **kwargs: Unpack[HashKwargs],
136) -> OpenedBioimageioYaml:
137 if isinstance(source, RelativeFilePath):
138 source = source.absolute()
140 if isinstance(source, ZipFile):
141 return _open_bioimageio_zip(source)
142 elif isinstance(source, ZipPath):
143 return _open_bioimageio_rdf_in_zip(source, source.root)
145 try:
146 if isinstance(source, (Path, str)) and (source_dir := Path(source)).is_dir():
147 # open bioimageio yaml from a folder
148 src = source_dir / find_bioimageio_yaml_file_name(source_dir)
149 else:
150 src = interprete_file_source(source)
152 reader = get_reader(src, **kwargs)
154 except Exception:
155 # check if `source` is a collection id
156 if (
157 not isinstance(source, str)
158 or not isinstance(settings.id_map, str)
159 or "/" not in settings.id_map
160 ):
161 raise
163 if settings.collection_http_pattern:
164 with ValidationContext(perform_io_checks=False):
165 url = HttpUrl(
166 settings.collection_http_pattern.format(bioimageio_id=source)
167 )
169 try:
170 r = httpx.get(url)
171 _ = r.raise_for_status()
172 unparsed_content = r.content.decode(encoding="utf-8")
173 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content))
174 except Exception as e:
175 logger.warning("Failed to get bioimageio.yaml from {}: {}", url, e)
176 else:
177 original_file_name = (
178 "rdf.yaml" if url.path is None else url.path.split("/")[-1]
179 )
180 return OpenedBioimageioYaml(
181 content=content,
182 original_root=url.parent,
183 original_file_name=original_file_name,
184 unparsed_content=unparsed_content,
185 )
187 id_map = get_id_map()
188 if id_map and source not in id_map:
189 close_matches = get_close_matches(source, id_map)
190 if len(close_matches) == 0:
191 raise
193 if len(close_matches) == 1:
194 did_you_mean = f" Did you mean '{close_matches[0]}'?"
195 else:
196 did_you_mean = f" Did you mean any of {close_matches}?"
198 raise FileNotFoundError(f"'{source}' not found.{did_you_mean}")
200 entry = id_map[source]
201 logger.info("loading {} from {}", source, entry.source)
202 reader = entry.get_reader()
203 with ValidationContext(perform_io_checks=False):
204 src = HttpUrl(entry.source)
206 if reader.is_zipfile:
207 return _open_bioimageio_zip(ZipFile(reader))
209 unparsed_content = reader.read().decode(encoding="utf-8")
210 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content))
212 if isinstance(src, RelativeFilePath):
213 src = src.absolute()
215 if isinstance(src, ZipPath):
216 root = src.root
217 else:
218 root = src.parent
220 return OpenedBioimageioYaml(
221 content,
222 original_root=root,
223 original_file_name=extract_file_name(src),
224 unparsed_content=unparsed_content,
225 )
228_IdMap = RootModel[Dict[str, LightHttpFileDescr]]
231def _get_id_map_impl(url: str) -> Dict[str, LightHttpFileDescr]:
232 if not isinstance(url, str) or "/" not in url:
233 logger.opt(depth=1).error("invalid id map url: {}", url)
234 try:
235 id_map_raw: Any = httpx.get(url, timeout=10).json()
236 except Exception as e:
237 logger.opt(depth=1).error("failed to get {}: {}", url, e)
238 return {}
240 id_map = _IdMap.model_validate(id_map_raw)
241 return id_map.root
244@cache
245def get_id_map() -> Mapping[str, LightHttpFileDescr]:
246 try:
247 if settings.resolve_draft:
248 ret = _get_id_map_impl(settings.id_map_draft)
249 else:
250 ret = {}
252 ret.update(_get_id_map_impl(settings.id_map))
254 except Exception as e:
255 logger.error("failed to get resource id mapping: {}", e)
256 ret = {}
258 return MappingProxyType(ret)
261def write_content_to_zip(
262 content: Mapping[
263 FileName,
264 Union[
265 str, FilePath, ZipPath, BioimageioYamlContentView, FileDescr, BytesReader
266 ],
267 ],
268 zip: zipfile.ZipFile,
269):
270 """write strings as text, dictionaries as yaml and files to a ZipFile
271 Args:
272 content: dict mapping archive names to local file paths,
273 strings (for text files), or dict (for yaml files).
274 zip: ZipFile
275 """
276 for arc_name, file in content.items():
277 if isinstance(file, collections.abc.Mapping):
278 buf = io.StringIO()
279 write_yaml(file, buf)
280 file = buf.getvalue()
282 if isinstance(file, str):
283 zip.writestr(arc_name, file.encode("utf-8"))
284 else:
285 if isinstance(file, BytesReader):
286 reader = file
287 else:
288 reader = get_reader(file)
290 with zip.open(arc_name, "w") as dest:
291 shutil.copyfileobj(reader, dest, 1024 * 8)
294def write_zip(
295 path: Union[FilePath, IO[bytes]],
296 content: Mapping[
297 FileName, Union[str, FilePath, ZipPath, BioimageioYamlContentView, BytesReader]
298 ],
299 *,
300 compression: int,
301 compression_level: int,
302) -> None:
303 """Write a zip archive.
305 Args:
306 path: output path to write to.
307 content: dict mapping archive names to local file paths, strings (for text files), or dict (for yaml files).
308 compression: The numeric constant of compression method.
309 compression_level: Compression level to use when writing files to the archive.
310 See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile
312 """
313 with ZipFile(
314 path, "w", compression=compression, compresslevel=compression_level
315 ) as zip:
316 write_content_to_zip(content, zip)
319def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]:
320 reader = get_reader(source)
321 if settings.allow_pickle:
322 logger.warning("Loading numpy array with `allow_pickle=True`.")
324 return numpy.load(reader, allow_pickle=settings.allow_pickle)
327def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None:
328 with path.open(mode="wb") as f:
329 assert not isinstance(f, io.TextIOWrapper)
330 return numpy.save(f, array, allow_pickle=False)