Coverage for bioimageio/spec/_internal/io_utils.py: 81%
156 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-27 09:20 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-27 09:20 +0000
1import collections.abc
2import io
3import shutil
4import zipfile
5from contextlib import nullcontext
6from difflib import get_close_matches
7from pathlib import Path
8from types import MappingProxyType
9from typing import (
10 IO,
11 Any,
12 Dict,
13 Mapping,
14 Union,
15 cast,
16)
17from zipfile import ZipFile
19import httpx
20import numpy
21from loguru import logger
22from numpy.typing import NDArray
23from pydantic import FilePath, NewPath, RootModel
24from ruyaml import YAML
25from typing_extensions import Unpack
27from ._settings import settings
28from .io import (
29 BIOIMAGEIO_YAML,
30 BioimageioYamlContent,
31 BioimageioYamlContentView,
32 BytesReader,
33 FileDescr,
34 HashKwargs,
35 LightHttpFileDescr,
36 OpenedBioimageioYaml,
37 RelativeFilePath,
38 YamlValue,
39 extract_file_name,
40 find_bioimageio_yaml_file_name,
41 get_reader,
42 identify_bioimageio_yaml_file_name,
43 interprete_file_source,
44)
45from .io_basics import AbsoluteDirectory, FileName, ZipPath
46from .types import FileSource, PermissiveFileSource
47from .url import HttpUrl, RootHttpUrl
48from .utils import cache
49from .validation_context import ValidationContext
51_yaml_load = YAML(typ="safe")
53_yaml_dump = YAML()
54_yaml_dump.version = (1, 2) # pyright: ignore[reportAttributeAccessIssue]
55_yaml_dump.default_flow_style = False
56_yaml_dump.indent(mapping=2, sequence=4, offset=2)
57_yaml_dump.width = 88 # pyright: ignore[reportAttributeAccessIssue]
60def read_yaml(
61 file: Union[FilePath, ZipPath, IO[str], IO[bytes], BytesReader, str],
62) -> YamlValue:
63 if isinstance(file, (ZipPath, Path)):
64 data = file.read_text(encoding="utf-8")
65 else:
66 data = file
68 content: YamlValue = _yaml_load.load(data)
69 return content
72def write_yaml(
73 content: Union[YamlValue, BioimageioYamlContentView],
74 /,
75 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath],
76):
77 if isinstance(file, Path):
78 cm = file.open("w", encoding="utf-8")
79 else:
80 cm = nullcontext(file)
82 with cm as f:
83 _yaml_dump.dump(content, f)
86def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent:
87 if not isinstance(content, dict):
88 raise ValueError(
89 f"Expected {BIOIMAGEIO_YAML} content to be a mapping (got {type(content)})."
90 )
92 for key in content:
93 if not isinstance(key, str):
94 raise ValueError(
95 f"Expected all keys (field names) in a {BIOIMAGEIO_YAML} "
96 + f"to be strings (got '{key}' of type {type(key)})."
97 )
99 return cast(BioimageioYamlContent, content)
102def _open_bioimageio_rdf_in_zip(
103 path: ZipPath,
104 original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile],
105) -> OpenedBioimageioYaml:
106 with path.open("rb") as f:
107 assert not isinstance(f, io.TextIOWrapper)
108 unparsed_content = f.read().decode(encoding="utf-8")
110 content = _sanitize_bioimageio_yaml(read_yaml(io.StringIO(unparsed_content)))
112 return OpenedBioimageioYaml(
113 content,
114 original_root,
115 extract_file_name(path),
116 unparsed_content=unparsed_content,
117 )
120def _open_bioimageio_zip(
121 source: ZipFile,
122) -> OpenedBioimageioYaml:
123 rdf_name = identify_bioimageio_yaml_file_name(
124 [info.filename for info in source.filelist]
125 )
126 return _open_bioimageio_rdf_in_zip(ZipPath(source, rdf_name), source)
129def open_bioimageio_yaml(
130 source: Union[PermissiveFileSource, ZipFile, ZipPath],
131 /,
132 **kwargs: Unpack[HashKwargs],
133) -> OpenedBioimageioYaml:
134 if isinstance(source, RelativeFilePath):
135 source = source.absolute()
137 if isinstance(source, ZipFile):
138 return _open_bioimageio_zip(source)
139 elif isinstance(source, ZipPath):
140 return _open_bioimageio_rdf_in_zip(source, source.root)
142 try:
143 if isinstance(source, (Path, str)) and (source_dir := Path(source)).is_dir():
144 # open bioimageio yaml from a folder
145 src = source_dir / find_bioimageio_yaml_file_name(source_dir)
146 else:
147 src = interprete_file_source(source)
149 reader = get_reader(src, **kwargs)
151 except Exception:
152 # check if `source` is a collection id
153 if (
154 not isinstance(source, str)
155 or not isinstance(settings.id_map, str)
156 or "/" not in settings.id_map
157 ):
158 raise
160 if settings.collection_http_pattern:
161 with ValidationContext(perform_io_checks=False):
162 url = HttpUrl(
163 settings.collection_http_pattern.format(bioimageio_id=source)
164 )
166 try:
167 r = httpx.get(url)
168 _ = r.raise_for_status()
169 unparsed_content = r.content.decode(encoding="utf-8")
170 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content))
171 except Exception as e:
172 logger.warning("Failed to get bioimageio.yaml from {}: {}", url, e)
173 else:
174 original_file_name = (
175 "rdf.yaml" if url.path is None else url.path.split("/")[-1]
176 )
177 return OpenedBioimageioYaml(
178 content=content,
179 original_root=url.parent,
180 original_file_name=original_file_name,
181 unparsed_content=unparsed_content,
182 )
184 id_map = get_id_map()
185 if id_map and source not in id_map:
186 close_matches = get_close_matches(source, id_map)
187 if len(close_matches) == 0:
188 raise
190 if len(close_matches) == 1:
191 did_you_mean = f" Did you mean '{close_matches[0]}'?"
192 else:
193 did_you_mean = f" Did you mean any of {close_matches}?"
195 raise FileNotFoundError(f"'{source}' not found.{did_you_mean}")
197 entry = id_map[source]
198 logger.info("loading {} from {}", source, entry.source)
199 reader = entry.get_reader()
200 with ValidationContext(perform_io_checks=False):
201 src = HttpUrl(entry.source)
203 if reader.is_zipfile:
204 return _open_bioimageio_zip(ZipFile(reader))
206 unparsed_content = reader.read().decode(encoding="utf-8")
207 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content))
209 if isinstance(src, RelativeFilePath):
210 src = src.absolute()
212 if isinstance(src, ZipPath):
213 root = src.root
214 else:
215 root = src.parent
217 return OpenedBioimageioYaml(
218 content,
219 original_root=root,
220 original_file_name=extract_file_name(src),
221 unparsed_content=unparsed_content,
222 )
225_IdMap = RootModel[Dict[str, LightHttpFileDescr]]
228def _get_id_map_impl(url: str) -> Dict[str, LightHttpFileDescr]:
229 if not isinstance(url, str) or "/" not in url:
230 logger.opt(depth=1).error("invalid id map url: {}", url)
231 try:
232 id_map_raw: Any = httpx.get(url, timeout=10).json()
233 except Exception as e:
234 logger.opt(depth=1).error("failed to get {}: {}", url, e)
235 return {}
237 id_map = _IdMap.model_validate(id_map_raw)
238 return id_map.root
241@cache
242def get_id_map() -> Mapping[str, LightHttpFileDescr]:
243 try:
244 if settings.resolve_draft:
245 ret = _get_id_map_impl(settings.id_map_draft)
246 else:
247 ret = {}
249 ret.update(_get_id_map_impl(settings.id_map))
251 except Exception as e:
252 logger.error("failed to get resource id mapping: {}", e)
253 ret = {}
255 return MappingProxyType(ret)
258def write_content_to_zip(
259 content: Mapping[
260 FileName,
261 Union[
262 str, FilePath, ZipPath, BioimageioYamlContentView, FileDescr, BytesReader
263 ],
264 ],
265 zip: zipfile.ZipFile,
266):
267 """write strings as text, dictionaries as yaml and files to a ZipFile
268 Args:
269 content: dict mapping archive names to local file paths,
270 strings (for text files), or dict (for yaml files).
271 zip: ZipFile
272 """
273 for arc_name, file in content.items():
274 if isinstance(file, collections.abc.Mapping):
275 buf = io.StringIO()
276 write_yaml(file, buf)
277 file = buf.getvalue()
279 if isinstance(file, str):
280 zip.writestr(arc_name, file.encode("utf-8"))
281 else:
282 if isinstance(file, BytesReader):
283 reader = file
284 else:
285 reader = get_reader(file)
287 with zip.open(arc_name, "w") as dest:
288 shutil.copyfileobj(reader, dest, 1024 * 8)
291def write_zip(
292 path: Union[FilePath, IO[bytes]],
293 content: Mapping[
294 FileName, Union[str, FilePath, ZipPath, BioimageioYamlContentView, BytesReader]
295 ],
296 *,
297 compression: int,
298 compression_level: int,
299) -> None:
300 """Write a zip archive.
302 Args:
303 path: output path to write to.
304 content: dict mapping archive names to local file paths, strings (for text files), or dict (for yaml files).
305 compression: The numeric constant of compression method.
306 compression_level: Compression level to use when writing files to the archive.
307 See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile
309 """
310 with ZipFile(
311 path, "w", compression=compression, compresslevel=compression_level
312 ) as zip:
313 write_content_to_zip(content, zip)
316def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]:
317 reader = get_reader(source)
318 if settings.allow_pickle:
319 logger.warning("Loading numpy array with `allow_pickle=True`.")
321 return numpy.load(reader, allow_pickle=settings.allow_pickle)
324def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None:
325 with path.open(mode="wb") as f:
326 assert not isinstance(f, io.TextIOWrapper)
327 return numpy.save(f, array, allow_pickle=False)