Coverage for src/bioimageio/spec/_internal/io_utils.py: 83%
161 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 08:37 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 08:37 +0000
1import collections.abc
2import io
3import shutil
4import zipfile
5from contextlib import nullcontext
6from difflib import get_close_matches
7from pathlib import Path
8from types import MappingProxyType
9from typing import (
10 IO,
11 Any,
12 Dict,
13 Mapping,
14 Union,
15 cast,
16)
17from zipfile import ZipFile
19import httpx
20import numpy
21from loguru import logger
22from numpy.typing import NDArray
23from pydantic import BaseModel, FilePath, NewPath, RootModel
24from ruyaml import YAML
25from typing_extensions import Unpack
27from ._settings import settings
28from .io import (
29 BIOIMAGEIO_YAML,
30 BioimageioYamlContent,
31 BioimageioYamlContentView,
32 BytesReader,
33 FileDescr,
34 HashKwargs,
35 LightHttpFileDescr,
36 OpenedBioimageioYaml,
37 RelativeFilePath,
38 YamlValue,
39 extract_file_name,
40 find_bioimageio_yaml_file_name,
41 get_reader,
42 identify_bioimageio_yaml_file_name,
43 interprete_file_source,
44)
45from .io_basics import AbsoluteDirectory, FileName, ZipPath
46from .types import FileSource, PermissiveFileSource
47from .url import HttpUrl, RootHttpUrl
48from .utils import cache
49from .validation_context import ValidationContext, get_validation_context
51_yaml_load = YAML(typ="safe")
53_yaml_dump = YAML()
54_yaml_dump.version = (1, 2) # pyright: ignore[reportAttributeAccessIssue]
55_yaml_dump.default_flow_style = False
56_yaml_dump.indent(mapping=2, sequence=4, offset=2)
57_yaml_dump.width = 88 # pyright: ignore[reportAttributeAccessIssue]
60def read_yaml(
61 file: Union[FilePath, ZipPath, IO[str], IO[bytes], BytesReader, str],
62) -> YamlValue:
63 if isinstance(file, (ZipPath, Path)):
64 data = file.read_text(encoding="utf-8")
65 else:
66 data = file
68 content: YamlValue = _yaml_load.load(data)
69 return content
72def write_yaml(
73 content: Union[YamlValue, BioimageioYamlContentView, BaseModel],
74 /,
75 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath],
76):
77 if isinstance(file, Path):
78 cm = file.open("w", encoding="utf-8")
79 else:
80 cm = nullcontext(file)
82 if isinstance(content, BaseModel):
83 content = content.model_dump(mode="json")
85 with cm as f:
86 _yaml_dump.dump(content, f)
89def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent:
90 if not isinstance(content, dict):
91 raise ValueError(
92 f"Expected {BIOIMAGEIO_YAML} content to be a mapping (got {type(content)})."
93 )
95 for key in content:
96 if not isinstance(key, str):
97 raise ValueError(
98 f"Expected all keys (field names) in a {BIOIMAGEIO_YAML} "
99 + f"to be strings (got '{key}' of type {type(key)})."
100 )
102 return cast(BioimageioYamlContent, content)
105def _open_bioimageio_rdf_in_zip(
106 path: ZipPath,
107 *,
108 original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile],
109 original_source_name: str,
110) -> OpenedBioimageioYaml:
111 with path.open("rb") as f:
112 assert not isinstance(f, io.TextIOWrapper)
113 unparsed_content = f.read().decode(encoding="utf-8")
115 content = _sanitize_bioimageio_yaml(read_yaml(io.StringIO(unparsed_content)))
117 return OpenedBioimageioYaml(
118 content,
119 original_root=original_root,
120 original_file_name=extract_file_name(path),
121 original_source_name=original_source_name,
122 unparsed_content=unparsed_content,
123 )
126def _open_bioimageio_zip(
127 source: ZipFile,
128 *,
129 original_source_name: str,
130) -> OpenedBioimageioYaml:
131 rdf_name = identify_bioimageio_yaml_file_name(
132 [info.filename for info in source.filelist]
133 )
134 return _open_bioimageio_rdf_in_zip(
135 ZipPath(source, rdf_name),
136 original_root=source,
137 original_source_name=original_source_name,
138 )
141def open_bioimageio_yaml(
142 source: Union[PermissiveFileSource, ZipFile, ZipPath],
143 /,
144 **kwargs: Unpack[HashKwargs],
145) -> OpenedBioimageioYaml:
146 if isinstance(source, RelativeFilePath):
147 source = source.absolute()
149 if isinstance(source, ZipFile):
150 return _open_bioimageio_zip(source, original_source_name=str(source))
151 elif isinstance(source, ZipPath):
152 return _open_bioimageio_rdf_in_zip(
153 source, original_root=source.root, original_source_name=str(source)
154 )
156 try:
157 if isinstance(source, (Path, str)) and (source_dir := Path(source)).is_dir():
158 # open bioimageio yaml from a folder
159 src = source_dir / find_bioimageio_yaml_file_name(source_dir)
160 else:
161 src = interprete_file_source(source)
163 reader = get_reader(src, **kwargs)
165 except Exception:
166 # check if `source` is a collection id
167 if (
168 not isinstance(source, str)
169 or not isinstance(settings.id_map, str)
170 or "/" not in settings.id_map
171 ):
172 raise
174 if settings.collection_http_pattern:
175 with ValidationContext(perform_io_checks=False):
176 url = HttpUrl(
177 settings.collection_http_pattern.format(bioimageio_id=source)
178 )
180 try:
181 r = httpx.get(url, follow_redirects=True)
182 _ = r.raise_for_status()
183 unparsed_content = r.content.decode(encoding="utf-8")
184 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content))
185 except Exception as e:
186 logger.warning("Failed to get bioimageio.yaml from {}: {}", url, e)
187 else:
188 original_file_name = (
189 "rdf.yaml" if url.path is None else url.path.split("/")[-1]
190 )
191 return OpenedBioimageioYaml(
192 content=content,
193 original_root=url.parent,
194 original_file_name=original_file_name,
195 original_source_name=source,
196 unparsed_content=unparsed_content,
197 )
199 id_map = get_id_map()
200 if id_map and source not in id_map:
201 close_matches = get_close_matches(source, id_map)
202 if len(close_matches) == 0:
203 raise
205 if len(close_matches) == 1:
206 did_you_mean = f" Did you mean '{close_matches[0]}'?"
207 else:
208 did_you_mean = f" Did you mean any of {close_matches}?"
210 raise FileNotFoundError(f"'{source}' not found.{did_you_mean}")
212 entry = id_map[source]
213 logger.info("loading {} from {}", source, entry.source)
214 reader = entry.get_reader()
215 with get_validation_context().replace(perform_io_checks=False):
216 src = HttpUrl(entry.source)
218 if reader.is_zipfile:
219 return _open_bioimageio_zip(ZipFile(reader), original_source_name=str(src))
221 unparsed_content = reader.read().decode(encoding="utf-8")
222 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content))
224 if isinstance(src, RelativeFilePath):
225 src = src.absolute()
227 if isinstance(src, ZipPath):
228 root = src.root
229 else:
230 root = src.parent
232 return OpenedBioimageioYaml(
233 content,
234 original_root=root,
235 original_source_name=str(src),
236 original_file_name=extract_file_name(src),
237 unparsed_content=unparsed_content,
238 )
241_IdMap = RootModel[Dict[str, LightHttpFileDescr]]
244def _get_id_map_impl(url: str) -> Dict[str, LightHttpFileDescr]:
245 if not isinstance(url, str) or "/" not in url:
246 logger.opt(depth=1).error("invalid id map url: {}", url)
247 try:
248 id_map_raw: Any = httpx.get(url, timeout=10, follow_redirects=True).json()
249 except Exception as e:
250 logger.opt(depth=1).error("failed to get {}: {}", url, e)
251 return {}
253 id_map = _IdMap.model_validate(id_map_raw)
254 return id_map.root
257@cache
258def get_id_map() -> Mapping[str, LightHttpFileDescr]:
259 try:
260 if settings.resolve_draft:
261 ret = _get_id_map_impl(settings.id_map_draft)
262 else:
263 ret = {}
265 ret.update(_get_id_map_impl(settings.id_map))
267 except Exception as e:
268 logger.error("failed to get resource id mapping: {}", e)
269 ret = {}
271 return MappingProxyType(ret)
274def write_content_to_zip(
275 content: Mapping[
276 FileName,
277 Union[
278 str, FilePath, ZipPath, BioimageioYamlContentView, FileDescr, BytesReader
279 ],
280 ],
281 zip: zipfile.ZipFile,
282):
283 """write strings as text, dictionaries as yaml and files to a ZipFile
284 Args:
285 content: dict mapping archive names to local file paths,
286 strings (for text files), or dict (for yaml files).
287 zip: ZipFile
288 """
289 for arc_name, file in content.items():
290 if isinstance(file, collections.abc.Mapping):
291 buf = io.StringIO()
292 write_yaml(file, buf)
293 file = buf.getvalue()
295 if isinstance(file, str):
296 zip.writestr(arc_name, file.encode("utf-8"))
297 else:
298 if isinstance(file, BytesReader):
299 reader = file
300 else:
301 reader = get_reader(file)
303 if (
304 isinstance(reader.original_root, ZipFile)
305 and reader.original_root.filename == zip.filename
306 and reader.original_file_name == arc_name
307 ):
308 logger.debug(
309 f"Not copying {reader.original_root}/{reader.original_file_name} to itself."
310 )
311 continue
313 with zip.open(arc_name, "w") as dest:
314 shutil.copyfileobj(reader, dest, 1024 * 8)
317def write_zip(
318 path: Union[FilePath, IO[bytes]],
319 content: Mapping[
320 FileName, Union[str, FilePath, ZipPath, BioimageioYamlContentView, BytesReader]
321 ],
322 *,
323 compression: int,
324 compression_level: int,
325) -> None:
326 """Write a zip archive.
328 Args:
329 path: output path to write to.
330 content: dict mapping archive names to local file paths, strings (for text files), or dict (for yaml files).
331 compression: The numeric constant of compression method.
332 compression_level: Compression level to use when writing files to the archive.
333 See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile
335 """
336 with ZipFile(
337 path, "w", compression=compression, compresslevel=compression_level
338 ) as zip:
339 write_content_to_zip(content, zip)
342def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]:
343 reader = get_reader(source)
344 if settings.allow_pickle:
345 logger.warning("Loading numpy array with `allow_pickle=True`.")
347 return numpy.load(reader, allow_pickle=settings.allow_pickle)
350def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None:
351 with path.open(mode="wb") as f:
352 assert not isinstance(f, io.TextIOWrapper)
353 return numpy.save(f, array, allow_pickle=False)