Coverage for bioimageio/spec/_internal/io_utils.py: 81%

158 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-18 12:47 +0000

1import collections.abc 

2import io 

3import shutil 

4import zipfile 

5from contextlib import nullcontext 

6from difflib import get_close_matches 

7from pathlib import Path 

8from types import MappingProxyType 

9from typing import ( 

10 IO, 

11 Any, 

12 Dict, 

13 Mapping, 

14 Union, 

15 cast, 

16) 

17from zipfile import ZipFile 

18 

19import httpx 

20import numpy 

21from loguru import logger 

22from numpy.typing import NDArray 

23from pydantic import BaseModel, FilePath, NewPath, RootModel 

24from ruyaml import YAML 

25from typing_extensions import Unpack 

26 

27from ._settings import settings 

28from .io import ( 

29 BIOIMAGEIO_YAML, 

30 BioimageioYamlContent, 

31 BioimageioYamlContentView, 

32 BytesReader, 

33 FileDescr, 

34 HashKwargs, 

35 LightHttpFileDescr, 

36 OpenedBioimageioYaml, 

37 RelativeFilePath, 

38 YamlValue, 

39 extract_file_name, 

40 find_bioimageio_yaml_file_name, 

41 get_reader, 

42 identify_bioimageio_yaml_file_name, 

43 interprete_file_source, 

44) 

45from .io_basics import AbsoluteDirectory, FileName, ZipPath 

46from .types import FileSource, PermissiveFileSource 

47from .url import HttpUrl, RootHttpUrl 

48from .utils import cache 

49from .validation_context import ValidationContext 

50 

51_yaml_load = YAML(typ="safe") 

52 

53_yaml_dump = YAML() 

54_yaml_dump.version = (1, 2) # pyright: ignore[reportAttributeAccessIssue] 

55_yaml_dump.default_flow_style = False 

56_yaml_dump.indent(mapping=2, sequence=4, offset=2) 

57_yaml_dump.width = 88 # pyright: ignore[reportAttributeAccessIssue] 

58 

59 

60def read_yaml( 

61 file: Union[FilePath, ZipPath, IO[str], IO[bytes], BytesReader, str], 

62) -> YamlValue: 

63 if isinstance(file, (ZipPath, Path)): 

64 data = file.read_text(encoding="utf-8") 

65 else: 

66 data = file 

67 

68 content: YamlValue = _yaml_load.load(data) 

69 return content 

70 

71 

72def write_yaml( 

73 content: Union[YamlValue, BioimageioYamlContentView, BaseModel], 

74 /, 

75 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath], 

76): 

77 if isinstance(file, Path): 

78 cm = file.open("w", encoding="utf-8") 

79 else: 

80 cm = nullcontext(file) 

81 

82 if isinstance(content, BaseModel): 

83 content = content.model_dump(mode="json") 

84 

85 with cm as f: 

86 _yaml_dump.dump(content, f) 

87 

88 

89def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent: 

90 if not isinstance(content, dict): 

91 raise ValueError( 

92 f"Expected {BIOIMAGEIO_YAML} content to be a mapping (got {type(content)})." 

93 ) 

94 

95 for key in content: 

96 if not isinstance(key, str): 

97 raise ValueError( 

98 f"Expected all keys (field names) in a {BIOIMAGEIO_YAML} " 

99 + f"to be strings (got '{key}' of type {type(key)})." 

100 ) 

101 

102 return cast(BioimageioYamlContent, content) 

103 

104 

105def _open_bioimageio_rdf_in_zip( 

106 path: ZipPath, 

107 original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile], 

108) -> OpenedBioimageioYaml: 

109 with path.open("rb") as f: 

110 assert not isinstance(f, io.TextIOWrapper) 

111 unparsed_content = f.read().decode(encoding="utf-8") 

112 

113 content = _sanitize_bioimageio_yaml(read_yaml(io.StringIO(unparsed_content))) 

114 

115 return OpenedBioimageioYaml( 

116 content, 

117 original_root, 

118 extract_file_name(path), 

119 unparsed_content=unparsed_content, 

120 ) 

121 

122 

123def _open_bioimageio_zip( 

124 source: ZipFile, 

125) -> OpenedBioimageioYaml: 

126 rdf_name = identify_bioimageio_yaml_file_name( 

127 [info.filename for info in source.filelist] 

128 ) 

129 return _open_bioimageio_rdf_in_zip(ZipPath(source, rdf_name), source) 

130 

131 

132def open_bioimageio_yaml( 

133 source: Union[PermissiveFileSource, ZipFile, ZipPath], 

134 /, 

135 **kwargs: Unpack[HashKwargs], 

136) -> OpenedBioimageioYaml: 

137 if isinstance(source, RelativeFilePath): 

138 source = source.absolute() 

139 

140 if isinstance(source, ZipFile): 

141 return _open_bioimageio_zip(source) 

142 elif isinstance(source, ZipPath): 

143 return _open_bioimageio_rdf_in_zip(source, source.root) 

144 

145 try: 

146 if isinstance(source, (Path, str)) and (source_dir := Path(source)).is_dir(): 

147 # open bioimageio yaml from a folder 

148 src = source_dir / find_bioimageio_yaml_file_name(source_dir) 

149 else: 

150 src = interprete_file_source(source) 

151 

152 reader = get_reader(src, **kwargs) 

153 

154 except Exception: 

155 # check if `source` is a collection id 

156 if ( 

157 not isinstance(source, str) 

158 or not isinstance(settings.id_map, str) 

159 or "/" not in settings.id_map 

160 ): 

161 raise 

162 

163 if settings.collection_http_pattern: 

164 with ValidationContext(perform_io_checks=False): 

165 url = HttpUrl( 

166 settings.collection_http_pattern.format(bioimageio_id=source) 

167 ) 

168 

169 try: 

170 r = httpx.get(url) 

171 _ = r.raise_for_status() 

172 unparsed_content = r.content.decode(encoding="utf-8") 

173 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content)) 

174 except Exception as e: 

175 logger.warning("Failed to get bioimageio.yaml from {}: {}", url, e) 

176 else: 

177 original_file_name = ( 

178 "rdf.yaml" if url.path is None else url.path.split("/")[-1] 

179 ) 

180 return OpenedBioimageioYaml( 

181 content=content, 

182 original_root=url.parent, 

183 original_file_name=original_file_name, 

184 unparsed_content=unparsed_content, 

185 ) 

186 

187 id_map = get_id_map() 

188 if id_map and source not in id_map: 

189 close_matches = get_close_matches(source, id_map) 

190 if len(close_matches) == 0: 

191 raise 

192 

193 if len(close_matches) == 1: 

194 did_you_mean = f" Did you mean '{close_matches[0]}'?" 

195 else: 

196 did_you_mean = f" Did you mean any of {close_matches}?" 

197 

198 raise FileNotFoundError(f"'{source}' not found.{did_you_mean}") 

199 

200 entry = id_map[source] 

201 logger.info("loading {} from {}", source, entry.source) 

202 reader = entry.get_reader() 

203 with ValidationContext(perform_io_checks=False): 

204 src = HttpUrl(entry.source) 

205 

206 if reader.is_zipfile: 

207 return _open_bioimageio_zip(ZipFile(reader)) 

208 

209 unparsed_content = reader.read().decode(encoding="utf-8") 

210 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content)) 

211 

212 if isinstance(src, RelativeFilePath): 

213 src = src.absolute() 

214 

215 if isinstance(src, ZipPath): 

216 root = src.root 

217 else: 

218 root = src.parent 

219 

220 return OpenedBioimageioYaml( 

221 content, 

222 original_root=root, 

223 original_file_name=extract_file_name(src), 

224 unparsed_content=unparsed_content, 

225 ) 

226 

227 

228_IdMap = RootModel[Dict[str, LightHttpFileDescr]] 

229 

230 

231def _get_id_map_impl(url: str) -> Dict[str, LightHttpFileDescr]: 

232 if not isinstance(url, str) or "/" not in url: 

233 logger.opt(depth=1).error("invalid id map url: {}", url) 

234 try: 

235 id_map_raw: Any = httpx.get(url, timeout=10).json() 

236 except Exception as e: 

237 logger.opt(depth=1).error("failed to get {}: {}", url, e) 

238 return {} 

239 

240 id_map = _IdMap.model_validate(id_map_raw) 

241 return id_map.root 

242 

243 

244@cache 

245def get_id_map() -> Mapping[str, LightHttpFileDescr]: 

246 try: 

247 if settings.resolve_draft: 

248 ret = _get_id_map_impl(settings.id_map_draft) 

249 else: 

250 ret = {} 

251 

252 ret.update(_get_id_map_impl(settings.id_map)) 

253 

254 except Exception as e: 

255 logger.error("failed to get resource id mapping: {}", e) 

256 ret = {} 

257 

258 return MappingProxyType(ret) 

259 

260 

261def write_content_to_zip( 

262 content: Mapping[ 

263 FileName, 

264 Union[ 

265 str, FilePath, ZipPath, BioimageioYamlContentView, FileDescr, BytesReader 

266 ], 

267 ], 

268 zip: zipfile.ZipFile, 

269): 

270 """write strings as text, dictionaries as yaml and files to a ZipFile 

271 Args: 

272 content: dict mapping archive names to local file paths, 

273 strings (for text files), or dict (for yaml files). 

274 zip: ZipFile 

275 """ 

276 for arc_name, file in content.items(): 

277 if isinstance(file, collections.abc.Mapping): 

278 buf = io.StringIO() 

279 write_yaml(file, buf) 

280 file = buf.getvalue() 

281 

282 if isinstance(file, str): 

283 zip.writestr(arc_name, file.encode("utf-8")) 

284 else: 

285 if isinstance(file, BytesReader): 

286 reader = file 

287 else: 

288 reader = get_reader(file) 

289 

290 with zip.open(arc_name, "w") as dest: 

291 shutil.copyfileobj(reader, dest, 1024 * 8) 

292 

293 

294def write_zip( 

295 path: Union[FilePath, IO[bytes]], 

296 content: Mapping[ 

297 FileName, Union[str, FilePath, ZipPath, BioimageioYamlContentView, BytesReader] 

298 ], 

299 *, 

300 compression: int, 

301 compression_level: int, 

302) -> None: 

303 """Write a zip archive. 

304 

305 Args: 

306 path: output path to write to. 

307 content: dict mapping archive names to local file paths, strings (for text files), or dict (for yaml files). 

308 compression: The numeric constant of compression method. 

309 compression_level: Compression level to use when writing files to the archive. 

310 See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile 

311 

312 """ 

313 with ZipFile( 

314 path, "w", compression=compression, compresslevel=compression_level 

315 ) as zip: 

316 write_content_to_zip(content, zip) 

317 

318 

319def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]: 

320 reader = get_reader(source) 

321 if settings.allow_pickle: 

322 logger.warning("Loading numpy array with `allow_pickle=True`.") 

323 

324 return numpy.load(reader, allow_pickle=settings.allow_pickle) 

325 

326 

327def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None: 

328 with path.open(mode="wb") as f: 

329 assert not isinstance(f, io.TextIOWrapper) 

330 return numpy.save(f, array, allow_pickle=False)