Coverage for bioimageio/spec/_internal/io_utils.py: 81%

156 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-27 09:20 +0000

1import collections.abc 

2import io 

3import shutil 

4import zipfile 

5from contextlib import nullcontext 

6from difflib import get_close_matches 

7from pathlib import Path 

8from types import MappingProxyType 

9from typing import ( 

10 IO, 

11 Any, 

12 Dict, 

13 Mapping, 

14 Union, 

15 cast, 

16) 

17from zipfile import ZipFile 

18 

19import httpx 

20import numpy 

21from loguru import logger 

22from numpy.typing import NDArray 

23from pydantic import FilePath, NewPath, RootModel 

24from ruyaml import YAML 

25from typing_extensions import Unpack 

26 

27from ._settings import settings 

28from .io import ( 

29 BIOIMAGEIO_YAML, 

30 BioimageioYamlContent, 

31 BioimageioYamlContentView, 

32 BytesReader, 

33 FileDescr, 

34 HashKwargs, 

35 LightHttpFileDescr, 

36 OpenedBioimageioYaml, 

37 RelativeFilePath, 

38 YamlValue, 

39 extract_file_name, 

40 find_bioimageio_yaml_file_name, 

41 get_reader, 

42 identify_bioimageio_yaml_file_name, 

43 interprete_file_source, 

44) 

45from .io_basics import AbsoluteDirectory, FileName, ZipPath 

46from .types import FileSource, PermissiveFileSource 

47from .url import HttpUrl, RootHttpUrl 

48from .utils import cache 

49from .validation_context import ValidationContext 

50 

51_yaml_load = YAML(typ="safe") 

52 

53_yaml_dump = YAML() 

54_yaml_dump.version = (1, 2) # pyright: ignore[reportAttributeAccessIssue] 

55_yaml_dump.default_flow_style = False 

56_yaml_dump.indent(mapping=2, sequence=4, offset=2) 

57_yaml_dump.width = 88 # pyright: ignore[reportAttributeAccessIssue] 

58 

59 

60def read_yaml( 

61 file: Union[FilePath, ZipPath, IO[str], IO[bytes], BytesReader, str], 

62) -> YamlValue: 

63 if isinstance(file, (ZipPath, Path)): 

64 data = file.read_text(encoding="utf-8") 

65 else: 

66 data = file 

67 

68 content: YamlValue = _yaml_load.load(data) 

69 return content 

70 

71 

72def write_yaml( 

73 content: Union[YamlValue, BioimageioYamlContentView], 

74 /, 

75 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath], 

76): 

77 if isinstance(file, Path): 

78 cm = file.open("w", encoding="utf-8") 

79 else: 

80 cm = nullcontext(file) 

81 

82 with cm as f: 

83 _yaml_dump.dump(content, f) 

84 

85 

86def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent: 

87 if not isinstance(content, dict): 

88 raise ValueError( 

89 f"Expected {BIOIMAGEIO_YAML} content to be a mapping (got {type(content)})." 

90 ) 

91 

92 for key in content: 

93 if not isinstance(key, str): 

94 raise ValueError( 

95 f"Expected all keys (field names) in a {BIOIMAGEIO_YAML} " 

96 + f"to be strings (got '{key}' of type {type(key)})." 

97 ) 

98 

99 return cast(BioimageioYamlContent, content) 

100 

101 

102def _open_bioimageio_rdf_in_zip( 

103 path: ZipPath, 

104 original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile], 

105) -> OpenedBioimageioYaml: 

106 with path.open("rb") as f: 

107 assert not isinstance(f, io.TextIOWrapper) 

108 unparsed_content = f.read().decode(encoding="utf-8") 

109 

110 content = _sanitize_bioimageio_yaml(read_yaml(io.StringIO(unparsed_content))) 

111 

112 return OpenedBioimageioYaml( 

113 content, 

114 original_root, 

115 extract_file_name(path), 

116 unparsed_content=unparsed_content, 

117 ) 

118 

119 

120def _open_bioimageio_zip( 

121 source: ZipFile, 

122) -> OpenedBioimageioYaml: 

123 rdf_name = identify_bioimageio_yaml_file_name( 

124 [info.filename for info in source.filelist] 

125 ) 

126 return _open_bioimageio_rdf_in_zip(ZipPath(source, rdf_name), source) 

127 

128 

129def open_bioimageio_yaml( 

130 source: Union[PermissiveFileSource, ZipFile, ZipPath], 

131 /, 

132 **kwargs: Unpack[HashKwargs], 

133) -> OpenedBioimageioYaml: 

134 if isinstance(source, RelativeFilePath): 

135 source = source.absolute() 

136 

137 if isinstance(source, ZipFile): 

138 return _open_bioimageio_zip(source) 

139 elif isinstance(source, ZipPath): 

140 return _open_bioimageio_rdf_in_zip(source, source.root) 

141 

142 try: 

143 if isinstance(source, (Path, str)) and (source_dir := Path(source)).is_dir(): 

144 # open bioimageio yaml from a folder 

145 src = source_dir / find_bioimageio_yaml_file_name(source_dir) 

146 else: 

147 src = interprete_file_source(source) 

148 

149 reader = get_reader(src, **kwargs) 

150 

151 except Exception: 

152 # check if `source` is a collection id 

153 if ( 

154 not isinstance(source, str) 

155 or not isinstance(settings.id_map, str) 

156 or "/" not in settings.id_map 

157 ): 

158 raise 

159 

160 if settings.collection_http_pattern: 

161 with ValidationContext(perform_io_checks=False): 

162 url = HttpUrl( 

163 settings.collection_http_pattern.format(bioimageio_id=source) 

164 ) 

165 

166 try: 

167 r = httpx.get(url) 

168 _ = r.raise_for_status() 

169 unparsed_content = r.content.decode(encoding="utf-8") 

170 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content)) 

171 except Exception as e: 

172 logger.warning("Failed to get bioimageio.yaml from {}: {}", url, e) 

173 else: 

174 original_file_name = ( 

175 "rdf.yaml" if url.path is None else url.path.split("/")[-1] 

176 ) 

177 return OpenedBioimageioYaml( 

178 content=content, 

179 original_root=url.parent, 

180 original_file_name=original_file_name, 

181 unparsed_content=unparsed_content, 

182 ) 

183 

184 id_map = get_id_map() 

185 if id_map and source not in id_map: 

186 close_matches = get_close_matches(source, id_map) 

187 if len(close_matches) == 0: 

188 raise 

189 

190 if len(close_matches) == 1: 

191 did_you_mean = f" Did you mean '{close_matches[0]}'?" 

192 else: 

193 did_you_mean = f" Did you mean any of {close_matches}?" 

194 

195 raise FileNotFoundError(f"'{source}' not found.{did_you_mean}") 

196 

197 entry = id_map[source] 

198 logger.info("loading {} from {}", source, entry.source) 

199 reader = entry.get_reader() 

200 with ValidationContext(perform_io_checks=False): 

201 src = HttpUrl(entry.source) 

202 

203 if reader.is_zipfile: 

204 return _open_bioimageio_zip(ZipFile(reader)) 

205 

206 unparsed_content = reader.read().decode(encoding="utf-8") 

207 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content)) 

208 

209 if isinstance(src, RelativeFilePath): 

210 src = src.absolute() 

211 

212 if isinstance(src, ZipPath): 

213 root = src.root 

214 else: 

215 root = src.parent 

216 

217 return OpenedBioimageioYaml( 

218 content, 

219 original_root=root, 

220 original_file_name=extract_file_name(src), 

221 unparsed_content=unparsed_content, 

222 ) 

223 

224 

225_IdMap = RootModel[Dict[str, LightHttpFileDescr]] 

226 

227 

228def _get_id_map_impl(url: str) -> Dict[str, LightHttpFileDescr]: 

229 if not isinstance(url, str) or "/" not in url: 

230 logger.opt(depth=1).error("invalid id map url: {}", url) 

231 try: 

232 id_map_raw: Any = httpx.get(url, timeout=10).json() 

233 except Exception as e: 

234 logger.opt(depth=1).error("failed to get {}: {}", url, e) 

235 return {} 

236 

237 id_map = _IdMap.model_validate(id_map_raw) 

238 return id_map.root 

239 

240 

241@cache 

242def get_id_map() -> Mapping[str, LightHttpFileDescr]: 

243 try: 

244 if settings.resolve_draft: 

245 ret = _get_id_map_impl(settings.id_map_draft) 

246 else: 

247 ret = {} 

248 

249 ret.update(_get_id_map_impl(settings.id_map)) 

250 

251 except Exception as e: 

252 logger.error("failed to get resource id mapping: {}", e) 

253 ret = {} 

254 

255 return MappingProxyType(ret) 

256 

257 

258def write_content_to_zip( 

259 content: Mapping[ 

260 FileName, 

261 Union[ 

262 str, FilePath, ZipPath, BioimageioYamlContentView, FileDescr, BytesReader 

263 ], 

264 ], 

265 zip: zipfile.ZipFile, 

266): 

267 """write strings as text, dictionaries as yaml and files to a ZipFile 

268 Args: 

269 content: dict mapping archive names to local file paths, 

270 strings (for text files), or dict (for yaml files). 

271 zip: ZipFile 

272 """ 

273 for arc_name, file in content.items(): 

274 if isinstance(file, collections.abc.Mapping): 

275 buf = io.StringIO() 

276 write_yaml(file, buf) 

277 file = buf.getvalue() 

278 

279 if isinstance(file, str): 

280 zip.writestr(arc_name, file.encode("utf-8")) 

281 else: 

282 if isinstance(file, BytesReader): 

283 reader = file 

284 else: 

285 reader = get_reader(file) 

286 

287 with zip.open(arc_name, "w") as dest: 

288 shutil.copyfileobj(reader, dest, 1024 * 8) 

289 

290 

291def write_zip( 

292 path: Union[FilePath, IO[bytes]], 

293 content: Mapping[ 

294 FileName, Union[str, FilePath, ZipPath, BioimageioYamlContentView, BytesReader] 

295 ], 

296 *, 

297 compression: int, 

298 compression_level: int, 

299) -> None: 

300 """Write a zip archive. 

301 

302 Args: 

303 path: output path to write to. 

304 content: dict mapping archive names to local file paths, strings (for text files), or dict (for yaml files). 

305 compression: The numeric constant of compression method. 

306 compression_level: Compression level to use when writing files to the archive. 

307 See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile 

308 

309 """ 

310 with ZipFile( 

311 path, "w", compression=compression, compresslevel=compression_level 

312 ) as zip: 

313 write_content_to_zip(content, zip) 

314 

315 

316def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]: 

317 reader = get_reader(source) 

318 if settings.allow_pickle: 

319 logger.warning("Loading numpy array with `allow_pickle=True`.") 

320 

321 return numpy.load(reader, allow_pickle=settings.allow_pickle) 

322 

323 

324def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None: 

325 with path.open(mode="wb") as f: 

326 assert not isinstance(f, io.TextIOWrapper) 

327 return numpy.save(f, array, allow_pickle=False)