Coverage for src/bioimageio/spec/_internal/io_utils.py: 83%

161 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 08:37 +0000

1import collections.abc 

2import io 

3import shutil 

4import zipfile 

5from contextlib import nullcontext 

6from difflib import get_close_matches 

7from pathlib import Path 

8from types import MappingProxyType 

9from typing import ( 

10 IO, 

11 Any, 

12 Dict, 

13 Mapping, 

14 Union, 

15 cast, 

16) 

17from zipfile import ZipFile 

18 

19import httpx 

20import numpy 

21from loguru import logger 

22from numpy.typing import NDArray 

23from pydantic import BaseModel, FilePath, NewPath, RootModel 

24from ruyaml import YAML 

25from typing_extensions import Unpack 

26 

27from ._settings import settings 

28from .io import ( 

29 BIOIMAGEIO_YAML, 

30 BioimageioYamlContent, 

31 BioimageioYamlContentView, 

32 BytesReader, 

33 FileDescr, 

34 HashKwargs, 

35 LightHttpFileDescr, 

36 OpenedBioimageioYaml, 

37 RelativeFilePath, 

38 YamlValue, 

39 extract_file_name, 

40 find_bioimageio_yaml_file_name, 

41 get_reader, 

42 identify_bioimageio_yaml_file_name, 

43 interprete_file_source, 

44) 

45from .io_basics import AbsoluteDirectory, FileName, ZipPath 

46from .types import FileSource, PermissiveFileSource 

47from .url import HttpUrl, RootHttpUrl 

48from .utils import cache 

49from .validation_context import ValidationContext, get_validation_context 

50 

51_yaml_load = YAML(typ="safe") 

52 

53_yaml_dump = YAML() 

54_yaml_dump.version = (1, 2) # pyright: ignore[reportAttributeAccessIssue] 

55_yaml_dump.default_flow_style = False 

56_yaml_dump.indent(mapping=2, sequence=4, offset=2) 

57_yaml_dump.width = 88 # pyright: ignore[reportAttributeAccessIssue] 

58 

59 

60def read_yaml( 

61 file: Union[FilePath, ZipPath, IO[str], IO[bytes], BytesReader, str], 

62) -> YamlValue: 

63 if isinstance(file, (ZipPath, Path)): 

64 data = file.read_text(encoding="utf-8") 

65 else: 

66 data = file 

67 

68 content: YamlValue = _yaml_load.load(data) 

69 return content 

70 

71 

72def write_yaml( 

73 content: Union[YamlValue, BioimageioYamlContentView, BaseModel], 

74 /, 

75 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath], 

76): 

77 if isinstance(file, Path): 

78 cm = file.open("w", encoding="utf-8") 

79 else: 

80 cm = nullcontext(file) 

81 

82 if isinstance(content, BaseModel): 

83 content = content.model_dump(mode="json") 

84 

85 with cm as f: 

86 _yaml_dump.dump(content, f) 

87 

88 

89def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent: 

90 if not isinstance(content, dict): 

91 raise ValueError( 

92 f"Expected {BIOIMAGEIO_YAML} content to be a mapping (got {type(content)})." 

93 ) 

94 

95 for key in content: 

96 if not isinstance(key, str): 

97 raise ValueError( 

98 f"Expected all keys (field names) in a {BIOIMAGEIO_YAML} " 

99 + f"to be strings (got '{key}' of type {type(key)})." 

100 ) 

101 

102 return cast(BioimageioYamlContent, content) 

103 

104 

105def _open_bioimageio_rdf_in_zip( 

106 path: ZipPath, 

107 *, 

108 original_root: Union[AbsoluteDirectory, RootHttpUrl, ZipFile], 

109 original_source_name: str, 

110) -> OpenedBioimageioYaml: 

111 with path.open("rb") as f: 

112 assert not isinstance(f, io.TextIOWrapper) 

113 unparsed_content = f.read().decode(encoding="utf-8") 

114 

115 content = _sanitize_bioimageio_yaml(read_yaml(io.StringIO(unparsed_content))) 

116 

117 return OpenedBioimageioYaml( 

118 content, 

119 original_root=original_root, 

120 original_file_name=extract_file_name(path), 

121 original_source_name=original_source_name, 

122 unparsed_content=unparsed_content, 

123 ) 

124 

125 

126def _open_bioimageio_zip( 

127 source: ZipFile, 

128 *, 

129 original_source_name: str, 

130) -> OpenedBioimageioYaml: 

131 rdf_name = identify_bioimageio_yaml_file_name( 

132 [info.filename for info in source.filelist] 

133 ) 

134 return _open_bioimageio_rdf_in_zip( 

135 ZipPath(source, rdf_name), 

136 original_root=source, 

137 original_source_name=original_source_name, 

138 ) 

139 

140 

141def open_bioimageio_yaml( 

142 source: Union[PermissiveFileSource, ZipFile, ZipPath], 

143 /, 

144 **kwargs: Unpack[HashKwargs], 

145) -> OpenedBioimageioYaml: 

146 if isinstance(source, RelativeFilePath): 

147 source = source.absolute() 

148 

149 if isinstance(source, ZipFile): 

150 return _open_bioimageio_zip(source, original_source_name=str(source)) 

151 elif isinstance(source, ZipPath): 

152 return _open_bioimageio_rdf_in_zip( 

153 source, original_root=source.root, original_source_name=str(source) 

154 ) 

155 

156 try: 

157 if isinstance(source, (Path, str)) and (source_dir := Path(source)).is_dir(): 

158 # open bioimageio yaml from a folder 

159 src = source_dir / find_bioimageio_yaml_file_name(source_dir) 

160 else: 

161 src = interprete_file_source(source) 

162 

163 reader = get_reader(src, **kwargs) 

164 

165 except Exception: 

166 # check if `source` is a collection id 

167 if ( 

168 not isinstance(source, str) 

169 or not isinstance(settings.id_map, str) 

170 or "/" not in settings.id_map 

171 ): 

172 raise 

173 

174 if settings.collection_http_pattern: 

175 with ValidationContext(perform_io_checks=False): 

176 url = HttpUrl( 

177 settings.collection_http_pattern.format(bioimageio_id=source) 

178 ) 

179 

180 try: 

181 r = httpx.get(url, follow_redirects=True) 

182 _ = r.raise_for_status() 

183 unparsed_content = r.content.decode(encoding="utf-8") 

184 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content)) 

185 except Exception as e: 

186 logger.warning("Failed to get bioimageio.yaml from {}: {}", url, e) 

187 else: 

188 original_file_name = ( 

189 "rdf.yaml" if url.path is None else url.path.split("/")[-1] 

190 ) 

191 return OpenedBioimageioYaml( 

192 content=content, 

193 original_root=url.parent, 

194 original_file_name=original_file_name, 

195 original_source_name=source, 

196 unparsed_content=unparsed_content, 

197 ) 

198 

199 id_map = get_id_map() 

200 if id_map and source not in id_map: 

201 close_matches = get_close_matches(source, id_map) 

202 if len(close_matches) == 0: 

203 raise 

204 

205 if len(close_matches) == 1: 

206 did_you_mean = f" Did you mean '{close_matches[0]}'?" 

207 else: 

208 did_you_mean = f" Did you mean any of {close_matches}?" 

209 

210 raise FileNotFoundError(f"'{source}' not found.{did_you_mean}") 

211 

212 entry = id_map[source] 

213 logger.info("loading {} from {}", source, entry.source) 

214 reader = entry.get_reader() 

215 with get_validation_context().replace(perform_io_checks=False): 

216 src = HttpUrl(entry.source) 

217 

218 if reader.is_zipfile: 

219 return _open_bioimageio_zip(ZipFile(reader), original_source_name=str(src)) 

220 

221 unparsed_content = reader.read().decode(encoding="utf-8") 

222 content = _sanitize_bioimageio_yaml(read_yaml(unparsed_content)) 

223 

224 if isinstance(src, RelativeFilePath): 

225 src = src.absolute() 

226 

227 if isinstance(src, ZipPath): 

228 root = src.root 

229 else: 

230 root = src.parent 

231 

232 return OpenedBioimageioYaml( 

233 content, 

234 original_root=root, 

235 original_source_name=str(src), 

236 original_file_name=extract_file_name(src), 

237 unparsed_content=unparsed_content, 

238 ) 

239 

240 

241_IdMap = RootModel[Dict[str, LightHttpFileDescr]] 

242 

243 

244def _get_id_map_impl(url: str) -> Dict[str, LightHttpFileDescr]: 

245 if not isinstance(url, str) or "/" not in url: 

246 logger.opt(depth=1).error("invalid id map url: {}", url) 

247 try: 

248 id_map_raw: Any = httpx.get(url, timeout=10, follow_redirects=True).json() 

249 except Exception as e: 

250 logger.opt(depth=1).error("failed to get {}: {}", url, e) 

251 return {} 

252 

253 id_map = _IdMap.model_validate(id_map_raw) 

254 return id_map.root 

255 

256 

257@cache 

258def get_id_map() -> Mapping[str, LightHttpFileDescr]: 

259 try: 

260 if settings.resolve_draft: 

261 ret = _get_id_map_impl(settings.id_map_draft) 

262 else: 

263 ret = {} 

264 

265 ret.update(_get_id_map_impl(settings.id_map)) 

266 

267 except Exception as e: 

268 logger.error("failed to get resource id mapping: {}", e) 

269 ret = {} 

270 

271 return MappingProxyType(ret) 

272 

273 

274def write_content_to_zip( 

275 content: Mapping[ 

276 FileName, 

277 Union[ 

278 str, FilePath, ZipPath, BioimageioYamlContentView, FileDescr, BytesReader 

279 ], 

280 ], 

281 zip: zipfile.ZipFile, 

282): 

283 """write strings as text, dictionaries as yaml and files to a ZipFile 

284 Args: 

285 content: dict mapping archive names to local file paths, 

286 strings (for text files), or dict (for yaml files). 

287 zip: ZipFile 

288 """ 

289 for arc_name, file in content.items(): 

290 if isinstance(file, collections.abc.Mapping): 

291 buf = io.StringIO() 

292 write_yaml(file, buf) 

293 file = buf.getvalue() 

294 

295 if isinstance(file, str): 

296 zip.writestr(arc_name, file.encode("utf-8")) 

297 else: 

298 if isinstance(file, BytesReader): 

299 reader = file 

300 else: 

301 reader = get_reader(file) 

302 

303 if ( 

304 isinstance(reader.original_root, ZipFile) 

305 and reader.original_root.filename == zip.filename 

306 and reader.original_file_name == arc_name 

307 ): 

308 logger.debug( 

309 f"Not copying {reader.original_root}/{reader.original_file_name} to itself." 

310 ) 

311 continue 

312 

313 with zip.open(arc_name, "w") as dest: 

314 shutil.copyfileobj(reader, dest, 1024 * 8) 

315 

316 

317def write_zip( 

318 path: Union[FilePath, IO[bytes]], 

319 content: Mapping[ 

320 FileName, Union[str, FilePath, ZipPath, BioimageioYamlContentView, BytesReader] 

321 ], 

322 *, 

323 compression: int, 

324 compression_level: int, 

325) -> None: 

326 """Write a zip archive. 

327 

328 Args: 

329 path: output path to write to. 

330 content: dict mapping archive names to local file paths, strings (for text files), or dict (for yaml files). 

331 compression: The numeric constant of compression method. 

332 compression_level: Compression level to use when writing files to the archive. 

333 See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile 

334 

335 """ 

336 with ZipFile( 

337 path, "w", compression=compression, compresslevel=compression_level 

338 ) as zip: 

339 write_content_to_zip(content, zip) 

340 

341 

342def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]: 

343 reader = get_reader(source) 

344 if settings.allow_pickle: 

345 logger.warning("Loading numpy array with `allow_pickle=True`.") 

346 

347 return numpy.load(reader, allow_pickle=settings.allow_pickle) 

348 

349 

350def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None: 

351 with path.open(mode="wb") as f: 

352 assert not isinstance(f, io.TextIOWrapper) 

353 return numpy.save(f, array, allow_pickle=False)