Coverage for bioimageio/spec/_internal/io_utils.py: 28%

123 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-02-05 13:53 +0000

1import io 

2import zipfile 

3from contextlib import nullcontext 

4from difflib import get_close_matches 

5from pathlib import Path 

6from types import MappingProxyType 

7from typing import ( 

8 IO, 

9 Any, 

10 Dict, 

11 Mapping, 

12 Union, 

13 cast, 

14) 

15from zipfile import ZipFile, is_zipfile 

16 

17import numpy 

18import requests 

19from loguru import logger 

20from numpy.typing import NDArray 

21from pydantic import FilePath, NewPath, RootModel 

22from ruyaml import YAML 

23from typing_extensions import Unpack 

24 

25from ._settings import settings 

26from .io import ( 

27 BIOIMAGEIO_YAML, 

28 BioimageioYamlContent, 

29 FileDescr, 

30 FileInZip, 

31 HashKwargs, 

32 LightHttpFileDescr, 

33 OpenedBioimageioYaml, 

34 YamlValue, 

35 download, 

36 find_bioimageio_yaml_file_name, 

37 identify_bioimageio_yaml_file_name, 

38) 

39from .io_basics import FileName, ZipPath 

40from .types import FileSource, PermissiveFileSource 

41from .utils import cache 

42 

43yaml = YAML(typ="safe") 

44 

45 

46def read_yaml(file: Union[FilePath, ZipPath, IO[str], IO[bytes]]) -> YamlValue: 

47 if isinstance(file, (ZipPath, Path)): 

48 data = file.read_text(encoding="utf-8") 

49 else: 

50 data = file 

51 

52 content: YamlValue = yaml.load(data) 

53 return content 

54 

55 

56def write_yaml( 

57 content: YamlValue, 

58 /, 

59 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath], 

60): 

61 if isinstance(file, Path): 

62 cm = file.open("w", encoding="utf-8") 

63 else: 

64 cm = nullcontext(file) 

65 

66 with cm as f: 

67 yaml.dump(content, f) 

68 

69 

70def _sanitize_bioimageio_yaml(content: YamlValue) -> BioimageioYamlContent: 

71 if not isinstance(content, dict): 

72 raise ValueError( 

73 f"Expected {BIOIMAGEIO_YAML} content to be a mapping (got {type(content)})." 

74 ) 

75 

76 for key in content: 

77 if not isinstance(key, str): 

78 raise ValueError( 

79 f"Expected all keys (field names) in a {BIOIMAGEIO_YAML} " 

80 + f"need to be strings (got '{key}' of type {type(key)})." 

81 ) 

82 

83 return cast(BioimageioYamlContent, content) 

84 

85 

86def _open_bioimageio_rdf_in_zip(source: ZipFile, rdf_name: str) -> OpenedBioimageioYaml: 

87 with source.open(rdf_name) as f: 

88 content = _sanitize_bioimageio_yaml(read_yaml(f)) 

89 

90 return OpenedBioimageioYaml(content, source, source.filename or "bioimageio.zip") 

91 

92 

93def _open_bioimageio_zip(source: ZipFile) -> OpenedBioimageioYaml: 

94 rdf_name = identify_bioimageio_yaml_file_name( 

95 [info.filename for info in source.filelist] 

96 ) 

97 return _open_bioimageio_rdf_in_zip(source, rdf_name) 

98 

99 

100def open_bioimageio_yaml( 

101 source: Union[PermissiveFileSource, ZipFile], /, **kwargs: Unpack[HashKwargs] 

102) -> OpenedBioimageioYaml: 

103 if isinstance(source, ZipFile): 

104 return _open_bioimageio_zip(source) 

105 

106 try: 

107 downloaded = download(source, **kwargs) 

108 except Exception: 

109 # check if `source` is a collection id 

110 if ( 

111 not isinstance(source, str) 

112 or not isinstance(settings.id_map, str) 

113 or "/" not in settings.id_map 

114 ): 

115 raise 

116 

117 id_map = get_id_map() 

118 if id_map and source not in id_map: 

119 close_matches = get_close_matches(source, id_map) 

120 if len(close_matches) == 0: 

121 raise 

122 

123 if len(close_matches) == 1: 

124 did_you_mean = f" Did you mean '{close_matches[0]}'?" 

125 else: 

126 did_you_mean = f" Did you mean any of {close_matches}?" 

127 

128 raise FileNotFoundError(f"'{source}' not found.{did_you_mean}") 

129 

130 entry = id_map[source] 

131 logger.info("loading {} from {}", source, entry.source) 

132 downloaded = entry.download() 

133 

134 local_source = downloaded.path 

135 if isinstance(local_source, ZipPath): 

136 return _open_bioimageio_rdf_in_zip(local_source.root, local_source.name) 

137 elif is_zipfile(local_source): 

138 return _open_bioimageio_zip(ZipFile(local_source)) 

139 

140 if local_source.is_dir(): 

141 root = local_source 

142 local_source = local_source / find_bioimageio_yaml_file_name(local_source) 

143 else: 

144 root = downloaded.original_root 

145 

146 content = _sanitize_bioimageio_yaml(read_yaml(local_source)) 

147 return OpenedBioimageioYaml( 

148 content, 

149 root.original_root if isinstance(root, FileInZip) else root, 

150 downloaded.original_file_name, 

151 ) 

152 

153 

154_IdMap = RootModel[Dict[str, LightHttpFileDescr]] 

155 

156 

157def _get_id_map_impl(url: str) -> Dict[str, LightHttpFileDescr]: 

158 if not isinstance(url, str) or "/" not in url: 

159 logger.opt(depth=1).error("invalid id map url: {}", url) 

160 try: 

161 id_map_raw: Any = requests.get(url, timeout=10).json() 

162 except Exception as e: 

163 logger.opt(depth=1).error("failed to get {}: {}", url, e) 

164 return {} 

165 

166 id_map = _IdMap.model_validate(id_map_raw) 

167 return id_map.root 

168 

169 

170@cache 

171def get_id_map() -> Mapping[str, LightHttpFileDescr]: 

172 try: 

173 if settings.resolve_draft: 

174 ret = _get_id_map_impl(settings.id_map_draft) 

175 else: 

176 ret = {} 

177 

178 ret.update(_get_id_map_impl(settings.id_map)) 

179 

180 except Exception as e: 

181 logger.error("failed to get resource id mapping: {}", e) 

182 ret = {} 

183 

184 return MappingProxyType(ret) 

185 

186 

187def write_content_to_zip( 

188 content: Mapping[FileName, Union[str, FilePath, ZipPath, Dict[Any, Any]]], 

189 zip: zipfile.ZipFile, 

190): 

191 """write strings as text, dictionaries as yaml and files to a ZipFile 

192 Args: 

193 content: dict mapping archive names to local file paths, 

194 strings (for text files), or dict (for yaml files). 

195 zip: ZipFile 

196 """ 

197 for arc_name, file in content.items(): 

198 if isinstance(file, dict): 

199 buf = io.StringIO() 

200 write_yaml(file, buf) 

201 file = buf.getvalue() 

202 

203 if isinstance(file, str): 

204 zip.writestr(arc_name, file.encode("utf-8")) 

205 elif isinstance(file, ZipPath): 

206 zip.writestr(arc_name, file.read_bytes()) 

207 else: 

208 zip.write(file, arcname=arc_name) 

209 

210 

211def write_zip( 

212 path: Union[FilePath, IO[bytes]], 

213 content: Mapping[FileName, Union[str, FilePath, ZipPath, Dict[Any, Any]]], 

214 *, 

215 compression: int, 

216 compression_level: int, 

217) -> None: 

218 """Write a zip archive. 

219 

220 Args: 

221 path: output path to write to. 

222 content: dict mapping archive names to local file paths, strings (for text files), or dict (for yaml files). 

223 compression: The numeric constant of compression method. 

224 compression_level: Compression level to use when writing files to the archive. 

225 See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile 

226 

227 """ 

228 with ZipFile( 

229 path, "w", compression=compression, compresslevel=compression_level 

230 ) as zip: 

231 write_content_to_zip(content, zip) 

232 

233 

234def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]: 

235 path = download(source).path 

236 with path.open(mode="rb") as f: 

237 assert not isinstance(f, io.TextIOWrapper) 

238 return numpy.load(f, allow_pickle=False) 

239 

240 

241def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None: 

242 with path.open(mode="wb") as f: 

243 assert not isinstance(f, io.TextIOWrapper) 

244 return numpy.save(f, array, allow_pickle=False)