Coverage for bioimageio/spec/generic/v0_3.py: 91%

193 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-27 09:20 +0000

1from __future__ import annotations 

2 

3import string 

4from functools import partial 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8 Callable, 

9 ClassVar, 

10 Dict, 

11 List, 

12 Literal, 

13 Optional, 

14 Sequence, 

15 Type, 

16 TypeVar, 

17 Union, 

18 cast, 

19) 

20 

21import annotated_types 

22from annotated_types import Len, LowerCase, MaxLen, MinLen 

23from pydantic import Field, RootModel, ValidationInfo, field_validator, model_validator 

24from typing_extensions import Annotated 

25 

26from bioimageio.spec._internal.type_guards import is_dict 

27 

28from .._internal.common_nodes import Node, ResourceDescrBase 

29from .._internal.constants import TAG_CATEGORIES 

30from .._internal.field_validation import validate_gh_user 

31from .._internal.field_warning import as_warning, warn 

32from .._internal.io import ( 

33 BioimageioYamlContent, 

34 FileDescr, 

35 WithSuffix, 

36 is_yaml_value, 

37) 

38from .._internal.io_basics import Sha256 

39from .._internal.io_packaging import FileDescr_ 

40from .._internal.license_id import DeprecatedLicenseId, LicenseId 

41from .._internal.node_converter import Converter 

42from .._internal.types import FileSource_, NotEmpty, RelativeFilePath 

43from .._internal.url import HttpUrl 

44from .._internal.validated_string import ValidatedString 

45from .._internal.validator_annotations import ( 

46 Predicate, 

47 RestrictCharacters, 

48) 

49from .._internal.version_type import Version 

50from .._internal.warning_levels import ALERT, INFO 

51from ._v0_3_converter import convert_from_older_format 

52from .v0_2 import Author as _Author_v0_2 

53from .v0_2 import BadgeDescr, Doi, FileSource_cover, OrcidId, Uploader 

54from .v0_2 import Maintainer as _Maintainer_v0_2 

55 

56__all__ = [ 

57 "Author", 

58 "BadgeDescr", 

59 "CiteEntry", 

60 "DeprecatedLicenseId", 

61 "Doi", 

62 "FileDescr", 

63 "GenericDescr", 

64 "HttpUrl", 

65 "KNOWN_SPECIFIC_RESOURCE_TYPES", 

66 "LicenseId", 

67 "LinkedResource", 

68 "Maintainer", 

69 "OrcidId", 

70 "RelativeFilePath", 

71 "ResourceId", 

72 "Sha256", 

73 "Uploader", 

74 "VALID_COVER_IMAGE_EXTENSIONS", 

75 "Version", 

76] 

77 

78KNOWN_SPECIFIC_RESOURCE_TYPES = ( 

79 "application", 

80 "collection", 

81 "dataset", 

82 "model", 

83 "notebook", 

84) 

85VALID_COVER_IMAGE_EXTENSIONS = ( 

86 ".gif", 

87 ".jpeg", 

88 ".jpg", 

89 ".png", 

90 ".svg", 

91) 

92 

93 

94class ResourceId(ValidatedString): 

95 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[ 

96 Annotated[ 

97 NotEmpty[str], 

98 RestrictCharacters(string.ascii_lowercase + string.digits + "_-/."), 

99 annotated_types.Predicate( 

100 lambda s: not (s.startswith("/") or s.endswith("/")) 

101 ), 

102 ] 

103 ] 

104 

105 

106def _has_no_slash(s: str) -> bool: 

107 return "/" not in s and "\\" not in s 

108 

109 

110class Author(_Author_v0_2): 

111 name: Annotated[str, Predicate(_has_no_slash)] 

112 github_user: Optional[str] = None 

113 

114 @field_validator("github_user", mode="after") 

115 def _validate_gh_user(cls, value: Optional[str]): 

116 if value is None: 

117 return None 

118 else: 

119 return validate_gh_user(value) 

120 

121 

122class _AuthorConv(Converter[_Author_v0_2, Author]): 

123 def _convert( 

124 self, src: _Author_v0_2, tgt: "type[Author] | type[dict[str, Any]]" 

125 ) -> "Author | dict[str, Any]": 

126 return tgt( 

127 name=src.name, 

128 github_user=src.github_user, 

129 affiliation=src.affiliation, 

130 email=src.email, 

131 orcid=src.orcid, 

132 ) 

133 

134 

135_author_conv = _AuthorConv(_Author_v0_2, Author) 

136 

137 

138class Maintainer(_Maintainer_v0_2): 

139 name: Optional[Annotated[str, Predicate(_has_no_slash)]] = None 

140 github_user: str 

141 

142 @field_validator("github_user", mode="after") 

143 def validate_gh_user(cls, value: str): 

144 return validate_gh_user(value) 

145 

146 

147class _MaintainerConv(Converter[_Maintainer_v0_2, Maintainer]): 

148 def _convert( 

149 self, src: _Maintainer_v0_2, tgt: "type[Maintainer | dict[str, Any]]" 

150 ) -> "Maintainer | dict[str, Any]": 

151 return tgt( 

152 name=src.name, 

153 github_user=src.github_user, 

154 affiliation=src.affiliation, 

155 email=src.email, 

156 orcid=src.orcid, 

157 ) 

158 

159 

160_maintainer_conv = _MaintainerConv(_Maintainer_v0_2, Maintainer) 

161 

162 

163class CiteEntry(Node): 

164 """A citation that should be referenced in work using this resource.""" 

165 

166 text: str 

167 """free text description""" 

168 

169 doi: Optional[Doi] = None 

170 """A digital object identifier (DOI) is the prefered citation reference. 

171 See https://www.doi.org/ for details. 

172 Note: 

173 Either **doi** or **url** have to be specified. 

174 """ 

175 

176 url: Optional[HttpUrl] = None 

177 """URL to cite (preferably specify a **doi** instead/also). 

178 Note: 

179 Either **doi** or **url** have to be specified. 

180 """ 

181 

182 @model_validator(mode="after") 

183 def _check_doi_or_url(self): 

184 if not self.doi and not self.url: 

185 raise ValueError("Either 'doi' or 'url' is required") 

186 

187 return self 

188 

189 

190class LinkedResourceBase(Node): 

191 

192 @model_validator(mode="before") 

193 def _remove_version_number(cls, value: Any): 

194 if is_dict(value): 

195 vn = value.pop("version_number", None) 

196 if vn is not None and value.get("version") is None: 

197 value["version"] = vn 

198 

199 return value 

200 

201 version: Optional[Version] = None 

202 """The version of the linked resource following SemVer 2.0.""" 

203 

204 

205class LinkedResource(LinkedResourceBase): 

206 """Reference to a bioimage.io resource""" 

207 

208 id: ResourceId 

209 """A valid resource `id` from the official bioimage.io collection.""" 

210 

211 

212class BioimageioConfig(Node, extra="allow"): 

213 """bioimage.io internal metadata.""" 

214 

215 

216class Config(Node, extra="allow"): 

217 """A place to store additional metadata (often tool specific). 

218 

219 Such additional metadata is typically set programmatically by the respective tool 

220 or by people with specific insights into the tool. 

221 If you want to store additional metadata that does not match any of the other 

222 fields, think of a key unlikely to collide with anyone elses use-case/tool and save 

223 it here. 

224 

225 Please consider creating [an issue in the bioimageio.spec repository](https://github.com/bioimage-io/spec-bioimage-io/issues/new?template=Blank+issue) 

226 if you are not sure if an existing field could cover your use case 

227 or if you think such a field should exist. 

228 """ 

229 

230 bioimageio: BioimageioConfig = Field(default_factory=BioimageioConfig) 

231 """bioimage.io internal metadata.""" 

232 

233 @model_validator(mode="after") 

234 def _validate_extra_fields(self): 

235 if self.model_extra: 

236 for k, v in self.model_extra.items(): 

237 if not isinstance(v, Node) and not is_yaml_value(v): 

238 raise ValueError( 

239 f"config.{k} is not a valid YAML value or `Node` instance" 

240 ) 

241 

242 return self 

243 

244 def __getitem__(self, key: str) -> Any: 

245 """Allows to access the config as a dictionary.""" 

246 return getattr(self, key) 

247 

248 def __setitem__(self, key: str, value: Any) -> None: 

249 """Allows to set the config as a dictionary.""" 

250 setattr(self, key, value) 

251 

252 

253class GenericModelDescrBase(ResourceDescrBase): 

254 """Base for all resource descriptions including of model descriptions""" 

255 

256 name: Annotated[ 

257 Annotated[ 

258 str, RestrictCharacters(string.ascii_letters + string.digits + "_+- ()") 

259 ], 

260 MinLen(5), 

261 MaxLen(128), 

262 warn(MaxLen(64), "Name longer than 64 characters.", INFO), 

263 ] 

264 name: Annotated[NotEmpty[str], MaxLen(128)] 

265 """A human-friendly name of the resource description. 

266 May only contains letters, digits, underscore, minus, parentheses and spaces.""" 

267 

268 description: Annotated[ 

269 str, MaxLen(1024), warn(MaxLen(512), "Description longer than 512 characters.") 

270 ] 

271 """A string containing a brief description.""" 

272 

273 covers: List[FileSource_cover] = Field( 

274 default_factory=cast(Callable[[], List[FileSource_cover]], list), 

275 description=( 

276 "Cover images. Please use an image smaller than 500KB and an aspect" 

277 " ratio width to height of 2:1 or 1:1.\nThe supported image formats" 

278 f" are: {VALID_COVER_IMAGE_EXTENSIONS}" 

279 ), 

280 examples=[["cover.png"]], 

281 ) 

282 """Cover images.""" 

283 

284 id_emoji: Optional[ 

285 Annotated[str, Len(min_length=1, max_length=2), Field(examples=["🦈", "🦥"])] 

286 ] = None 

287 """UTF-8 emoji for display alongside the `id`.""" 

288 

289 authors: NotEmpty[List[Author]] 

290 """The authors are the creators of this resource description and the primary points of contact.""" 

291 

292 attachments: List[FileDescr_] = Field( 

293 default_factory=cast(Callable[[], List[FileDescr_]], list) 

294 ) 

295 """file attachments""" 

296 

297 cite: NotEmpty[List[CiteEntry]] 

298 """citations""" 

299 

300 license: Annotated[ 

301 Annotated[ 

302 Union[LicenseId, DeprecatedLicenseId], Field(union_mode="left_to_right") 

303 ], 

304 warn( 

305 LicenseId, 

306 "{value} is deprecated, see https://spdx.org/licenses/{value}.html", 

307 ), 

308 Field(examples=["CC0-1.0", "MIT", "BSD-2-Clause"]), 

309 ] 

310 """A [SPDX license identifier](https://spdx.org/licenses/). 

311 We do not support custom license beyond the SPDX license list, if you need that please 

312 [open a GitHub issue](https://github.com/bioimage-io/spec-bioimage-io/issues/new/choose) 

313 to discuss your intentions with the community.""" 

314 

315 git_repo: Annotated[ 

316 Optional[HttpUrl], 

317 Field( 

318 examples=[ 

319 "https://github.com/bioimage-io/spec-bioimage-io/tree/main/example_descriptions/models/unet2d_nuclei_broad" 

320 ], 

321 ), 

322 ] = None 

323 """A URL to the Git repository where the resource is being developed.""" 

324 

325 icon: Union[Annotated[str, Len(min_length=1, max_length=2)], FileSource_, None] = ( 

326 None 

327 ) 

328 """An icon for illustration, e.g. on bioimage.io""" 

329 

330 links: Annotated[ 

331 List[str], 

332 Field( 

333 examples=[ 

334 ( 

335 "ilastik/ilastik", 

336 "deepimagej/deepimagej", 

337 "zero/notebook_u-net_3d_zerocostdl4mic", 

338 ) 

339 ], 

340 ), 

341 ] = Field(default_factory=list) 

342 """IDs of other bioimage.io resources""" 

343 

344 uploader: Optional[Uploader] = None 

345 """The person who uploaded the model (e.g. to bioimage.io)""" 

346 

347 maintainers: List[Maintainer] = Field( # pyright: ignore[reportUnknownVariableType] 

348 default_factory=list 

349 ) 

350 """Maintainers of this resource. 

351 If not specified, `authors` are maintainers and at least some of them has to specify their `github_user` name""" 

352 

353 @partial(as_warning, severity=ALERT) 

354 @field_validator("maintainers", mode="after") 

355 @classmethod 

356 def check_maintainers_exist( 

357 cls, maintainers: List[Maintainer], info: ValidationInfo 

358 ) -> List[Maintainer]: 

359 if not maintainers and "authors" in info.data: 

360 authors: List[Author] = info.data["authors"] 

361 if all(a.github_user is None for a in authors): 

362 raise ValueError( 

363 "Missing `maintainers` or any author in `authors` with a specified" 

364 + " `github_user` name." 

365 ) 

366 

367 return maintainers 

368 

369 tags: Annotated[ 

370 List[str], 

371 Field(examples=[("unet2d", "pytorch", "nucleus", "segmentation", "dsb2018")]), 

372 ] = Field(default_factory=list) 

373 """Associated tags""" 

374 

375 @as_warning 

376 @field_validator("tags") 

377 @classmethod 

378 def warn_about_tag_categories( 

379 cls, value: List[str], info: ValidationInfo 

380 ) -> List[str]: 

381 categories = TAG_CATEGORIES.get(info.data["type"], {}) 

382 missing_categories: List[Dict[str, Sequence[str]]] = [] 

383 for cat, entries in categories.items(): 

384 if not any(e in value for e in entries): 

385 missing_categories.append({cat: entries}) 

386 

387 if missing_categories: 

388 raise ValueError( 

389 f"Missing tags from bioimage.io categories: {missing_categories}" 

390 ) 

391 

392 return value 

393 

394 version: Optional[Version] = None 

395 """The version of the resource following SemVer 2.0.""" 

396 

397 @model_validator(mode="before") 

398 def _remove_version_number(cls, value: Any): 

399 if is_dict(value): 

400 vn = value.pop("version_number", None) 

401 if vn is not None and value.get("version") is None: 

402 value["version"] = vn 

403 

404 return value 

405 

406 

407FileSource_documentation = Annotated[ 

408 FileSource_, 

409 WithSuffix(".md", case_sensitive=True), 

410 Field( 

411 examples=[ 

412 "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/unet2d_nuclei_broad/README.md", 

413 "README.md", 

414 ], 

415 ), 

416] 

417 

418 

419class GenericDescrBase(GenericModelDescrBase): 

420 """Base for all resource descriptions except for the model descriptions""" 

421 

422 implemented_format_version: ClassVar[Literal["0.3.0"]] = "0.3.0" 

423 if TYPE_CHECKING: 

424 format_version: Literal["0.3.0"] = "0.3.0" 

425 else: 

426 format_version: Literal["0.3.0"] 

427 """The **format** version of this resource specification""" 

428 

429 @model_validator(mode="before") 

430 @classmethod 

431 def _convert_from_older_format( 

432 cls, data: BioimageioYamlContent, / 

433 ) -> BioimageioYamlContent: 

434 cls.convert_from_old_format_wo_validation(data) 

435 return data 

436 

437 @classmethod 

438 def convert_from_old_format_wo_validation(cls, data: BioimageioYamlContent) -> None: 

439 """Convert metadata following an older format version to this classes' format 

440 without validating the result. 

441 """ 

442 convert_from_older_format(data) 

443 

444 documentation: Optional[FileSource_documentation] = None 

445 """URL or relative path to a markdown file encoded in UTF-8 with additional documentation. 

446 The recommended documentation file name is `README.md`. An `.md` suffix is mandatory.""" 

447 

448 badges: List[BadgeDescr] = Field( # pyright: ignore[reportUnknownVariableType] 

449 default_factory=list 

450 ) 

451 """badges associated with this resource""" 

452 

453 config: Config = Field(default_factory=Config) 

454 """A field for custom configuration that can contain any keys not present in the RDF spec. 

455 This means you should not store, for example, a GitHub repo URL in `config` since there is a `git_repo` field. 

456 Keys in `config` may be very specific to a tool or consumer software. To avoid conflicting definitions, 

457 it is recommended to wrap added configuration into a sub-field named with the specific domain or tool name, 

458 for example: 

459 ```yaml 

460 config: 

461 giraffe_neckometer: # here is the domain name 

462 length: 3837283 

463 address: 

464 home: zoo 

465 imagej: # config specific to ImageJ 

466 macro_dir: path/to/macro/file 

467 ``` 

468 If possible, please use [`snake_case`](https://en.wikipedia.org/wiki/Snake_case) for keys in `config`. 

469 You may want to list linked files additionally under `attachments` to include them when packaging a resource. 

470 (Packaging a resource means downloading/copying important linked files and creating a ZIP archive that contains 

471 an altered rdf.yaml file with local references to the downloaded files.)""" 

472 

473 

474ResourceDescrType = TypeVar("ResourceDescrType", bound=GenericDescrBase) 

475 

476 

477class GenericDescr(GenericDescrBase, extra="ignore"): 

478 """Specification of the fields used in a generic bioimage.io-compliant resource description file (RDF). 

479 

480 An RDF is a YAML file that describes a resource such as a model, a dataset, or a notebook. 

481 Note that those resources are described with a type-specific RDF. 

482 Use this generic resource description, if none of the known specific types matches your resource. 

483 """ 

484 

485 implemented_type: ClassVar[Literal["generic"]] = "generic" 

486 if TYPE_CHECKING: 

487 type: Annotated[str, LowerCase] = "generic" 

488 """The resource type assigns a broad category to the resource.""" 

489 else: 

490 type: Annotated[str, LowerCase] 

491 """The resource type assigns a broad category to the resource.""" 

492 

493 id: Optional[ 

494 Annotated[ResourceId, Field(examples=["affable-shark", "ambitious-sloth"])] 

495 ] = None 

496 """bioimage.io-wide unique resource identifier 

497 assigned by bioimage.io; version **un**specific.""" 

498 

499 parent: Optional[ResourceId] = None 

500 """The description from which this one is derived""" 

501 

502 source: Optional[HttpUrl] = None 

503 """The primary source of the resource""" 

504 

505 @field_validator("type", mode="after") 

506 @classmethod 

507 def check_specific_types(cls, value: str) -> str: 

508 if value in KNOWN_SPECIFIC_RESOURCE_TYPES: 

509 raise ValueError( 

510 f"Use the {value} description instead of this generic description for" 

511 + f" your '{value}' resource." 

512 ) 

513 

514 return value