Coverage for bioimageio/spec/generic/v0_3.py: 93%

190 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-12 17:44 +0000

1from __future__ import annotations 

2 

3import string 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Callable, 

8 ClassVar, 

9 Dict, 

10 List, 

11 Literal, 

12 Optional, 

13 Sequence, 

14 Type, 

15 TypeVar, 

16 Union, 

17 cast, 

18) 

19 

20import annotated_types 

21from annotated_types import Len, LowerCase, MaxLen, MinLen 

22from pydantic import Field, RootModel, ValidationInfo, field_validator, model_validator 

23from typing_extensions import Annotated 

24 

25from .._internal.common_nodes import Node, ResourceDescrBase 

26from .._internal.constants import TAG_CATEGORIES 

27from .._internal.field_validation import validate_github_user 

28from .._internal.field_warning import as_warning, issue_warning, warn 

29from .._internal.io import ( 

30 BioimageioYamlContent, 

31 FileDescr, 

32 WithSuffix, 

33 is_yaml_value, 

34) 

35from .._internal.io_basics import Sha256 

36from .._internal.io_packaging import FileDescr_ 

37from .._internal.license_id import DeprecatedLicenseId, LicenseId 

38from .._internal.node_converter import Converter 

39from .._internal.type_guards import is_dict 

40from .._internal.types import FAIR, FileSource_, NotEmpty, RelativeFilePath 

41from .._internal.url import HttpUrl 

42from .._internal.validated_string import ValidatedString 

43from .._internal.validator_annotations import ( 

44 Predicate, 

45 RestrictCharacters, 

46) 

47from .._internal.version_type import Version 

48from .._internal.warning_levels import ALERT, INFO 

49from ._v0_3_converter import convert_from_older_format 

50from .v0_2 import Author as _Author_v0_2 

51from .v0_2 import BadgeDescr, Doi, FileSource_cover, OrcidId, Uploader 

52from .v0_2 import Maintainer as _Maintainer_v0_2 

53 

54__all__ = [ 

55 "Author", 

56 "BadgeDescr", 

57 "CiteEntry", 

58 "DeprecatedLicenseId", 

59 "Doi", 

60 "FileDescr", 

61 "GenericDescr", 

62 "HttpUrl", 

63 "KNOWN_SPECIFIC_RESOURCE_TYPES", 

64 "LicenseId", 

65 "LinkedResource", 

66 "Maintainer", 

67 "OrcidId", 

68 "RelativeFilePath", 

69 "ResourceId", 

70 "Sha256", 

71 "Uploader", 

72 "VALID_COVER_IMAGE_EXTENSIONS", 

73 "Version", 

74] 

75 

76KNOWN_SPECIFIC_RESOURCE_TYPES = ( 

77 "application", 

78 "collection", 

79 "dataset", 

80 "model", 

81 "notebook", 

82) 

83VALID_COVER_IMAGE_EXTENSIONS = ( 

84 ".gif", 

85 ".jpeg", 

86 ".jpg", 

87 ".png", 

88 ".svg", 

89) 

90 

91 

92class ResourceId(ValidatedString): 

93 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[ 

94 Annotated[ 

95 NotEmpty[str], 

96 RestrictCharacters(string.ascii_lowercase + string.digits + "_-/."), 

97 annotated_types.Predicate( 

98 lambda s: not (s.startswith("/") or s.endswith("/")) 

99 ), 

100 ] 

101 ] 

102 

103 

104def _has_no_slash(s: str) -> bool: 

105 return "/" not in s and "\\" not in s 

106 

107 

108class Author(_Author_v0_2): 

109 name: Annotated[str, Predicate(_has_no_slash)] 

110 github_user: Optional[str] = None 

111 

112 @field_validator("github_user", mode="after") 

113 def _validate_github_user(cls, value: Optional[str]): 

114 if value is None: 

115 return None 

116 else: 

117 return validate_github_user(value) 

118 

119 

120class _AuthorConv(Converter[_Author_v0_2, Author]): 

121 def _convert( 

122 self, src: _Author_v0_2, tgt: "type[Author] | type[dict[str, Any]]" 

123 ) -> "Author | dict[str, Any]": 

124 return tgt( 

125 name=src.name, 

126 github_user=src.github_user, 

127 affiliation=src.affiliation, 

128 email=src.email, 

129 orcid=src.orcid, 

130 ) 

131 

132 

133_author_conv = _AuthorConv(_Author_v0_2, Author) 

134 

135 

136class Maintainer(_Maintainer_v0_2): 

137 name: Optional[Annotated[str, Predicate(_has_no_slash)]] = None 

138 github_user: str 

139 

140 @field_validator("github_user", mode="after") 

141 def validate_github_user(cls, value: str): 

142 return validate_github_user(value) 

143 

144 

145class _MaintainerConv(Converter[_Maintainer_v0_2, Maintainer]): 

146 def _convert( 

147 self, src: _Maintainer_v0_2, tgt: "type[Maintainer | dict[str, Any]]" 

148 ) -> "Maintainer | dict[str, Any]": 

149 return tgt( 

150 name=src.name, 

151 github_user=src.github_user, 

152 affiliation=src.affiliation, 

153 email=src.email, 

154 orcid=src.orcid, 

155 ) 

156 

157 

158_maintainer_conv = _MaintainerConv(_Maintainer_v0_2, Maintainer) 

159 

160 

161class CiteEntry(Node): 

162 """A citation that should be referenced in work using this resource.""" 

163 

164 text: str 

165 """free text description""" 

166 

167 doi: Optional[Doi] = None 

168 """A digital object identifier (DOI) is the prefered citation reference. 

169 See https://www.doi.org/ for details. 

170 Note: 

171 Either **doi** or **url** have to be specified. 

172 """ 

173 

174 url: Optional[HttpUrl] = None 

175 """URL to cite (preferably specify a **doi** instead/also). 

176 Note: 

177 Either **doi** or **url** have to be specified. 

178 """ 

179 

180 @model_validator(mode="after") 

181 def _check_doi_or_url(self): 

182 if not self.doi and not self.url: 

183 raise ValueError("Either 'doi' or 'url' is required") 

184 

185 return self 

186 

187 

188class LinkedResourceBase(Node): 

189 

190 @model_validator(mode="before") 

191 def _remove_version_number(cls, value: Any): 

192 if is_dict(value): 

193 vn = value.pop("version_number", None) 

194 if vn is not None and value.get("version") is None: 

195 value["version"] = vn 

196 

197 return value 

198 

199 version: Optional[Version] = None 

200 """The version of the linked resource following SemVer 2.0.""" 

201 

202 

203class LinkedResource(LinkedResourceBase): 

204 """Reference to a bioimage.io resource""" 

205 

206 id: ResourceId 

207 """A valid resource `id` from the official bioimage.io collection.""" 

208 

209 

210class BioimageioConfig(Node, extra="allow"): 

211 """bioimage.io internal metadata.""" 

212 

213 

214class Config(Node, extra="allow"): 

215 """A place to store additional metadata (often tool specific). 

216 

217 Such additional metadata is typically set programmatically by the respective tool 

218 or by people with specific insights into the tool. 

219 If you want to store additional metadata that does not match any of the other 

220 fields, think of a key unlikely to collide with anyone elses use-case/tool and save 

221 it here. 

222 

223 Please consider creating [an issue in the bioimageio.spec repository](https://github.com/bioimage-io/spec-bioimage-io/issues/new?template=Blank+issue) 

224 if you are not sure if an existing field could cover your use case 

225 or if you think such a field should exist. 

226 """ 

227 

228 bioimageio: BioimageioConfig = Field(default_factory=BioimageioConfig) 

229 """bioimage.io internal metadata.""" 

230 

231 @model_validator(mode="after") 

232 def _validate_extra_fields(self): 

233 if self.model_extra: 

234 for k, v in self.model_extra.items(): 

235 if not isinstance(v, Node) and not is_yaml_value(v): 

236 raise ValueError( 

237 f"config.{k} is not a valid YAML value or `Node` instance" 

238 ) 

239 

240 return self 

241 

242 def __getitem__(self, key: str) -> Any: 

243 """Allows to access the config as a dictionary.""" 

244 return getattr(self, key) 

245 

246 def __setitem__(self, key: str, value: Any) -> None: 

247 """Allows to set the config as a dictionary.""" 

248 setattr(self, key, value) 

249 

250 

251class GenericModelDescrBase(ResourceDescrBase): 

252 """Base for all resource descriptions including of model descriptions""" 

253 

254 name: Annotated[ 

255 Annotated[ 

256 str, RestrictCharacters(string.ascii_letters + string.digits + "_+- ()") 

257 ], 

258 MinLen(5), 

259 MaxLen(128), 

260 warn(MaxLen(64), "Name longer than 64 characters.", INFO), 

261 ] 

262 """A human-friendly name of the resource description. 

263 May only contains letters, digits, underscore, minus, parentheses and spaces.""" 

264 

265 description: FAIR[ 

266 Annotated[ 

267 str, 

268 MaxLen(1024), 

269 warn(MaxLen(512), "Description longer than 512 characters."), 

270 ] 

271 ] = "" 

272 """A string containing a brief description.""" 

273 

274 covers: List[FileSource_cover] = Field( 

275 default_factory=cast(Callable[[], List[FileSource_cover]], list), 

276 description=( 

277 "Cover images. Please use an image smaller than 500KB and an aspect" 

278 " ratio width to height of 2:1 or 1:1.\nThe supported image formats" 

279 f" are: {VALID_COVER_IMAGE_EXTENSIONS}" 

280 ), 

281 examples=[["cover.png"]], 

282 ) 

283 """Cover images.""" 

284 

285 id_emoji: Optional[ 

286 Annotated[str, Len(min_length=1, max_length=2), Field(examples=["🦈", "🦥"])] 

287 ] = None 

288 """UTF-8 emoji for display alongside the `id`.""" 

289 

290 authors: FAIR[List[Author]] = Field( 

291 default_factory=cast(Callable[[], List[Author]], list) 

292 ) 

293 """The authors are the creators of this resource description and the primary points of contact.""" 

294 

295 attachments: List[FileDescr_] = Field( 

296 default_factory=cast(Callable[[], List[FileDescr_]], list) 

297 ) 

298 """file attachments""" 

299 

300 cite: FAIR[List[CiteEntry]] = Field( 

301 default_factory=cast(Callable[[], List[CiteEntry]], list) 

302 ) 

303 """citations""" 

304 

305 license: FAIR[ 

306 Annotated[ 

307 Annotated[ 

308 Union[LicenseId, DeprecatedLicenseId, None], 

309 Field(union_mode="left_to_right"), 

310 ], 

311 warn( 

312 Optional[LicenseId], 

313 "{value} is deprecated, see https://spdx.org/licenses/{value}.html", 

314 ), 

315 Field(examples=["CC0-1.0", "MIT", "BSD-2-Clause"]), 

316 ] 

317 ] = None 

318 """A [SPDX license identifier](https://spdx.org/licenses/). 

319 We do not support custom license beyond the SPDX license list, if you need that please 

320 [open a GitHub issue](https://github.com/bioimage-io/spec-bioimage-io/issues/new/choose) 

321 to discuss your intentions with the community.""" 

322 

323 git_repo: Annotated[ 

324 Optional[HttpUrl], 

325 Field( 

326 examples=[ 

327 "https://github.com/bioimage-io/spec-bioimage-io/tree/main/example_descriptions/models/unet2d_nuclei_broad" 

328 ], 

329 ), 

330 ] = None 

331 """A URL to the Git repository where the resource is being developed.""" 

332 

333 icon: Union[Annotated[str, Len(min_length=1, max_length=2)], FileSource_, None] = ( 

334 None 

335 ) 

336 """An icon for illustration, e.g. on bioimage.io""" 

337 

338 links: Annotated[ 

339 List[str], 

340 Field( 

341 examples=[ 

342 ( 

343 "ilastik/ilastik", 

344 "deepimagej/deepimagej", 

345 "zero/notebook_u-net_3d_zerocostdl4mic", 

346 ) 

347 ], 

348 ), 

349 ] = Field(default_factory=list) 

350 """IDs of other bioimage.io resources""" 

351 

352 uploader: Optional[Uploader] = None 

353 """The person who uploaded the model (e.g. to bioimage.io)""" 

354 

355 maintainers: List[Maintainer] = Field( 

356 default_factory=cast(Callable[[], List[Maintainer]], list) 

357 ) 

358 """Maintainers of this resource. 

359 If not specified, `authors` are maintainers and at least some of them has to specify their `github_user` name""" 

360 

361 @model_validator(mode="after") 

362 def _check_maintainers_exist(self): 

363 if not self.maintainers and self.authors: 

364 if all(a.github_user is None for a in self.authors): 

365 issue_warning( 

366 "Missing `maintainers` or any author in `authors` with a specified" 

367 + " `github_user` name.", 

368 value=self.authors, 

369 field="authors", 

370 severity=ALERT, 

371 ) 

372 

373 return self 

374 

375 tags: FAIR[ 

376 Annotated[ 

377 List[str], 

378 Field( 

379 examples=[("unet2d", "pytorch", "nucleus", "segmentation", "dsb2018")] 

380 ), 

381 ] 

382 ] = Field(default_factory=list) 

383 """Associated tags""" 

384 

385 @as_warning 

386 @field_validator("tags") 

387 @classmethod 

388 def warn_about_tag_categories( 

389 cls, value: List[str], info: ValidationInfo 

390 ) -> List[str]: 

391 categories = TAG_CATEGORIES.get(info.data["type"], {}) 

392 missing_categories: List[Dict[str, Sequence[str]]] = [] 

393 for cat, entries in categories.items(): 

394 if not any(e in value for e in entries): 

395 missing_categories.append({cat: entries}) 

396 

397 if missing_categories: 

398 raise ValueError( 

399 f"Missing tags from bioimage.io categories: {missing_categories}" 

400 ) 

401 

402 return value 

403 

404 version: Optional[Version] = None 

405 """The version of the resource following SemVer 2.0.""" 

406 

407 @model_validator(mode="before") 

408 def _remove_version_number(cls, value: Any): 

409 if is_dict(value): 

410 vn = value.pop("version_number", None) 

411 if vn is not None and value.get("version") is None: 

412 value["version"] = vn 

413 

414 return value 

415 

416 version_comment: Optional[Annotated[str, MaxLen(512)]] = None 

417 """A comment on the version of the resource.""" 

418 

419 

420FileSource_documentation = Annotated[ 

421 FileSource_, 

422 WithSuffix(".md", case_sensitive=True), 

423 Field( 

424 examples=[ 

425 "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/unet2d_nuclei_broad/README.md", 

426 "README.md", 

427 ], 

428 ), 

429] 

430 

431 

432class GenericDescrBase(GenericModelDescrBase): 

433 """Base for all resource descriptions except for the model descriptions""" 

434 

435 implemented_format_version: ClassVar[Literal["0.3.0"]] = "0.3.0" 

436 if TYPE_CHECKING: 

437 format_version: Literal["0.3.0"] = "0.3.0" 

438 else: 

439 format_version: Literal["0.3.0"] 

440 """The **format** version of this resource specification""" 

441 

442 @model_validator(mode="before") 

443 @classmethod 

444 def _convert_from_older_format( 

445 cls, data: BioimageioYamlContent, / 

446 ) -> BioimageioYamlContent: 

447 cls.convert_from_old_format_wo_validation(data) 

448 return data 

449 

450 @classmethod 

451 def convert_from_old_format_wo_validation(cls, data: BioimageioYamlContent) -> None: 

452 """Convert metadata following an older format version to this classes' format 

453 without validating the result. 

454 """ 

455 convert_from_older_format(data) 

456 

457 documentation: FAIR[Optional[FileSource_documentation]] = None 

458 """URL or relative path to a markdown file encoded in UTF-8 with additional documentation. 

459 The recommended documentation file name is `README.md`. An `.md` suffix is mandatory.""" 

460 

461 badges: List[BadgeDescr] = Field( # pyright: ignore[reportUnknownVariableType] 

462 default_factory=list 

463 ) 

464 """badges associated with this resource""" 

465 

466 config: Config = Field(default_factory=Config.model_construct) 

467 """A field for custom configuration that can contain any keys not present in the RDF spec. 

468 This means you should not store, for example, a GitHub repo URL in `config` since there is a `git_repo` field. 

469 Keys in `config` may be very specific to a tool or consumer software. To avoid conflicting definitions, 

470 it is recommended to wrap added configuration into a sub-field named with the specific domain or tool name, 

471 for example: 

472 ```yaml 

473 config: 

474 giraffe_neckometer: # here is the domain name 

475 length: 3837283 

476 address: 

477 home: zoo 

478 imagej: # config specific to ImageJ 

479 macro_dir: path/to/macro/file 

480 ``` 

481 If possible, please use [`snake_case`](https://en.wikipedia.org/wiki/Snake_case) for keys in `config`. 

482 You may want to list linked files additionally under `attachments` to include them when packaging a resource. 

483 (Packaging a resource means downloading/copying important linked files and creating a ZIP archive that contains 

484 an altered rdf.yaml file with local references to the downloaded files.)""" 

485 

486 

487ResourceDescrType = TypeVar("ResourceDescrType", bound=GenericDescrBase) 

488 

489 

490class GenericDescr(GenericDescrBase, extra="ignore"): 

491 """Specification of the fields used in a generic bioimage.io-compliant resource description file (RDF). 

492 

493 An RDF is a YAML file that describes a resource such as a model, a dataset, or a notebook. 

494 Note that those resources are described with a type-specific RDF. 

495 Use this generic resource description, if none of the known specific types matches your resource. 

496 """ 

497 

498 implemented_type: ClassVar[Literal["generic"]] = "generic" 

499 if TYPE_CHECKING: 

500 type: Annotated[str, LowerCase] = "generic" 

501 """The resource type assigns a broad category to the resource.""" 

502 else: 

503 type: Annotated[str, LowerCase] 

504 """The resource type assigns a broad category to the resource.""" 

505 

506 id: Optional[ 

507 Annotated[ResourceId, Field(examples=["affable-shark", "ambitious-sloth"])] 

508 ] = None 

509 """bioimage.io-wide unique resource identifier 

510 assigned by bioimage.io; version **un**specific.""" 

511 

512 parent: Optional[ResourceId] = None 

513 """The description from which this one is derived""" 

514 

515 source: Optional[HttpUrl] = None 

516 """The primary source of the resource""" 

517 

518 @field_validator("type", mode="after") 

519 @classmethod 

520 def check_specific_types(cls, value: str) -> str: 

521 if value in KNOWN_SPECIFIC_RESOURCE_TYPES: 

522 raise ValueError( 

523 f"Use the {value} description instead of this generic description for" 

524 + f" your '{value}' resource." 

525 ) 

526 

527 return value