Coverage for bioimageio/spec/generic/v0_3.py: 93%

190 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-11 07:34 +0000

1from __future__ import annotations 

2 

3import string 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Callable, 

8 ClassVar, 

9 Dict, 

10 List, 

11 Literal, 

12 Optional, 

13 Sequence, 

14 Type, 

15 TypeVar, 

16 Union, 

17 cast, 

18) 

19 

20import annotated_types 

21from annotated_types import Len, LowerCase, MaxLen, MinLen 

22from pydantic import Field, RootModel, ValidationInfo, field_validator, model_validator 

23from typing_extensions import Annotated 

24 

25from .._internal.common_nodes import Node, ResourceDescrBase 

26from .._internal.constants import TAG_CATEGORIES 

27from .._internal.field_validation import validate_github_user 

28from .._internal.field_warning import as_warning, issue_warning, warn 

29from .._internal.io import ( 

30 BioimageioYamlContent, 

31 FileDescr, 

32 WithSuffix, 

33 is_yaml_value, 

34) 

35from .._internal.io_basics import Sha256 

36from .._internal.io_packaging import FileDescr_ 

37from .._internal.license_id import DeprecatedLicenseId, LicenseId 

38from .._internal.node_converter import Converter 

39from .._internal.type_guards import is_dict 

40from .._internal.types import FAIR, FileSource_, NotEmpty, RelativeFilePath 

41from .._internal.url import HttpUrl 

42from .._internal.validated_string import ValidatedString 

43from .._internal.validator_annotations import ( 

44 Predicate, 

45 RestrictCharacters, 

46) 

47from .._internal.version_type import Version 

48from .._internal.warning_levels import ALERT, INFO 

49from ._v0_3_converter import convert_from_older_format 

50from .v0_2 import Author as _Author_v0_2 

51from .v0_2 import BadgeDescr, Doi, FileSource_cover, OrcidId, Uploader 

52from .v0_2 import Maintainer as _Maintainer_v0_2 

53 

54__all__ = [ 

55 "Author", 

56 "BadgeDescr", 

57 "CiteEntry", 

58 "DeprecatedLicenseId", 

59 "Doi", 

60 "FileDescr", 

61 "GenericDescr", 

62 "HttpUrl", 

63 "KNOWN_SPECIFIC_RESOURCE_TYPES", 

64 "LicenseId", 

65 "LinkedResource", 

66 "Maintainer", 

67 "OrcidId", 

68 "RelativeFilePath", 

69 "ResourceId", 

70 "Sha256", 

71 "Uploader", 

72 "VALID_COVER_IMAGE_EXTENSIONS", 

73 "Version", 

74] 

75 

76KNOWN_SPECIFIC_RESOURCE_TYPES = ( 

77 "application", 

78 "collection", 

79 "dataset", 

80 "model", 

81 "notebook", 

82) 

83VALID_COVER_IMAGE_EXTENSIONS = ( 

84 ".gif", 

85 ".jpeg", 

86 ".jpg", 

87 ".png", 

88 ".svg", 

89) 

90 

91 

92class ResourceId(ValidatedString): 

93 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[ 

94 Annotated[ 

95 NotEmpty[str], 

96 RestrictCharacters(string.ascii_lowercase + string.digits + "_-/."), 

97 annotated_types.Predicate( 

98 lambda s: not (s.startswith("/") or s.endswith("/")) 

99 ), 

100 ] 

101 ] 

102 

103 

104def _has_no_slash(s: str) -> bool: 

105 return "/" not in s and "\\" not in s 

106 

107 

108class Author(_Author_v0_2): 

109 name: Annotated[str, Predicate(_has_no_slash)] 

110 github_user: Optional[str] = None 

111 

112 @field_validator("github_user", mode="after") 

113 def _validate_github_user(cls, value: Optional[str]): 

114 if value is None: 

115 return None 

116 else: 

117 return validate_github_user(value) 

118 

119 

120class _AuthorConv(Converter[_Author_v0_2, Author]): 

121 def _convert( 

122 self, src: _Author_v0_2, tgt: "type[Author] | type[dict[str, Any]]" 

123 ) -> "Author | dict[str, Any]": 

124 return tgt( 

125 name=src.name, 

126 github_user=src.github_user, 

127 affiliation=src.affiliation, 

128 email=src.email, 

129 orcid=src.orcid, 

130 ) 

131 

132 

133_author_conv = _AuthorConv(_Author_v0_2, Author) 

134 

135 

136class Maintainer(_Maintainer_v0_2): 

137 name: Optional[Annotated[str, Predicate(_has_no_slash)]] = None 

138 github_user: str 

139 

140 @field_validator("github_user", mode="after") 

141 def validate_github_user(cls, value: str): 

142 return validate_github_user(value) 

143 

144 

145class _MaintainerConv(Converter[_Maintainer_v0_2, Maintainer]): 

146 def _convert( 

147 self, src: _Maintainer_v0_2, tgt: "type[Maintainer | dict[str, Any]]" 

148 ) -> "Maintainer | dict[str, Any]": 

149 return tgt( 

150 name=src.name, 

151 github_user=src.github_user, 

152 affiliation=src.affiliation, 

153 email=src.email, 

154 orcid=src.orcid, 

155 ) 

156 

157 

158_maintainer_conv = _MaintainerConv(_Maintainer_v0_2, Maintainer) 

159 

160 

161class CiteEntry(Node): 

162 """A citation that should be referenced in work using this resource.""" 

163 

164 text: str 

165 """free text description""" 

166 

167 doi: Optional[Doi] = None 

168 """A digital object identifier (DOI) is the prefered citation reference. 

169 See https://www.doi.org/ for details. 

170 Note: 

171 Either **doi** or **url** have to be specified. 

172 """ 

173 

174 url: Optional[HttpUrl] = None 

175 """URL to cite (preferably specify a **doi** instead/also). 

176 Note: 

177 Either **doi** or **url** have to be specified. 

178 """ 

179 

180 @model_validator(mode="after") 

181 def _check_doi_or_url(self): 

182 if not self.doi and not self.url: 

183 raise ValueError("Either 'doi' or 'url' is required") 

184 

185 return self 

186 

187 

188class LinkedResourceBase(Node): 

189 @model_validator(mode="before") 

190 def _remove_version_number(cls, value: Any): 

191 if is_dict(value): 

192 vn = value.pop("version_number", None) 

193 if vn is not None and value.get("version") is None: 

194 value["version"] = vn 

195 

196 return value 

197 

198 version: Optional[Version] = None 

199 """The version of the linked resource following SemVer 2.0.""" 

200 

201 

202class LinkedResource(LinkedResourceBase): 

203 """Reference to a bioimage.io resource""" 

204 

205 id: ResourceId 

206 """A valid resource `id` from the official bioimage.io collection.""" 

207 

208 

209class BioimageioConfig(Node, extra="allow"): 

210 """bioimage.io internal metadata.""" 

211 

212 

213class Config(Node, extra="allow"): 

214 """A place to store additional metadata (often tool specific). 

215 

216 Such additional metadata is typically set programmatically by the respective tool 

217 or by people with specific insights into the tool. 

218 If you want to store additional metadata that does not match any of the other 

219 fields, think of a key unlikely to collide with anyone elses use-case/tool and save 

220 it here. 

221 

222 Please consider creating [an issue in the bioimageio.spec repository](https://github.com/bioimage-io/spec-bioimage-io/issues/new?template=Blank+issue) 

223 if you are not sure if an existing field could cover your use case 

224 or if you think such a field should exist. 

225 """ 

226 

227 bioimageio: BioimageioConfig = Field(default_factory=BioimageioConfig) 

228 """bioimage.io internal metadata.""" 

229 

230 @model_validator(mode="after") 

231 def _validate_extra_fields(self): 

232 if self.model_extra: 

233 for k, v in self.model_extra.items(): 

234 if not isinstance(v, Node) and not is_yaml_value(v): 

235 raise ValueError( 

236 f"config.{k} is not a valid YAML value or `Node` instance" 

237 ) 

238 

239 return self 

240 

241 def __getitem__(self, key: str) -> Any: 

242 """Allows to access the config as a dictionary.""" 

243 return getattr(self, key) 

244 

245 def __setitem__(self, key: str, value: Any) -> None: 

246 """Allows to set the config as a dictionary.""" 

247 setattr(self, key, value) 

248 

249 

250class GenericModelDescrBase(ResourceDescrBase): 

251 """Base for all resource descriptions including of model descriptions""" 

252 

253 name: Annotated[ 

254 Annotated[ 

255 str, RestrictCharacters(string.ascii_letters + string.digits + "_+- ()") 

256 ], 

257 MinLen(5), 

258 MaxLen(128), 

259 warn(MaxLen(64), "Name longer than 64 characters.", INFO), 

260 ] 

261 """A human-friendly name of the resource description. 

262 May only contains letters, digits, underscore, minus, parentheses and spaces.""" 

263 

264 description: FAIR[ 

265 Annotated[ 

266 str, 

267 MaxLen(1024), 

268 warn(MaxLen(512), "Description longer than 512 characters."), 

269 ] 

270 ] = "" 

271 """A string containing a brief description.""" 

272 

273 covers: List[FileSource_cover] = Field( 

274 default_factory=cast(Callable[[], List[FileSource_cover]], list), 

275 description=( 

276 "Cover images. Please use an image smaller than 500KB and an aspect" 

277 " ratio width to height of 2:1 or 1:1.\nThe supported image formats" 

278 f" are: {VALID_COVER_IMAGE_EXTENSIONS}" 

279 ), 

280 examples=[["cover.png"]], 

281 ) 

282 """Cover images.""" 

283 

284 id_emoji: Optional[ 

285 Annotated[str, Len(min_length=1, max_length=2), Field(examples=["🦈", "🦥"])] 

286 ] = None 

287 """UTF-8 emoji for display alongside the `id`.""" 

288 

289 authors: FAIR[List[Author]] = Field( 

290 default_factory=cast(Callable[[], List[Author]], list) 

291 ) 

292 """The authors are the creators of this resource description and the primary points of contact.""" 

293 

294 attachments: List[FileDescr_] = Field( 

295 default_factory=cast(Callable[[], List[FileDescr_]], list) 

296 ) 

297 """file attachments""" 

298 

299 cite: FAIR[List[CiteEntry]] = Field( 

300 default_factory=cast(Callable[[], List[CiteEntry]], list) 

301 ) 

302 """citations""" 

303 

304 license: FAIR[ 

305 Annotated[ 

306 Annotated[ 

307 Union[LicenseId, DeprecatedLicenseId, None], 

308 Field(union_mode="left_to_right"), 

309 ], 

310 warn( 

311 Optional[LicenseId], 

312 "{value} is deprecated, see https://spdx.org/licenses/{value}.html", 

313 ), 

314 Field(examples=["CC0-1.0", "MIT", "BSD-2-Clause"]), 

315 ] 

316 ] = None 

317 """A [SPDX license identifier](https://spdx.org/licenses/). 

318 We do not support custom license beyond the SPDX license list, if you need that please 

319 [open a GitHub issue](https://github.com/bioimage-io/spec-bioimage-io/issues/new/choose) 

320 to discuss your intentions with the community.""" 

321 

322 git_repo: Annotated[ 

323 Optional[HttpUrl], 

324 Field( 

325 examples=[ 

326 "https://github.com/bioimage-io/spec-bioimage-io/tree/main/example_descriptions/models/unet2d_nuclei_broad" 

327 ], 

328 ), 

329 ] = None 

330 """A URL to the Git repository where the resource is being developed.""" 

331 

332 icon: Union[Annotated[str, Len(min_length=1, max_length=2)], FileSource_, None] = ( 

333 None 

334 ) 

335 """An icon for illustration, e.g. on bioimage.io""" 

336 

337 links: Annotated[ 

338 List[str], 

339 Field( 

340 examples=[ 

341 ( 

342 "ilastik/ilastik", 

343 "deepimagej/deepimagej", 

344 "zero/notebook_u-net_3d_zerocostdl4mic", 

345 ) 

346 ], 

347 ), 

348 ] = Field(default_factory=list) 

349 """IDs of other bioimage.io resources""" 

350 

351 uploader: Optional[Uploader] = None 

352 """The person who uploaded the model (e.g. to bioimage.io)""" 

353 

354 maintainers: List[Maintainer] = Field( 

355 default_factory=cast(Callable[[], List[Maintainer]], list) 

356 ) 

357 """Maintainers of this resource. 

358 If not specified, `authors` are maintainers and at least some of them has to specify their `github_user` name""" 

359 

360 @model_validator(mode="after") 

361 def _check_maintainers_exist(self): 

362 if not self.maintainers and self.authors: 

363 if all(a.github_user is None for a in self.authors): 

364 issue_warning( 

365 "Missing `maintainers` or any author in `authors` with a specified" 

366 + " `github_user` name.", 

367 value=self.authors, 

368 field="authors", 

369 severity=ALERT, 

370 ) 

371 

372 return self 

373 

374 tags: FAIR[ 

375 Annotated[ 

376 List[str], 

377 Field( 

378 examples=[("unet2d", "pytorch", "nucleus", "segmentation", "dsb2018")] 

379 ), 

380 ] 

381 ] = Field(default_factory=list) 

382 """Associated tags""" 

383 

384 @as_warning 

385 @field_validator("tags") 

386 @classmethod 

387 def warn_about_tag_categories( 

388 cls, value: List[str], info: ValidationInfo 

389 ) -> List[str]: 

390 categories = TAG_CATEGORIES.get(info.data["type"], {}) 

391 missing_categories: List[Dict[str, Sequence[str]]] = [] 

392 for cat, entries in categories.items(): 

393 if not any(e in value for e in entries): 

394 missing_categories.append({cat: entries}) 

395 

396 if missing_categories: 

397 raise ValueError( 

398 f"Missing tags from bioimage.io categories: {missing_categories}" 

399 ) 

400 

401 return value 

402 

403 version: Optional[Version] = None 

404 """The version of the resource following SemVer 2.0.""" 

405 

406 @model_validator(mode="before") 

407 def _remove_version_number(cls, value: Any): 

408 if is_dict(value): 

409 vn = value.pop("version_number", None) 

410 if vn is not None and value.get("version") is None: 

411 value["version"] = vn 

412 

413 return value 

414 

415 version_comment: Optional[Annotated[str, MaxLen(512)]] = None 

416 """A comment on the version of the resource.""" 

417 

418 

419FileSource_documentation = Annotated[ 

420 FileSource_, 

421 WithSuffix(".md", case_sensitive=True), 

422 Field( 

423 examples=[ 

424 "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/unet2d_nuclei_broad/README.md", 

425 "README.md", 

426 ], 

427 ), 

428] 

429 

430 

431class GenericDescrBase(GenericModelDescrBase): 

432 """Base for all resource descriptions except for the model descriptions""" 

433 

434 implemented_format_version: ClassVar[Literal["0.3.0"]] = "0.3.0" 

435 if TYPE_CHECKING: 

436 format_version: Literal["0.3.0"] = "0.3.0" 

437 else: 

438 format_version: Literal["0.3.0"] 

439 """The **format** version of this resource specification""" 

440 

441 @model_validator(mode="before") 

442 @classmethod 

443 def _convert_from_older_format( 

444 cls, data: BioimageioYamlContent, / 

445 ) -> BioimageioYamlContent: 

446 cls.convert_from_old_format_wo_validation(data) 

447 return data 

448 

449 @classmethod 

450 def convert_from_old_format_wo_validation(cls, data: BioimageioYamlContent) -> None: 

451 """Convert metadata following an older format version to this classes' format 

452 without validating the result. 

453 """ 

454 convert_from_older_format(data) 

455 

456 documentation: FAIR[Optional[FileSource_documentation]] = None 

457 """URL or relative path to a markdown file encoded in UTF-8 with additional documentation. 

458 The recommended documentation file name is `README.md`. An `.md` suffix is mandatory.""" 

459 

460 badges: List[BadgeDescr] = Field( # pyright: ignore[reportUnknownVariableType] 

461 default_factory=list 

462 ) 

463 """badges associated with this resource""" 

464 

465 config: Config = Field(default_factory=Config.model_construct) 

466 """A field for custom configuration that can contain any keys not present in the RDF spec. 

467 This means you should not store, for example, a GitHub repo URL in `config` since there is a `git_repo` field. 

468 Keys in `config` may be very specific to a tool or consumer software. To avoid conflicting definitions, 

469 it is recommended to wrap added configuration into a sub-field named with the specific domain or tool name, 

470 for example: 

471 ```yaml 

472 config: 

473 giraffe_neckometer: # here is the domain name 

474 length: 3837283 

475 address: 

476 home: zoo 

477 imagej: # config specific to ImageJ 

478 macro_dir: path/to/macro/file 

479 ``` 

480 If possible, please use [`snake_case`](https://en.wikipedia.org/wiki/Snake_case) for keys in `config`. 

481 You may want to list linked files additionally under `attachments` to include them when packaging a resource. 

482 (Packaging a resource means downloading/copying important linked files and creating a ZIP archive that contains 

483 an altered rdf.yaml file with local references to the downloaded files.)""" 

484 

485 

486ResourceDescrType = TypeVar("ResourceDescrType", bound=GenericDescrBase) 

487 

488 

489class GenericDescr(GenericDescrBase, extra="ignore"): 

490 """Specification of the fields used in a generic bioimage.io-compliant resource description file (RDF). 

491 

492 An RDF is a YAML file that describes a resource such as a model, a dataset, or a notebook. 

493 Note that those resources are described with a type-specific RDF. 

494 Use this generic resource description, if none of the known specific types matches your resource. 

495 """ 

496 

497 implemented_type: ClassVar[Literal["generic"]] = "generic" 

498 if TYPE_CHECKING: 

499 type: Annotated[str, LowerCase] = "generic" 

500 """The resource type assigns a broad category to the resource.""" 

501 else: 

502 type: Annotated[str, LowerCase] 

503 """The resource type assigns a broad category to the resource.""" 

504 

505 id: Optional[ 

506 Annotated[ResourceId, Field(examples=["affable-shark", "ambitious-sloth"])] 

507 ] = None 

508 """bioimage.io-wide unique resource identifier 

509 assigned by bioimage.io; version **un**specific.""" 

510 

511 parent: Optional[ResourceId] = None 

512 """The description from which this one is derived""" 

513 

514 source: Optional[HttpUrl] = None 

515 """The primary source of the resource""" 

516 

517 @field_validator("type", mode="after") 

518 @classmethod 

519 def check_specific_types(cls, value: str) -> str: 

520 if value in KNOWN_SPECIFIC_RESOURCE_TYPES: 

521 raise ValueError( 

522 f"Use the {value} description instead of this generic description for" 

523 + f" your '{value}' resource." 

524 ) 

525 

526 return value