Coverage for bioimageio/spec/generic/v0_3.py: 91%

189 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-02 14:21 +0000

1from __future__ import annotations 

2 

3import string 

4from functools import partial 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8 ClassVar, 

9 Dict, 

10 List, 

11 Literal, 

12 Optional, 

13 Sequence, 

14 Type, 

15 TypeVar, 

16 Union, 

17) 

18 

19import annotated_types 

20from annotated_types import Len, LowerCase, MaxLen, MinLen 

21from pydantic import Field, RootModel, ValidationInfo, field_validator, model_validator 

22from typing_extensions import Annotated 

23 

24from bioimageio.spec._internal.type_guards import is_dict 

25 

26from .._internal.common_nodes import Node, ResourceDescrBase 

27from .._internal.constants import TAG_CATEGORIES 

28from .._internal.field_validation import validate_gh_user 

29from .._internal.field_warning import as_warning, warn 

30from .._internal.io import ( 

31 BioimageioYamlContent, 

32 FileDescr, 

33 V_suffix, 

34 include_in_package_serializer, 

35 is_yaml_value, 

36 validate_suffix, 

37) 

38from .._internal.io_basics import AbsoluteFilePath, Sha256 

39from .._internal.license_id import DeprecatedLicenseId, LicenseId 

40from .._internal.node_converter import Converter 

41from .._internal.types import ImportantFileSource, NotEmpty, RelativeFilePath 

42from .._internal.url import HttpUrl 

43from .._internal.validated_string import ValidatedString 

44from .._internal.validator_annotations import ( 

45 AfterValidator, 

46 Predicate, 

47 RestrictCharacters, 

48) 

49from .._internal.version_type import Version 

50from .._internal.warning_levels import ALERT, INFO 

51from ._v0_3_converter import convert_from_older_format 

52from .v0_2 import Author as _Author_v0_2 

53from .v0_2 import BadgeDescr, CoverImageSource, Doi, OrcidId, Uploader 

54from .v0_2 import Maintainer as _Maintainer_v0_2 

55 

56__all__ = [ 

57 "Author", 

58 "BadgeDescr", 

59 "CiteEntry", 

60 "DeprecatedLicenseId", 

61 "Doi", 

62 "FileDescr", 

63 "GenericDescr", 

64 "HttpUrl", 

65 "KNOWN_SPECIFIC_RESOURCE_TYPES", 

66 "LicenseId", 

67 "LinkedResource", 

68 "Maintainer", 

69 "OrcidId", 

70 "RelativeFilePath", 

71 "ResourceId", 

72 "Sha256", 

73 "Uploader", 

74 "VALID_COVER_IMAGE_EXTENSIONS", 

75 "Version", 

76] 

77 

78KNOWN_SPECIFIC_RESOURCE_TYPES = ( 

79 "application", 

80 "collection", 

81 "dataset", 

82 "model", 

83 "notebook", 

84) 

85VALID_COVER_IMAGE_EXTENSIONS = ( 

86 ".gif", 

87 ".jpeg", 

88 ".jpg", 

89 ".png", 

90 ".svg", 

91) 

92 

93 

94class ResourceId(ValidatedString): 

95 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[ 

96 Annotated[ 

97 NotEmpty[str], 

98 RestrictCharacters(string.ascii_lowercase + string.digits + "_-/."), 

99 annotated_types.Predicate( 

100 lambda s: not (s.startswith("/") or s.endswith("/")) 

101 ), 

102 ] 

103 ] 

104 

105 

106def _validate_md_suffix(value: V_suffix) -> V_suffix: 

107 return validate_suffix(value, suffix=".md", case_sensitive=True) 

108 

109 

110DocumentationSource = Annotated[ 

111 Union[AbsoluteFilePath, RelativeFilePath, HttpUrl], 

112 Field(union_mode="left_to_right"), 

113 AfterValidator(_validate_md_suffix), 

114 include_in_package_serializer, 

115] 

116 

117 

118def _has_no_slash(s: str) -> bool: 

119 return "/" not in s and "\\" not in s 

120 

121 

122class Author(_Author_v0_2): 

123 name: Annotated[str, Predicate(_has_no_slash)] 

124 github_user: Optional[str] = None 

125 

126 @field_validator("github_user", mode="after") 

127 def _validate_gh_user(cls, value: Optional[str]): 

128 if value is None: 

129 return None 

130 else: 

131 return validate_gh_user(value) 

132 

133 

134class _AuthorConv(Converter[_Author_v0_2, Author]): 

135 def _convert( 

136 self, src: _Author_v0_2, tgt: "type[Author] | type[dict[str, Any]]" 

137 ) -> "Author | dict[str, Any]": 

138 return tgt( 

139 name=src.name, 

140 github_user=src.github_user, 

141 affiliation=src.affiliation, 

142 email=src.email, 

143 orcid=src.orcid, 

144 ) 

145 

146 

147_author_conv = _AuthorConv(_Author_v0_2, Author) 

148 

149 

150class Maintainer(_Maintainer_v0_2): 

151 name: Optional[Annotated[str, Predicate(_has_no_slash)]] = None 

152 github_user: str 

153 

154 @field_validator("github_user", mode="after") 

155 def validate_gh_user(cls, value: str): 

156 return validate_gh_user(value) 

157 

158 

159class _MaintainerConv(Converter[_Maintainer_v0_2, Maintainer]): 

160 def _convert( 

161 self, src: _Maintainer_v0_2, tgt: "type[Maintainer | dict[str, Any]]" 

162 ) -> "Maintainer | dict[str, Any]": 

163 return tgt( 

164 name=src.name, 

165 github_user=src.github_user, 

166 affiliation=src.affiliation, 

167 email=src.email, 

168 orcid=src.orcid, 

169 ) 

170 

171 

172_maintainer_conv = _MaintainerConv(_Maintainer_v0_2, Maintainer) 

173 

174 

175class CiteEntry(Node): 

176 """A citation that should be referenced in work using this resource.""" 

177 

178 text: str 

179 """free text description""" 

180 

181 doi: Optional[Doi] = None 

182 """A digital object identifier (DOI) is the prefered citation reference. 

183 See https://www.doi.org/ for details. 

184 Note: 

185 Either **doi** or **url** have to be specified. 

186 """ 

187 

188 url: Optional[HttpUrl] = None 

189 """URL to cite (preferably specify a **doi** instead/also). 

190 Note: 

191 Either **doi** or **url** have to be specified. 

192 """ 

193 

194 @model_validator(mode="after") 

195 def _check_doi_or_url(self): 

196 if not self.doi and not self.url: 

197 raise ValueError("Either 'doi' or 'url' is required") 

198 

199 return self 

200 

201 

202class LinkedResourceBase(Node): 

203 

204 @model_validator(mode="before") 

205 def _remove_version_number(cls, value: Any): 

206 if is_dict(value): 

207 vn = value.pop("version_number", None) 

208 if vn is not None and value.get("version") is None: 

209 value["version"] = vn 

210 

211 return value 

212 

213 version: Optional[Version] = None 

214 """The version of the linked resource following SemVer 2.0.""" 

215 

216 

217class LinkedResource(LinkedResourceBase): 

218 """Reference to a bioimage.io resource""" 

219 

220 id: ResourceId 

221 """A valid resource `id` from the official bioimage.io collection.""" 

222 

223 

224class BioimageioConfig(Node, extra="allow"): 

225 """bioimage.io internal metadata.""" 

226 

227 

228class Config(Node, extra="allow"): 

229 """A place to store additional metadata (often tool specific). 

230 

231 Such additional metadata is typically set programmatically by the respective tool 

232 or by people with specific insights into the tool. 

233 If you want to store additional metadata that does not match any of the other 

234 fields, think of a key unlikely to collide with anyone elses use-case/tool and save 

235 it here. 

236 

237 Please consider creating [an issue in the bioimageio.spec repository](https://github.com/bioimage-io/spec-bioimage-io/issues/new?template=Blank+issue) 

238 if you are not sure if an existing field could cover your use case 

239 or if you think such a field should exist. 

240 """ 

241 

242 bioimageio: BioimageioConfig = Field(default_factory=BioimageioConfig) 

243 """bioimage.io internal metadata.""" 

244 

245 @model_validator(mode="after") 

246 def _validate_extra_fields(self): 

247 if self.model_extra: 

248 for k, v in self.model_extra.items(): 

249 if not isinstance(v, Node) and not is_yaml_value(v): 

250 raise ValueError( 

251 f"config.{k} is not a valid YAML value or `Node` instance" 

252 ) 

253 

254 return self 

255 

256 

257class GenericModelDescrBase(ResourceDescrBase): 

258 """Base for all resource descriptions including of model descriptions""" 

259 

260 name: Annotated[ 

261 Annotated[ 

262 str, RestrictCharacters(string.ascii_letters + string.digits + "_+- ()") 

263 ], 

264 MinLen(5), 

265 MaxLen(128), 

266 warn(MaxLen(64), "Name longer than 64 characters.", INFO), 

267 ] 

268 name: Annotated[NotEmpty[str], MaxLen(128)] 

269 """A human-friendly name of the resource description. 

270 May only contains letters, digits, underscore, minus, parentheses and spaces.""" 

271 

272 description: Annotated[ 

273 str, MaxLen(1024), warn(MaxLen(512), "Description longer than 512 characters.") 

274 ] 

275 """A string containing a brief description.""" 

276 

277 covers: Annotated[ 

278 List[CoverImageSource], 

279 Field( 

280 examples=[], 

281 description=( 

282 "Cover images. Please use an image smaller than 500KB and an aspect" 

283 " ratio width to height of 2:1 or 1:1.\nThe supported image formats" 

284 f" are: {VALID_COVER_IMAGE_EXTENSIONS}" 

285 ), 

286 ), 

287 ] = Field(default_factory=list) 

288 """∈📦 Cover images.""" 

289 

290 id_emoji: Optional[ 

291 Annotated[str, Len(min_length=1, max_length=2), Field(examples=["🦈", "🦥"])] 

292 ] = None 

293 """UTF-8 emoji for display alongside the `id`.""" 

294 

295 authors: NotEmpty[List[Author]] 

296 """The authors are the creators of this resource description and the primary points of contact.""" 

297 

298 attachments: List[FileDescr] = Field(default_factory=list) 

299 """file attachments""" 

300 

301 cite: NotEmpty[List[CiteEntry]] 

302 """citations""" 

303 

304 license: Annotated[ 

305 Annotated[ 

306 Union[LicenseId, DeprecatedLicenseId], Field(union_mode="left_to_right") 

307 ], 

308 warn( 

309 LicenseId, 

310 "{value} is deprecated, see https://spdx.org/licenses/{value}.html", 

311 ), 

312 Field(examples=["CC0-1.0", "MIT", "BSD-2-Clause"]), 

313 ] 

314 """A [SPDX license identifier](https://spdx.org/licenses/). 

315 We do not support custom license beyond the SPDX license list, if you need that please 

316 [open a GitHub issue](https://github.com/bioimage-io/spec-bioimage-io/issues/new/choose) 

317 to discuss your intentions with the community.""" 

318 

319 git_repo: Annotated[ 

320 Optional[HttpUrl], 

321 Field( 

322 examples=[ 

323 "https://github.com/bioimage-io/spec-bioimage-io/tree/main/example_descriptions/models/unet2d_nuclei_broad" 

324 ], 

325 ), 

326 ] = None 

327 """A URL to the Git repository where the resource is being developed.""" 

328 

329 icon: Union[ 

330 Annotated[str, Len(min_length=1, max_length=2)], ImportantFileSource, None 

331 ] = None 

332 """An icon for illustration, e.g. on bioimage.io""" 

333 

334 links: Annotated[ 

335 List[str], 

336 Field( 

337 examples=[ 

338 ( 

339 "ilastik/ilastik", 

340 "deepimagej/deepimagej", 

341 "zero/notebook_u-net_3d_zerocostdl4mic", 

342 ) 

343 ], 

344 ), 

345 ] = Field(default_factory=list) 

346 """IDs of other bioimage.io resources""" 

347 

348 uploader: Optional[Uploader] = None 

349 """The person who uploaded the model (e.g. to bioimage.io)""" 

350 

351 maintainers: List[Maintainer] = Field(default_factory=list) 

352 """Maintainers of this resource. 

353 If not specified, `authors` are maintainers and at least some of them has to specify their `github_user` name""" 

354 

355 @partial(as_warning, severity=ALERT) 

356 @field_validator("maintainers", mode="after") 

357 @classmethod 

358 def check_maintainers_exist( 

359 cls, maintainers: List[Maintainer], info: ValidationInfo 

360 ) -> List[Maintainer]: 

361 if not maintainers and "authors" in info.data: 

362 authors: List[Author] = info.data["authors"] 

363 if all(a.github_user is None for a in authors): 

364 raise ValueError( 

365 "Missing `maintainers` or any author in `authors` with a specified" 

366 + " `github_user` name." 

367 ) 

368 

369 return maintainers 

370 

371 tags: Annotated[ 

372 List[str], 

373 Field(examples=[("unet2d", "pytorch", "nucleus", "segmentation", "dsb2018")]), 

374 ] = Field(default_factory=list) 

375 """Associated tags""" 

376 

377 @as_warning 

378 @field_validator("tags") 

379 @classmethod 

380 def warn_about_tag_categories( 

381 cls, value: List[str], info: ValidationInfo 

382 ) -> List[str]: 

383 categories = TAG_CATEGORIES.get(info.data["type"], {}) 

384 missing_categories: List[Dict[str, Sequence[str]]] = [] 

385 for cat, entries in categories.items(): 

386 if not any(e in value for e in entries): 

387 missing_categories.append({cat: entries}) 

388 

389 if missing_categories: 

390 raise ValueError( 

391 f"Missing tags from bioimage.io categories: {missing_categories}" 

392 ) 

393 

394 return value 

395 

396 version: Optional[Version] = None 

397 """The version of the resource following SemVer 2.0.""" 

398 

399 @model_validator(mode="before") 

400 def _remove_version_number(cls, value: Any): 

401 if is_dict(value): 

402 vn = value.pop("version_number", None) 

403 if vn is not None and value.get("version") is None: 

404 value["version"] = vn 

405 

406 return value 

407 

408 

409class GenericDescrBase(GenericModelDescrBase): 

410 """Base for all resource descriptions except for the model descriptions""" 

411 

412 implemented_format_version: ClassVar[Literal["0.3.0"]] = "0.3.0" 

413 if TYPE_CHECKING: 

414 format_version: Literal["0.3.0"] = "0.3.0" 

415 else: 

416 format_version: Literal["0.3.0"] 

417 """The **format** version of this resource specification""" 

418 

419 @model_validator(mode="before") 

420 @classmethod 

421 def _convert_from_older_format( 

422 cls, data: BioimageioYamlContent, / 

423 ) -> BioimageioYamlContent: 

424 cls.convert_from_old_format_wo_validation(data) 

425 return data 

426 

427 @classmethod 

428 def convert_from_old_format_wo_validation(cls, data: BioimageioYamlContent) -> None: 

429 """Convert metadata following an older format version to this classes' format 

430 without validating the result. 

431 """ 

432 convert_from_older_format(data) 

433 

434 documentation: Annotated[ 

435 Optional[DocumentationSource], 

436 Field( 

437 examples=[ 

438 "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/unet2d_nuclei_broad/README.md", 

439 "README.md", 

440 ], 

441 ), 

442 ] = None 

443 """∈📦 URL or relative path to a markdown file encoded in UTF-8 with additional documentation. 

444 The recommended documentation file name is `README.md`. An `.md` suffix is mandatory.""" 

445 

446 badges: List[BadgeDescr] = Field(default_factory=list) 

447 """badges associated with this resource""" 

448 

449 config: Config = Field(default_factory=Config) 

450 """A field for custom configuration that can contain any keys not present in the RDF spec. 

451 This means you should not store, for example, a GitHub repo URL in `config` since there is a `git_repo` field. 

452 Keys in `config` may be very specific to a tool or consumer software. To avoid conflicting definitions, 

453 it is recommended to wrap added configuration into a sub-field named with the specific domain or tool name, 

454 for example: 

455 ```yaml 

456 config: 

457 giraffe_neckometer: # here is the domain name 

458 length: 3837283 

459 address: 

460 home: zoo 

461 imagej: # config specific to ImageJ 

462 macro_dir: path/to/macro/file 

463 ``` 

464 If possible, please use [`snake_case`](https://en.wikipedia.org/wiki/Snake_case) for keys in `config`. 

465 You may want to list linked files additionally under `attachments` to include them when packaging a resource. 

466 (Packaging a resource means downloading/copying important linked files and creating a ZIP archive that contains 

467 an altered rdf.yaml file with local references to the downloaded files.)""" 

468 

469 

470ResourceDescrType = TypeVar("ResourceDescrType", bound=GenericDescrBase) 

471 

472 

473class GenericDescr(GenericDescrBase, extra="ignore"): 

474 """Specification of the fields used in a generic bioimage.io-compliant resource description file (RDF). 

475 

476 An RDF is a YAML file that describes a resource such as a model, a dataset, or a notebook. 

477 Note that those resources are described with a type-specific RDF. 

478 Use this generic resource description, if none of the known specific types matches your resource. 

479 """ 

480 

481 type: Annotated[str, LowerCase] = Field("generic", frozen=True) 

482 """The resource type assigns a broad category to the resource.""" 

483 

484 id: Optional[ 

485 Annotated[ResourceId, Field(examples=["affable-shark", "ambitious-sloth"])] 

486 ] = None 

487 """bioimage.io-wide unique resource identifier 

488 assigned by bioimage.io; version **un**specific.""" 

489 

490 parent: Optional[ResourceId] = None 

491 """The description from which this one is derived""" 

492 

493 source: Optional[HttpUrl] = None 

494 """The primary source of the resource""" 

495 

496 @field_validator("type", mode="after") 

497 @classmethod 

498 def check_specific_types(cls, value: str) -> str: 

499 if value in KNOWN_SPECIFIC_RESOURCE_TYPES: 

500 raise ValueError( 

501 f"Use the {value} description instead of this generic description for" 

502 + f" your '{value}' resource." 

503 ) 

504 

505 return value