Coverage for bioimageio/spec/generic/v0_2.py: 93%

188 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-18 12:47 +0000

1import string 

2from typing import ( 

3 TYPE_CHECKING, 

4 Any, 

5 Callable, 

6 ClassVar, 

7 Dict, 

8 List, 

9 Literal, 

10 Mapping, 

11 Optional, 

12 Sequence, 

13 Type, 

14 TypeVar, 

15 Union, 

16 cast, 

17) 

18 

19import annotated_types 

20import pydantic 

21from annotated_types import Len, LowerCase, MaxLen 

22from pydantic import ( 

23 EmailStr, 

24 Field, 

25 RootModel, 

26 ValidationInfo, 

27 field_validator, 

28 model_validator, 

29) 

30from typing_extensions import Annotated, Self, assert_never 

31 

32from .._internal.common_nodes import Node, ResourceDescrBase 

33from .._internal.constants import TAG_CATEGORIES 

34from .._internal.field_warning import as_warning, issue_warning, warn 

35from .._internal.io import ( 

36 BioimageioYamlContent, 

37 WithSuffix, 

38 YamlValue, 

39 wo_special_file_name, 

40) 

41from .._internal.io_packaging import FileSource_, include_in_package 

42from .._internal.type_guards import is_sequence 

43from .._internal.types import ( 

44 DeprecatedLicenseId, 

45 FilePath, 

46 FileSource, 

47 LicenseId, 

48 NotEmpty, 

49) 

50from .._internal.types import Doi as Doi 

51from .._internal.types import OrcidId as OrcidId 

52from .._internal.types import RelativeFilePath as RelativeFilePath 

53from .._internal.url import HttpUrl as HttpUrl 

54from .._internal.validated_string import ValidatedString 

55from .._internal.validator_annotations import AfterValidator, RestrictCharacters 

56from .._internal.version_type import Version as Version 

57from ._v0_2_converter import convert_from_older_format as _convert_from_older_format 

58 

59 

60class ResourceId(ValidatedString): 

61 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[ 

62 Annotated[ 

63 NotEmpty[str], 

64 AfterValidator(str.lower), # convert upper case on the fly 

65 RestrictCharacters(string.ascii_lowercase + string.digits + "_-/."), 

66 annotated_types.Predicate( 

67 lambda s: not (s.startswith("/") or s.endswith("/")) 

68 ), 

69 ] 

70 ] 

71 

72 

73KNOWN_SPECIFIC_RESOURCE_TYPES = ( 

74 "application", 

75 "collection", 

76 "dataset", 

77 "model", 

78 "notebook", 

79) 

80 

81VALID_COVER_IMAGE_EXTENSIONS = ( 

82 ".gif", 

83 ".jpeg", 

84 ".jpg", 

85 ".png", 

86 ".svg", 

87 ".tif", 

88 ".tiff", 

89) 

90 

91 

92FileSource_cover = Annotated[ 

93 FileSource_, 

94 WithSuffix(VALID_COVER_IMAGE_EXTENSIONS, case_sensitive=False), 

95] 

96 

97 

98class AttachmentsDescr(Node): 

99 model_config = {**Node.model_config, "extra": "allow"} 

100 """update pydantic model config to allow additional unknown keys""" 

101 

102 files: List[FileSource_] = Field( 

103 default_factory=cast(Callable[[], List[FileSource_]], list) 

104 ) 

105 """File attachments""" 

106 

107 

108def _remove_slashes(s: str): 

109 return s.replace("/", "").replace("\\", "") 

110 

111 

112class Uploader(Node): 

113 email: EmailStr 

114 """Email""" 

115 name: Optional[Annotated[str, AfterValidator(_remove_slashes)]] = None 

116 """name""" 

117 

118 

119class _Person(Node): 

120 affiliation: Optional[str] = None 

121 """Affiliation""" 

122 

123 email: Optional[EmailStr] = None 

124 """Email""" 

125 

126 orcid: Annotated[Optional[OrcidId], Field(examples=["0000-0001-2345-6789"])] = None 

127 """An [ORCID iD](https://support.orcid.org/hc/en-us/sections/360001495313-What-is-ORCID 

128 ) in hyphenated groups of 4 digits, (and [valid]( 

129 https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier 

130 ) as per ISO 7064 11,2.) 

131 """ 

132 

133 

134class Author(_Person): 

135 name: Annotated[str, AfterValidator(_remove_slashes)] 

136 github_user: Optional[str] = None # TODO: validate github_user 

137 

138 

139class Maintainer(_Person): 

140 name: Optional[Annotated[str, AfterValidator(_remove_slashes)]] = None 

141 github_user: str 

142 

143 

144class BadgeDescr(Node): 

145 """A custom badge""" 

146 

147 label: Annotated[str, Field(examples=["Open in Colab"])] 

148 """badge label to display on hover""" 

149 

150 icon: Annotated[ 

151 Optional[ 

152 Union[ 

153 Annotated[ 

154 Union[FilePath, RelativeFilePath], 

155 AfterValidator(wo_special_file_name), 

156 include_in_package, 

157 ], 

158 Union[HttpUrl, pydantic.HttpUrl], 

159 ] 

160 ], 

161 Field(examples=["https://colab.research.google.com/assets/colab-badge.svg"]), 

162 ] = None 

163 """badge icon (included in bioimage.io package if not a URL)""" 

164 

165 url: Annotated[ 

166 HttpUrl, 

167 Field( 

168 examples=[ 

169 "https://colab.research.google.com/github/HenriquesLab/ZeroCostDL4Mic/blob/master/Colab_notebooks/U-net_2D_ZeroCostDL4Mic.ipynb" 

170 ] 

171 ), 

172 ] 

173 """target URL""" 

174 

175 

176class CiteEntry(Node): 

177 text: str 

178 """free text description""" 

179 

180 doi: Optional[Doi] = None 

181 """A digital object identifier (DOI) is the prefered citation reference. 

182 See https://www.doi.org/ for details. (alternatively specify `url`)""" 

183 

184 @field_validator("doi", mode="before") 

185 @classmethod 

186 def accept_prefixed_doi(cls, doi: Any) -> Any: 

187 if isinstance(doi, str): 

188 for doi_prefix in ("https://doi.org/", "http://dx.doi.org/"): 

189 if doi.startswith(doi_prefix): 

190 doi = doi[len(doi_prefix) :] 

191 break 

192 

193 return doi 

194 

195 url: Optional[str] = None 

196 """URL to cite (preferably specify a `doi` instead)""" 

197 

198 @model_validator(mode="after") 

199 def _check_doi_or_url(self) -> Self: 

200 if not self.doi and not self.url: 

201 raise ValueError("Either 'doi' or 'url' is required") 

202 

203 return self 

204 

205 

206class LinkedResource(Node): 

207 """Reference to a bioimage.io resource""" 

208 

209 id: ResourceId 

210 """A valid resource `id` from the bioimage.io collection.""" 

211 

212 version_number: Optional[int] = None 

213 """version number (n-th published version, not the semantic version) of linked resource""" 

214 

215 

216class GenericModelDescrBase(ResourceDescrBase): 

217 """Base for all resource descriptions including of model descriptions""" 

218 

219 name: Annotated[NotEmpty[str], warn(MaxLen(128), "Longer than 128 characters.")] 

220 """A human-friendly name of the resource description""" 

221 

222 description: str 

223 

224 covers: List[FileSource_cover] = Field( 

225 default_factory=cast(Callable[[], List[FileSource_cover]], list), 

226 examples=[["cover.png"]], 

227 description=( 

228 "Cover images. Please use an image smaller than 500KB and an aspect" 

229 " ratio width to height of 2:1.\nThe supported image formats are:" 

230 f" {VALID_COVER_IMAGE_EXTENSIONS}" 

231 ), 

232 ) 

233 """Cover images. Please use an image smaller than 500KB and an aspect ratio width to height of 2:1.""" 

234 

235 id_emoji: Optional[ 

236 Annotated[str, Len(min_length=1, max_length=1), Field(examples=["🦈", "🦥"])] 

237 ] = None 

238 """UTF-8 emoji for display alongside the `id`.""" 

239 

240 authors: List[Author] = Field( # pyright: ignore[reportUnknownVariableType] 

241 default_factory=list 

242 ) 

243 """The authors are the creators of the RDF and the primary points of contact.""" 

244 

245 @field_validator("authors", mode="before") 

246 @classmethod 

247 def accept_author_strings(cls, authors: Union[Any, Sequence[Any]]) -> Any: 

248 """we unofficially accept strings as author entries""" 

249 if is_sequence(authors): 

250 authors = [{"name": a} if isinstance(a, str) else a for a in authors] 

251 

252 if not authors: 

253 issue_warning("missing", value=authors, field="authors") 

254 

255 return authors 

256 

257 attachments: Optional[AttachmentsDescr] = None 

258 """file and other attachments""" 

259 

260 cite: List[CiteEntry] = Field( # pyright: ignore[reportUnknownVariableType] 

261 default_factory=list 

262 ) 

263 """citations""" 

264 

265 @field_validator("cite", mode="after") 

266 @classmethod 

267 def _warn_empty_cite(cls, value: Any): 

268 if not value: 

269 issue_warning("missing", value=value, field="cite") 

270 

271 return value 

272 

273 config: Annotated[ 

274 Dict[str, YamlValue], 

275 Field( 

276 examples=[ 

277 dict( 

278 bioimageio={ 

279 "my_custom_key": 3837283, 

280 "another_key": {"nested": "value"}, 

281 }, 

282 imagej={"macro_dir": "path/to/macro/file"}, 

283 ) 

284 ], 

285 ), 

286 ] = Field(default_factory=dict) 

287 """A field for custom configuration that can contain any keys not present in the RDF spec. 

288 This means you should not store, for example, a github repo URL in `config` since we already have the 

289 `git_repo` field defined in the spec. 

290 Keys in `config` may be very specific to a tool or consumer software. To avoid conflicting definitions, 

291 it is recommended to wrap added configuration into a sub-field named with the specific domain or tool name, 

292 for example: 

293 ```yaml 

294 config: 

295 bioimageio: # here is the domain name 

296 my_custom_key: 3837283 

297 another_key: 

298 nested: value 

299 imagej: # config specific to ImageJ 

300 macro_dir: path/to/macro/file 

301 ``` 

302 If possible, please use [`snake_case`](https://en.wikipedia.org/wiki/Snake_case) for keys in `config`. 

303 You may want to list linked files additionally under `attachments` to include them when packaging a resource 

304 (packaging a resource means downloading/copying important linked files and creating a ZIP archive that contains 

305 an altered rdf.yaml file with local references to the downloaded files)""" 

306 

307 download_url: Optional[HttpUrl] = None 

308 """URL to download the resource from (deprecated)""" 

309 

310 git_repo: Annotated[ 

311 Optional[str], 

312 Field( 

313 examples=[ 

314 "https://github.com/bioimage-io/spec-bioimage-io/tree/main/example_descriptions/models/unet2d_nuclei_broad" 

315 ], 

316 ), 

317 ] = None 

318 """A URL to the Git repository where the resource is being developed.""" 

319 

320 icon: Union[Annotated[str, Len(min_length=1, max_length=2)], FileSource, None] = ( 

321 None 

322 ) 

323 """An icon for illustration""" 

324 

325 links: Annotated[ 

326 List[str], 

327 Field( 

328 examples=[ 

329 ( 

330 "ilastik/ilastik", 

331 "deepimagej/deepimagej", 

332 "zero/notebook_u-net_3d_zerocostdl4mic", 

333 ) 

334 ], 

335 ), 

336 ] = Field(default_factory=list) 

337 """IDs of other bioimage.io resources""" 

338 

339 uploader: Optional[Uploader] = None 

340 """The person who uploaded the model (e.g. to bioimage.io)""" 

341 

342 # TODO: (py>3.8) remove pyright ignore 

343 maintainers: List[Maintainer] = Field( # pyright: ignore[reportUnknownVariableType] 

344 default_factory=list 

345 ) 

346 """Maintainers of this resource. 

347 If not specified `authors` are maintainers and at least some of them should specify their `github_user` name""" 

348 

349 rdf_source: Optional[FileSource] = None 

350 """Resource description file (RDF) source; used to keep track of where an rdf.yaml was loaded from. 

351 Do not set this field in a YAML file.""" 

352 

353 tags: Annotated[ 

354 List[str], 

355 Field(examples=[("unet2d", "pytorch", "nucleus", "segmentation", "dsb2018")]), 

356 ] = Field(default_factory=list) 

357 """Associated tags""" 

358 

359 @as_warning 

360 @field_validator("tags") 

361 @classmethod 

362 def warn_about_tag_categories( 

363 cls, value: List[str], info: ValidationInfo 

364 ) -> List[str]: 

365 categories = TAG_CATEGORIES.get(info.data["type"], {}) 

366 missing_categories: List[Mapping[str, Sequence[str]]] = [] 

367 for cat, entries in categories.items(): 

368 if not any(e in value for e in entries): 

369 missing_categories.append({cat: entries}) 

370 

371 if missing_categories: 

372 raise ValueError( 

373 "Missing tags from bioimage.io categories: {missing_categories}" 

374 ) 

375 

376 return value 

377 

378 version: Optional[Version] = None 

379 """The version of the resource following SemVer 2.0.""" 

380 

381 version_number: Optional[int] = None 

382 """version number (n-th published version, not the semantic version)""" 

383 

384 

385class GenericDescrBase(GenericModelDescrBase): 

386 """Base for all resource descriptions except for the model descriptions""" 

387 

388 implemented_format_version: ClassVar[Literal["0.2.4"]] = "0.2.4" 

389 if TYPE_CHECKING: 

390 format_version: Literal["0.2.4"] = "0.2.4" 

391 else: 

392 format_version: Literal["0.2.4"] 

393 """The format version of this resource specification 

394 (not the `version` of the resource description) 

395 When creating a new resource always use the latest micro/patch version described here. 

396 The `format_version` is important for any consumer software to understand how to parse the fields. 

397 """ 

398 

399 @model_validator(mode="before") 

400 @classmethod 

401 def _convert_from_older_format( 

402 cls, data: BioimageioYamlContent, / 

403 ) -> BioimageioYamlContent: 

404 _convert_from_older_format(data) 

405 return data 

406 

407 badges: List[BadgeDescr] = Field( # pyright: ignore[reportUnknownVariableType] 

408 default_factory=list 

409 ) 

410 """badges associated with this resource""" 

411 

412 documentation: Annotated[ 

413 Optional[FileSource], 

414 Field( 

415 examples=[ 

416 "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/unet2d_nuclei_broad/README.md", 

417 "README.md", 

418 ], 

419 ), 

420 ] = None 

421 """URL or relative path to a markdown file with additional documentation. 

422 The recommended documentation file name is `README.md`. An `.md` suffix is mandatory.""" 

423 

424 license: Annotated[ 

425 Union[LicenseId, DeprecatedLicenseId, str, None], 

426 Field(union_mode="left_to_right", examples=["CC0-1.0", "MIT", "BSD-2-Clause"]), 

427 ] = None 

428 """A [SPDX license identifier](https://spdx.org/licenses/). 

429 We do not support custom license beyond the SPDX license list, if you need that please 

430 [open a GitHub issue](https://github.com/bioimage-io/spec-bioimage-io/issues/new/choose 

431 ) to discuss your intentions with the community.""" 

432 

433 @field_validator("license", mode="after") 

434 @classmethod 

435 def deprecated_spdx_license( 

436 cls, value: Optional[Union[LicenseId, DeprecatedLicenseId, str]] 

437 ): 

438 if isinstance(value, LicenseId): 

439 pass 

440 elif value is None: 

441 issue_warning("missing", value=value, field="license") 

442 elif isinstance(value, DeprecatedLicenseId): 

443 issue_warning( 

444 "'{value}' is a deprecated license identifier.", 

445 value=value, 

446 field="license", 

447 ) 

448 elif isinstance(value, str): 

449 issue_warning( 

450 "'{value}' is an unknown license identifier.", 

451 value=value, 

452 field="license", 

453 ) 

454 else: 

455 assert_never(value) 

456 

457 return value 

458 

459 

460ResourceDescrType = TypeVar("ResourceDescrType", bound=GenericDescrBase) 

461 

462 

463class GenericDescr(GenericDescrBase, extra="ignore"): 

464 """Specification of the fields used in a generic bioimage.io-compliant resource description file (RDF). 

465 

466 An RDF is a YAML file that describes a resource such as a model, a dataset, or a notebook. 

467 Note that those resources are described with a type-specific RDF. 

468 Use this generic resource description, if none of the known specific types matches your resource. 

469 """ 

470 

471 type: Annotated[str, LowerCase, Field(frozen=True)] = "generic" 

472 """The resource type assigns a broad category to the resource.""" 

473 

474 id: Optional[ 

475 Annotated[ResourceId, Field(examples=["affable-shark", "ambitious-sloth"])] 

476 ] = None 

477 """bioimage.io-wide unique resource identifier 

478 assigned by bioimage.io; version **un**specific.""" 

479 

480 source: Optional[HttpUrl] = None 

481 """The primary source of the resource""" 

482 

483 @field_validator("type", mode="after") 

484 @classmethod 

485 def check_specific_types(cls, value: str) -> str: 

486 if value in KNOWN_SPECIFIC_RESOURCE_TYPES: 

487 raise ValueError( 

488 f"Use the {value} description instead of this generic description for" 

489 + f" your '{value}' resource." 

490 ) 

491 

492 return value