Coverage for bioimageio/spec/generic/v0_2.py: 93%

188 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-02 14:21 +0000

1import string 

2from typing import ( 

3 TYPE_CHECKING, 

4 Any, 

5 ClassVar, 

6 Dict, 

7 List, 

8 Literal, 

9 Mapping, 

10 Optional, 

11 Sequence, 

12 Type, 

13 TypeVar, 

14 Union, 

15) 

16 

17import annotated_types 

18from annotated_types import Len, LowerCase, MaxLen 

19from pydantic import ( 

20 EmailStr, 

21 Field, 

22 RootModel, 

23 ValidationInfo, 

24 field_validator, 

25 model_validator, 

26) 

27from typing_extensions import Annotated, Self, assert_never 

28 

29from .._internal.common_nodes import Node, ResourceDescrBase 

30from .._internal.constants import TAG_CATEGORIES 

31from .._internal.field_warning import as_warning, issue_warning, warn 

32from .._internal.io import ( 

33 BioimageioYamlContent, 

34 InPackageIfLocalFileSource, 

35 WithSuffix, 

36 YamlValue, 

37 include_in_package_serializer, 

38) 

39from .._internal.io_basics import AbsoluteFilePath as AbsoluteFilePath 

40from .._internal.type_guards import is_sequence 

41from .._internal.types import ( 

42 DeprecatedLicenseId, 

43 FileSource, 

44 ImportantFileSource, 

45 LicenseId, 

46 NotEmpty, 

47) 

48from .._internal.types import Doi as Doi 

49from .._internal.types import OrcidId as OrcidId 

50from .._internal.types import RelativeFilePath as RelativeFilePath 

51from .._internal.url import HttpUrl as HttpUrl 

52from .._internal.validated_string import ValidatedString 

53from .._internal.validator_annotations import AfterValidator, RestrictCharacters 

54from .._internal.version_type import Version as Version 

55from ._v0_2_converter import convert_from_older_format as _convert_from_older_format 

56 

57 

58class ResourceId(ValidatedString): 

59 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[ 

60 Annotated[ 

61 NotEmpty[str], 

62 AfterValidator(str.lower), # convert upper case on the fly 

63 RestrictCharacters(string.ascii_lowercase + string.digits + "_-/."), 

64 annotated_types.Predicate( 

65 lambda s: not (s.startswith("/") or s.endswith("/")) 

66 ), 

67 ] 

68 ] 

69 

70 

71KNOWN_SPECIFIC_RESOURCE_TYPES = ( 

72 "application", 

73 "collection", 

74 "dataset", 

75 "model", 

76 "notebook", 

77) 

78 

79VALID_COVER_IMAGE_EXTENSIONS = ( 

80 ".gif", 

81 ".jpeg", 

82 ".jpg", 

83 ".png", 

84 ".svg", 

85 ".tif", 

86 ".tiff", 

87) 

88 

89_WithImageSuffix = WithSuffix(VALID_COVER_IMAGE_EXTENSIONS, case_sensitive=False) 

90CoverImageSource = Annotated[ 

91 Union[AbsoluteFilePath, RelativeFilePath, HttpUrl], 

92 Field(union_mode="left_to_right"), 

93 _WithImageSuffix, 

94 include_in_package_serializer, 

95] 

96 

97 

98class AttachmentsDescr(Node): 

99 model_config = {**Node.model_config, "extra": "allow"} 

100 """update pydantic model config to allow additional unknown keys""" 

101 files: List[ImportantFileSource] = Field(default_factory=list) 

102 """∈📦 File attachments""" 

103 

104 

105def _remove_slashes(s: str): 

106 return s.replace("/", "").replace("\\", "") 

107 

108 

109class Uploader(Node): 

110 email: EmailStr 

111 """Email""" 

112 name: Optional[Annotated[str, AfterValidator(_remove_slashes)]] = None 

113 """name""" 

114 

115 

116class _Person(Node): 

117 affiliation: Optional[str] = None 

118 """Affiliation""" 

119 

120 email: Optional[EmailStr] = None 

121 """Email""" 

122 

123 orcid: Annotated[Optional[OrcidId], Field(examples=["0000-0001-2345-6789"])] = None 

124 """An [ORCID iD](https://support.orcid.org/hc/en-us/sections/360001495313-What-is-ORCID 

125 ) in hyphenated groups of 4 digits, (and [valid]( 

126 https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier 

127 ) as per ISO 7064 11,2.) 

128 """ 

129 

130 

131class Author(_Person): 

132 name: Annotated[str, AfterValidator(_remove_slashes)] 

133 github_user: Optional[str] = None # TODO: validate github_user 

134 

135 

136class Maintainer(_Person): 

137 name: Optional[Annotated[str, AfterValidator(_remove_slashes)]] = None 

138 github_user: str 

139 

140 

141class BadgeDescr(Node): 

142 """A custom badge""" 

143 

144 label: Annotated[str, Field(examples=["Open in Colab"])] 

145 """badge label to display on hover""" 

146 

147 icon: Annotated[ 

148 Optional[InPackageIfLocalFileSource], 

149 Field(examples=["https://colab.research.google.com/assets/colab-badge.svg"]), 

150 ] = None 

151 """badge icon""" 

152 

153 url: Annotated[ 

154 HttpUrl, 

155 Field( 

156 examples=[ 

157 "https://colab.research.google.com/github/HenriquesLab/ZeroCostDL4Mic/blob/master/Colab_notebooks/U-net_2D_ZeroCostDL4Mic.ipynb" 

158 ] 

159 ), 

160 ] 

161 """target URL""" 

162 

163 

164class CiteEntry(Node): 

165 text: str 

166 """free text description""" 

167 

168 doi: Optional[Doi] = None 

169 """A digital object identifier (DOI) is the prefered citation reference. 

170 See https://www.doi.org/ for details. (alternatively specify `url`)""" 

171 

172 @field_validator("doi", mode="before") 

173 @classmethod 

174 def accept_prefixed_doi(cls, doi: Any) -> Any: 

175 if isinstance(doi, str): 

176 for doi_prefix in ("https://doi.org/", "http://dx.doi.org/"): 

177 if doi.startswith(doi_prefix): 

178 doi = doi[len(doi_prefix) :] 

179 break 

180 

181 return doi 

182 

183 url: Optional[str] = None 

184 """URL to cite (preferably specify a `doi` instead)""" 

185 

186 @model_validator(mode="after") 

187 def _check_doi_or_url(self) -> Self: 

188 if not self.doi and not self.url: 

189 raise ValueError("Either 'doi' or 'url' is required") 

190 

191 return self 

192 

193 

194class LinkedResource(Node): 

195 """Reference to a bioimage.io resource""" 

196 

197 id: ResourceId 

198 """A valid resource `id` from the bioimage.io collection.""" 

199 

200 version_number: Optional[int] = None 

201 """version number (n-th published version, not the semantic version) of linked resource""" 

202 

203 

204class GenericModelDescrBase(ResourceDescrBase): 

205 """Base for all resource descriptions including of model descriptions""" 

206 

207 name: Annotated[NotEmpty[str], warn(MaxLen(128), "Longer than 128 characters.")] 

208 """A human-friendly name of the resource description""" 

209 

210 description: str 

211 

212 covers: Annotated[ 

213 List[CoverImageSource], 

214 Field( 

215 examples=["cover.png"], 

216 description=( 

217 "Cover images. Please use an image smaller than 500KB and an aspect" 

218 " ratio width to height of 2:1.\nThe supported image formats are:" 

219 f" {VALID_COVER_IMAGE_EXTENSIONS}" 

220 ), 

221 ), 

222 ] = Field( 

223 default_factory=list, 

224 ) 

225 """∈📦 Cover images. Please use an image smaller than 500KB and an aspect ratio width to height of 2:1.""" 

226 

227 id_emoji: Optional[ 

228 Annotated[str, Len(min_length=1, max_length=1), Field(examples=["🦈", "🦥"])] 

229 ] = None 

230 """UTF-8 emoji for display alongside the `id`.""" 

231 

232 authors: List[Author] = Field(default_factory=list) 

233 """The authors are the creators of the RDF and the primary points of contact.""" 

234 

235 @field_validator("authors", mode="before") 

236 @classmethod 

237 def accept_author_strings(cls, authors: Union[Any, Sequence[Any]]) -> Any: 

238 """we unofficially accept strings as author entries""" 

239 if is_sequence(authors): 

240 authors = [{"name": a} if isinstance(a, str) else a for a in authors] 

241 

242 if not authors: 

243 issue_warning("missing", value=authors, field="authors") 

244 

245 return authors 

246 

247 attachments: Optional[AttachmentsDescr] = None 

248 """file and other attachments""" 

249 

250 cite: List[CiteEntry] = Field(default_factory=list) 

251 """citations""" 

252 

253 @field_validator("cite", mode="after") 

254 @classmethod 

255 def _warn_empty_cite(cls, value: Any): 

256 if not value: 

257 issue_warning("missing", value=value, field="cite") 

258 

259 return value 

260 

261 config: Annotated[ 

262 Dict[str, YamlValue], 

263 Field( 

264 examples=[ 

265 dict( 

266 bioimageio={ 

267 "my_custom_key": 3837283, 

268 "another_key": {"nested": "value"}, 

269 }, 

270 imagej={"macro_dir": "path/to/macro/file"}, 

271 ) 

272 ], 

273 ), 

274 ] = Field(default_factory=dict) 

275 """A field for custom configuration that can contain any keys not present in the RDF spec. 

276 This means you should not store, for example, a github repo URL in `config` since we already have the 

277 `git_repo` field defined in the spec. 

278 Keys in `config` may be very specific to a tool or consumer software. To avoid conflicting definitions, 

279 it is recommended to wrap added configuration into a sub-field named with the specific domain or tool name, 

280 for example: 

281 ```yaml 

282 config: 

283 bioimageio: # here is the domain name 

284 my_custom_key: 3837283 

285 another_key: 

286 nested: value 

287 imagej: # config specific to ImageJ 

288 macro_dir: path/to/macro/file 

289 ``` 

290 If possible, please use [`snake_case`](https://en.wikipedia.org/wiki/Snake_case) for keys in `config`. 

291 You may want to list linked files additionally under `attachments` to include them when packaging a resource 

292 (packaging a resource means downloading/copying important linked files and creating a ZIP archive that contains 

293 an altered rdf.yaml file with local references to the downloaded files)""" 

294 

295 download_url: Optional[HttpUrl] = None 

296 """URL to download the resource from (deprecated)""" 

297 

298 git_repo: Annotated[ 

299 Optional[str], 

300 Field( 

301 examples=[ 

302 "https://github.com/bioimage-io/spec-bioimage-io/tree/main/example_descriptions/models/unet2d_nuclei_broad" 

303 ], 

304 ), 

305 ] = None 

306 """A URL to the Git repository where the resource is being developed.""" 

307 

308 icon: Union[ 

309 Annotated[str, Len(min_length=1, max_length=2)], ImportantFileSource, None 

310 ] = None 

311 """An icon for illustration""" 

312 

313 links: Annotated[ 

314 List[str], 

315 Field( 

316 examples=[ 

317 ( 

318 "ilastik/ilastik", 

319 "deepimagej/deepimagej", 

320 "zero/notebook_u-net_3d_zerocostdl4mic", 

321 ) 

322 ], 

323 ), 

324 ] = Field(default_factory=list) 

325 """IDs of other bioimage.io resources""" 

326 

327 uploader: Optional[Uploader] = None 

328 """The person who uploaded the model (e.g. to bioimage.io)""" 

329 

330 maintainers: List[Maintainer] = Field(default_factory=list) 

331 """Maintainers of this resource. 

332 If not specified `authors` are maintainers and at least some of them should specify their `github_user` name""" 

333 

334 rdf_source: Optional[FileSource] = None 

335 """Resource description file (RDF) source; used to keep track of where an rdf.yaml was loaded from. 

336 Do not set this field in a YAML file.""" 

337 

338 tags: Annotated[ 

339 List[str], 

340 Field(examples=[("unet2d", "pytorch", "nucleus", "segmentation", "dsb2018")]), 

341 ] = Field(default_factory=list) 

342 """Associated tags""" 

343 

344 @as_warning 

345 @field_validator("tags") 

346 @classmethod 

347 def warn_about_tag_categories( 

348 cls, value: List[str], info: ValidationInfo 

349 ) -> List[str]: 

350 categories = TAG_CATEGORIES.get(info.data["type"], {}) 

351 missing_categories: List[Mapping[str, Sequence[str]]] = [] 

352 for cat, entries in categories.items(): 

353 if not any(e in value for e in entries): 

354 missing_categories.append({cat: entries}) 

355 

356 if missing_categories: 

357 raise ValueError( 

358 "Missing tags from bioimage.io categories: {missing_categories}" 

359 ) 

360 

361 return value 

362 

363 version: Optional[Version] = None 

364 """The version of the resource following SemVer 2.0.""" 

365 

366 version_number: Optional[int] = None 

367 """version number (n-th published version, not the semantic version)""" 

368 

369 

370class GenericDescrBase(GenericModelDescrBase): 

371 """Base for all resource descriptions except for the model descriptions""" 

372 

373 implemented_format_version: ClassVar[Literal["0.2.4"]] = "0.2.4" 

374 if TYPE_CHECKING: 

375 format_version: Literal["0.2.4"] = "0.2.4" 

376 else: 

377 format_version: Literal["0.2.4"] 

378 """The format version of this resource specification 

379 (not the `version` of the resource description) 

380 When creating a new resource always use the latest micro/patch version described here. 

381 The `format_version` is important for any consumer software to understand how to parse the fields. 

382 """ 

383 

384 @model_validator(mode="before") 

385 @classmethod 

386 def _convert_from_older_format( 

387 cls, data: BioimageioYamlContent, / 

388 ) -> BioimageioYamlContent: 

389 _convert_from_older_format(data) 

390 return data 

391 

392 badges: List[BadgeDescr] = Field(default_factory=list) 

393 """badges associated with this resource""" 

394 

395 documentation: Annotated[ 

396 Optional[ImportantFileSource], 

397 Field( 

398 examples=[ 

399 "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/unet2d_nuclei_broad/README.md", 

400 "README.md", 

401 ], 

402 ), 

403 ] = None 

404 """∈📦 URL or relative path to a markdown file with additional documentation. 

405 The recommended documentation file name is `README.md`. An `.md` suffix is mandatory.""" 

406 

407 license: Annotated[ 

408 Union[LicenseId, DeprecatedLicenseId, str, None], 

409 Field(union_mode="left_to_right", examples=["CC0-1.0", "MIT", "BSD-2-Clause"]), 

410 ] = None 

411 """A [SPDX license identifier](https://spdx.org/licenses/). 

412 We do not support custom license beyond the SPDX license list, if you need that please 

413 [open a GitHub issue](https://github.com/bioimage-io/spec-bioimage-io/issues/new/choose 

414 ) to discuss your intentions with the community.""" 

415 

416 @field_validator("license", mode="after") 

417 @classmethod 

418 def deprecated_spdx_license( 

419 cls, value: Optional[Union[LicenseId, DeprecatedLicenseId, str]] 

420 ): 

421 if isinstance(value, LicenseId): 

422 pass 

423 elif value is None: 

424 issue_warning("missing", value=value, field="license") 

425 elif isinstance(value, DeprecatedLicenseId): 

426 issue_warning( 

427 "'{value}' is a deprecated license identifier.", 

428 value=value, 

429 field="license", 

430 ) 

431 elif isinstance(value, str): 

432 issue_warning( 

433 "'{value}' is an unknown license identifier.", 

434 value=value, 

435 field="license", 

436 ) 

437 else: 

438 assert_never(value) 

439 

440 return value 

441 

442 

443ResourceDescrType = TypeVar("ResourceDescrType", bound=GenericDescrBase) 

444 

445 

446class GenericDescr(GenericDescrBase, extra="ignore"): 

447 """Specification of the fields used in a generic bioimage.io-compliant resource description file (RDF). 

448 

449 An RDF is a YAML file that describes a resource such as a model, a dataset, or a notebook. 

450 Note that those resources are described with a type-specific RDF. 

451 Use this generic resource description, if none of the known specific types matches your resource. 

452 """ 

453 

454 type: Annotated[str, LowerCase, Field(frozen=True)] = "generic" 

455 """The resource type assigns a broad category to the resource.""" 

456 

457 id: Optional[ 

458 Annotated[ResourceId, Field(examples=["affable-shark", "ambitious-sloth"])] 

459 ] = None 

460 """bioimage.io-wide unique resource identifier 

461 assigned by bioimage.io; version **un**specific.""" 

462 

463 source: Optional[HttpUrl] = None 

464 """The primary source of the resource""" 

465 

466 @field_validator("type", mode="after") 

467 @classmethod 

468 def check_specific_types(cls, value: str) -> str: 

469 if value in KNOWN_SPECIFIC_RESOURCE_TYPES: 

470 raise ValueError( 

471 f"Use the {value} description instead of this generic description for" 

472 + f" your '{value}' resource." 

473 ) 

474 

475 return value