bioimageio.spec.utils
1import json 2from typing import List, TypedDict 3 4from ._description import ensure_description_is_dataset, ensure_description_is_model 5from ._internal.io import ( 6 download, 7 extract_file_name, 8 get_sha256, 9 identify_bioimageio_yaml_file_name, 10 is_valid_bioimageio_yaml_name, 11) 12from ._internal.io_utils import load_array, save_array, write_yaml 13from ._internal.utils import files 14 15__all__ = [ 16 "download", 17 "ensure_description_is_dataset", 18 "ensure_description_is_model", 19 "extract_file_name", 20 "get_sha256", 21 "get_spdx_licenses", 22 "identify_bioimageio_yaml_file_name", 23 "is_valid_bioimageio_yaml_name", 24 "load_array", 25 "save_array", 26 "SpdxLicenseEntry", 27 "SpdxLicenses", 28 "write_yaml", 29] 30 31 32class SpdxLicenseEntry(TypedDict): 33 isDeprecatedLicenseId: bool 34 isKnownByZenodo: bool 35 isOsiApproved: bool 36 licenseId: str 37 name: str 38 reference: str 39 40 41class SpdxLicenses(TypedDict): 42 licenseListVersion: str 43 licenses: List[SpdxLicenseEntry] 44 releaseDate: str 45 46 47def get_spdx_licenses() -> SpdxLicenses: 48 """get details of the SPDX licenses known to bioimageio.spec""" 49 with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open( 50 "r", encoding="utf-8" 51 ) as f: 52 return json.load(f)
def
download( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file'), FieldInfo(annotation=NoneType, required=True, title='FilePath')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], str, Annotated[pydantic_core._pydantic_core.Url, UrlConstraints(max_length=2083, allowed_schemes=['http', 'https'], host_required=None, default_host=None, default_port=None, default_path=None)], bioimageio.spec._internal.io.FileDescr, zipp.Path], /, progressbar: Union[bioimageio.spec._internal.io.Progressbar, bool, NoneType] = None, **kwargs: Unpack[bioimageio.spec._internal.io.HashKwargs]) -> Union[bioimageio.spec._internal.io.LocalFile, bioimageio.spec._internal.io.FileInZip]:
672def resolve( 673 source: Union[PermissiveFileSource, FileDescr, ZipPath], 674 /, 675 progressbar: Union[Progressbar, bool, None] = None, 676 **kwargs: Unpack[HashKwargs], 677) -> Union[LocalFile, FileInZip]: 678 """Resolve file `source` (download if needed)""" 679 680 if isinstance(source, str): 681 source = interprete_file_source(source) 682 683 if isinstance(source, RelativeFilePath): 684 source = source.absolute() 685 if isinstance(source, ZipPath): 686 return FileInZip(source, source.root, extract_file_name(source)) 687 688 if isinstance(source, pydantic.AnyUrl): 689 with get_validation_context().replace(perform_io_checks=False): 690 source = HttpUrl(source) 691 692 if isinstance(source, FileDescr): 693 return source.download() 694 elif isinstance(source, ZipPath): 695 zip_root = source.root 696 assert isinstance(zip_root, ZipFile) 697 return FileInZip( 698 source, 699 zip_root, 700 extract_file_name(source), 701 ) 702 elif isinstance(source, Path): 703 if source.is_dir(): 704 raise FileNotFoundError(f"{source} is a directory, not a file") 705 706 if not source.exists(): 707 raise FileNotFoundError(source) 708 709 return LocalFile( 710 source, 711 source.parent, 712 extract_file_name(source), 713 ) 714 elif isinstance(source, HttpUrl): 715 if source.scheme not in ("http", "https"): 716 raise NotImplementedError(source.scheme) 717 718 if settings.CI: 719 headers = {"User-Agent": "ci"} 720 if progressbar is None: 721 progressbar = False 722 else: 723 headers = {} 724 if progressbar is None: 725 progressbar = True 726 727 if settings.user_agent is not None: 728 headers["User-Agent"] = settings.user_agent 729 730 chunk_size = 1024 731 if ( 732 settings.cache_path 733 and not get_validation_context().disable_cache 734 and any(v is not None for v in kwargs.values()) 735 ): 736 downloader = pooch.HTTPDownloader( 737 headers=headers, 738 progressbar=progressbar, # pyright: ignore[reportArgumentType] 739 chunk_size=chunk_size, 740 ) 741 fname = _get_unique_file_name(source) 742 _ls: Any = pooch.retrieve( 743 url=str(source), 744 known_hash=_get_known_hash(kwargs), 745 downloader=downloader, 746 fname=fname, 747 path=settings.cache_path, 748 ) 749 local_source = Path(_ls).absolute() 750 return LocalFile( 751 local_source, 752 source.parent, 753 extract_file_name(source), 754 ) 755 else: 756 # cacheless download to memory using an in memory zip file 757 r = requests.get(str(source), stream=True) 758 r.raise_for_status() 759 760 zf = zipfile.ZipFile(io.BytesIO(), "w") 761 fn = extract_file_name(source) 762 total = int(r.headers.get("content-length", 0)) 763 764 if isinstance(progressbar, bool): 765 if progressbar: 766 use_ascii = bool(sys.platform == "win32") 767 pbar = tqdm( 768 total=total, 769 ncols=79, 770 ascii=use_ascii, 771 unit="B", 772 unit_scale=True, 773 leave=True, 774 ) 775 pbar = tqdm(desc=f"Downloading {fn}") 776 else: 777 pbar = None 778 else: 779 pbar = progressbar 780 781 zp = ZipPath(zf, fn) 782 with zp.open("wb") as z: 783 assert not isinstance(z, io.TextIOWrapper) 784 for chunk in r.iter_content(chunk_size=chunk_size): 785 n = z.write(chunk) 786 if pbar is not None: 787 _ = pbar.update(n) 788 789 # Make sure the progress bar gets filled even if the actual number 790 # is chunks is smaller than expected. This happens when streaming 791 # text files that are compressed by the server when sending (gzip). 792 # Binary files don't experience this. 793 # (adapted from pooch.HttpDownloader) 794 if pbar is not None: 795 pbar.reset() 796 _ = pbar.update(total) 797 pbar.close() 798 799 return FileInZip( 800 path=zp, 801 original_root=source.parent, 802 original_file_name=fn, 803 ) 804 805 else: 806 assert_never(source)
Resolve file source
(download if needed)
def
ensure_description_is_dataset( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[Annotated[bioimageio.spec.application.v0_2.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.2')], Annotated[bioimageio.spec.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='application')], Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')], Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')], Annotated[Union[Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.2')], Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='notebook')]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.generic.v0_2.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.2')], Annotated[bioimageio.spec.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='generic')]]) -> Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')]:
263def ensure_description_is_dataset( 264 rd: Union[InvalidDescr, ResourceDescr], 265) -> AnyDatasetDescr: 266 if isinstance(rd, InvalidDescr): 267 rd.validation_summary.display() 268 raise ValueError(f"Invalid {rd.type} description.") 269 270 if rd.type != "dataset": 271 rd.validation_summary.display() 272 raise ValueError( 273 f"Expected a dataset resource, but got resource type '{rd.type}'" 274 ) 275 276 assert not isinstance( 277 rd, 278 ( 279 GenericDescr_v0_2, 280 GenericDescr_v0_3, 281 ), 282 ) 283 284 return rd
def
ensure_description_is_model( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[Annotated[bioimageio.spec.application.v0_2.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.2')], Annotated[bioimageio.spec.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='application')], Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')], Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')], Annotated[Union[Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.2')], Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='notebook')]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.generic.v0_2.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.2')], Annotated[bioimageio.spec.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='generic')]]) -> Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')]:
235def ensure_description_is_model( 236 rd: Union[InvalidDescr, ResourceDescr], 237) -> AnyModelDescr: 238 """ 239 Raises: 240 ValueError: for invalid or non-model resources 241 """ 242 if isinstance(rd, InvalidDescr): 243 rd.validation_summary.display() 244 raise ValueError(f"Invalid {rd.type} description") 245 246 if rd.type != "model": 247 rd.validation_summary.display() 248 raise ValueError( 249 f"Expected a model resource, but got resource type '{rd.type}'" 250 ) 251 252 assert not isinstance( 253 rd, 254 ( 255 GenericDescr_v0_2, 256 GenericDescr_v0_3, 257 ), 258 ) 259 260 return rd
Raises:
- ValueError: for invalid or non-model resources
def
extract_file_name( src: Union[Annotated[pydantic_core._pydantic_core.Url, UrlConstraints(max_length=2083, allowed_schemes=['http', 'https'], host_required=None, default_host=None, default_port=None, default_path=None)], bioimageio.spec._internal.url.HttpUrl, pathlib.PurePath, bioimageio.spec._internal.io.RelativeFilePath, zipp.Path]) -> str:
891def extract_file_name( 892 src: Union[pydantic.HttpUrl, HttpUrl, PurePath, RelativeFilePath, ZipPath], 893) -> FileName: 894 if isinstance(src, ZipPath): 895 return src.name or src.root.filename or "bioimageio.zip" 896 elif isinstance(src, RelativeFilePath): 897 return src.path.name 898 elif isinstance(src, PurePath): 899 return src.name 900 else: 901 url = urlparse(str(src)) 902 if ( 903 url.scheme == "https" 904 and url.hostname == "zenodo.org" 905 and url.path.startswith("/api/records/") 906 and url.path.endswith("/content") 907 ): 908 return url.path.split("/")[-2] 909 else: 910 return url.path.split("/")[-1]
def
get_sha256( path: Union[pathlib.Path, zipp.Path]) -> bioimageio.spec._internal.io_basics.Sha256:
913def get_sha256(path: Union[Path, ZipPath]) -> Sha256: 914 """from https://stackoverflow.com/a/44873382""" 915 if isinstance(path, ZipPath): 916 # no buffered reading available 917 zf = path.root 918 assert isinstance(zf, ZipFile) 919 data = path.read_bytes() 920 assert isinstance(data, bytes) 921 h = hashlib.sha256(data) 922 else: 923 h = hashlib.sha256() 924 chunksize = 128 * 1024 925 b = bytearray(chunksize) 926 mv = memoryview(b) 927 with open(path, "rb", buffering=0) as f: 928 for n in iter(lambda: f.readinto(mv), 0): 929 h.update(mv[:n]) 930 931 sha = h.hexdigest() 932 assert len(sha) == 64 933 return Sha256(sha)
48def get_spdx_licenses() -> SpdxLicenses: 49 """get details of the SPDX licenses known to bioimageio.spec""" 50 with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open( 51 "r", encoding="utf-8" 52 ) as f: 53 return json.load(f)
get details of the SPDX licenses known to bioimageio.spec
def
identify_bioimageio_yaml_file_name(file_names: Iterable[str]) -> str:
431def identify_bioimageio_yaml_file_name(file_names: Iterable[FileName]) -> FileName: 432 file_names = sorted(file_names) 433 for bioimageio_name in ALL_BIOIMAGEIO_YAML_NAMES: 434 for file_name in file_names: 435 if file_name == bioimageio_name or file_name.endswith( 436 "." + bioimageio_name 437 ): 438 return file_name 439 440 raise ValueError( 441 f"No {BIOIMAGEIO_YAML} found in {file_names}. (Looking for '{BIOIMAGEIO_YAML}'" 442 + " or or any of the alterntive file names:" 443 + f" {ALTERNATIVE_BIOIMAGEIO_YAML_NAMES}, or any file with an extension of" 444 + f" those, e.g. 'anything.{BIOIMAGEIO_YAML}')." 445 )
def
is_valid_bioimageio_yaml_name(file_name: str) -> bool:
def
load_array( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file'), FieldInfo(annotation=NoneType, required=True, title='FilePath')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], bioimageio.spec._internal.io.FileDescr, zipp.Path]) -> numpy.ndarray[tuple[int, ...], numpy.dtype[typing.Any]]:
def
save_array( path: Union[pathlib.Path, zipp.Path], array: numpy.ndarray[tuple[int, ...], numpy.dtype[typing.Any]]) -> None:
class
SpdxLicenseEntry(typing.TypedDict):
class
SpdxLicenses(typing.TypedDict):
42class SpdxLicenses(TypedDict): 43 licenseListVersion: str 44 licenses: List[SpdxLicenseEntry] 45 releaseDate: str
licenses: List[SpdxLicenseEntry]
def
write_yaml( content: Union[YamlValue, Dict[str, YamlValue]], /, file: Union[Annotated[pathlib.Path, PathType(path_type='new')], Annotated[pathlib.Path, PathType(path_type='file')], IO[str], IO[bytes], zipp.Path]):
65def write_yaml( 66 content: Union[YamlValue, BioimageioYamlContent], 67 /, 68 file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath], 69): 70 if isinstance(file, Path): 71 cm = file.open("w", encoding="utf-8") 72 else: 73 cm = nullcontext(file) 74 75 with cm as f: 76 _yaml_dump.dump(content, f)