bioimageio.spec.utils

 1import json
 2from typing import List, TypedDict
 3
 4from ._description import ensure_description_is_dataset, ensure_description_is_model
 5from ._internal.io import (
 6    download,
 7    extract_file_name,
 8    get_sha256,
 9    identify_bioimageio_yaml_file_name,
10    is_valid_bioimageio_yaml_name,
11)
12from ._internal.io_utils import load_array, save_array
13from ._internal.utils import files
14
15__all__ = [
16    "download",
17    "ensure_description_is_dataset",
18    "ensure_description_is_model",
19    "extract_file_name",
20    "get_sha256",
21    "get_spdx_licenses",
22    "identify_bioimageio_yaml_file_name",
23    "is_valid_bioimageio_yaml_name",
24    "load_array",
25    "save_array",
26    "SpdxLicenseEntry",
27    "SpdxLicenses",
28]
29
30
31class SpdxLicenseEntry(TypedDict):
32    isDeprecatedLicenseId: bool
33    isKnownByZenodo: bool
34    isOsiApproved: bool
35    licenseId: str
36    name: str
37    reference: str
38
39
40class SpdxLicenses(TypedDict):
41    licenseListVersion: str
42    licenses: List[SpdxLicenseEntry]
43    releaseDate: str
44
45
46def get_spdx_licenses() -> SpdxLicenses:
47    """get details of the SPDX licenses known to bioimageio.spec"""
48    with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open(
49        "r", encoding="utf-8"
50    ) as f:
51        return json.load(f)
def download( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], str, Annotated[pydantic_core._pydantic_core.Url, UrlConstraints(max_length=2083, allowed_schemes=['http', 'https'], host_required=None, default_host=None, default_port=None, default_path=None)], bioimageio.spec._internal.io.FileDescr, zipp.Path], /, progressbar: Union[bioimageio.spec._internal.io.Progressbar, bool, NoneType] = None, **kwargs: Unpack[bioimageio.spec._internal.io.HashKwargs]) -> Union[bioimageio.spec._internal.io.LocalFile, bioimageio.spec._internal.io.FileInZip]:
658def resolve(
659    source: Union[PermissiveFileSource, FileDescr, ZipPath],
660    /,
661    progressbar: Union[Progressbar, bool, None] = None,
662    **kwargs: Unpack[HashKwargs],
663) -> Union[LocalFile, FileInZip]:
664    """Resolve file `source` (download if needed)"""
665    if isinstance(source, FileDescr):
666        return source.download()
667    elif isinstance(source, ZipPath):
668        zip_root = source.root
669        assert isinstance(zip_root, ZipFile)
670        return FileInZip(
671            source,
672            zip_root,
673            extract_file_name(source),
674        )
675
676    strict_source = interprete_file_source(source)
677    if isinstance(strict_source, RelativeFilePath):
678        strict_source = strict_source.absolute()
679        if isinstance(strict_source, ZipPath):
680            return FileInZip(
681                strict_source, strict_source.root, extract_file_name(strict_source)
682            )
683
684    if isinstance(strict_source, PurePath):
685        if not strict_source.exists():
686            raise FileNotFoundError(strict_source)
687        local_source = strict_source
688        root: Union[RootHttpUrl, DirectoryPath] = strict_source.parent
689    else:
690        if strict_source.scheme not in ("http", "https"):
691            raise NotImplementedError(strict_source.scheme)
692
693        if settings.CI:
694            headers = {"User-Agent": "ci"}
695            if progressbar is None:
696                progressbar = False
697        else:
698            headers = {}
699            if progressbar is None:
700                progressbar = True
701
702        if settings.user_agent is not None:
703            headers["User-Agent"] = settings.user_agent
704
705        downloader = pooch.HTTPDownloader(
706            headers=headers,
707            progressbar=progressbar,  # pyright: ignore[reportArgumentType]
708        )
709        fname = _get_unique_file_name(strict_source)
710        _ls: Any = pooch.retrieve(
711            url=str(strict_source),
712            known_hash=_get_known_hash(kwargs),
713            downloader=downloader,
714            fname=fname,
715            path=settings.cache_path,
716        )
717        local_source = Path(_ls).absolute()
718        root = strict_source.parent
719
720    return LocalFile(
721        local_source,
722        root,
723        extract_file_name(strict_source),
724    )

Resolve file source (download if needed)

def ensure_description_is_dataset( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[bioimageio.spec.application.v0_2.ApplicationDescr, bioimageio.spec.ApplicationDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.dataset.v0_2.DatasetDescr, bioimageio.spec.DatasetDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.model.v0_4.ModelDescr, bioimageio.spec.ModelDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.NotebookDescr, bioimageio.spec.NotebookDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.generic.v0_2.GenericDescr, bioimageio.spec.GenericDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)]]) -> Annotated[Union[bioimageio.spec.dataset.v0_2.DatasetDescr, bioimageio.spec.DatasetDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)]:
214def ensure_description_is_dataset(
215    rd: Union[InvalidDescr, ResourceDescr],
216) -> AnyDatasetDescr:
217    if isinstance(rd, InvalidDescr):
218        rd.validation_summary.display()
219        raise ValueError("resource description is invalid")
220
221    if rd.type != "dataset":
222        rd.validation_summary.display()
223        raise ValueError(
224            f"expected a dataset resource, but got resource type '{rd.type}'"
225        )
226
227    assert not isinstance(
228        rd,
229        (
230            GenericDescr_v0_2,
231            GenericDescr_v0_3,
232        ),
233    )
234
235    return rd
def ensure_description_is_model( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[bioimageio.spec.application.v0_2.ApplicationDescr, bioimageio.spec.ApplicationDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.dataset.v0_2.DatasetDescr, bioimageio.spec.DatasetDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.model.v0_4.ModelDescr, bioimageio.spec.ModelDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.NotebookDescr, bioimageio.spec.NotebookDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[bioimageio.spec.generic.v0_2.GenericDescr, bioimageio.spec.GenericDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)]]) -> Annotated[Union[bioimageio.spec.model.v0_4.ModelDescr, bioimageio.spec.ModelDescr], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None)]:
186def ensure_description_is_model(
187    rd: Union[InvalidDescr, ResourceDescr],
188) -> AnyModelDescr:
189    """
190    Raises:
191        ValueError: for invalid or non-model resources
192    """
193    if isinstance(rd, InvalidDescr):
194        rd.validation_summary.display()
195        raise ValueError("resource description is invalid")
196
197    if rd.type != "model":
198        rd.validation_summary.display()
199        raise ValueError(
200            f"expected a model resource, but got resource type '{rd.type}'"
201        )
202
203    assert not isinstance(
204        rd,
205        (
206            GenericDescr_v0_2,
207            GenericDescr_v0_3,
208        ),
209    )
210
211    return rd
Raises:
  • ValueError: for invalid or non-model resources
def extract_file_name( src: Union[Annotated[pydantic_core._pydantic_core.Url, UrlConstraints(max_length=2083, allowed_schemes=['http', 'https'], host_required=None, default_host=None, default_port=None, default_path=None)], bioimageio.spec._internal.url.HttpUrl, pathlib.PurePath, bioimageio.spec._internal.io.RelativeFilePath, zipp.Path]) -> str:
803def extract_file_name(
804    src: Union[pydantic.HttpUrl, HttpUrl, PurePath, RelativeFilePath, ZipPath],
805) -> FileName:
806    if isinstance(src, ZipPath):
807        return src.name or src.root.filename or "bioimageio.zip"
808    elif isinstance(src, RelativeFilePath):
809        return src.path.name
810    elif isinstance(src, PurePath):
811        return src.name
812    else:
813        url = urlparse(str(src))
814        if (
815            url.scheme == "https"
816            and url.hostname == "zenodo.org"
817            and url.path.startswith("/api/records/")
818            and url.path.endswith("/content")
819        ):
820            return url.path.split("/")[-2]
821        else:
822            return url.path.split("/")[-1]
@lru_cache
def get_sha256( path: Union[pathlib.Path, zipp.Path]) -> bioimageio.spec._internal.io_basics.Sha256:
825@lru_cache
826def get_sha256(path: Union[Path, ZipPath]) -> Sha256:
827    """from https://stackoverflow.com/a/44873382"""
828    desc = f"computing SHA256 of {path.name}"
829    if isinstance(path, ZipPath):
830        # no buffered reading available
831        zf = path.root
832        assert isinstance(zf, ZipFile)
833        file_size = zf.NameToInfo[path.at].file_size
834        pbar = tqdm(desc=desc, total=file_size)
835        data = path.read_bytes()
836        assert isinstance(data, bytes)
837        h = hashlib.sha256(data)
838    else:
839        file_size = path.stat().st_size
840        pbar = tqdm(desc=desc, total=file_size)
841        h = hashlib.sha256()
842        chunksize = 128 * 1024
843        b = bytearray(chunksize)
844        mv = memoryview(b)
845        with open(path, "rb", buffering=0) as f:
846            for n in iter(lambda: f.readinto(mv), 0):
847                h.update(mv[:n])
848                _ = pbar.update(n)
849
850    sha = h.hexdigest()
851    pbar.set_description(desc=desc + f" (result: {sha})")
852    pbar.close()
853    assert len(sha) == 64
854    return Sha256(sha)
def get_spdx_licenses() -> SpdxLicenses:
47def get_spdx_licenses() -> SpdxLicenses:
48    """get details of the SPDX licenses known to bioimageio.spec"""
49    with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open(
50        "r", encoding="utf-8"
51    ) as f:
52        return json.load(f)

get details of the SPDX licenses known to bioimageio.spec

def identify_bioimageio_yaml_file_name(file_names: Iterable[str]) -> str:
429def identify_bioimageio_yaml_file_name(file_names: Iterable[FileName]) -> FileName:
430    file_names = sorted(file_names)
431    for bioimageio_name in ALL_BIOIMAGEIO_YAML_NAMES:
432        for file_name in file_names:
433            if file_name == bioimageio_name or file_name.endswith(
434                "." + bioimageio_name
435            ):
436                return file_name
437
438    raise ValueError(
439        f"No {BIOIMAGEIO_YAML} found in {file_names}. (Looking for '{BIOIMAGEIO_YAML}'"
440        + " or or any of the alterntive file names:"
441        + f" {ALTERNATIVE_BIOIMAGEIO_YAML_NAMES}, or any file with an extension of"
442        + f"  those, e.g. 'anything.{BIOIMAGEIO_YAML}')."
443    )
def is_valid_bioimageio_yaml_name(file_name: str) -> bool:
421def is_valid_bioimageio_yaml_name(file_name: FileName) -> bool:
422    for bioimageio_name in ALL_BIOIMAGEIO_YAML_NAMES:
423        if file_name == bioimageio_name or file_name.endswith("." + bioimageio_name):
424            return True
425
426    return False
def load_array( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], bioimageio.spec._internal.io.FileDescr, zipp.Path]) -> numpy.ndarray[typing.Any, numpy.dtype[typing.Any]]:
235def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]:
236    path = download(source).path
237    with path.open(mode="rb") as f:
238        assert not isinstance(f, io.TextIOWrapper)
239        return numpy.load(f, allow_pickle=False)
def save_array( path: Union[pathlib.Path, zipp.Path], array: numpy.ndarray[typing.Any, numpy.dtype[typing.Any]]) -> None:
242def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None:
243    with path.open(mode="wb") as f:
244        assert not isinstance(f, io.TextIOWrapper)
245        return numpy.save(f, array, allow_pickle=False)
class SpdxLicenseEntry(typing.TypedDict):
32class SpdxLicenseEntry(TypedDict):
33    isDeprecatedLicenseId: bool
34    isKnownByZenodo: bool
35    isOsiApproved: bool
36    licenseId: str
37    name: str
38    reference: str
isDeprecatedLicenseId: bool
isKnownByZenodo: bool
isOsiApproved: bool
licenseId: str
name: str
reference: str
class SpdxLicenses(typing.TypedDict):
41class SpdxLicenses(TypedDict):
42    licenseListVersion: str
43    licenses: List[SpdxLicenseEntry]
44    releaseDate: str
licenseListVersion: str
licenses: List[SpdxLicenseEntry]
releaseDate: str