bioimageio.spec.utils

 1import json
 2from typing import List, TypedDict
 3
 4from ._description import ensure_description_is_dataset, ensure_description_is_model
 5from ._internal.io import (
 6    download,
 7    extract_file_name,
 8    get_reader,
 9    get_sha256,
10    identify_bioimageio_yaml_file_name,
11    is_valid_bioimageio_yaml_name,
12)
13from ._internal.io_utils import load_array, save_array, write_yaml
14from ._internal.utils import files
15
16__all__ = [
17    "download",
18    "ensure_description_is_dataset",
19    "ensure_description_is_model",
20    "extract_file_name",
21    "get_file_name",
22    "get_reader",
23    "get_sha256",
24    "get_spdx_licenses",
25    "identify_bioimageio_yaml_file_name",
26    "is_valid_bioimageio_yaml_name",
27    "load_array",
28    "save_array",
29    "SpdxLicenseEntry",
30    "SpdxLicenses",
31    "write_yaml",
32]
33
34get_file_name = extract_file_name
35
36
37class SpdxLicenseEntry(TypedDict):
38    isDeprecatedLicenseId: bool
39    isKnownByZenodo: bool
40    isOsiApproved: bool
41    licenseId: str
42    name: str
43    reference: str
44
45
46class SpdxLicenses(TypedDict):
47    licenseListVersion: str
48    licenses: List[SpdxLicenseEntry]
49    releaseDate: str
50
51
52def get_spdx_licenses() -> SpdxLicenses:
53    """get details of the SPDX licenses known to bioimageio.spec"""
54    with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open(
55        "r", encoding="utf-8"
56    ) as f:
57        return json.load(f)
def download( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file'), FieldInfo(annotation=NoneType, required=True, title='FilePath')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], str, pydantic.networks.HttpUrl, bioimageio.spec._internal.io.FileDescr, zipp.Path], /, progressbar: Union[bioimageio.spec._internal.progress.Progressbar, Callable[[], bioimageio.spec._internal.progress.Progressbar], bool, NoneType] = None, **kwargs: Unpack[bioimageio.spec._internal.io.HashKwargs]) -> bioimageio.spec._internal.io_basics.BytesReader:
623def get_reader(
624    source: Union[PermissiveFileSource, FileDescr, ZipPath],
625    /,
626    progressbar: Union[Progressbar, Callable[[], Progressbar], bool, None] = None,
627    **kwargs: Unpack[HashKwargs],
628) -> BytesReader:
629    """Open a file `source` (download if needed)"""
630    if isinstance(source, FileDescr):
631        if "sha256" not in kwargs:
632            kwargs["sha256"] = source.sha256
633
634        source = source.source
635    elif isinstance(source, str):
636        source = interprete_file_source(source)
637
638    if isinstance(source, RelativeFilePath):
639        source = source.absolute()
640    elif isinstance(source, pydantic.AnyUrl):
641        with get_validation_context().replace(perform_io_checks=False):
642            source = HttpUrl(source)
643
644    if isinstance(source, HttpUrl):
645        return _open_url(source, progressbar=progressbar, **kwargs)
646
647    if isinstance(source, ZipPath):
648        if not source.exists():
649            raise FileNotFoundError(source)
650
651        f = source.open(mode="rb")
652        assert not isinstance(f, TextIOWrapper)
653        root = source.root
654    elif isinstance(source, Path):
655        if source.is_dir():
656            raise FileNotFoundError(f"{source} is a directory, not a file")
657
658        if not source.exists():
659            raise FileNotFoundError(source)
660
661        f = source.open("rb")
662        root = source.parent
663    else:
664        assert_never(source)
665
666    expected_sha = kwargs.get("sha256")
667    if expected_sha is None:
668        sha = None
669    else:
670        sha = get_sha256(f)
671        _ = f.seek(0)
672        if sha != expected_sha:
673            raise ValueError(
674                f"SHA256 mismatch for {source}. Expected {expected_sha}, got {sha}."
675            )
676
677    return BytesReader(
678        f,
679        sha256=sha,
680        suffix=source.suffix,
681        original_file_name=source.name,
682        original_root=root,
683        is_zipfile=None,
684    )

Open a file source (download if needed)

def ensure_description_is_dataset( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[Annotated[bioimageio.spec.application.v0_2.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.2')], Annotated[bioimageio.spec.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='application')], Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')], Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')], Annotated[Union[Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.2')], Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='notebook')]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.generic.v0_2.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.2')], Annotated[bioimageio.spec.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='generic')]]) -> Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')]:
263def ensure_description_is_dataset(
264    rd: Union[InvalidDescr, ResourceDescr],
265) -> AnyDatasetDescr:
266    if isinstance(rd, InvalidDescr):
267        rd.validation_summary.display()
268        raise ValueError(f"Invalid {rd.type} description.")
269
270    if rd.type != "dataset":
271        rd.validation_summary.display()
272        raise ValueError(
273            f"Expected a dataset resource, but got resource type '{rd.type}'"
274        )
275
276    assert not isinstance(
277        rd,
278        (
279            GenericDescr_v0_2,
280            GenericDescr_v0_3,
281        ),
282    )
283
284    return rd
def ensure_description_is_model( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[Annotated[bioimageio.spec.application.v0_2.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.2')], Annotated[bioimageio.spec.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='application')], Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')], Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')], Annotated[Union[Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.2')], Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='notebook')]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.generic.v0_2.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.2')], Annotated[bioimageio.spec.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='generic')]]) -> Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')]:
235def ensure_description_is_model(
236    rd: Union[InvalidDescr, ResourceDescr],
237) -> AnyModelDescr:
238    """
239    Raises:
240        ValueError: for invalid or non-model resources
241    """
242    if isinstance(rd, InvalidDescr):
243        rd.validation_summary.display()
244        raise ValueError(f"Invalid {rd.type} description")
245
246    if rd.type != "model":
247        rd.validation_summary.display()
248        raise ValueError(
249            f"Expected a model resource, but got resource type '{rd.type}'"
250        )
251
252    assert not isinstance(
253        rd,
254        (
255            GenericDescr_v0_2,
256            GenericDescr_v0_3,
257        ),
258    )
259
260    return rd
Raises:
  • ValueError: for invalid or non-model resources
def extract_file_name( src: Union[pydantic.networks.HttpUrl, bioimageio.spec._internal.root_url.RootHttpUrl, pathlib.PurePath, bioimageio.spec._internal.io.RelativeFilePath, zipp.Path, bioimageio.spec._internal.io.FileDescr]) -> str:
799def extract_file_name(
800    src: Union[
801        pydantic.HttpUrl, RootHttpUrl, PurePath, RelativeFilePath, ZipPath, FileDescr
802    ],
803) -> FileName:
804    if isinstance(src, FileDescr):
805        src = src.source
806
807    if isinstance(src, ZipPath):
808        return src.name or src.root.filename or "bioimageio.zip"
809    elif isinstance(src, RelativeFilePath):
810        return src.path.name
811    elif isinstance(src, PurePath):
812        return src.name
813    else:
814        url = urlparse(str(src))
815        if (
816            url.scheme == "https"
817            and url.hostname == "zenodo.org"
818            and url.path.startswith("/api/records/")
819            and url.path.endswith("/content")
820        ):
821            return url.path.split("/")[-2]
822        else:
823            return url.path.split("/")[-1]
def get_file_name( src: Union[pydantic.networks.HttpUrl, bioimageio.spec._internal.root_url.RootHttpUrl, pathlib.PurePath, bioimageio.spec._internal.io.RelativeFilePath, zipp.Path, bioimageio.spec._internal.io.FileDescr]) -> str:
799def extract_file_name(
800    src: Union[
801        pydantic.HttpUrl, RootHttpUrl, PurePath, RelativeFilePath, ZipPath, FileDescr
802    ],
803) -> FileName:
804    if isinstance(src, FileDescr):
805        src = src.source
806
807    if isinstance(src, ZipPath):
808        return src.name or src.root.filename or "bioimageio.zip"
809    elif isinstance(src, RelativeFilePath):
810        return src.path.name
811    elif isinstance(src, PurePath):
812        return src.name
813    else:
814        url = urlparse(str(src))
815        if (
816            url.scheme == "https"
817            and url.hostname == "zenodo.org"
818            and url.path.startswith("/api/records/")
819            and url.path.endswith("/content")
820        ):
821            return url.path.split("/")[-2]
822        else:
823            return url.path.split("/")[-1]
def get_reader( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file'), FieldInfo(annotation=NoneType, required=True, title='FilePath')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], str, pydantic.networks.HttpUrl, bioimageio.spec._internal.io.FileDescr, zipp.Path], /, progressbar: Union[bioimageio.spec._internal.progress.Progressbar, Callable[[], bioimageio.spec._internal.progress.Progressbar], bool, NoneType] = None, **kwargs: Unpack[bioimageio.spec._internal.io.HashKwargs]) -> bioimageio.spec._internal.io_basics.BytesReader:
623def get_reader(
624    source: Union[PermissiveFileSource, FileDescr, ZipPath],
625    /,
626    progressbar: Union[Progressbar, Callable[[], Progressbar], bool, None] = None,
627    **kwargs: Unpack[HashKwargs],
628) -> BytesReader:
629    """Open a file `source` (download if needed)"""
630    if isinstance(source, FileDescr):
631        if "sha256" not in kwargs:
632            kwargs["sha256"] = source.sha256
633
634        source = source.source
635    elif isinstance(source, str):
636        source = interprete_file_source(source)
637
638    if isinstance(source, RelativeFilePath):
639        source = source.absolute()
640    elif isinstance(source, pydantic.AnyUrl):
641        with get_validation_context().replace(perform_io_checks=False):
642            source = HttpUrl(source)
643
644    if isinstance(source, HttpUrl):
645        return _open_url(source, progressbar=progressbar, **kwargs)
646
647    if isinstance(source, ZipPath):
648        if not source.exists():
649            raise FileNotFoundError(source)
650
651        f = source.open(mode="rb")
652        assert not isinstance(f, TextIOWrapper)
653        root = source.root
654    elif isinstance(source, Path):
655        if source.is_dir():
656            raise FileNotFoundError(f"{source} is a directory, not a file")
657
658        if not source.exists():
659            raise FileNotFoundError(source)
660
661        f = source.open("rb")
662        root = source.parent
663    else:
664        assert_never(source)
665
666    expected_sha = kwargs.get("sha256")
667    if expected_sha is None:
668        sha = None
669    else:
670        sha = get_sha256(f)
671        _ = f.seek(0)
672        if sha != expected_sha:
673            raise ValueError(
674                f"SHA256 mismatch for {source}. Expected {expected_sha}, got {sha}."
675            )
676
677    return BytesReader(
678        f,
679        sha256=sha,
680        suffix=source.suffix,
681        original_file_name=source.name,
682        original_root=root,
683        is_zipfile=None,
684    )

Open a file source (download if needed)

150def get_sha256(source: Union[BytesReaderP, BytesReaderIntoP, Path]) -> Sha256:
151    chunksize = 128 * 1024
152    h = hashlib.sha256()
153
154    if isinstance(source, BytesReaderIntoP):
155        b = bytearray(chunksize)
156        mv = memoryview(b)
157        for n in iter(lambda: source.readinto(mv), 0):
158            h.update(mv[:n])
159    else:
160        if isinstance(source, Path):
161            read_ctxt = source.open(mode="rb")
162        else:
163            read_ctxt = nullcontext(source)
164
165        with read_ctxt as r:
166            for chunk in iter(partial(r.read, chunksize), b""):
167                h.update(chunk)
168
169    sha = h.hexdigest()
170    return Sha256(sha)
def get_spdx_licenses() -> SpdxLicenses:
53def get_spdx_licenses() -> SpdxLicenses:
54    """get details of the SPDX licenses known to bioimageio.spec"""
55    with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open(
56        "r", encoding="utf-8"
57    ) as f:
58        return json.load(f)

get details of the SPDX licenses known to bioimageio.spec

def identify_bioimageio_yaml_file_name(file_names: Iterable[str]) -> str:
359def identify_bioimageio_yaml_file_name(file_names: Iterable[FileName]) -> FileName:
360    file_names = sorted(file_names)
361    for bioimageio_name in ALL_BIOIMAGEIO_YAML_NAMES:
362        for file_name in file_names:
363            if file_name == bioimageio_name or file_name.endswith(
364                "." + bioimageio_name
365            ):
366                return file_name
367
368    raise ValueError(
369        f"No {BIOIMAGEIO_YAML} found in {file_names}. (Looking for '{BIOIMAGEIO_YAML}'"
370        + " or or any of the alterntive file names:"
371        + f" {ALTERNATIVE_BIOIMAGEIO_YAML_NAMES}, or any file with an extension of"
372        + f"  those, e.g. 'anything.{BIOIMAGEIO_YAML}')."
373    )
def is_valid_bioimageio_yaml_name(file_name: str) -> bool:
351def is_valid_bioimageio_yaml_name(file_name: FileName) -> bool:
352    for bioimageio_name in ALL_BIOIMAGEIO_YAML_NAMES:
353        if file_name == bioimageio_name or file_name.endswith("." + bioimageio_name):
354            return True
355
356    return False
def load_array( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file'), FieldInfo(annotation=NoneType, required=True, title='FilePath')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], bioimageio.spec._internal.io.FileDescr, zipp.Path]) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[typing.Any]]:
317def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]:
318    reader = get_reader(source)
319    if settings.allow_pickle:
320        logger.warning("Loading numpy array with `allow_pickle=True`.")
321
322    return numpy.load(reader, allow_pickle=settings.allow_pickle)
def save_array( path: Union[pathlib.Path, zipp.Path], array: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[typing.Any]]) -> None:
325def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None:
326    with path.open(mode="wb") as f:
327        assert not isinstance(f, io.TextIOWrapper)
328        return numpy.save(f, array, allow_pickle=False)
class SpdxLicenseEntry(typing.TypedDict):
38class SpdxLicenseEntry(TypedDict):
39    isDeprecatedLicenseId: bool
40    isKnownByZenodo: bool
41    isOsiApproved: bool
42    licenseId: str
43    name: str
44    reference: str
isDeprecatedLicenseId: bool
isKnownByZenodo: bool
isOsiApproved: bool
licenseId: str
name: str
reference: str
class SpdxLicenses(typing.TypedDict):
47class SpdxLicenses(TypedDict):
48    licenseListVersion: str
49    licenses: List[SpdxLicenseEntry]
50    releaseDate: str
licenseListVersion: str
licenses: List[SpdxLicenseEntry]
releaseDate: str
def write_yaml( content: Union[YamlValue, Mapping[str, YamlValueView]], /, file: Union[Annotated[pathlib.Path, PathType(path_type='new')], Annotated[pathlib.Path, PathType(path_type='file')], IO[str], IO[bytes], zipp.Path]):
73def write_yaml(
74    content: Union[YamlValue, BioimageioYamlContentView],
75    /,
76    file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath],
77):
78    if isinstance(file, Path):
79        cm = file.open("w", encoding="utf-8")
80    else:
81        cm = nullcontext(file)
82
83    with cm as f:
84        _yaml_dump.dump(content, f)