bioimageio.spec.utils

 1import json
 2from typing import List, TypedDict
 3
 4from ._description import ensure_description_is_dataset, ensure_description_is_model
 5from ._internal.io import (
 6    download,
 7    extract_file_name,
 8    get_sha256,
 9    identify_bioimageio_yaml_file_name,
10    is_valid_bioimageio_yaml_name,
11)
12from ._internal.io_utils import load_array, save_array, write_yaml
13from ._internal.utils import files
14
15__all__ = [
16    "download",
17    "ensure_description_is_dataset",
18    "ensure_description_is_model",
19    "extract_file_name",
20    "get_sha256",
21    "get_spdx_licenses",
22    "identify_bioimageio_yaml_file_name",
23    "is_valid_bioimageio_yaml_name",
24    "load_array",
25    "save_array",
26    "SpdxLicenseEntry",
27    "SpdxLicenses",
28    "write_yaml",
29]
30
31
32class SpdxLicenseEntry(TypedDict):
33    isDeprecatedLicenseId: bool
34    isKnownByZenodo: bool
35    isOsiApproved: bool
36    licenseId: str
37    name: str
38    reference: str
39
40
41class SpdxLicenses(TypedDict):
42    licenseListVersion: str
43    licenses: List[SpdxLicenseEntry]
44    releaseDate: str
45
46
47def get_spdx_licenses() -> SpdxLicenses:
48    """get details of the SPDX licenses known to bioimageio.spec"""
49    with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open(
50        "r", encoding="utf-8"
51    ) as f:
52        return json.load(f)
def download( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file'), FieldInfo(annotation=NoneType, required=True, title='FilePath')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], str, Annotated[pydantic_core._pydantic_core.Url, UrlConstraints(max_length=2083, allowed_schemes=['http', 'https'], host_required=None, default_host=None, default_port=None, default_path=None)], bioimageio.spec._internal.io.FileDescr, zipp.Path], /, progressbar: Union[bioimageio.spec._internal.io.Progressbar, bool, NoneType] = None, **kwargs: Unpack[bioimageio.spec._internal.io.HashKwargs]) -> Union[bioimageio.spec._internal.io.LocalFile, bioimageio.spec._internal.io.FileInZip]:
672def resolve(
673    source: Union[PermissiveFileSource, FileDescr, ZipPath],
674    /,
675    progressbar: Union[Progressbar, bool, None] = None,
676    **kwargs: Unpack[HashKwargs],
677) -> Union[LocalFile, FileInZip]:
678    """Resolve file `source` (download if needed)"""
679
680    if isinstance(source, str):
681        source = interprete_file_source(source)
682
683    if isinstance(source, RelativeFilePath):
684        source = source.absolute()
685        if isinstance(source, ZipPath):
686            return FileInZip(source, source.root, extract_file_name(source))
687
688    if isinstance(source, pydantic.AnyUrl):
689        with get_validation_context().replace(perform_io_checks=False):
690            source = HttpUrl(source)
691
692    if isinstance(source, FileDescr):
693        return source.download()
694    elif isinstance(source, ZipPath):
695        zip_root = source.root
696        assert isinstance(zip_root, ZipFile)
697        return FileInZip(
698            source,
699            zip_root,
700            extract_file_name(source),
701        )
702    elif isinstance(source, Path):
703        if source.is_dir():
704            raise FileNotFoundError(f"{source} is a directory, not a file")
705
706        if not source.exists():
707            raise FileNotFoundError(source)
708
709        return LocalFile(
710            source,
711            source.parent,
712            extract_file_name(source),
713        )
714    elif isinstance(source, HttpUrl):
715        if source.scheme not in ("http", "https"):
716            raise NotImplementedError(source.scheme)
717
718        if settings.CI:
719            headers = {"User-Agent": "ci"}
720            if progressbar is None:
721                progressbar = False
722        else:
723            headers = {}
724            if progressbar is None:
725                progressbar = True
726
727        if settings.user_agent is not None:
728            headers["User-Agent"] = settings.user_agent
729
730        chunk_size = 1024
731        if (
732            settings.cache_path
733            and not get_validation_context().disable_cache
734            and any(v is not None for v in kwargs.values())
735        ):
736            downloader = pooch.HTTPDownloader(
737                headers=headers,
738                progressbar=progressbar,  # pyright: ignore[reportArgumentType]
739                chunk_size=chunk_size,
740            )
741            fname = _get_unique_file_name(source)
742            _ls: Any = pooch.retrieve(
743                url=str(source),
744                known_hash=_get_known_hash(kwargs),
745                downloader=downloader,
746                fname=fname,
747                path=settings.cache_path,
748            )
749            local_source = Path(_ls).absolute()
750            return LocalFile(
751                local_source,
752                source.parent,
753                extract_file_name(source),
754            )
755        else:
756            # cacheless download to memory using an in memory zip file
757            r = requests.get(str(source), stream=True)
758            r.raise_for_status()
759
760            zf = zipfile.ZipFile(io.BytesIO(), "w")
761            fn = extract_file_name(source)
762            total = int(r.headers.get("content-length", 0))
763
764            if isinstance(progressbar, bool):
765                if progressbar:
766                    use_ascii = bool(sys.platform == "win32")
767                    pbar = tqdm(
768                        total=total,
769                        ncols=79,
770                        ascii=use_ascii,
771                        unit="B",
772                        unit_scale=True,
773                        leave=True,
774                    )
775                    pbar = tqdm(desc=f"Downloading {fn}")
776                else:
777                    pbar = None
778            else:
779                pbar = progressbar
780
781            zp = ZipPath(zf, fn)
782            with zp.open("wb") as z:
783                assert not isinstance(z, io.TextIOWrapper)
784                for chunk in r.iter_content(chunk_size=chunk_size):
785                    n = z.write(chunk)
786                    if pbar is not None:
787                        _ = pbar.update(n)
788
789            # Make sure the progress bar gets filled even if the actual number
790            # is chunks is smaller than expected. This happens when streaming
791            # text files that are compressed by the server when sending (gzip).
792            # Binary files don't experience this.
793            # (adapted from pooch.HttpDownloader)
794            if pbar is not None:
795                pbar.reset()
796                _ = pbar.update(total)
797                pbar.close()
798
799            return FileInZip(
800                path=zp,
801                original_root=source.parent,
802                original_file_name=fn,
803            )
804
805    else:
806        assert_never(source)

Resolve file source (download if needed)

def ensure_description_is_dataset( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[Annotated[bioimageio.spec.application.v0_2.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.2')], Annotated[bioimageio.spec.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='application')], Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')], Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')], Annotated[Union[Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.2')], Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='notebook')]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.generic.v0_2.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.2')], Annotated[bioimageio.spec.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='generic')]]) -> Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')]:
263def ensure_description_is_dataset(
264    rd: Union[InvalidDescr, ResourceDescr],
265) -> AnyDatasetDescr:
266    if isinstance(rd, InvalidDescr):
267        rd.validation_summary.display()
268        raise ValueError(f"Invalid {rd.type} description.")
269
270    if rd.type != "dataset":
271        rd.validation_summary.display()
272        raise ValueError(
273            f"Expected a dataset resource, but got resource type '{rd.type}'"
274        )
275
276    assert not isinstance(
277        rd,
278        (
279            GenericDescr_v0_2,
280            GenericDescr_v0_3,
281        ),
282    )
283
284    return rd
def ensure_description_is_model( rd: Union[bioimageio.spec.InvalidDescr, Annotated[Union[Annotated[Union[Annotated[bioimageio.spec.application.v0_2.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.2')], Annotated[bioimageio.spec.ApplicationDescr, FieldInfo(annotation=NoneType, required=True, title='application 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='application')], Annotated[Union[Annotated[bioimageio.spec.dataset.v0_2.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.2')], Annotated[bioimageio.spec.DatasetDescr, FieldInfo(annotation=NoneType, required=True, title='dataset 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='dataset')], Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')], Annotated[Union[Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.2')], Annotated[bioimageio.spec.NotebookDescr, FieldInfo(annotation=NoneType, required=True, title='notebook 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='notebook')]], Discriminator(discriminator='type', custom_error_type=None, custom_error_message=None, custom_error_context=None)], Annotated[Union[Annotated[bioimageio.spec.generic.v0_2.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.2')], Annotated[bioimageio.spec.GenericDescr, FieldInfo(annotation=NoneType, required=True, title='generic 0.3')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='generic')]]) -> Annotated[Union[Annotated[bioimageio.spec.model.v0_4.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.4')], Annotated[bioimageio.spec.ModelDescr, FieldInfo(annotation=NoneType, required=True, title='model 0.5')]], Discriminator(discriminator='format_version', custom_error_type=None, custom_error_message=None, custom_error_context=None), FieldInfo(annotation=NoneType, required=True, title='model')]:
235def ensure_description_is_model(
236    rd: Union[InvalidDescr, ResourceDescr],
237) -> AnyModelDescr:
238    """
239    Raises:
240        ValueError: for invalid or non-model resources
241    """
242    if isinstance(rd, InvalidDescr):
243        rd.validation_summary.display()
244        raise ValueError(f"Invalid {rd.type} description")
245
246    if rd.type != "model":
247        rd.validation_summary.display()
248        raise ValueError(
249            f"Expected a model resource, but got resource type '{rd.type}'"
250        )
251
252    assert not isinstance(
253        rd,
254        (
255            GenericDescr_v0_2,
256            GenericDescr_v0_3,
257        ),
258    )
259
260    return rd
Raises:
  • ValueError: for invalid or non-model resources
def extract_file_name( src: Union[Annotated[pydantic_core._pydantic_core.Url, UrlConstraints(max_length=2083, allowed_schemes=['http', 'https'], host_required=None, default_host=None, default_port=None, default_path=None)], bioimageio.spec._internal.url.HttpUrl, pathlib.PurePath, bioimageio.spec._internal.io.RelativeFilePath, zipp.Path]) -> str:
891def extract_file_name(
892    src: Union[pydantic.HttpUrl, HttpUrl, PurePath, RelativeFilePath, ZipPath],
893) -> FileName:
894    if isinstance(src, ZipPath):
895        return src.name or src.root.filename or "bioimageio.zip"
896    elif isinstance(src, RelativeFilePath):
897        return src.path.name
898    elif isinstance(src, PurePath):
899        return src.name
900    else:
901        url = urlparse(str(src))
902        if (
903            url.scheme == "https"
904            and url.hostname == "zenodo.org"
905            and url.path.startswith("/api/records/")
906            and url.path.endswith("/content")
907        ):
908            return url.path.split("/")[-2]
909        else:
910            return url.path.split("/")[-1]
def get_sha256( path: Union[pathlib.Path, zipp.Path]) -> bioimageio.spec._internal.io_basics.Sha256:
913def get_sha256(path: Union[Path, ZipPath]) -> Sha256:
914    """from https://stackoverflow.com/a/44873382"""
915    if isinstance(path, ZipPath):
916        # no buffered reading available
917        zf = path.root
918        assert isinstance(zf, ZipFile)
919        data = path.read_bytes()
920        assert isinstance(data, bytes)
921        h = hashlib.sha256(data)
922    else:
923        h = hashlib.sha256()
924        chunksize = 128 * 1024
925        b = bytearray(chunksize)
926        mv = memoryview(b)
927        with open(path, "rb", buffering=0) as f:
928            for n in iter(lambda: f.readinto(mv), 0):
929                h.update(mv[:n])
930
931    sha = h.hexdigest()
932    assert len(sha) == 64
933    return Sha256(sha)
def get_spdx_licenses() -> SpdxLicenses:
48def get_spdx_licenses() -> SpdxLicenses:
49    """get details of the SPDX licenses known to bioimageio.spec"""
50    with files("bioimageio.spec").joinpath("static/spdx_licenses.json").open(
51        "r", encoding="utf-8"
52    ) as f:
53        return json.load(f)

get details of the SPDX licenses known to bioimageio.spec

def identify_bioimageio_yaml_file_name(file_names: Iterable[str]) -> str:
431def identify_bioimageio_yaml_file_name(file_names: Iterable[FileName]) -> FileName:
432    file_names = sorted(file_names)
433    for bioimageio_name in ALL_BIOIMAGEIO_YAML_NAMES:
434        for file_name in file_names:
435            if file_name == bioimageio_name or file_name.endswith(
436                "." + bioimageio_name
437            ):
438                return file_name
439
440    raise ValueError(
441        f"No {BIOIMAGEIO_YAML} found in {file_names}. (Looking for '{BIOIMAGEIO_YAML}'"
442        + " or or any of the alterntive file names:"
443        + f" {ALTERNATIVE_BIOIMAGEIO_YAML_NAMES}, or any file with an extension of"
444        + f"  those, e.g. 'anything.{BIOIMAGEIO_YAML}')."
445    )
def is_valid_bioimageio_yaml_name(file_name: str) -> bool:
423def is_valid_bioimageio_yaml_name(file_name: FileName) -> bool:
424    for bioimageio_name in ALL_BIOIMAGEIO_YAML_NAMES:
425        if file_name == bioimageio_name or file_name.endswith("." + bioimageio_name):
426            return True
427
428    return False
def load_array( source: Union[Annotated[Union[bioimageio.spec._internal.url.HttpUrl, bioimageio.spec._internal.io.RelativeFilePath, Annotated[pathlib.Path, PathType(path_type='file'), FieldInfo(annotation=NoneType, required=True, title='FilePath')]], FieldInfo(annotation=NoneType, required=True, metadata=[_PydanticGeneralMetadata(union_mode='left_to_right')])], bioimageio.spec._internal.io.FileDescr, zipp.Path]) -> numpy.ndarray[tuple[int, ...], numpy.dtype[typing.Any]]:
284def load_array(source: Union[FileSource, FileDescr, ZipPath]) -> NDArray[Any]:
285    path = resolve(source).path
286    with path.open(mode="rb") as f:
287        assert not isinstance(f, io.TextIOWrapper)
288        return numpy.load(f, allow_pickle=settings.allow_pickle)
def save_array( path: Union[pathlib.Path, zipp.Path], array: numpy.ndarray[tuple[int, ...], numpy.dtype[typing.Any]]) -> None:
291def save_array(path: Union[Path, ZipPath], array: NDArray[Any]) -> None:
292    with path.open(mode="wb") as f:
293        assert not isinstance(f, io.TextIOWrapper)
294        return numpy.save(f, array, allow_pickle=settings.allow_pickle)
class SpdxLicenseEntry(typing.TypedDict):
33class SpdxLicenseEntry(TypedDict):
34    isDeprecatedLicenseId: bool
35    isKnownByZenodo: bool
36    isOsiApproved: bool
37    licenseId: str
38    name: str
39    reference: str
isDeprecatedLicenseId: bool
isKnownByZenodo: bool
isOsiApproved: bool
licenseId: str
name: str
reference: str
class SpdxLicenses(typing.TypedDict):
42class SpdxLicenses(TypedDict):
43    licenseListVersion: str
44    licenses: List[SpdxLicenseEntry]
45    releaseDate: str
licenseListVersion: str
licenses: List[SpdxLicenseEntry]
releaseDate: str
def write_yaml( content: Union[YamlValue, Dict[str, YamlValue]], /, file: Union[Annotated[pathlib.Path, PathType(path_type='new')], Annotated[pathlib.Path, PathType(path_type='file')], IO[str], IO[bytes], zipp.Path]):
65def write_yaml(
66    content: Union[YamlValue, BioimageioYamlContent],
67    /,
68    file: Union[NewPath, FilePath, IO[str], IO[bytes], ZipPath],
69):
70    if isinstance(file, Path):
71        cm = file.open("w", encoding="utf-8")
72    else:
73        cm = nullcontext(file)
74
75    with cm as f:
76        _yaml_dump.dump(content, f)