Coverage for bioimageio/core/cli.py: 83%
368 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 09:51 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 09:51 +0000
1"""bioimageio CLI
3Note: Some docstrings use a hair space ' '
4 to place the added '(default: ...)' on a new line.
5"""
7import json
8import shutil
9import subprocess
10import sys
11from abc import ABC
12from argparse import RawTextHelpFormatter
13from difflib import SequenceMatcher
14from functools import cached_property
15from io import StringIO
16from pathlib import Path
17from pprint import pformat, pprint
18from typing import (
19 Any,
20 Dict,
21 Iterable,
22 List,
23 Literal,
24 Mapping,
25 Optional,
26 Sequence,
27 Set,
28 Tuple,
29 Type,
30 Union,
31)
33import rich.markdown
34from loguru import logger
35from pydantic import AliasChoices, BaseModel, Field, model_validator
36from pydantic_settings import (
37 BaseSettings,
38 CliPositionalArg,
39 CliSettingsSource,
40 CliSubCommand,
41 JsonConfigSettingsSource,
42 PydanticBaseSettingsSource,
43 SettingsConfigDict,
44 YamlConfigSettingsSource,
45)
46from tqdm import tqdm
47from typing_extensions import assert_never
49from bioimageio.spec import (
50 AnyModelDescr,
51 InvalidDescr,
52 ResourceDescr,
53 load_description,
54 save_bioimageio_yaml_only,
55 settings,
56 update_format,
57 update_hashes,
58)
59from bioimageio.spec._internal.io import is_yaml_value
60from bioimageio.spec._internal.io_basics import ZipPath
61from bioimageio.spec._internal.io_utils import open_bioimageio_yaml
62from bioimageio.spec._internal.types import NotEmpty
63from bioimageio.spec.dataset import DatasetDescr
64from bioimageio.spec.model import ModelDescr, v0_4, v0_5
65from bioimageio.spec.notebook import NotebookDescr
66from bioimageio.spec.utils import download, ensure_description_is_model, write_yaml
68from .commands import WeightFormatArgAll, WeightFormatArgAny, package, test
69from .common import MemberId, SampleId, SupportedWeightsFormat
70from .digest_spec import get_member_ids, load_sample_for_model
71from .io import load_dataset_stat, save_dataset_stat, save_sample
72from .prediction import create_prediction_pipeline
73from .proc_setup import (
74 DatasetMeasure,
75 Measure,
76 MeasureValue,
77 StatsCalculator,
78 get_required_dataset_measures,
79)
80from .sample import Sample
81from .stat_measures import Stat
82from .utils import VERSION, compare
83from .weight_converters._add_weights import add_weights
85WEIGHT_FORMAT_ALIASES = AliasChoices(
86 "weight-format",
87 "weights-format",
88 "weight_format",
89 "weights_format",
90)
93class CmdBase(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True):
94 pass
97class ArgMixin(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True):
98 pass
101class WithSummaryLogging(ArgMixin):
102 summary: Union[
103 Literal["display"], Path, Sequence[Union[Literal["display"], Path]]
104 ] = Field(
105 "display",
106 examples=[
107 "display",
108 Path("summary.md"),
109 Path("bioimageio_summaries/"),
110 ["display", Path("summary.md")],
111 ],
112 )
113 """Display the validation summary or save it as JSON, Markdown or HTML.
114 The format is chosen based on the suffix: `.json`, `.md`, `.html`.
115 If a folder is given (path w/o suffix) the summary is saved in all formats.
116 Choose/add `"display"` to render the validation summary to the terminal.
117 """
119 def log(self, descr: Union[ResourceDescr, InvalidDescr]):
120 _ = descr.validation_summary.log(self.summary)
123class WithSource(ArgMixin):
124 source: CliPositionalArg[str]
125 """Url/path to a (folder with a) bioimageio.yaml/rdf.yaml file
126 or a bioimage.io resource identifier, e.g. 'affable-shark'"""
128 @cached_property
129 def descr(self):
130 return load_description(self.source)
132 @property
133 def descr_id(self) -> str:
134 """a more user-friendly description id
135 (replacing legacy ids with their nicknames)
136 """
137 if isinstance(self.descr, InvalidDescr):
138 return str(getattr(self.descr, "id", getattr(self.descr, "name")))
140 nickname = None
141 if (
142 isinstance(self.descr.config, v0_5.Config)
143 and (bio_config := self.descr.config.bioimageio)
144 and bio_config.model_extra is not None
145 ):
146 nickname = bio_config.model_extra.get("nickname")
148 return str(nickname or self.descr.id or self.descr.name)
151class ValidateFormatCmd(CmdBase, WithSource, WithSummaryLogging):
152 """Validate the meta data format of a bioimageio resource."""
154 perform_io_checks: bool = Field(
155 settings.perform_io_checks, alias="perform-io-checks"
156 )
157 """Wether or not to perform validations that requires downloading remote files.
158 Note: Default value is set by `BIOIMAGEIO_PERFORM_IO_CHECKS` environment variable.
159 """
161 @cached_property
162 def descr(self):
163 return load_description(self.source, perform_io_checks=self.perform_io_checks)
165 def run(self):
166 self.log(self.descr)
167 sys.exit(
168 0
169 if self.descr.validation_summary.status in ("valid-format", "passed")
170 else 1
171 )
174class TestCmd(CmdBase, WithSource, WithSummaryLogging):
175 """Test a bioimageio resource (beyond meta data formatting)."""
177 weight_format: WeightFormatArgAll = Field(
178 "all",
179 alias="weight-format",
180 validation_alias=WEIGHT_FORMAT_ALIASES,
181 )
182 """The weight format to limit testing to.
184 (only relevant for model resources)"""
186 devices: Optional[Union[str, Sequence[str]]] = None
187 """Device(s) to use for testing"""
189 runtime_env: Union[Literal["currently-active", "as-described"], Path] = Field(
190 "currently-active", alias="runtime-env"
191 )
192 """The python environment to run the tests in
193 - `"currently-active"`: use active Python interpreter
194 - `"as-described"`: generate a conda environment YAML file based on the model
195 weights description.
196 - A path to a conda environment YAML.
197 Note: The `bioimageio.core` dependency will be added automatically if not present.
198 """
200 determinism: Literal["seed_only", "full"] = "seed_only"
201 """Modes to improve reproducibility of test outputs."""
203 stop_early: bool = Field(
204 False, alias="stop-early", validation_alias=AliasChoices("stop-early", "x")
205 )
206 """Do not run further subtests after a failed one."""
208 def run(self):
209 sys.exit(
210 test(
211 self.descr,
212 weight_format=self.weight_format,
213 devices=self.devices,
214 summary=self.summary,
215 runtime_env=self.runtime_env,
216 determinism=self.determinism,
217 )
218 )
221class PackageCmd(CmdBase, WithSource, WithSummaryLogging):
222 """Save a resource's metadata with its associated files."""
224 path: CliPositionalArg[Path]
225 """The path to write the (zipped) package to.
226 If it does not have a `.zip` suffix
227 this command will save the package as an unzipped folder instead."""
229 weight_format: WeightFormatArgAll = Field(
230 "all",
231 alias="weight-format",
232 validation_alias=WEIGHT_FORMAT_ALIASES,
233 )
234 """The weight format to include in the package (for model descriptions only)."""
236 def run(self):
237 if isinstance(self.descr, InvalidDescr):
238 self.log(self.descr)
239 raise ValueError(f"Invalid {self.descr.type} description.")
241 sys.exit(
242 package(
243 self.descr,
244 self.path,
245 weight_format=self.weight_format,
246 )
247 )
250def _get_stat(
251 model_descr: AnyModelDescr,
252 dataset: Iterable[Sample],
253 dataset_length: int,
254 stats_path: Path,
255) -> Mapping[DatasetMeasure, MeasureValue]:
256 req_dataset_meas, _ = get_required_dataset_measures(model_descr)
257 if not req_dataset_meas:
258 return {}
260 req_dataset_meas, _ = get_required_dataset_measures(model_descr)
262 if stats_path.exists():
263 logger.info("loading precomputed dataset measures from {}", stats_path)
264 stat = load_dataset_stat(stats_path)
265 for m in req_dataset_meas:
266 if m not in stat:
267 raise ValueError(f"Missing {m} in {stats_path}")
269 return stat
271 stats_calc = StatsCalculator(req_dataset_meas)
273 for sample in tqdm(
274 dataset, total=dataset_length, desc="precomputing dataset stats", unit="sample"
275 ):
276 stats_calc.update(sample)
278 stat = stats_calc.finalize()
279 save_dataset_stat(stat, stats_path)
281 return stat
284class UpdateCmdBase(CmdBase, WithSource, ABC):
285 output: Union[Literal["display", "stdout"], Path] = "display"
286 """Output updated bioimageio.yaml to the terminal or write to a file.
287 Notes:
288 - `"display"`: Render to the terminal with syntax highlighting.
289 - `"stdout"`: Write to sys.stdout without syntax highligthing.
290 (More convenient for copying the updated bioimageio.yaml from the terminal.)
291 """
293 diff: Union[bool, Path] = Field(True, alias="diff")
294 """Output a diff of original and updated bioimageio.yaml.
295 If a given path has an `.html` extension, a standalone HTML file is written,
296 otherwise the diff is saved in unified diff format (pure text).
297 """
299 exclude_unset: bool = Field(True, alias="exclude-unset")
300 """Exclude fields that have not explicitly be set."""
302 exclude_defaults: bool = Field(False, alias="exclude-defaults")
303 """Exclude fields that have the default value (even if set explicitly)."""
305 @cached_property
306 def updated(self) -> Union[ResourceDescr, InvalidDescr]:
307 raise NotImplementedError
309 def run(self):
310 original_yaml = open_bioimageio_yaml(self.source).unparsed_content
311 assert isinstance(original_yaml, str)
312 stream = StringIO()
314 save_bioimageio_yaml_only(
315 self.updated,
316 stream,
317 exclude_unset=self.exclude_unset,
318 exclude_defaults=self.exclude_defaults,
319 )
320 updated_yaml = stream.getvalue()
322 diff = compare(
323 original_yaml.split("\n"),
324 updated_yaml.split("\n"),
325 diff_format=(
326 "html"
327 if isinstance(self.diff, Path) and self.diff.suffix == ".html"
328 else "unified"
329 ),
330 )
332 if isinstance(self.diff, Path):
333 _ = self.diff.write_text(diff, encoding="utf-8")
334 elif self.diff:
335 console = rich.console.Console()
336 diff_md = f"## Diff\n\n````````diff\n{diff}\n````````"
337 console.print(rich.markdown.Markdown(diff_md))
339 if isinstance(self.output, Path):
340 _ = self.output.write_text(updated_yaml, encoding="utf-8")
341 logger.info(f"written updated description to {self.output}")
342 elif self.output == "display":
343 updated_md = f"## Updated bioimageio.yaml\n\n```yaml\n{updated_yaml}\n```"
344 rich.console.Console().print(rich.markdown.Markdown(updated_md))
345 elif self.output == "stdout":
346 print(updated_yaml)
347 else:
348 assert_never(self.output)
350 if isinstance(self.updated, InvalidDescr):
351 logger.warning("Update resulted in invalid description")
352 _ = self.updated.validation_summary.display()
355class UpdateFormatCmd(UpdateCmdBase):
356 """Update the metadata format to the latest format version."""
358 exclude_defaults: bool = Field(True, alias="exclude-defaults")
359 """Exclude fields that have the default value (even if set explicitly).
361 Note:
362 The update process sets most unset fields explicitly with their default value.
363 """
365 perform_io_checks: bool = Field(
366 settings.perform_io_checks, alias="perform-io-checks"
367 )
368 """Wether or not to attempt validation that may require file download.
369 If `True` file hash values are added if not present."""
371 @cached_property
372 def updated(self):
373 return update_format(
374 self.source,
375 exclude_defaults=self.exclude_defaults,
376 perform_io_checks=self.perform_io_checks,
377 )
380class UpdateHashesCmd(UpdateCmdBase):
381 """Create a bioimageio.yaml description with updated file hashes."""
383 @cached_property
384 def updated(self):
385 return update_hashes(self.source)
388class PredictCmd(CmdBase, WithSource):
389 """Run inference on your data with a bioimage.io model."""
391 inputs: NotEmpty[Sequence[Union[str, NotEmpty[Tuple[str, ...]]]]] = (
392 "{input_id}/001.tif",
393 )
394 """Model input sample paths (for each input tensor)
396 The input paths are expected to have shape...
397 - (n_samples,) or (n_samples,1) for models expecting a single input tensor
398 - (n_samples,) containing the substring '{input_id}', or
399 - (n_samples, n_model_inputs) to provide each input tensor path explicitly.
401 All substrings that are replaced by metadata from the model description:
402 - '{model_id}'
403 - '{input_id}'
405 Example inputs to process sample 'a' and 'b'
406 for a model expecting a 'raw' and a 'mask' input tensor:
407 --inputs="[[\\"a_raw.tif\\",\\"a_mask.tif\\"],[\\"b_raw.tif\\",\\"b_mask.tif\\"]]"
408 (Note that JSON double quotes need to be escaped.)
410 Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file
411 may provide the arguments, e.g.:
412 ```yaml
413 inputs:
414 - [a_raw.tif, a_mask.tif]
415 - [b_raw.tif, b_mask.tif]
416 ```
418 `.npy` and any file extension supported by imageio are supported.
419 Aavailable formats are listed at
420 https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats.
421 Some formats have additional dependencies.
424 """
426 outputs: Union[str, NotEmpty[Tuple[str, ...]]] = (
427 "outputs_{model_id}/{output_id}/{sample_id}.tif"
428 )
429 """Model output path pattern (per output tensor)
431 All substrings that are replaced:
432 - '{model_id}' (from model description)
433 - '{output_id}' (from model description)
434 - '{sample_id}' (extracted from input paths)
437 """
439 overwrite: bool = False
440 """allow overwriting existing output files"""
442 blockwise: bool = False
443 """process inputs blockwise"""
445 stats: Path = Path("dataset_statistics.json")
446 """path to dataset statistics
447 (will be written if it does not exist,
448 but the model requires statistical dataset measures)
449 """
451 preview: bool = False
452 """preview which files would be processed
453 and what outputs would be generated."""
455 weight_format: WeightFormatArgAny = Field(
456 "any",
457 alias="weight-format",
458 validation_alias=WEIGHT_FORMAT_ALIASES,
459 )
460 """The weight format to use."""
462 example: bool = False
463 """generate and run an example
465 1. downloads example model inputs
466 2. creates a `{model_id}_example` folder
467 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml`
468 4. executes a preview dry-run
469 5. executes prediction with example input
472 """
474 def _example(self):
475 model_descr = ensure_description_is_model(self.descr)
476 input_ids = get_member_ids(model_descr.inputs)
477 example_inputs = (
478 model_descr.sample_inputs
479 if isinstance(model_descr, v0_4.ModelDescr)
480 else [ipt.sample_tensor or ipt.test_tensor for ipt in model_descr.inputs]
481 )
482 if not example_inputs:
483 raise ValueError(f"{self.descr_id} does not specify any example inputs.")
485 inputs001: List[str] = []
486 example_path = Path(f"{self.descr_id}_example")
487 example_path.mkdir(exist_ok=True)
489 for t, src in zip(input_ids, example_inputs):
490 local = download(src).path
491 dst = Path(f"{example_path}/{t}/001{''.join(local.suffixes)}")
492 dst.parent.mkdir(parents=True, exist_ok=True)
493 inputs001.append(dst.as_posix())
494 if isinstance(local, Path):
495 shutil.copy(local, dst)
496 elif isinstance(local, ZipPath):
497 _ = local.root.extract(local.at, path=dst)
498 else:
499 assert_never(local)
501 inputs = [tuple(inputs001)]
502 output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif"
504 bioimageio_cli_path = example_path / YAML_FILE
505 stats_file = "dataset_statistics.json"
506 stats = (example_path / stats_file).as_posix()
507 cli_example_args = dict(
508 inputs=inputs,
509 outputs=output_pattern,
510 stats=stats_file,
511 blockwise=self.blockwise,
512 )
513 assert is_yaml_value(cli_example_args)
514 write_yaml(
515 cli_example_args,
516 bioimageio_cli_path,
517 )
519 yaml_file_content = None
521 # escaped double quotes
522 inputs_json = json.dumps(inputs)
523 inputs_escaped = inputs_json.replace('"', r"\"")
524 source_escaped = self.source.replace('"', r"\"")
526 def get_example_command(preview: bool, escape: bool = False):
527 q: str = '"' if escape else ""
529 return [
530 "bioimageio",
531 "predict",
532 # --no-preview not supported for py=3.8
533 *(["--preview"] if preview else []),
534 "--overwrite",
535 *(["--blockwise"] if self.blockwise else []),
536 f"--stats={q}{stats}{q}",
537 f"--inputs={q}{inputs_escaped if escape else inputs_json}{q}",
538 f"--outputs={q}{output_pattern}{q}",
539 f"{q}{source_escaped if escape else self.source}{q}",
540 ]
542 if Path(YAML_FILE).exists():
543 logger.info(
544 "temporarily removing '{}' to execute example prediction", YAML_FILE
545 )
546 yaml_file_content = Path(YAML_FILE).read_bytes()
547 Path(YAML_FILE).unlink()
549 try:
550 _ = subprocess.run(get_example_command(True), check=True)
551 _ = subprocess.run(get_example_command(False), check=True)
552 finally:
553 if yaml_file_content is not None:
554 _ = Path(YAML_FILE).write_bytes(yaml_file_content)
555 logger.debug("restored '{}'", YAML_FILE)
557 print(
558 "🎉 Sucessfully ran example prediction!\n"
559 + "To predict the example input using the CLI example config file"
560 + f" {example_path/YAML_FILE}, execute `bioimageio predict` from {example_path}:\n"
561 + f"$ cd {str(example_path)}\n"
562 + f'$ bioimageio predict "{source_escaped}"\n\n'
563 + "Alternatively run the following command"
564 + " in the current workind directory, not the example folder:\n$ "
565 + " ".join(get_example_command(False, escape=True))
566 + f"\n(note that a local '{JSON_FILE}' or '{YAML_FILE}' may interfere with this)"
567 )
569 def run(self):
570 if self.example:
571 return self._example()
573 model_descr = ensure_description_is_model(self.descr)
575 input_ids = get_member_ids(model_descr.inputs)
576 output_ids = get_member_ids(model_descr.outputs)
578 minimum_input_ids = tuple(
579 str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name)
580 for ipt in model_descr.inputs
581 if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional
582 )
583 maximum_input_ids = tuple(
584 str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name)
585 for ipt in model_descr.inputs
586 )
588 def expand_inputs(i: int, ipt: Union[str, Tuple[str, ...]]) -> Tuple[str, ...]:
589 if isinstance(ipt, str):
590 ipts = tuple(
591 ipt.format(model_id=self.descr_id, input_id=t) for t in input_ids
592 )
593 else:
594 ipts = tuple(
595 p.format(model_id=self.descr_id, input_id=t)
596 for t, p in zip(input_ids, ipt)
597 )
599 if len(set(ipts)) < len(ipts):
600 if len(minimum_input_ids) == len(maximum_input_ids):
601 n = len(minimum_input_ids)
602 else:
603 n = f"{len(minimum_input_ids)}-{len(maximum_input_ids)}"
605 raise ValueError(
606 f"[input sample #{i}] Include '{{input_id}}' in path pattern or explicitly specify {n} distinct input paths (got {ipt})"
607 )
609 if len(ipts) < len(minimum_input_ids):
610 raise ValueError(
611 f"[input sample #{i}] Expected at least {len(minimum_input_ids)} inputs {minimum_input_ids}, got {ipts}"
612 )
614 if len(ipts) > len(maximum_input_ids):
615 raise ValueError(
616 f"Expected at most {len(maximum_input_ids)} inputs {maximum_input_ids}, got {ipts}"
617 )
619 return ipts
621 inputs = [expand_inputs(i, ipt) for i, ipt in enumerate(self.inputs, start=1)]
623 sample_paths_in = [
624 {t: Path(p) for t, p in zip(input_ids, ipts)} for ipts in inputs
625 ]
627 sample_ids = _get_sample_ids(sample_paths_in)
629 def expand_outputs():
630 if isinstance(self.outputs, str):
631 outputs = [
632 tuple(
633 Path(
634 self.outputs.format(
635 model_id=self.descr_id, output_id=t, sample_id=s
636 )
637 )
638 for t in output_ids
639 )
640 for s in sample_ids
641 ]
642 else:
643 outputs = [
644 tuple(
645 Path(p.format(model_id=self.descr_id, output_id=t, sample_id=s))
646 for t, p in zip(output_ids, self.outputs)
647 )
648 for s in sample_ids
649 ]
651 for i, out in enumerate(outputs, start=1):
652 if len(set(out)) < len(out):
653 raise ValueError(
654 f"[output sample #{i}] Include '{{output_id}}' in path pattern or explicitly specify {len(output_ids)} distinct output paths (got {out})"
655 )
657 if len(out) != len(output_ids):
658 raise ValueError(
659 f"[output sample #{i}] Expected {len(output_ids)} outputs {output_ids}, got {out}"
660 )
662 return outputs
664 outputs = expand_outputs()
666 sample_paths_out = [
667 {MemberId(t): Path(p) for t, p in zip(output_ids, out)} for out in outputs
668 ]
670 if not self.overwrite:
671 for sample_paths in sample_paths_out:
672 for p in sample_paths.values():
673 if p.exists():
674 raise FileExistsError(
675 f"{p} already exists. use --overwrite to (re-)write outputs anyway."
676 )
677 if self.preview:
678 print("🛈 bioimageio prediction preview structure:")
679 pprint(
680 {
681 "{sample_id}": dict(
682 inputs={"{input_id}": "<input path>"},
683 outputs={"{output_id}": "<output path>"},
684 )
685 }
686 )
687 print("🔎 bioimageio prediction preview output:")
688 pprint(
689 {
690 s: dict(
691 inputs={t: p.as_posix() for t, p in sp_in.items()},
692 outputs={t: p.as_posix() for t, p in sp_out.items()},
693 )
694 for s, sp_in, sp_out in zip(
695 sample_ids, sample_paths_in, sample_paths_out
696 )
697 }
698 )
699 return
701 def input_dataset(stat: Stat):
702 for s, sp_in in zip(sample_ids, sample_paths_in):
703 yield load_sample_for_model(
704 model=model_descr,
705 paths=sp_in,
706 stat=stat,
707 sample_id=s,
708 )
710 stat: Dict[Measure, MeasureValue] = dict(
711 _get_stat(
712 model_descr, input_dataset({}), len(sample_ids), self.stats
713 ).items()
714 )
716 pp = create_prediction_pipeline(
717 model_descr,
718 weight_format=None if self.weight_format == "any" else self.weight_format,
719 )
720 predict_method = (
721 pp.predict_sample_with_blocking
722 if self.blockwise
723 else pp.predict_sample_without_blocking
724 )
726 for sample_in, sp_out in tqdm(
727 zip(input_dataset(dict(stat)), sample_paths_out),
728 total=len(inputs),
729 desc=f"predict with {self.descr_id}",
730 unit="sample",
731 ):
732 sample_out = predict_method(sample_in)
733 save_sample(sp_out, sample_out)
736class AddWeightsCmd(CmdBase, WithSource, WithSummaryLogging):
737 output: CliPositionalArg[Path]
738 """The path to write the updated model package to."""
740 source_format: Optional[SupportedWeightsFormat] = Field(None, alias="source-format")
741 """Exclusively use these weights to convert to other formats."""
743 target_format: Optional[SupportedWeightsFormat] = Field(None, alias="target-format")
744 """Exclusively add this weight format."""
746 verbose: bool = False
747 """Log more (error) output."""
749 def run(self):
750 model_descr = ensure_description_is_model(self.descr)
751 if isinstance(model_descr, v0_4.ModelDescr):
752 raise TypeError(
753 f"model format {model_descr.format_version} not supported."
754 + " Please update the model first."
755 )
756 updated_model_descr = add_weights(
757 model_descr,
758 output_path=self.output,
759 source_format=self.source_format,
760 target_format=self.target_format,
761 verbose=self.verbose,
762 )
763 if updated_model_descr is None:
764 return
766 self.log(updated_model_descr)
769JSON_FILE = "bioimageio-cli.json"
770YAML_FILE = "bioimageio-cli.yaml"
773class Bioimageio(
774 BaseSettings,
775 cli_implicit_flags=True,
776 cli_parse_args=True,
777 cli_prog_name="bioimageio",
778 cli_use_class_docs_for_groups=True,
779 use_attribute_docstrings=True,
780):
781 """bioimageio - CLI for bioimage.io resources 🦒"""
783 model_config = SettingsConfigDict(
784 json_file=JSON_FILE,
785 yaml_file=YAML_FILE,
786 )
788 validate_format: CliSubCommand[ValidateFormatCmd] = Field(alias="validate-format")
789 "Check a resource's metadata format"
791 test: CliSubCommand[TestCmd]
792 "Test a bioimageio resource (beyond meta data formatting)"
794 package: CliSubCommand[PackageCmd]
795 "Package a resource"
797 predict: CliSubCommand[PredictCmd]
798 "Predict with a model resource"
800 update_format: CliSubCommand[UpdateFormatCmd] = Field(alias="update-format")
801 """Update the metadata format"""
803 update_hashes: CliSubCommand[UpdateHashesCmd] = Field(alias="update-hashes")
804 """Create a bioimageio.yaml description with updated file hashes."""
806 add_weights: CliSubCommand[AddWeightsCmd] = Field(alias="add-weights")
807 """Add additional weights to the model descriptions converted from available
808 formats to improve deployability."""
810 @classmethod
811 def settings_customise_sources(
812 cls,
813 settings_cls: Type[BaseSettings],
814 init_settings: PydanticBaseSettingsSource,
815 env_settings: PydanticBaseSettingsSource,
816 dotenv_settings: PydanticBaseSettingsSource,
817 file_secret_settings: PydanticBaseSettingsSource,
818 ) -> Tuple[PydanticBaseSettingsSource, ...]:
819 cli: CliSettingsSource[BaseSettings] = CliSettingsSource(
820 settings_cls,
821 cli_parse_args=True,
822 formatter_class=RawTextHelpFormatter,
823 )
824 sys_args = pformat(sys.argv)
825 logger.info("starting CLI with arguments:\n{}", sys_args)
826 return (
827 cli,
828 init_settings,
829 YamlConfigSettingsSource(settings_cls),
830 JsonConfigSettingsSource(settings_cls),
831 )
833 @model_validator(mode="before")
834 @classmethod
835 def _log(cls, data: Any):
836 logger.info(
837 "loaded CLI input:\n{}",
838 pformat({k: v for k, v in data.items() if v is not None}),
839 )
840 return data
842 def run(self):
843 logger.info(
844 "executing CLI command:\n{}",
845 pformat({k: v for k, v in self.model_dump().items() if v is not None}),
846 )
847 cmd = (
848 self.add_weights
849 or self.package
850 or self.predict
851 or self.test
852 or self.update_format
853 or self.update_hashes
854 or self.validate_format
855 )
856 assert cmd is not None
857 cmd.run()
860assert isinstance(Bioimageio.__doc__, str)
861Bioimageio.__doc__ += f"""
863library versions:
864 bioimageio.core {VERSION}
865 bioimageio.spec {VERSION}
867spec format versions:
868 model RDF {ModelDescr.implemented_format_version}
869 dataset RDF {DatasetDescr.implemented_format_version}
870 notebook RDF {NotebookDescr.implemented_format_version}
872"""
875def _get_sample_ids(
876 input_paths: Sequence[Mapping[MemberId, Path]],
877) -> Sequence[SampleId]:
878 """Get sample ids for given input paths, based on the common path per sample.
880 Falls back to sample01, samle02, etc..."""
882 matcher = SequenceMatcher()
884 def get_common_seq(seqs: Sequence[Sequence[str]]) -> Sequence[str]:
885 """extract a common sequence from multiple sequences
886 (order sensitive; strips whitespace and slashes)
887 """
888 common = seqs[0]
890 for seq in seqs[1:]:
891 if not seq:
892 continue
893 matcher.set_seqs(common, seq)
894 i, _, size = matcher.find_longest_match()
895 common = common[i : i + size]
897 if isinstance(common, str):
898 common = common.strip().strip("/")
899 else:
900 common = [cs for c in common if (cs := c.strip().strip("/"))]
902 if not common:
903 raise ValueError(f"failed to find common sequence for {seqs}")
905 return common
907 def get_shorter_diff(seqs: Sequence[Sequence[str]]) -> List[Sequence[str]]:
908 """get a shorter sequence whose entries are still unique
909 (order sensitive, not minimal sequence)
910 """
911 min_seq_len = min(len(s) for s in seqs)
912 # cut from the start
913 for start in range(min_seq_len - 1, -1, -1):
914 shortened = [s[start:] for s in seqs]
915 if len(set(shortened)) == len(seqs):
916 min_seq_len -= start
917 break
918 else:
919 seen: Set[Sequence[str]] = set()
920 dupes = [s for s in seqs if s in seen or seen.add(s)]
921 raise ValueError(f"Found duplicate entries {dupes}")
923 # cut from the end
924 for end in range(min_seq_len - 1, 1, -1):
925 shortened = [s[:end] for s in shortened]
926 if len(set(shortened)) == len(seqs):
927 break
929 return shortened
931 full_tensor_ids = [
932 sorted(
933 p.resolve().with_suffix("").as_posix() for p in input_sample_paths.values()
934 )
935 for input_sample_paths in input_paths
936 ]
937 try:
938 long_sample_ids = [get_common_seq(t) for t in full_tensor_ids]
939 sample_ids = get_shorter_diff(long_sample_ids)
940 except ValueError as e:
941 raise ValueError(f"failed to extract sample ids: {e}")
943 return sample_ids