Coverage for bioimageio/core/cli.py: 83%

367 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 13:47 +0000

1"""bioimageio CLI 

2 

3Note: Some docstrings use a hair space ' ' 

4 to place the added '(default: ...)' on a new line. 

5""" 

6 

7import json 

8import shutil 

9import subprocess 

10import sys 

11from abc import ABC 

12from argparse import RawTextHelpFormatter 

13from difflib import SequenceMatcher 

14from functools import cached_property 

15from io import StringIO 

16from pathlib import Path 

17from pprint import pformat, pprint 

18from typing import ( 

19 Any, 

20 Dict, 

21 Iterable, 

22 List, 

23 Literal, 

24 Mapping, 

25 Optional, 

26 Sequence, 

27 Set, 

28 Tuple, 

29 Type, 

30 Union, 

31) 

32 

33import rich.markdown 

34from loguru import logger 

35from pydantic import AliasChoices, BaseModel, Field, model_validator 

36from pydantic_settings import ( 

37 BaseSettings, 

38 CliPositionalArg, 

39 CliSettingsSource, 

40 CliSubCommand, 

41 JsonConfigSettingsSource, 

42 PydanticBaseSettingsSource, 

43 SettingsConfigDict, 

44 YamlConfigSettingsSource, 

45) 

46from tqdm import tqdm 

47from typing_extensions import assert_never 

48 

49from bioimageio.spec import ( 

50 AnyModelDescr, 

51 InvalidDescr, 

52 ResourceDescr, 

53 load_description, 

54 save_bioimageio_yaml_only, 

55 settings, 

56 update_format, 

57 update_hashes, 

58) 

59from bioimageio.spec._internal.io import is_yaml_value 

60from bioimageio.spec._internal.io_utils import open_bioimageio_yaml 

61from bioimageio.spec._internal.types import FormatVersionPlaceholder, NotEmpty 

62from bioimageio.spec.dataset import DatasetDescr 

63from bioimageio.spec.model import ModelDescr, v0_4, v0_5 

64from bioimageio.spec.notebook import NotebookDescr 

65from bioimageio.spec.utils import ensure_description_is_model, get_reader, write_yaml 

66 

67from .commands import WeightFormatArgAll, WeightFormatArgAny, package, test 

68from .common import MemberId, SampleId, SupportedWeightsFormat 

69from .digest_spec import get_member_ids, load_sample_for_model 

70from .io import load_dataset_stat, save_dataset_stat, save_sample 

71from .prediction import create_prediction_pipeline 

72from .proc_setup import ( 

73 DatasetMeasure, 

74 Measure, 

75 MeasureValue, 

76 StatsCalculator, 

77 get_required_dataset_measures, 

78) 

79from .sample import Sample 

80from .stat_measures import Stat 

81from .utils import VERSION, compare 

82from .weight_converters._add_weights import add_weights 

83 

84WEIGHT_FORMAT_ALIASES = AliasChoices( 

85 "weight-format", 

86 "weights-format", 

87 "weight_format", 

88 "weights_format", 

89) 

90 

91 

92class CmdBase(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True): 

93 pass 

94 

95 

96class ArgMixin(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True): 

97 pass 

98 

99 

100class WithSummaryLogging(ArgMixin): 

101 summary: Union[ 

102 Literal["display"], Path, Sequence[Union[Literal["display"], Path]] 

103 ] = Field( 

104 "display", 

105 examples=[ 

106 "display", 

107 Path("summary.md"), 

108 Path("bioimageio_summaries/"), 

109 ["display", Path("summary.md")], 

110 ], 

111 ) 

112 """Display the validation summary or save it as JSON, Markdown or HTML. 

113 The format is chosen based on the suffix: `.json`, `.md`, `.html`. 

114 If a folder is given (path w/o suffix) the summary is saved in all formats. 

115 Choose/add `"display"` to render the validation summary to the terminal. 

116 """ 

117 

118 def log(self, descr: Union[ResourceDescr, InvalidDescr]): 

119 _ = descr.validation_summary.log(self.summary) 

120 

121 

122class WithSource(ArgMixin): 

123 source: CliPositionalArg[str] 

124 """Url/path to a (folder with a) bioimageio.yaml/rdf.yaml file 

125 or a bioimage.io resource identifier, e.g. 'affable-shark'""" 

126 

127 @cached_property 

128 def descr(self): 

129 return load_description(self.source) 

130 

131 @property 

132 def descr_id(self) -> str: 

133 """a more user-friendly description id 

134 (replacing legacy ids with their nicknames) 

135 """ 

136 if isinstance(self.descr, InvalidDescr): 

137 return str(getattr(self.descr, "id", getattr(self.descr, "name"))) 

138 

139 nickname = None 

140 if ( 

141 isinstance(self.descr.config, v0_5.Config) 

142 and (bio_config := self.descr.config.bioimageio) 

143 and bio_config.model_extra is not None 

144 ): 

145 nickname = bio_config.model_extra.get("nickname") 

146 

147 return str(nickname or self.descr.id or self.descr.name) 

148 

149 

150class ValidateFormatCmd(CmdBase, WithSource, WithSummaryLogging): 

151 """Validate the meta data format of a bioimageio resource.""" 

152 

153 perform_io_checks: bool = Field( 

154 settings.perform_io_checks, alias="perform-io-checks" 

155 ) 

156 """Wether or not to perform validations that requires downloading remote files. 

157 Note: Default value is set by `BIOIMAGEIO_PERFORM_IO_CHECKS` environment variable. 

158 """ 

159 

160 @cached_property 

161 def descr(self): 

162 return load_description(self.source, perform_io_checks=self.perform_io_checks) 

163 

164 def run(self): 

165 self.log(self.descr) 

166 sys.exit( 

167 0 

168 if self.descr.validation_summary.status in ("valid-format", "passed") 

169 else 1 

170 ) 

171 

172 

173class TestCmd(CmdBase, WithSource, WithSummaryLogging): 

174 """Test a bioimageio resource (beyond meta data formatting).""" 

175 

176 weight_format: WeightFormatArgAll = Field( 

177 "all", 

178 alias="weight-format", 

179 validation_alias=WEIGHT_FORMAT_ALIASES, 

180 ) 

181 """The weight format to limit testing to. 

182 

183 (only relevant for model resources)""" 

184 

185 devices: Optional[Union[str, Sequence[str]]] = None 

186 """Device(s) to use for testing""" 

187 

188 runtime_env: Union[Literal["currently-active", "as-described"], Path] = Field( 

189 "currently-active", alias="runtime-env" 

190 ) 

191 """The python environment to run the tests in 

192 - `"currently-active"`: use active Python interpreter 

193 - `"as-described"`: generate a conda environment YAML file based on the model 

194 weights description. 

195 - A path to a conda environment YAML. 

196 Note: The `bioimageio.core` dependency will be added automatically if not present. 

197 """ 

198 

199 determinism: Literal["seed_only", "full"] = "seed_only" 

200 """Modes to improve reproducibility of test outputs.""" 

201 

202 stop_early: bool = Field( 

203 False, alias="stop-early", validation_alias=AliasChoices("stop-early", "x") 

204 ) 

205 """Do not run further subtests after a failed one.""" 

206 

207 format_version: Union[FormatVersionPlaceholder, str] = Field( 

208 "discover", alias="format-version" 

209 ) 

210 """The format version to use for testing. 

211 - 'latest': Use the latest implemented format version for the given resource type (may trigger auto updating) 

212 - 'discover': Use the format version as described in the resource description 

213 - '0.4', '0.5', ...: Use the specified format version (may trigger auto updating) 

214 """ 

215 

216 def run(self): 

217 sys.exit( 

218 test( 

219 self.descr, 

220 weight_format=self.weight_format, 

221 devices=self.devices, 

222 summary=self.summary, 

223 runtime_env=self.runtime_env, 

224 determinism=self.determinism, 

225 format_version=self.format_version, 

226 ) 

227 ) 

228 

229 

230class PackageCmd(CmdBase, WithSource, WithSummaryLogging): 

231 """Save a resource's metadata with its associated files.""" 

232 

233 path: CliPositionalArg[Path] 

234 """The path to write the (zipped) package to. 

235 If it does not have a `.zip` suffix 

236 this command will save the package as an unzipped folder instead.""" 

237 

238 weight_format: WeightFormatArgAll = Field( 

239 "all", 

240 alias="weight-format", 

241 validation_alias=WEIGHT_FORMAT_ALIASES, 

242 ) 

243 """The weight format to include in the package (for model descriptions only).""" 

244 

245 def run(self): 

246 if isinstance(self.descr, InvalidDescr): 

247 self.log(self.descr) 

248 raise ValueError(f"Invalid {self.descr.type} description.") 

249 

250 sys.exit( 

251 package( 

252 self.descr, 

253 self.path, 

254 weight_format=self.weight_format, 

255 ) 

256 ) 

257 

258 

259def _get_stat( 

260 model_descr: AnyModelDescr, 

261 dataset: Iterable[Sample], 

262 dataset_length: int, 

263 stats_path: Path, 

264) -> Mapping[DatasetMeasure, MeasureValue]: 

265 req_dataset_meas, _ = get_required_dataset_measures(model_descr) 

266 if not req_dataset_meas: 

267 return {} 

268 

269 req_dataset_meas, _ = get_required_dataset_measures(model_descr) 

270 

271 if stats_path.exists(): 

272 logger.info("loading precomputed dataset measures from {}", stats_path) 

273 stat = load_dataset_stat(stats_path) 

274 for m in req_dataset_meas: 

275 if m not in stat: 

276 raise ValueError(f"Missing {m} in {stats_path}") 

277 

278 return stat 

279 

280 stats_calc = StatsCalculator(req_dataset_meas) 

281 

282 for sample in tqdm( 

283 dataset, total=dataset_length, desc="precomputing dataset stats", unit="sample" 

284 ): 

285 stats_calc.update(sample) 

286 

287 stat = stats_calc.finalize() 

288 save_dataset_stat(stat, stats_path) 

289 

290 return stat 

291 

292 

293class UpdateCmdBase(CmdBase, WithSource, ABC): 

294 output: Union[Literal["display", "stdout"], Path] = "display" 

295 """Output updated bioimageio.yaml to the terminal or write to a file. 

296 Notes: 

297 - `"display"`: Render to the terminal with syntax highlighting. 

298 - `"stdout"`: Write to sys.stdout without syntax highligthing. 

299 (More convenient for copying the updated bioimageio.yaml from the terminal.) 

300 """ 

301 

302 diff: Union[bool, Path] = Field(True, alias="diff") 

303 """Output a diff of original and updated bioimageio.yaml. 

304 If a given path has an `.html` extension, a standalone HTML file is written, 

305 otherwise the diff is saved in unified diff format (pure text). 

306 """ 

307 

308 exclude_unset: bool = Field(True, alias="exclude-unset") 

309 """Exclude fields that have not explicitly be set.""" 

310 

311 exclude_defaults: bool = Field(False, alias="exclude-defaults") 

312 """Exclude fields that have the default value (even if set explicitly).""" 

313 

314 @cached_property 

315 def updated(self) -> Union[ResourceDescr, InvalidDescr]: 

316 raise NotImplementedError 

317 

318 def run(self): 

319 original_yaml = open_bioimageio_yaml(self.source).unparsed_content 

320 assert isinstance(original_yaml, str) 

321 stream = StringIO() 

322 

323 save_bioimageio_yaml_only( 

324 self.updated, 

325 stream, 

326 exclude_unset=self.exclude_unset, 

327 exclude_defaults=self.exclude_defaults, 

328 ) 

329 updated_yaml = stream.getvalue() 

330 

331 diff = compare( 

332 original_yaml.split("\n"), 

333 updated_yaml.split("\n"), 

334 diff_format=( 

335 "html" 

336 if isinstance(self.diff, Path) and self.diff.suffix == ".html" 

337 else "unified" 

338 ), 

339 ) 

340 

341 if isinstance(self.diff, Path): 

342 _ = self.diff.write_text(diff, encoding="utf-8") 

343 elif self.diff: 

344 console = rich.console.Console() 

345 diff_md = f"## Diff\n\n````````diff\n{diff}\n````````" 

346 console.print(rich.markdown.Markdown(diff_md)) 

347 

348 if isinstance(self.output, Path): 

349 _ = self.output.write_text(updated_yaml, encoding="utf-8") 

350 logger.info(f"written updated description to {self.output}") 

351 elif self.output == "display": 

352 updated_md = f"## Updated bioimageio.yaml\n\n```yaml\n{updated_yaml}\n```" 

353 rich.console.Console().print(rich.markdown.Markdown(updated_md)) 

354 elif self.output == "stdout": 

355 print(updated_yaml) 

356 else: 

357 assert_never(self.output) 

358 

359 if isinstance(self.updated, InvalidDescr): 

360 logger.warning("Update resulted in invalid description") 

361 _ = self.updated.validation_summary.display() 

362 

363 

364class UpdateFormatCmd(UpdateCmdBase): 

365 """Update the metadata format to the latest format version.""" 

366 

367 exclude_defaults: bool = Field(True, alias="exclude-defaults") 

368 """Exclude fields that have the default value (even if set explicitly). 

369 

370 Note: 

371 The update process sets most unset fields explicitly with their default value. 

372 """ 

373 

374 perform_io_checks: bool = Field( 

375 settings.perform_io_checks, alias="perform-io-checks" 

376 ) 

377 """Wether or not to attempt validation that may require file download. 

378 If `True` file hash values are added if not present.""" 

379 

380 @cached_property 

381 def updated(self): 

382 return update_format( 

383 self.source, 

384 exclude_defaults=self.exclude_defaults, 

385 perform_io_checks=self.perform_io_checks, 

386 ) 

387 

388 

389class UpdateHashesCmd(UpdateCmdBase): 

390 """Create a bioimageio.yaml description with updated file hashes.""" 

391 

392 @cached_property 

393 def updated(self): 

394 return update_hashes(self.source) 

395 

396 

397class PredictCmd(CmdBase, WithSource): 

398 """Run inference on your data with a bioimage.io model.""" 

399 

400 inputs: NotEmpty[Sequence[Union[str, NotEmpty[Tuple[str, ...]]]]] = ( 

401 "{input_id}/001.tif", 

402 ) 

403 """Model input sample paths (for each input tensor) 

404 

405 The input paths are expected to have shape... 

406 - (n_samples,) or (n_samples,1) for models expecting a single input tensor 

407 - (n_samples,) containing the substring '{input_id}', or 

408 - (n_samples, n_model_inputs) to provide each input tensor path explicitly. 

409 

410 All substrings that are replaced by metadata from the model description: 

411 - '{model_id}' 

412 - '{input_id}' 

413 

414 Example inputs to process sample 'a' and 'b' 

415 for a model expecting a 'raw' and a 'mask' input tensor: 

416 --inputs="[[\\"a_raw.tif\\",\\"a_mask.tif\\"],[\\"b_raw.tif\\",\\"b_mask.tif\\"]]" 

417 (Note that JSON double quotes need to be escaped.) 

418 

419 Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file 

420 may provide the arguments, e.g.: 

421 ```yaml 

422 inputs: 

423 - [a_raw.tif, a_mask.tif] 

424 - [b_raw.tif, b_mask.tif] 

425 ``` 

426 

427 `.npy` and any file extension supported by imageio are supported. 

428 Aavailable formats are listed at 

429 https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats. 

430 Some formats have additional dependencies. 

431 

432 

433 """ 

434 

435 outputs: Union[str, NotEmpty[Tuple[str, ...]]] = ( 

436 "outputs_{model_id}/{output_id}/{sample_id}.tif" 

437 ) 

438 """Model output path pattern (per output tensor) 

439 

440 All substrings that are replaced: 

441 - '{model_id}' (from model description) 

442 - '{output_id}' (from model description) 

443 - '{sample_id}' (extracted from input paths) 

444 

445 

446 """ 

447 

448 overwrite: bool = False 

449 """allow overwriting existing output files""" 

450 

451 blockwise: bool = False 

452 """process inputs blockwise""" 

453 

454 stats: Path = Path("dataset_statistics.json") 

455 """path to dataset statistics 

456 (will be written if it does not exist, 

457 but the model requires statistical dataset measures) 

458  """ 

459 

460 preview: bool = False 

461 """preview which files would be processed 

462 and what outputs would be generated.""" 

463 

464 weight_format: WeightFormatArgAny = Field( 

465 "any", 

466 alias="weight-format", 

467 validation_alias=WEIGHT_FORMAT_ALIASES, 

468 ) 

469 """The weight format to use.""" 

470 

471 example: bool = False 

472 """generate and run an example 

473 

474 1. downloads example model inputs 

475 2. creates a `{model_id}_example` folder 

476 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` 

477 4. executes a preview dry-run 

478 5. executes prediction with example input 

479 

480 

481 """ 

482 

483 def _example(self): 

484 model_descr = ensure_description_is_model(self.descr) 

485 input_ids = get_member_ids(model_descr.inputs) 

486 example_inputs = ( 

487 model_descr.sample_inputs 

488 if isinstance(model_descr, v0_4.ModelDescr) 

489 else [ipt.sample_tensor or ipt.test_tensor for ipt in model_descr.inputs] 

490 ) 

491 if not example_inputs: 

492 raise ValueError(f"{self.descr_id} does not specify any example inputs.") 

493 

494 inputs001: List[str] = [] 

495 example_path = Path(f"{self.descr_id}_example") 

496 example_path.mkdir(exist_ok=True) 

497 

498 for t, src in zip(input_ids, example_inputs): 

499 reader = get_reader(src) 

500 dst = Path(f"{example_path}/{t}/001{reader.suffix}") 

501 dst.parent.mkdir(parents=True, exist_ok=True) 

502 inputs001.append(dst.as_posix()) 

503 with dst.open("wb") as f: 

504 shutil.copyfileobj(reader, f) 

505 

506 inputs = [inputs001] 

507 output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif" 

508 

509 bioimageio_cli_path = example_path / YAML_FILE 

510 stats_file = "dataset_statistics.json" 

511 stats = (example_path / stats_file).as_posix() 

512 cli_example_args = dict( 

513 inputs=inputs, 

514 outputs=output_pattern, 

515 stats=stats_file, 

516 blockwise=self.blockwise, 

517 ) 

518 assert is_yaml_value(cli_example_args), cli_example_args 

519 write_yaml( 

520 cli_example_args, 

521 bioimageio_cli_path, 

522 ) 

523 

524 yaml_file_content = None 

525 

526 # escaped double quotes 

527 inputs_json = json.dumps(inputs) 

528 inputs_escaped = inputs_json.replace('"', r"\"") 

529 source_escaped = self.source.replace('"', r"\"") 

530 

531 def get_example_command(preview: bool, escape: bool = False): 

532 q: str = '"' if escape else "" 

533 

534 return [ 

535 "bioimageio", 

536 "predict", 

537 # --no-preview not supported for py=3.8 

538 *(["--preview"] if preview else []), 

539 "--overwrite", 

540 *(["--blockwise"] if self.blockwise else []), 

541 f"--stats={q}{stats}{q}", 

542 f"--inputs={q}{inputs_escaped if escape else inputs_json}{q}", 

543 f"--outputs={q}{output_pattern}{q}", 

544 f"{q}{source_escaped if escape else self.source}{q}", 

545 ] 

546 

547 if Path(YAML_FILE).exists(): 

548 logger.info( 

549 "temporarily removing '{}' to execute example prediction", YAML_FILE 

550 ) 

551 yaml_file_content = Path(YAML_FILE).read_bytes() 

552 Path(YAML_FILE).unlink() 

553 

554 try: 

555 _ = subprocess.run(get_example_command(True), check=True) 

556 _ = subprocess.run(get_example_command(False), check=True) 

557 finally: 

558 if yaml_file_content is not None: 

559 _ = Path(YAML_FILE).write_bytes(yaml_file_content) 

560 logger.debug("restored '{}'", YAML_FILE) 

561 

562 print( 

563 "🎉 Sucessfully ran example prediction!\n" 

564 + "To predict the example input using the CLI example config file" 

565 + f" {example_path/YAML_FILE}, execute `bioimageio predict` from {example_path}:\n" 

566 + f"$ cd {str(example_path)}\n" 

567 + f'$ bioimageio predict "{source_escaped}"\n\n' 

568 + "Alternatively run the following command" 

569 + " in the current workind directory, not the example folder:\n$ " 

570 + " ".join(get_example_command(False, escape=True)) 

571 + f"\n(note that a local '{JSON_FILE}' or '{YAML_FILE}' may interfere with this)" 

572 ) 

573 

574 def run(self): 

575 if self.example: 

576 return self._example() 

577 

578 model_descr = ensure_description_is_model(self.descr) 

579 

580 input_ids = get_member_ids(model_descr.inputs) 

581 output_ids = get_member_ids(model_descr.outputs) 

582 

583 minimum_input_ids = tuple( 

584 str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) 

585 for ipt in model_descr.inputs 

586 if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional 

587 ) 

588 maximum_input_ids = tuple( 

589 str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) 

590 for ipt in model_descr.inputs 

591 ) 

592 

593 def expand_inputs(i: int, ipt: Union[str, Tuple[str, ...]]) -> Tuple[str, ...]: 

594 if isinstance(ipt, str): 

595 ipts = tuple( 

596 ipt.format(model_id=self.descr_id, input_id=t) for t in input_ids 

597 ) 

598 else: 

599 ipts = tuple( 

600 p.format(model_id=self.descr_id, input_id=t) 

601 for t, p in zip(input_ids, ipt) 

602 ) 

603 

604 if len(set(ipts)) < len(ipts): 

605 if len(minimum_input_ids) == len(maximum_input_ids): 

606 n = len(minimum_input_ids) 

607 else: 

608 n = f"{len(minimum_input_ids)}-{len(maximum_input_ids)}" 

609 

610 raise ValueError( 

611 f"[input sample #{i}] Include '{{input_id}}' in path pattern or explicitly specify {n} distinct input paths (got {ipt})" 

612 ) 

613 

614 if len(ipts) < len(minimum_input_ids): 

615 raise ValueError( 

616 f"[input sample #{i}] Expected at least {len(minimum_input_ids)} inputs {minimum_input_ids}, got {ipts}" 

617 ) 

618 

619 if len(ipts) > len(maximum_input_ids): 

620 raise ValueError( 

621 f"Expected at most {len(maximum_input_ids)} inputs {maximum_input_ids}, got {ipts}" 

622 ) 

623 

624 return ipts 

625 

626 inputs = [expand_inputs(i, ipt) for i, ipt in enumerate(self.inputs, start=1)] 

627 

628 sample_paths_in = [ 

629 {t: Path(p) for t, p in zip(input_ids, ipts)} for ipts in inputs 

630 ] 

631 

632 sample_ids = _get_sample_ids(sample_paths_in) 

633 

634 def expand_outputs(): 

635 if isinstance(self.outputs, str): 

636 outputs = [ 

637 tuple( 

638 Path( 

639 self.outputs.format( 

640 model_id=self.descr_id, output_id=t, sample_id=s 

641 ) 

642 ) 

643 for t in output_ids 

644 ) 

645 for s in sample_ids 

646 ] 

647 else: 

648 outputs = [ 

649 tuple( 

650 Path(p.format(model_id=self.descr_id, output_id=t, sample_id=s)) 

651 for t, p in zip(output_ids, self.outputs) 

652 ) 

653 for s in sample_ids 

654 ] 

655 

656 for i, out in enumerate(outputs, start=1): 

657 if len(set(out)) < len(out): 

658 raise ValueError( 

659 f"[output sample #{i}] Include '{{output_id}}' in path pattern or explicitly specify {len(output_ids)} distinct output paths (got {out})" 

660 ) 

661 

662 if len(out) != len(output_ids): 

663 raise ValueError( 

664 f"[output sample #{i}] Expected {len(output_ids)} outputs {output_ids}, got {out}" 

665 ) 

666 

667 return outputs 

668 

669 outputs = expand_outputs() 

670 

671 sample_paths_out = [ 

672 {MemberId(t): Path(p) for t, p in zip(output_ids, out)} for out in outputs 

673 ] 

674 

675 if not self.overwrite: 

676 for sample_paths in sample_paths_out: 

677 for p in sample_paths.values(): 

678 if p.exists(): 

679 raise FileExistsError( 

680 f"{p} already exists. use --overwrite to (re-)write outputs anyway." 

681 ) 

682 if self.preview: 

683 print("🛈 bioimageio prediction preview structure:") 

684 pprint( 

685 { 

686 "{sample_id}": dict( 

687 inputs={"{input_id}": "<input path>"}, 

688 outputs={"{output_id}": "<output path>"}, 

689 ) 

690 } 

691 ) 

692 print("🔎 bioimageio prediction preview output:") 

693 pprint( 

694 { 

695 s: dict( 

696 inputs={t: p.as_posix() for t, p in sp_in.items()}, 

697 outputs={t: p.as_posix() for t, p in sp_out.items()}, 

698 ) 

699 for s, sp_in, sp_out in zip( 

700 sample_ids, sample_paths_in, sample_paths_out 

701 ) 

702 } 

703 ) 

704 return 

705 

706 def input_dataset(stat: Stat): 

707 for s, sp_in in zip(sample_ids, sample_paths_in): 

708 yield load_sample_for_model( 

709 model=model_descr, 

710 paths=sp_in, 

711 stat=stat, 

712 sample_id=s, 

713 ) 

714 

715 stat: Dict[Measure, MeasureValue] = dict( 

716 _get_stat( 

717 model_descr, input_dataset({}), len(sample_ids), self.stats 

718 ).items() 

719 ) 

720 

721 pp = create_prediction_pipeline( 

722 model_descr, 

723 weight_format=None if self.weight_format == "any" else self.weight_format, 

724 ) 

725 predict_method = ( 

726 pp.predict_sample_with_blocking 

727 if self.blockwise 

728 else pp.predict_sample_without_blocking 

729 ) 

730 

731 for sample_in, sp_out in tqdm( 

732 zip(input_dataset(dict(stat)), sample_paths_out), 

733 total=len(inputs), 

734 desc=f"predict with {self.descr_id}", 

735 unit="sample", 

736 ): 

737 sample_out = predict_method(sample_in) 

738 save_sample(sp_out, sample_out) 

739 

740 

741class AddWeightsCmd(CmdBase, WithSource, WithSummaryLogging): 

742 output: CliPositionalArg[Path] 

743 """The path to write the updated model package to.""" 

744 

745 source_format: Optional[SupportedWeightsFormat] = Field(None, alias="source-format") 

746 """Exclusively use these weights to convert to other formats.""" 

747 

748 target_format: Optional[SupportedWeightsFormat] = Field(None, alias="target-format") 

749 """Exclusively add this weight format.""" 

750 

751 verbose: bool = False 

752 """Log more (error) output.""" 

753 

754 def run(self): 

755 model_descr = ensure_description_is_model(self.descr) 

756 if isinstance(model_descr, v0_4.ModelDescr): 

757 raise TypeError( 

758 f"model format {model_descr.format_version} not supported." 

759 + " Please update the model first." 

760 ) 

761 updated_model_descr = add_weights( 

762 model_descr, 

763 output_path=self.output, 

764 source_format=self.source_format, 

765 target_format=self.target_format, 

766 verbose=self.verbose, 

767 ) 

768 if updated_model_descr is None: 

769 return 

770 

771 self.log(updated_model_descr) 

772 

773 

774JSON_FILE = "bioimageio-cli.json" 

775YAML_FILE = "bioimageio-cli.yaml" 

776 

777 

778class Bioimageio( 

779 BaseSettings, 

780 cli_implicit_flags=True, 

781 cli_parse_args=True, 

782 cli_prog_name="bioimageio", 

783 cli_use_class_docs_for_groups=True, 

784 use_attribute_docstrings=True, 

785): 

786 """bioimageio - CLI for bioimage.io resources 🦒""" 

787 

788 model_config = SettingsConfigDict( 

789 json_file=JSON_FILE, 

790 yaml_file=YAML_FILE, 

791 ) 

792 

793 validate_format: CliSubCommand[ValidateFormatCmd] = Field(alias="validate-format") 

794 "Check a resource's metadata format" 

795 

796 test: CliSubCommand[TestCmd] 

797 "Test a bioimageio resource (beyond meta data formatting)" 

798 

799 package: CliSubCommand[PackageCmd] 

800 "Package a resource" 

801 

802 predict: CliSubCommand[PredictCmd] 

803 "Predict with a model resource" 

804 

805 update_format: CliSubCommand[UpdateFormatCmd] = Field(alias="update-format") 

806 """Update the metadata format""" 

807 

808 update_hashes: CliSubCommand[UpdateHashesCmd] = Field(alias="update-hashes") 

809 """Create a bioimageio.yaml description with updated file hashes.""" 

810 

811 add_weights: CliSubCommand[AddWeightsCmd] = Field(alias="add-weights") 

812 """Add additional weights to the model descriptions converted from available 

813 formats to improve deployability.""" 

814 

815 @classmethod 

816 def settings_customise_sources( 

817 cls, 

818 settings_cls: Type[BaseSettings], 

819 init_settings: PydanticBaseSettingsSource, 

820 env_settings: PydanticBaseSettingsSource, 

821 dotenv_settings: PydanticBaseSettingsSource, 

822 file_secret_settings: PydanticBaseSettingsSource, 

823 ) -> Tuple[PydanticBaseSettingsSource, ...]: 

824 cli: CliSettingsSource[BaseSettings] = CliSettingsSource( 

825 settings_cls, 

826 cli_parse_args=True, 

827 formatter_class=RawTextHelpFormatter, 

828 ) 

829 sys_args = pformat(sys.argv) 

830 logger.info("starting CLI with arguments:\n{}", sys_args) 

831 return ( 

832 cli, 

833 init_settings, 

834 YamlConfigSettingsSource(settings_cls), 

835 JsonConfigSettingsSource(settings_cls), 

836 ) 

837 

838 @model_validator(mode="before") 

839 @classmethod 

840 def _log(cls, data: Any): 

841 logger.info( 

842 "loaded CLI input:\n{}", 

843 pformat({k: v for k, v in data.items() if v is not None}), 

844 ) 

845 return data 

846 

847 def run(self): 

848 logger.info( 

849 "executing CLI command:\n{}", 

850 pformat({k: v for k, v in self.model_dump().items() if v is not None}), 

851 ) 

852 cmd = ( 

853 self.add_weights 

854 or self.package 

855 or self.predict 

856 or self.test 

857 or self.update_format 

858 or self.update_hashes 

859 or self.validate_format 

860 ) 

861 assert cmd is not None 

862 cmd.run() 

863 

864 

865assert isinstance(Bioimageio.__doc__, str) 

866Bioimageio.__doc__ += f""" 

867 

868library versions: 

869 bioimageio.core {VERSION} 

870 bioimageio.spec {VERSION} 

871 

872spec format versions: 

873 model RDF {ModelDescr.implemented_format_version} 

874 dataset RDF {DatasetDescr.implemented_format_version} 

875 notebook RDF {NotebookDescr.implemented_format_version} 

876 

877""" 

878 

879 

880def _get_sample_ids( 

881 input_paths: Sequence[Mapping[MemberId, Path]], 

882) -> Sequence[SampleId]: 

883 """Get sample ids for given input paths, based on the common path per sample. 

884 

885 Falls back to sample01, samle02, etc...""" 

886 

887 matcher = SequenceMatcher() 

888 

889 def get_common_seq(seqs: Sequence[Sequence[str]]) -> Sequence[str]: 

890 """extract a common sequence from multiple sequences 

891 (order sensitive; strips whitespace and slashes) 

892 """ 

893 common = seqs[0] 

894 

895 for seq in seqs[1:]: 

896 if not seq: 

897 continue 

898 matcher.set_seqs(common, seq) 

899 i, _, size = matcher.find_longest_match() 

900 common = common[i : i + size] 

901 

902 if isinstance(common, str): 

903 common = common.strip().strip("/") 

904 else: 

905 common = [cs for c in common if (cs := c.strip().strip("/"))] 

906 

907 if not common: 

908 raise ValueError(f"failed to find common sequence for {seqs}") 

909 

910 return common 

911 

912 def get_shorter_diff(seqs: Sequence[Sequence[str]]) -> List[Sequence[str]]: 

913 """get a shorter sequence whose entries are still unique 

914 (order sensitive, not minimal sequence) 

915 """ 

916 min_seq_len = min(len(s) for s in seqs) 

917 # cut from the start 

918 for start in range(min_seq_len - 1, -1, -1): 

919 shortened = [s[start:] for s in seqs] 

920 if len(set(shortened)) == len(seqs): 

921 min_seq_len -= start 

922 break 

923 else: 

924 seen: Set[Sequence[str]] = set() 

925 dupes = [s for s in seqs if s in seen or seen.add(s)] 

926 raise ValueError(f"Found duplicate entries {dupes}") 

927 

928 # cut from the end 

929 for end in range(min_seq_len - 1, 1, -1): 

930 shortened = [s[:end] for s in shortened] 

931 if len(set(shortened)) == len(seqs): 

932 break 

933 

934 return shortened 

935 

936 full_tensor_ids = [ 

937 sorted( 

938 p.resolve().with_suffix("").as_posix() for p in input_sample_paths.values() 

939 ) 

940 for input_sample_paths in input_paths 

941 ] 

942 try: 

943 long_sample_ids = [get_common_seq(t) for t in full_tensor_ids] 

944 sample_ids = get_shorter_diff(long_sample_ids) 

945 except ValueError as e: 

946 raise ValueError(f"failed to extract sample ids: {e}") 

947 

948 return sample_ids