Coverage for bioimageio/core/cli.py: 83%

368 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-01 09:51 +0000

1"""bioimageio CLI 

2 

3Note: Some docstrings use a hair space ' ' 

4 to place the added '(default: ...)' on a new line. 

5""" 

6 

7import json 

8import shutil 

9import subprocess 

10import sys 

11from abc import ABC 

12from argparse import RawTextHelpFormatter 

13from difflib import SequenceMatcher 

14from functools import cached_property 

15from io import StringIO 

16from pathlib import Path 

17from pprint import pformat, pprint 

18from typing import ( 

19 Any, 

20 Dict, 

21 Iterable, 

22 List, 

23 Literal, 

24 Mapping, 

25 Optional, 

26 Sequence, 

27 Set, 

28 Tuple, 

29 Type, 

30 Union, 

31) 

32 

33import rich.markdown 

34from loguru import logger 

35from pydantic import AliasChoices, BaseModel, Field, model_validator 

36from pydantic_settings import ( 

37 BaseSettings, 

38 CliPositionalArg, 

39 CliSettingsSource, 

40 CliSubCommand, 

41 JsonConfigSettingsSource, 

42 PydanticBaseSettingsSource, 

43 SettingsConfigDict, 

44 YamlConfigSettingsSource, 

45) 

46from tqdm import tqdm 

47from typing_extensions import assert_never 

48 

49from bioimageio.spec import ( 

50 AnyModelDescr, 

51 InvalidDescr, 

52 ResourceDescr, 

53 load_description, 

54 save_bioimageio_yaml_only, 

55 settings, 

56 update_format, 

57 update_hashes, 

58) 

59from bioimageio.spec._internal.io import is_yaml_value 

60from bioimageio.spec._internal.io_basics import ZipPath 

61from bioimageio.spec._internal.io_utils import open_bioimageio_yaml 

62from bioimageio.spec._internal.types import NotEmpty 

63from bioimageio.spec.dataset import DatasetDescr 

64from bioimageio.spec.model import ModelDescr, v0_4, v0_5 

65from bioimageio.spec.notebook import NotebookDescr 

66from bioimageio.spec.utils import download, ensure_description_is_model, write_yaml 

67 

68from .commands import WeightFormatArgAll, WeightFormatArgAny, package, test 

69from .common import MemberId, SampleId, SupportedWeightsFormat 

70from .digest_spec import get_member_ids, load_sample_for_model 

71from .io import load_dataset_stat, save_dataset_stat, save_sample 

72from .prediction import create_prediction_pipeline 

73from .proc_setup import ( 

74 DatasetMeasure, 

75 Measure, 

76 MeasureValue, 

77 StatsCalculator, 

78 get_required_dataset_measures, 

79) 

80from .sample import Sample 

81from .stat_measures import Stat 

82from .utils import VERSION, compare 

83from .weight_converters._add_weights import add_weights 

84 

85WEIGHT_FORMAT_ALIASES = AliasChoices( 

86 "weight-format", 

87 "weights-format", 

88 "weight_format", 

89 "weights_format", 

90) 

91 

92 

93class CmdBase(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True): 

94 pass 

95 

96 

97class ArgMixin(BaseModel, use_attribute_docstrings=True, cli_implicit_flags=True): 

98 pass 

99 

100 

101class WithSummaryLogging(ArgMixin): 

102 summary: Union[ 

103 Literal["display"], Path, Sequence[Union[Literal["display"], Path]] 

104 ] = Field( 

105 "display", 

106 examples=[ 

107 "display", 

108 Path("summary.md"), 

109 Path("bioimageio_summaries/"), 

110 ["display", Path("summary.md")], 

111 ], 

112 ) 

113 """Display the validation summary or save it as JSON, Markdown or HTML. 

114 The format is chosen based on the suffix: `.json`, `.md`, `.html`. 

115 If a folder is given (path w/o suffix) the summary is saved in all formats. 

116 Choose/add `"display"` to render the validation summary to the terminal. 

117 """ 

118 

119 def log(self, descr: Union[ResourceDescr, InvalidDescr]): 

120 _ = descr.validation_summary.log(self.summary) 

121 

122 

123class WithSource(ArgMixin): 

124 source: CliPositionalArg[str] 

125 """Url/path to a (folder with a) bioimageio.yaml/rdf.yaml file 

126 or a bioimage.io resource identifier, e.g. 'affable-shark'""" 

127 

128 @cached_property 

129 def descr(self): 

130 return load_description(self.source) 

131 

132 @property 

133 def descr_id(self) -> str: 

134 """a more user-friendly description id 

135 (replacing legacy ids with their nicknames) 

136 """ 

137 if isinstance(self.descr, InvalidDescr): 

138 return str(getattr(self.descr, "id", getattr(self.descr, "name"))) 

139 

140 nickname = None 

141 if ( 

142 isinstance(self.descr.config, v0_5.Config) 

143 and (bio_config := self.descr.config.bioimageio) 

144 and bio_config.model_extra is not None 

145 ): 

146 nickname = bio_config.model_extra.get("nickname") 

147 

148 return str(nickname or self.descr.id or self.descr.name) 

149 

150 

151class ValidateFormatCmd(CmdBase, WithSource, WithSummaryLogging): 

152 """Validate the meta data format of a bioimageio resource.""" 

153 

154 perform_io_checks: bool = Field( 

155 settings.perform_io_checks, alias="perform-io-checks" 

156 ) 

157 """Wether or not to perform validations that requires downloading remote files. 

158 Note: Default value is set by `BIOIMAGEIO_PERFORM_IO_CHECKS` environment variable. 

159 """ 

160 

161 @cached_property 

162 def descr(self): 

163 return load_description(self.source, perform_io_checks=self.perform_io_checks) 

164 

165 def run(self): 

166 self.log(self.descr) 

167 sys.exit( 

168 0 

169 if self.descr.validation_summary.status in ("valid-format", "passed") 

170 else 1 

171 ) 

172 

173 

174class TestCmd(CmdBase, WithSource, WithSummaryLogging): 

175 """Test a bioimageio resource (beyond meta data formatting).""" 

176 

177 weight_format: WeightFormatArgAll = Field( 

178 "all", 

179 alias="weight-format", 

180 validation_alias=WEIGHT_FORMAT_ALIASES, 

181 ) 

182 """The weight format to limit testing to. 

183 

184 (only relevant for model resources)""" 

185 

186 devices: Optional[Union[str, Sequence[str]]] = None 

187 """Device(s) to use for testing""" 

188 

189 runtime_env: Union[Literal["currently-active", "as-described"], Path] = Field( 

190 "currently-active", alias="runtime-env" 

191 ) 

192 """The python environment to run the tests in 

193 - `"currently-active"`: use active Python interpreter 

194 - `"as-described"`: generate a conda environment YAML file based on the model 

195 weights description. 

196 - A path to a conda environment YAML. 

197 Note: The `bioimageio.core` dependency will be added automatically if not present. 

198 """ 

199 

200 determinism: Literal["seed_only", "full"] = "seed_only" 

201 """Modes to improve reproducibility of test outputs.""" 

202 

203 stop_early: bool = Field( 

204 False, alias="stop-early", validation_alias=AliasChoices("stop-early", "x") 

205 ) 

206 """Do not run further subtests after a failed one.""" 

207 

208 def run(self): 

209 sys.exit( 

210 test( 

211 self.descr, 

212 weight_format=self.weight_format, 

213 devices=self.devices, 

214 summary=self.summary, 

215 runtime_env=self.runtime_env, 

216 determinism=self.determinism, 

217 ) 

218 ) 

219 

220 

221class PackageCmd(CmdBase, WithSource, WithSummaryLogging): 

222 """Save a resource's metadata with its associated files.""" 

223 

224 path: CliPositionalArg[Path] 

225 """The path to write the (zipped) package to. 

226 If it does not have a `.zip` suffix 

227 this command will save the package as an unzipped folder instead.""" 

228 

229 weight_format: WeightFormatArgAll = Field( 

230 "all", 

231 alias="weight-format", 

232 validation_alias=WEIGHT_FORMAT_ALIASES, 

233 ) 

234 """The weight format to include in the package (for model descriptions only).""" 

235 

236 def run(self): 

237 if isinstance(self.descr, InvalidDescr): 

238 self.log(self.descr) 

239 raise ValueError(f"Invalid {self.descr.type} description.") 

240 

241 sys.exit( 

242 package( 

243 self.descr, 

244 self.path, 

245 weight_format=self.weight_format, 

246 ) 

247 ) 

248 

249 

250def _get_stat( 

251 model_descr: AnyModelDescr, 

252 dataset: Iterable[Sample], 

253 dataset_length: int, 

254 stats_path: Path, 

255) -> Mapping[DatasetMeasure, MeasureValue]: 

256 req_dataset_meas, _ = get_required_dataset_measures(model_descr) 

257 if not req_dataset_meas: 

258 return {} 

259 

260 req_dataset_meas, _ = get_required_dataset_measures(model_descr) 

261 

262 if stats_path.exists(): 

263 logger.info("loading precomputed dataset measures from {}", stats_path) 

264 stat = load_dataset_stat(stats_path) 

265 for m in req_dataset_meas: 

266 if m not in stat: 

267 raise ValueError(f"Missing {m} in {stats_path}") 

268 

269 return stat 

270 

271 stats_calc = StatsCalculator(req_dataset_meas) 

272 

273 for sample in tqdm( 

274 dataset, total=dataset_length, desc="precomputing dataset stats", unit="sample" 

275 ): 

276 stats_calc.update(sample) 

277 

278 stat = stats_calc.finalize() 

279 save_dataset_stat(stat, stats_path) 

280 

281 return stat 

282 

283 

284class UpdateCmdBase(CmdBase, WithSource, ABC): 

285 output: Union[Literal["display", "stdout"], Path] = "display" 

286 """Output updated bioimageio.yaml to the terminal or write to a file. 

287 Notes: 

288 - `"display"`: Render to the terminal with syntax highlighting. 

289 - `"stdout"`: Write to sys.stdout without syntax highligthing. 

290 (More convenient for copying the updated bioimageio.yaml from the terminal.) 

291 """ 

292 

293 diff: Union[bool, Path] = Field(True, alias="diff") 

294 """Output a diff of original and updated bioimageio.yaml. 

295 If a given path has an `.html` extension, a standalone HTML file is written, 

296 otherwise the diff is saved in unified diff format (pure text). 

297 """ 

298 

299 exclude_unset: bool = Field(True, alias="exclude-unset") 

300 """Exclude fields that have not explicitly be set.""" 

301 

302 exclude_defaults: bool = Field(False, alias="exclude-defaults") 

303 """Exclude fields that have the default value (even if set explicitly).""" 

304 

305 @cached_property 

306 def updated(self) -> Union[ResourceDescr, InvalidDescr]: 

307 raise NotImplementedError 

308 

309 def run(self): 

310 original_yaml = open_bioimageio_yaml(self.source).unparsed_content 

311 assert isinstance(original_yaml, str) 

312 stream = StringIO() 

313 

314 save_bioimageio_yaml_only( 

315 self.updated, 

316 stream, 

317 exclude_unset=self.exclude_unset, 

318 exclude_defaults=self.exclude_defaults, 

319 ) 

320 updated_yaml = stream.getvalue() 

321 

322 diff = compare( 

323 original_yaml.split("\n"), 

324 updated_yaml.split("\n"), 

325 diff_format=( 

326 "html" 

327 if isinstance(self.diff, Path) and self.diff.suffix == ".html" 

328 else "unified" 

329 ), 

330 ) 

331 

332 if isinstance(self.diff, Path): 

333 _ = self.diff.write_text(diff, encoding="utf-8") 

334 elif self.diff: 

335 console = rich.console.Console() 

336 diff_md = f"## Diff\n\n````````diff\n{diff}\n````````" 

337 console.print(rich.markdown.Markdown(diff_md)) 

338 

339 if isinstance(self.output, Path): 

340 _ = self.output.write_text(updated_yaml, encoding="utf-8") 

341 logger.info(f"written updated description to {self.output}") 

342 elif self.output == "display": 

343 updated_md = f"## Updated bioimageio.yaml\n\n```yaml\n{updated_yaml}\n```" 

344 rich.console.Console().print(rich.markdown.Markdown(updated_md)) 

345 elif self.output == "stdout": 

346 print(updated_yaml) 

347 else: 

348 assert_never(self.output) 

349 

350 if isinstance(self.updated, InvalidDescr): 

351 logger.warning("Update resulted in invalid description") 

352 _ = self.updated.validation_summary.display() 

353 

354 

355class UpdateFormatCmd(UpdateCmdBase): 

356 """Update the metadata format to the latest format version.""" 

357 

358 exclude_defaults: bool = Field(True, alias="exclude-defaults") 

359 """Exclude fields that have the default value (even if set explicitly). 

360 

361 Note: 

362 The update process sets most unset fields explicitly with their default value. 

363 """ 

364 

365 perform_io_checks: bool = Field( 

366 settings.perform_io_checks, alias="perform-io-checks" 

367 ) 

368 """Wether or not to attempt validation that may require file download. 

369 If `True` file hash values are added if not present.""" 

370 

371 @cached_property 

372 def updated(self): 

373 return update_format( 

374 self.source, 

375 exclude_defaults=self.exclude_defaults, 

376 perform_io_checks=self.perform_io_checks, 

377 ) 

378 

379 

380class UpdateHashesCmd(UpdateCmdBase): 

381 """Create a bioimageio.yaml description with updated file hashes.""" 

382 

383 @cached_property 

384 def updated(self): 

385 return update_hashes(self.source) 

386 

387 

388class PredictCmd(CmdBase, WithSource): 

389 """Run inference on your data with a bioimage.io model.""" 

390 

391 inputs: NotEmpty[Sequence[Union[str, NotEmpty[Tuple[str, ...]]]]] = ( 

392 "{input_id}/001.tif", 

393 ) 

394 """Model input sample paths (for each input tensor) 

395 

396 The input paths are expected to have shape... 

397 - (n_samples,) or (n_samples,1) for models expecting a single input tensor 

398 - (n_samples,) containing the substring '{input_id}', or 

399 - (n_samples, n_model_inputs) to provide each input tensor path explicitly. 

400 

401 All substrings that are replaced by metadata from the model description: 

402 - '{model_id}' 

403 - '{input_id}' 

404 

405 Example inputs to process sample 'a' and 'b' 

406 for a model expecting a 'raw' and a 'mask' input tensor: 

407 --inputs="[[\\"a_raw.tif\\",\\"a_mask.tif\\"],[\\"b_raw.tif\\",\\"b_mask.tif\\"]]" 

408 (Note that JSON double quotes need to be escaped.) 

409 

410 Alternatively a `bioimageio-cli.yaml` (or `bioimageio-cli.json`) file 

411 may provide the arguments, e.g.: 

412 ```yaml 

413 inputs: 

414 - [a_raw.tif, a_mask.tif] 

415 - [b_raw.tif, b_mask.tif] 

416 ``` 

417 

418 `.npy` and any file extension supported by imageio are supported. 

419 Aavailable formats are listed at 

420 https://imageio.readthedocs.io/en/stable/formats/index.html#all-formats. 

421 Some formats have additional dependencies. 

422 

423 

424 """ 

425 

426 outputs: Union[str, NotEmpty[Tuple[str, ...]]] = ( 

427 "outputs_{model_id}/{output_id}/{sample_id}.tif" 

428 ) 

429 """Model output path pattern (per output tensor) 

430 

431 All substrings that are replaced: 

432 - '{model_id}' (from model description) 

433 - '{output_id}' (from model description) 

434 - '{sample_id}' (extracted from input paths) 

435 

436 

437 """ 

438 

439 overwrite: bool = False 

440 """allow overwriting existing output files""" 

441 

442 blockwise: bool = False 

443 """process inputs blockwise""" 

444 

445 stats: Path = Path("dataset_statistics.json") 

446 """path to dataset statistics 

447 (will be written if it does not exist, 

448 but the model requires statistical dataset measures) 

449  """ 

450 

451 preview: bool = False 

452 """preview which files would be processed 

453 and what outputs would be generated.""" 

454 

455 weight_format: WeightFormatArgAny = Field( 

456 "any", 

457 alias="weight-format", 

458 validation_alias=WEIGHT_FORMAT_ALIASES, 

459 ) 

460 """The weight format to use.""" 

461 

462 example: bool = False 

463 """generate and run an example 

464 

465 1. downloads example model inputs 

466 2. creates a `{model_id}_example` folder 

467 3. writes input arguments to `{model_id}_example/bioimageio-cli.yaml` 

468 4. executes a preview dry-run 

469 5. executes prediction with example input 

470 

471 

472 """ 

473 

474 def _example(self): 

475 model_descr = ensure_description_is_model(self.descr) 

476 input_ids = get_member_ids(model_descr.inputs) 

477 example_inputs = ( 

478 model_descr.sample_inputs 

479 if isinstance(model_descr, v0_4.ModelDescr) 

480 else [ipt.sample_tensor or ipt.test_tensor for ipt in model_descr.inputs] 

481 ) 

482 if not example_inputs: 

483 raise ValueError(f"{self.descr_id} does not specify any example inputs.") 

484 

485 inputs001: List[str] = [] 

486 example_path = Path(f"{self.descr_id}_example") 

487 example_path.mkdir(exist_ok=True) 

488 

489 for t, src in zip(input_ids, example_inputs): 

490 local = download(src).path 

491 dst = Path(f"{example_path}/{t}/001{''.join(local.suffixes)}") 

492 dst.parent.mkdir(parents=True, exist_ok=True) 

493 inputs001.append(dst.as_posix()) 

494 if isinstance(local, Path): 

495 shutil.copy(local, dst) 

496 elif isinstance(local, ZipPath): 

497 _ = local.root.extract(local.at, path=dst) 

498 else: 

499 assert_never(local) 

500 

501 inputs = [tuple(inputs001)] 

502 output_pattern = f"{example_path}/outputs/{{output_id}}/{{sample_id}}.tif" 

503 

504 bioimageio_cli_path = example_path / YAML_FILE 

505 stats_file = "dataset_statistics.json" 

506 stats = (example_path / stats_file).as_posix() 

507 cli_example_args = dict( 

508 inputs=inputs, 

509 outputs=output_pattern, 

510 stats=stats_file, 

511 blockwise=self.blockwise, 

512 ) 

513 assert is_yaml_value(cli_example_args) 

514 write_yaml( 

515 cli_example_args, 

516 bioimageio_cli_path, 

517 ) 

518 

519 yaml_file_content = None 

520 

521 # escaped double quotes 

522 inputs_json = json.dumps(inputs) 

523 inputs_escaped = inputs_json.replace('"', r"\"") 

524 source_escaped = self.source.replace('"', r"\"") 

525 

526 def get_example_command(preview: bool, escape: bool = False): 

527 q: str = '"' if escape else "" 

528 

529 return [ 

530 "bioimageio", 

531 "predict", 

532 # --no-preview not supported for py=3.8 

533 *(["--preview"] if preview else []), 

534 "--overwrite", 

535 *(["--blockwise"] if self.blockwise else []), 

536 f"--stats={q}{stats}{q}", 

537 f"--inputs={q}{inputs_escaped if escape else inputs_json}{q}", 

538 f"--outputs={q}{output_pattern}{q}", 

539 f"{q}{source_escaped if escape else self.source}{q}", 

540 ] 

541 

542 if Path(YAML_FILE).exists(): 

543 logger.info( 

544 "temporarily removing '{}' to execute example prediction", YAML_FILE 

545 ) 

546 yaml_file_content = Path(YAML_FILE).read_bytes() 

547 Path(YAML_FILE).unlink() 

548 

549 try: 

550 _ = subprocess.run(get_example_command(True), check=True) 

551 _ = subprocess.run(get_example_command(False), check=True) 

552 finally: 

553 if yaml_file_content is not None: 

554 _ = Path(YAML_FILE).write_bytes(yaml_file_content) 

555 logger.debug("restored '{}'", YAML_FILE) 

556 

557 print( 

558 "🎉 Sucessfully ran example prediction!\n" 

559 + "To predict the example input using the CLI example config file" 

560 + f" {example_path/YAML_FILE}, execute `bioimageio predict` from {example_path}:\n" 

561 + f"$ cd {str(example_path)}\n" 

562 + f'$ bioimageio predict "{source_escaped}"\n\n' 

563 + "Alternatively run the following command" 

564 + " in the current workind directory, not the example folder:\n$ " 

565 + " ".join(get_example_command(False, escape=True)) 

566 + f"\n(note that a local '{JSON_FILE}' or '{YAML_FILE}' may interfere with this)" 

567 ) 

568 

569 def run(self): 

570 if self.example: 

571 return self._example() 

572 

573 model_descr = ensure_description_is_model(self.descr) 

574 

575 input_ids = get_member_ids(model_descr.inputs) 

576 output_ids = get_member_ids(model_descr.outputs) 

577 

578 minimum_input_ids = tuple( 

579 str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) 

580 for ipt in model_descr.inputs 

581 if not isinstance(ipt, v0_5.InputTensorDescr) or not ipt.optional 

582 ) 

583 maximum_input_ids = tuple( 

584 str(ipt.id) if isinstance(ipt, v0_5.InputTensorDescr) else str(ipt.name) 

585 for ipt in model_descr.inputs 

586 ) 

587 

588 def expand_inputs(i: int, ipt: Union[str, Tuple[str, ...]]) -> Tuple[str, ...]: 

589 if isinstance(ipt, str): 

590 ipts = tuple( 

591 ipt.format(model_id=self.descr_id, input_id=t) for t in input_ids 

592 ) 

593 else: 

594 ipts = tuple( 

595 p.format(model_id=self.descr_id, input_id=t) 

596 for t, p in zip(input_ids, ipt) 

597 ) 

598 

599 if len(set(ipts)) < len(ipts): 

600 if len(minimum_input_ids) == len(maximum_input_ids): 

601 n = len(minimum_input_ids) 

602 else: 

603 n = f"{len(minimum_input_ids)}-{len(maximum_input_ids)}" 

604 

605 raise ValueError( 

606 f"[input sample #{i}] Include '{{input_id}}' in path pattern or explicitly specify {n} distinct input paths (got {ipt})" 

607 ) 

608 

609 if len(ipts) < len(minimum_input_ids): 

610 raise ValueError( 

611 f"[input sample #{i}] Expected at least {len(minimum_input_ids)} inputs {minimum_input_ids}, got {ipts}" 

612 ) 

613 

614 if len(ipts) > len(maximum_input_ids): 

615 raise ValueError( 

616 f"Expected at most {len(maximum_input_ids)} inputs {maximum_input_ids}, got {ipts}" 

617 ) 

618 

619 return ipts 

620 

621 inputs = [expand_inputs(i, ipt) for i, ipt in enumerate(self.inputs, start=1)] 

622 

623 sample_paths_in = [ 

624 {t: Path(p) for t, p in zip(input_ids, ipts)} for ipts in inputs 

625 ] 

626 

627 sample_ids = _get_sample_ids(sample_paths_in) 

628 

629 def expand_outputs(): 

630 if isinstance(self.outputs, str): 

631 outputs = [ 

632 tuple( 

633 Path( 

634 self.outputs.format( 

635 model_id=self.descr_id, output_id=t, sample_id=s 

636 ) 

637 ) 

638 for t in output_ids 

639 ) 

640 for s in sample_ids 

641 ] 

642 else: 

643 outputs = [ 

644 tuple( 

645 Path(p.format(model_id=self.descr_id, output_id=t, sample_id=s)) 

646 for t, p in zip(output_ids, self.outputs) 

647 ) 

648 for s in sample_ids 

649 ] 

650 

651 for i, out in enumerate(outputs, start=1): 

652 if len(set(out)) < len(out): 

653 raise ValueError( 

654 f"[output sample #{i}] Include '{{output_id}}' in path pattern or explicitly specify {len(output_ids)} distinct output paths (got {out})" 

655 ) 

656 

657 if len(out) != len(output_ids): 

658 raise ValueError( 

659 f"[output sample #{i}] Expected {len(output_ids)} outputs {output_ids}, got {out}" 

660 ) 

661 

662 return outputs 

663 

664 outputs = expand_outputs() 

665 

666 sample_paths_out = [ 

667 {MemberId(t): Path(p) for t, p in zip(output_ids, out)} for out in outputs 

668 ] 

669 

670 if not self.overwrite: 

671 for sample_paths in sample_paths_out: 

672 for p in sample_paths.values(): 

673 if p.exists(): 

674 raise FileExistsError( 

675 f"{p} already exists. use --overwrite to (re-)write outputs anyway." 

676 ) 

677 if self.preview: 

678 print("🛈 bioimageio prediction preview structure:") 

679 pprint( 

680 { 

681 "{sample_id}": dict( 

682 inputs={"{input_id}": "<input path>"}, 

683 outputs={"{output_id}": "<output path>"}, 

684 ) 

685 } 

686 ) 

687 print("🔎 bioimageio prediction preview output:") 

688 pprint( 

689 { 

690 s: dict( 

691 inputs={t: p.as_posix() for t, p in sp_in.items()}, 

692 outputs={t: p.as_posix() for t, p in sp_out.items()}, 

693 ) 

694 for s, sp_in, sp_out in zip( 

695 sample_ids, sample_paths_in, sample_paths_out 

696 ) 

697 } 

698 ) 

699 return 

700 

701 def input_dataset(stat: Stat): 

702 for s, sp_in in zip(sample_ids, sample_paths_in): 

703 yield load_sample_for_model( 

704 model=model_descr, 

705 paths=sp_in, 

706 stat=stat, 

707 sample_id=s, 

708 ) 

709 

710 stat: Dict[Measure, MeasureValue] = dict( 

711 _get_stat( 

712 model_descr, input_dataset({}), len(sample_ids), self.stats 

713 ).items() 

714 ) 

715 

716 pp = create_prediction_pipeline( 

717 model_descr, 

718 weight_format=None if self.weight_format == "any" else self.weight_format, 

719 ) 

720 predict_method = ( 

721 pp.predict_sample_with_blocking 

722 if self.blockwise 

723 else pp.predict_sample_without_blocking 

724 ) 

725 

726 for sample_in, sp_out in tqdm( 

727 zip(input_dataset(dict(stat)), sample_paths_out), 

728 total=len(inputs), 

729 desc=f"predict with {self.descr_id}", 

730 unit="sample", 

731 ): 

732 sample_out = predict_method(sample_in) 

733 save_sample(sp_out, sample_out) 

734 

735 

736class AddWeightsCmd(CmdBase, WithSource, WithSummaryLogging): 

737 output: CliPositionalArg[Path] 

738 """The path to write the updated model package to.""" 

739 

740 source_format: Optional[SupportedWeightsFormat] = Field(None, alias="source-format") 

741 """Exclusively use these weights to convert to other formats.""" 

742 

743 target_format: Optional[SupportedWeightsFormat] = Field(None, alias="target-format") 

744 """Exclusively add this weight format.""" 

745 

746 verbose: bool = False 

747 """Log more (error) output.""" 

748 

749 def run(self): 

750 model_descr = ensure_description_is_model(self.descr) 

751 if isinstance(model_descr, v0_4.ModelDescr): 

752 raise TypeError( 

753 f"model format {model_descr.format_version} not supported." 

754 + " Please update the model first." 

755 ) 

756 updated_model_descr = add_weights( 

757 model_descr, 

758 output_path=self.output, 

759 source_format=self.source_format, 

760 target_format=self.target_format, 

761 verbose=self.verbose, 

762 ) 

763 if updated_model_descr is None: 

764 return 

765 

766 self.log(updated_model_descr) 

767 

768 

769JSON_FILE = "bioimageio-cli.json" 

770YAML_FILE = "bioimageio-cli.yaml" 

771 

772 

773class Bioimageio( 

774 BaseSettings, 

775 cli_implicit_flags=True, 

776 cli_parse_args=True, 

777 cli_prog_name="bioimageio", 

778 cli_use_class_docs_for_groups=True, 

779 use_attribute_docstrings=True, 

780): 

781 """bioimageio - CLI for bioimage.io resources 🦒""" 

782 

783 model_config = SettingsConfigDict( 

784 json_file=JSON_FILE, 

785 yaml_file=YAML_FILE, 

786 ) 

787 

788 validate_format: CliSubCommand[ValidateFormatCmd] = Field(alias="validate-format") 

789 "Check a resource's metadata format" 

790 

791 test: CliSubCommand[TestCmd] 

792 "Test a bioimageio resource (beyond meta data formatting)" 

793 

794 package: CliSubCommand[PackageCmd] 

795 "Package a resource" 

796 

797 predict: CliSubCommand[PredictCmd] 

798 "Predict with a model resource" 

799 

800 update_format: CliSubCommand[UpdateFormatCmd] = Field(alias="update-format") 

801 """Update the metadata format""" 

802 

803 update_hashes: CliSubCommand[UpdateHashesCmd] = Field(alias="update-hashes") 

804 """Create a bioimageio.yaml description with updated file hashes.""" 

805 

806 add_weights: CliSubCommand[AddWeightsCmd] = Field(alias="add-weights") 

807 """Add additional weights to the model descriptions converted from available 

808 formats to improve deployability.""" 

809 

810 @classmethod 

811 def settings_customise_sources( 

812 cls, 

813 settings_cls: Type[BaseSettings], 

814 init_settings: PydanticBaseSettingsSource, 

815 env_settings: PydanticBaseSettingsSource, 

816 dotenv_settings: PydanticBaseSettingsSource, 

817 file_secret_settings: PydanticBaseSettingsSource, 

818 ) -> Tuple[PydanticBaseSettingsSource, ...]: 

819 cli: CliSettingsSource[BaseSettings] = CliSettingsSource( 

820 settings_cls, 

821 cli_parse_args=True, 

822 formatter_class=RawTextHelpFormatter, 

823 ) 

824 sys_args = pformat(sys.argv) 

825 logger.info("starting CLI with arguments:\n{}", sys_args) 

826 return ( 

827 cli, 

828 init_settings, 

829 YamlConfigSettingsSource(settings_cls), 

830 JsonConfigSettingsSource(settings_cls), 

831 ) 

832 

833 @model_validator(mode="before") 

834 @classmethod 

835 def _log(cls, data: Any): 

836 logger.info( 

837 "loaded CLI input:\n{}", 

838 pformat({k: v for k, v in data.items() if v is not None}), 

839 ) 

840 return data 

841 

842 def run(self): 

843 logger.info( 

844 "executing CLI command:\n{}", 

845 pformat({k: v for k, v in self.model_dump().items() if v is not None}), 

846 ) 

847 cmd = ( 

848 self.add_weights 

849 or self.package 

850 or self.predict 

851 or self.test 

852 or self.update_format 

853 or self.update_hashes 

854 or self.validate_format 

855 ) 

856 assert cmd is not None 

857 cmd.run() 

858 

859 

860assert isinstance(Bioimageio.__doc__, str) 

861Bioimageio.__doc__ += f""" 

862 

863library versions: 

864 bioimageio.core {VERSION} 

865 bioimageio.spec {VERSION} 

866 

867spec format versions: 

868 model RDF {ModelDescr.implemented_format_version} 

869 dataset RDF {DatasetDescr.implemented_format_version} 

870 notebook RDF {NotebookDescr.implemented_format_version} 

871 

872""" 

873 

874 

875def _get_sample_ids( 

876 input_paths: Sequence[Mapping[MemberId, Path]], 

877) -> Sequence[SampleId]: 

878 """Get sample ids for given input paths, based on the common path per sample. 

879 

880 Falls back to sample01, samle02, etc...""" 

881 

882 matcher = SequenceMatcher() 

883 

884 def get_common_seq(seqs: Sequence[Sequence[str]]) -> Sequence[str]: 

885 """extract a common sequence from multiple sequences 

886 (order sensitive; strips whitespace and slashes) 

887 """ 

888 common = seqs[0] 

889 

890 for seq in seqs[1:]: 

891 if not seq: 

892 continue 

893 matcher.set_seqs(common, seq) 

894 i, _, size = matcher.find_longest_match() 

895 common = common[i : i + size] 

896 

897 if isinstance(common, str): 

898 common = common.strip().strip("/") 

899 else: 

900 common = [cs for c in common if (cs := c.strip().strip("/"))] 

901 

902 if not common: 

903 raise ValueError(f"failed to find common sequence for {seqs}") 

904 

905 return common 

906 

907 def get_shorter_diff(seqs: Sequence[Sequence[str]]) -> List[Sequence[str]]: 

908 """get a shorter sequence whose entries are still unique 

909 (order sensitive, not minimal sequence) 

910 """ 

911 min_seq_len = min(len(s) for s in seqs) 

912 # cut from the start 

913 for start in range(min_seq_len - 1, -1, -1): 

914 shortened = [s[start:] for s in seqs] 

915 if len(set(shortened)) == len(seqs): 

916 min_seq_len -= start 

917 break 

918 else: 

919 seen: Set[Sequence[str]] = set() 

920 dupes = [s for s in seqs if s in seen or seen.add(s)] 

921 raise ValueError(f"Found duplicate entries {dupes}") 

922 

923 # cut from the end 

924 for end in range(min_seq_len - 1, 1, -1): 

925 shortened = [s[:end] for s in shortened] 

926 if len(set(shortened)) == len(seqs): 

927 break 

928 

929 return shortened 

930 

931 full_tensor_ids = [ 

932 sorted( 

933 p.resolve().with_suffix("").as_posix() for p in input_sample_paths.values() 

934 ) 

935 for input_sample_paths in input_paths 

936 ] 

937 try: 

938 long_sample_ids = [get_common_seq(t) for t in full_tensor_ids] 

939 sample_ids = get_shorter_diff(long_sample_ids) 

940 except ValueError as e: 

941 raise ValueError(f"failed to extract sample ids: {e}") 

942 

943 return sample_ids