Coverage for src/backoffice/_summarize.py: 0%
56 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-11-12 10:26 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-11-12 10:26 +0000
1import json
2import warnings
3from typing import Dict
5from loguru import logger
6from packaging.version import Version
7from tqdm import tqdm
9from backoffice.compatibility import (
10 TOOL_NAMES,
11 CompatibilityScores,
12 CompatibilitySummary,
13 ToolCompatibilityReport,
14 ToolName,
15 ToolNameVersioned,
16 ToolReportDetails,
17)
18from backoffice.index import IndexItem, IndexItemVersion, load_index
19from backoffice.utils import (
20 get_all_tool_report_paths,
21 get_summary,
22 get_summary_file_path,
23)
26def summarize_reports():
27 index = load_index()
28 for item in tqdm(index.items):
29 for v in item.versions:
30 _summarize(item, v)
32 # TODO: Parallelize?
33 # with ThreadPoolExecutor() as executor:
34 # futures: list[Future[Any]] = []
35 # for item in index.items:
36 # for v in item.versions:
37 # futures.append(executor.submit(_summarize, item, v))
39 # for _ in tqdm(as_completed(futures), total=len(futures)):
40 # pass
43def _summarize(item: IndexItem, v: IndexItemVersion):
44 """Conflate all summaries for a given item version."""
46 initial_summary = get_summary(item.id, v.version)
48 reports: list[ToolCompatibilityReport] = []
49 scores: dict[ToolNameVersioned, float] = {}
50 metadata_completeness = 0.0
51 metadata_format_score = 0.0
52 metadata_format_version = Version(
53 "0.0.0"
54 ) # to track the latest core version with valid format
55 for report_path in get_all_tool_report_paths(item.id, v.version):
56 tool, tool_version = report_path.stem.split("_", 1)
57 tool = tool.lower()
58 if tool not in TOOL_NAMES:
59 warnings.warn(f"Report {report_path} has unknown tool name '{tool}'.")
60 continue
61 try:
62 data = json.loads(report_path.read_text(encoding="utf-8"))
63 report = ToolCompatibilityReport(
64 tool=tool, tool_version=tool_version, **data
65 )
66 except Exception as e:
67 report = ToolCompatibilityReport(
68 tool=tool,
69 tool_version=tool_version,
70 status="failed",
71 error=str(e),
72 score=0.0,
73 details="Failed to parse compatibility report.",
74 )
76 scores[f"{tool}_{tool_version}"] = report.score
77 reports.append(report)
78 if report.tool == "bioimageio.core" and isinstance(
79 report.details, ToolReportDetails
80 ):
81 # select the best completeness score among core reports
82 metadata_completeness = max(
83 metadata_completeness, report.details.metadata_completeness or 0.0
84 )
85 # determine metadata format score
86 # - valid-format for latest core report: 1.0
87 # - valid-format for older core report: 0.5
88 # - invalid format for all core reports: 0.0
89 core_version = Version(tool_version)
90 if core_version >= metadata_format_version:
91 metadata_format_version = core_version
92 if report.details.status in ("passed", "valid-format"):
93 metadata_format_score = 1.0
94 else:
95 metadata_format_score = 0.5 if metadata_format_score else 0.0
97 elif not metadata_format_score and report.details.status in (
98 "passed",
99 "valid-format",
100 ):
101 metadata_format_score = 0.5
103 tests: Dict[ToolName, Dict[str, ToolCompatibilityReport]] = {}
104 for r in reports:
105 tests.setdefault(r.tool, {})[r.tool_version] = r
107 compatibility_scores = CompatibilityScores(
108 tool_compatibility_version_specific=scores,
109 metadata_completeness=metadata_completeness,
110 metadata_format=metadata_format_score,
111 )
113 compatibility_status = (
114 "passed"
115 if compatibility_scores.tool_compatibility
116 and max(compatibility_scores.tool_compatibility.values()) >= 0.5
117 else "failed"
118 )
119 summary = CompatibilitySummary(
120 rdf_content=initial_summary.rdf_content,
121 rdf_yaml_sha256=initial_summary.rdf_yaml_sha256,
122 status=compatibility_status,
123 scores=compatibility_scores,
124 tests=tests,
125 )
127 json_dict = summary.model_dump(mode="json")
128 with get_summary_file_path(item.id, v.version).open("wt", encoding="utf-8") as f:
129 json.dump(json_dict, f, indent=4, sort_keys=True, ensure_ascii=False)
130 # TODO: use .model_dump_json once it supports 'sort_keys' argument for a potential speed gain
131 # _ = get_summary_file_path(item.id, v.version).write_text(
132 # summary.model_dump_json(indent=4), encoding="utf-8"
133 # )
135 logger.info(
136 "summarized {} version {} with {} reports, status: {}, metadata completeness: {:.2f}",
137 item.id,
138 v.version,
139 len(reports),
140 compatibility_status,
141 metadata_completeness,
142 )
145if __name__ == "__main__":
146 summarize_reports()