Coverage for src/backoffice/_summarize.py: 0%
64 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-12-22 02:13 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-12-22 02:13 +0000
1import json
2import warnings
3from typing import Dict
5from loguru import logger
6from packaging.version import Version
7from tqdm import tqdm
9from backoffice.compatibility import (
10 TOOL_NAMES,
11 CompatibilityScores,
12 CompatibilitySummary,
13 ToolCompatibilityReport,
14 ToolName,
15 ToolNameVersioned,
16 ToolReportDetails,
17)
18from backoffice.index import IndexItem, IndexItemVersion, load_index
19from backoffice.utils import (
20 get_all_tool_report_paths,
21 get_summary,
22 get_summary_file_path,
23)
26def summarize_reports():
27 index = load_index()
28 for item in tqdm(index.items):
29 for v in item.versions:
30 _summarize(item, v)
32 # TODO: Parallelize?
33 # with ThreadPoolExecutor() as executor:
34 # futures: list[Future[Any]] = []
35 # for item in index.items:
36 # for v in item.versions:
37 # futures.append(executor.submit(_summarize, item, v))
39 # for _ in tqdm(as_completed(futures), total=len(futures)):
40 # pass
43def _summarize(item: IndexItem, v: IndexItemVersion):
44 """Conflate all summaries for a given item version."""
46 initial_summary = get_summary(item.id, v.version)
48 reports: list[ToolCompatibilityReport] = []
49 scores: dict[ToolNameVersioned, float] = {}
50 metadata_completeness = 0.0
51 metadata_format_score = 0.0
52 metadata_format_version = Version(
53 "0.0.0"
54 ) # to track the latest core version with valid format
55 for report_path in get_all_tool_report_paths(item.id, v.version):
56 tool, tool_version = report_path.stem.split("_", 1)
57 tool = tool.lower()
58 if tool not in TOOL_NAMES:
59 warnings.warn(f"Report {report_path} has unknown tool name '{tool}'.")
60 continue
61 try:
62 data = json.loads(report_path.read_text(encoding="utf-8"))
63 if "tool" in data:
64 if data["tool"] != tool:
65 warnings.warn(
66 f"Report {report_path} has inconsistent tool name '{data['tool']}' != '{tool}'."
67 )
68 del data["tool"]
70 if "tool_version" in data:
71 if data["tool_version"] != tool_version:
72 warnings.warn(
73 f"Report {report_path} has inconsistent tool version '{data['tool_version']}' != '{tool_version}'."
74 )
75 del data["tool_version"]
77 report = ToolCompatibilityReport(
78 tool=tool, tool_version=tool_version, **data
79 )
80 except Exception as e:
81 report = ToolCompatibilityReport(
82 tool=tool,
83 tool_version=tool_version,
84 status="failed",
85 error=str(e),
86 score=0.0,
87 details="Failed to parse compatibility report.",
88 )
90 scores[f"{tool}_{tool_version}"] = report.score
91 reports.append(report)
92 if report.tool == "bioimageio.core" and isinstance(
93 report.details, ToolReportDetails
94 ):
95 # select the best completeness score among core reports
96 metadata_completeness = max(
97 metadata_completeness, report.details.metadata_completeness or 0.0
98 )
99 # determine metadata format score
100 # - valid-format for latest core report: 1.0
101 # - valid-format for older core report: 0.5
102 # - invalid format for all core reports: 0.0
103 core_version = Version(tool_version)
104 if core_version >= metadata_format_version:
105 metadata_format_version = core_version
106 if report.details.status in ("passed", "valid-format"):
107 metadata_format_score = 1.0
108 else:
109 metadata_format_score = 0.5 if metadata_format_score else 0.0
111 elif not metadata_format_score and report.details.status in (
112 "passed",
113 "valid-format",
114 ):
115 metadata_format_score = 0.5
117 tests: Dict[ToolName, Dict[str, ToolCompatibilityReport]] = {}
118 for r in reports:
119 tests.setdefault(r.tool, {})[r.tool_version] = r
121 compatibility_scores = CompatibilityScores(
122 tool_compatibility_version_specific=scores,
123 metadata_completeness=metadata_completeness,
124 metadata_format=metadata_format_score,
125 )
127 compatibility_status = (
128 "passed"
129 if compatibility_scores.tool_compatibility
130 and max(compatibility_scores.tool_compatibility.values()) >= 0.5
131 else "failed"
132 )
133 summary = CompatibilitySummary(
134 rdf_content=initial_summary.rdf_content,
135 rdf_yaml_sha256=initial_summary.rdf_yaml_sha256,
136 status=compatibility_status,
137 scores=compatibility_scores,
138 tests=tests,
139 )
141 json_dict = summary.model_dump(mode="json")
142 with get_summary_file_path(item.id, v.version).open("wt", encoding="utf-8") as f:
143 json.dump(json_dict, f, indent=4, sort_keys=True, ensure_ascii=False)
144 # TODO: use .model_dump_json once it supports 'sort_keys' argument for a potential speed gain
145 # _ = get_summary_file_path(item.id, v.version).write_text(
146 # summary.model_dump_json(indent=4), encoding="utf-8"
147 # )
149 logger.info(
150 "summarized {} version {} with {} reports, status: {}, metadata completeness: {:.2f}",
151 item.id,
152 v.version,
153 len(reports),
154 compatibility_status,
155 metadata_completeness,
156 )
159if __name__ == "__main__":
160 summarize_reports()