Coverage for src/backoffice/_summarize.py: 0%

56 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-11-12 10:26 +0000

1import json 

2import warnings 

3from typing import Dict 

4 

5from loguru import logger 

6from packaging.version import Version 

7from tqdm import tqdm 

8 

9from backoffice.compatibility import ( 

10 TOOL_NAMES, 

11 CompatibilityScores, 

12 CompatibilitySummary, 

13 ToolCompatibilityReport, 

14 ToolName, 

15 ToolNameVersioned, 

16 ToolReportDetails, 

17) 

18from backoffice.index import IndexItem, IndexItemVersion, load_index 

19from backoffice.utils import ( 

20 get_all_tool_report_paths, 

21 get_summary, 

22 get_summary_file_path, 

23) 

24 

25 

26def summarize_reports(): 

27 index = load_index() 

28 for item in tqdm(index.items): 

29 for v in item.versions: 

30 _summarize(item, v) 

31 

32 # TODO: Parallelize? 

33 # with ThreadPoolExecutor() as executor: 

34 # futures: list[Future[Any]] = [] 

35 # for item in index.items: 

36 # for v in item.versions: 

37 # futures.append(executor.submit(_summarize, item, v)) 

38 

39 # for _ in tqdm(as_completed(futures), total=len(futures)): 

40 # pass 

41 

42 

43def _summarize(item: IndexItem, v: IndexItemVersion): 

44 """Conflate all summaries for a given item version.""" 

45 

46 initial_summary = get_summary(item.id, v.version) 

47 

48 reports: list[ToolCompatibilityReport] = [] 

49 scores: dict[ToolNameVersioned, float] = {} 

50 metadata_completeness = 0.0 

51 metadata_format_score = 0.0 

52 metadata_format_version = Version( 

53 "0.0.0" 

54 ) # to track the latest core version with valid format 

55 for report_path in get_all_tool_report_paths(item.id, v.version): 

56 tool, tool_version = report_path.stem.split("_", 1) 

57 tool = tool.lower() 

58 if tool not in TOOL_NAMES: 

59 warnings.warn(f"Report {report_path} has unknown tool name '{tool}'.") 

60 continue 

61 try: 

62 data = json.loads(report_path.read_text(encoding="utf-8")) 

63 report = ToolCompatibilityReport( 

64 tool=tool, tool_version=tool_version, **data 

65 ) 

66 except Exception as e: 

67 report = ToolCompatibilityReport( 

68 tool=tool, 

69 tool_version=tool_version, 

70 status="failed", 

71 error=str(e), 

72 score=0.0, 

73 details="Failed to parse compatibility report.", 

74 ) 

75 

76 scores[f"{tool}_{tool_version}"] = report.score 

77 reports.append(report) 

78 if report.tool == "bioimageio.core" and isinstance( 

79 report.details, ToolReportDetails 

80 ): 

81 # select the best completeness score among core reports 

82 metadata_completeness = max( 

83 metadata_completeness, report.details.metadata_completeness or 0.0 

84 ) 

85 # determine metadata format score 

86 # - valid-format for latest core report: 1.0 

87 # - valid-format for older core report: 0.5 

88 # - invalid format for all core reports: 0.0 

89 core_version = Version(tool_version) 

90 if core_version >= metadata_format_version: 

91 metadata_format_version = core_version 

92 if report.details.status in ("passed", "valid-format"): 

93 metadata_format_score = 1.0 

94 else: 

95 metadata_format_score = 0.5 if metadata_format_score else 0.0 

96 

97 elif not metadata_format_score and report.details.status in ( 

98 "passed", 

99 "valid-format", 

100 ): 

101 metadata_format_score = 0.5 

102 

103 tests: Dict[ToolName, Dict[str, ToolCompatibilityReport]] = {} 

104 for r in reports: 

105 tests.setdefault(r.tool, {})[r.tool_version] = r 

106 

107 compatibility_scores = CompatibilityScores( 

108 tool_compatibility_version_specific=scores, 

109 metadata_completeness=metadata_completeness, 

110 metadata_format=metadata_format_score, 

111 ) 

112 

113 compatibility_status = ( 

114 "passed" 

115 if compatibility_scores.tool_compatibility 

116 and max(compatibility_scores.tool_compatibility.values()) >= 0.5 

117 else "failed" 

118 ) 

119 summary = CompatibilitySummary( 

120 rdf_content=initial_summary.rdf_content, 

121 rdf_yaml_sha256=initial_summary.rdf_yaml_sha256, 

122 status=compatibility_status, 

123 scores=compatibility_scores, 

124 tests=tests, 

125 ) 

126 

127 json_dict = summary.model_dump(mode="json") 

128 with get_summary_file_path(item.id, v.version).open("wt", encoding="utf-8") as f: 

129 json.dump(json_dict, f, indent=4, sort_keys=True, ensure_ascii=False) 

130 # TODO: use .model_dump_json once it supports 'sort_keys' argument for a potential speed gain 

131 # _ = get_summary_file_path(item.id, v.version).write_text( 

132 # summary.model_dump_json(indent=4), encoding="utf-8" 

133 # ) 

134 

135 logger.info( 

136 "summarized {} version {} with {} reports, status: {}, metadata completeness: {:.2f}", 

137 item.id, 

138 v.version, 

139 len(reports), 

140 compatibility_status, 

141 metadata_completeness, 

142 ) 

143 

144 

145if __name__ == "__main__": 

146 summarize_reports()