Coverage for src/backoffice/_summarize.py: 0%

64 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-12-22 02:13 +0000

1import json 

2import warnings 

3from typing import Dict 

4 

5from loguru import logger 

6from packaging.version import Version 

7from tqdm import tqdm 

8 

9from backoffice.compatibility import ( 

10 TOOL_NAMES, 

11 CompatibilityScores, 

12 CompatibilitySummary, 

13 ToolCompatibilityReport, 

14 ToolName, 

15 ToolNameVersioned, 

16 ToolReportDetails, 

17) 

18from backoffice.index import IndexItem, IndexItemVersion, load_index 

19from backoffice.utils import ( 

20 get_all_tool_report_paths, 

21 get_summary, 

22 get_summary_file_path, 

23) 

24 

25 

26def summarize_reports(): 

27 index = load_index() 

28 for item in tqdm(index.items): 

29 for v in item.versions: 

30 _summarize(item, v) 

31 

32 # TODO: Parallelize? 

33 # with ThreadPoolExecutor() as executor: 

34 # futures: list[Future[Any]] = [] 

35 # for item in index.items: 

36 # for v in item.versions: 

37 # futures.append(executor.submit(_summarize, item, v)) 

38 

39 # for _ in tqdm(as_completed(futures), total=len(futures)): 

40 # pass 

41 

42 

43def _summarize(item: IndexItem, v: IndexItemVersion): 

44 """Conflate all summaries for a given item version.""" 

45 

46 initial_summary = get_summary(item.id, v.version) 

47 

48 reports: list[ToolCompatibilityReport] = [] 

49 scores: dict[ToolNameVersioned, float] = {} 

50 metadata_completeness = 0.0 

51 metadata_format_score = 0.0 

52 metadata_format_version = Version( 

53 "0.0.0" 

54 ) # to track the latest core version with valid format 

55 for report_path in get_all_tool_report_paths(item.id, v.version): 

56 tool, tool_version = report_path.stem.split("_", 1) 

57 tool = tool.lower() 

58 if tool not in TOOL_NAMES: 

59 warnings.warn(f"Report {report_path} has unknown tool name '{tool}'.") 

60 continue 

61 try: 

62 data = json.loads(report_path.read_text(encoding="utf-8")) 

63 if "tool" in data: 

64 if data["tool"] != tool: 

65 warnings.warn( 

66 f"Report {report_path} has inconsistent tool name '{data['tool']}' != '{tool}'." 

67 ) 

68 del data["tool"] 

69 

70 if "tool_version" in data: 

71 if data["tool_version"] != tool_version: 

72 warnings.warn( 

73 f"Report {report_path} has inconsistent tool version '{data['tool_version']}' != '{tool_version}'." 

74 ) 

75 del data["tool_version"] 

76 

77 report = ToolCompatibilityReport( 

78 tool=tool, tool_version=tool_version, **data 

79 ) 

80 except Exception as e: 

81 report = ToolCompatibilityReport( 

82 tool=tool, 

83 tool_version=tool_version, 

84 status="failed", 

85 error=str(e), 

86 score=0.0, 

87 details="Failed to parse compatibility report.", 

88 ) 

89 

90 scores[f"{tool}_{tool_version}"] = report.score 

91 reports.append(report) 

92 if report.tool == "bioimageio.core" and isinstance( 

93 report.details, ToolReportDetails 

94 ): 

95 # select the best completeness score among core reports 

96 metadata_completeness = max( 

97 metadata_completeness, report.details.metadata_completeness or 0.0 

98 ) 

99 # determine metadata format score 

100 # - valid-format for latest core report: 1.0 

101 # - valid-format for older core report: 0.5 

102 # - invalid format for all core reports: 0.0 

103 core_version = Version(tool_version) 

104 if core_version >= metadata_format_version: 

105 metadata_format_version = core_version 

106 if report.details.status in ("passed", "valid-format"): 

107 metadata_format_score = 1.0 

108 else: 

109 metadata_format_score = 0.5 if metadata_format_score else 0.0 

110 

111 elif not metadata_format_score and report.details.status in ( 

112 "passed", 

113 "valid-format", 

114 ): 

115 metadata_format_score = 0.5 

116 

117 tests: Dict[ToolName, Dict[str, ToolCompatibilityReport]] = {} 

118 for r in reports: 

119 tests.setdefault(r.tool, {})[r.tool_version] = r 

120 

121 compatibility_scores = CompatibilityScores( 

122 tool_compatibility_version_specific=scores, 

123 metadata_completeness=metadata_completeness, 

124 metadata_format=metadata_format_score, 

125 ) 

126 

127 compatibility_status = ( 

128 "passed" 

129 if compatibility_scores.tool_compatibility 

130 and max(compatibility_scores.tool_compatibility.values()) >= 0.5 

131 else "failed" 

132 ) 

133 summary = CompatibilitySummary( 

134 rdf_content=initial_summary.rdf_content, 

135 rdf_yaml_sha256=initial_summary.rdf_yaml_sha256, 

136 status=compatibility_status, 

137 scores=compatibility_scores, 

138 tests=tests, 

139 ) 

140 

141 json_dict = summary.model_dump(mode="json") 

142 with get_summary_file_path(item.id, v.version).open("wt", encoding="utf-8") as f: 

143 json.dump(json_dict, f, indent=4, sort_keys=True, ensure_ascii=False) 

144 # TODO: use .model_dump_json once it supports 'sort_keys' argument for a potential speed gain 

145 # _ = get_summary_file_path(item.id, v.version).write_text( 

146 # summary.model_dump_json(indent=4), encoding="utf-8" 

147 # ) 

148 

149 logger.info( 

150 "summarized {} version {} with {} reports, status: {}, metadata completeness: {:.2f}", 

151 item.id, 

152 v.version, 

153 len(reports), 

154 compatibility_status, 

155 metadata_completeness, 

156 ) 

157 

158 

159if __name__ == "__main__": 

160 summarize_reports()