Coverage for src/backoffice/

1import json

2import warnings

3from typing import Dict

5from loguru import logger

6from packaging.version import Version

7from tqdm import tqdm

9from backoffice.compatibility import (

10 TOOL_NAMES,

11 CompatibilityScores,

12 CompatibilitySummary,

13 ToolCompatibilityReport,

14 ToolName,

15 ToolNameVersioned,

16 ToolReportDetails,

17)

18from backoffice.index import IndexItem, IndexItemVersion, load_index

19from backoffice.utils import (

20 get_all_tool_report_paths,

21 get_summary,

22 get_summary_file_path,

23)

26def summarize_reports():

27 index = load_index()

28 for item in tqdm(index.items):

29 for v in item.versions:

30 _summarize(item, v)

32 # TODO: Parallelize?

33 # with ThreadPoolExecutor() as executor:

34 # futures: list[Future[Any]] = []

35 # for item in index.items:

36 # for v in item.versions:

37 # futures.append(executor.submit(_summarize, item, v))

39 # for _ in tqdm(as_completed(futures), total=len(futures)):

40 # pass

43def _summarize(item: IndexItem, v: IndexItemVersion):

44 """Conflate all summaries for a given item version."""

46 initial_summary = get_summary(item.id, v.version)

48 reports: list[ToolCompatibilityReport] = []

49 scores: dict[ToolNameVersioned, float] = {}

50 metadata_completeness = 0.0

51 metadata_format_score = 0.0

52 metadata_format_version = Version(

53 "0.0.0"

54 ) # to track the latest core version with valid format

55 for report_path in get_all_tool_report_paths(item.id, v.version):

56 tool, tool_version = report_path.stem.split("_", 1)

57 tool = tool.lower()

58 if tool not in TOOL_NAMES:

59 warnings.warn(f"Report {report_path} has unknown tool name '{tool}'.")

60 continue

61 try:

62 data = json.loads(report_path.read_text(encoding="utf-8"))

63 if "tool" in data:

64 if data["tool"] != tool:

65 warnings.warn(

66 f"Report {report_path} has inconsistent tool name '{data['tool']}' != '{tool}'."

67 )

68 del data["tool"]

70 if "tool_version" in data:

71 if data["tool_version"] != tool_version:

72 warnings.warn(

73 f"Report {report_path} has inconsistent tool version '{data['tool_version']}' != '{tool_version}'."

74 )

75 del data["tool_version"]

77 report = ToolCompatibilityReport(

78 tool=tool, tool_version=tool_version, **data

79 )

80 except Exception as e:

81 report = ToolCompatibilityReport(

82 tool=tool,

83 tool_version=tool_version,

84 status="failed",

85 error=str(e),

86 score=0.0,

87 details="Failed to parse compatibility report.",

88 )

90 scores[f"{tool}_{tool_version}"] = report.score

91 reports.append(report)

92 if report.tool == "bioimageio.core" and isinstance(

93 report.details, ToolReportDetails

94 ):

95 # select the best completeness score among core reports

96 metadata_completeness = max(

97 metadata_completeness, report.details.metadata_completeness or 0.0

98 )

99 # determine metadata format score

100 # - valid-format for latest core report: 1.0

101 # - valid-format for older core report: 0.5

102 # - invalid format for all core reports: 0.0

103 core_version = Version(tool_version)

104 if core_version >= metadata_format_version:

105 metadata_format_version = core_version

106 if report.details.status in ("passed", "valid-format"):

107 metadata_format_score = 1.0

108 else:

109 metadata_format_score = 0.5 if metadata_format_score else 0.0

110

111 elif not metadata_format_score and report.details.status in (

112 "passed",

113 "valid-format",

114 ):

115 metadata_format_score = 0.5

116

117 tests: Dict[ToolName, Dict[str, ToolCompatibilityReport]] = {}

118 for r in reports:

119 tests.setdefault(r.tool, {})[r.tool_version] = r

120

121 compatibility_scores = CompatibilityScores(

122 tool_compatibility_version_specific=scores,

123 metadata_completeness=metadata_completeness,

124 metadata_format=metadata_format_score,

125 )

126

127 compatibility_status = (

128 "passed"

129 if compatibility_scores.tool_compatibility

130 and max(compatibility_scores.tool_compatibility.values()) >= 0.5

131 else "failed"

132 )

133 summary = CompatibilitySummary(

134 rdf_content=initial_summary.rdf_content,

135 rdf_yaml_sha256=initial_summary.rdf_yaml_sha256,

136 status=compatibility_status,

137 scores=compatibility_scores,

138 tests=tests,

139 )

140

141 json_dict = summary.model_dump(mode="json")

142 with get_summary_file_path(item.id, v.version).open("wt", encoding="utf-8") as f:

143 json.dump(json_dict, f, indent=4, sort_keys=True, ensure_ascii=False)

144 # TODO: use .model_dump_json once it supports 'sort_keys' argument for a potential speed gain

145 # _ = get_summary_file_path(item.id, v.version).write_text(

146 # summary.model_dump_json(indent=4), encoding="utf-8"

147 # )

148

149 logger.info(

150 "summarized {} version {} with {} reports, status: {}, metadata completeness: {:.2f}",

151 item.id,

152 v.version,

153 len(reports),

154 compatibility_status,

155 metadata_completeness,

156 )

157

158

159if __name__ == "__main__":

160 summarize_reports()

Coverage for src/backoffice/_summarize.py: 0%

64 statements