Coverage for bioimageio/spec/_internal/url.py: 96%

77 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-11 07:34 +0000

1from contextlib import nullcontext 

2from typing import Any, ClassVar, Optional, Type, Union 

3 

4import httpx 

5import pydantic 

6from loguru import logger 

7from pydantic import RootModel 

8from typing_extensions import Literal, assert_never 

9 

10from . import warning_levels 

11from .field_warning import issue_warning 

12from .root_url import RootHttpUrl 

13from .validation_context import get_validation_context 

14 

15 

16def _validate_url(url: Union[str, pydantic.HttpUrl]) -> pydantic.HttpUrl: 

17 return _validate_url_impl(url, request_mode="head") 

18 

19 

20_KNOWN_VALID_URLS = ("https://zenodo.org/records/3446812/files/unet2d_weights.torch",) 

21"""known valid urls to bypass validation for to avoid sporadic 503 errors in tests etc.""" 

22 

23 

24def _validate_url_impl( 

25 url: Union[str, pydantic.HttpUrl], 

26 request_mode: Literal["head", "get_stream", "get"], 

27 timeout: int = 3, 

28) -> pydantic.HttpUrl: 

29 url = str(url) 

30 context = get_validation_context() 

31 if url in context.known_files: 

32 return pydantic.HttpUrl(url) 

33 

34 val_url = url 

35 

36 if ( 

37 url.startswith("http://example.com") 

38 or url.startswith("https://example.com") 

39 or url in _KNOWN_VALID_URLS 

40 ): 

41 return pydantic.HttpUrl(url) 

42 

43 if url.startswith("https://colab.research.google.com/github/"): 

44 # get requests for colab returns 200 even if the source notebook does not exists. 

45 # We therefore validate the url to the notebbok instead (for github notebooks) 

46 val_url = url.replace( 

47 "https://colab.research.google.com/github/", "https://github.com/" 

48 ) 

49 elif url.startswith("https://colab.research.google.com/"): 

50 # TODO: improve validation of non-github colab urls 

51 issue_warning( 

52 "colab urls currently pass even if the notebook url was not found. Cannot fully validate {value}", 

53 value=url, 

54 severity=warning_levels.INFO, 

55 ) 

56 

57 try: 

58 if request_mode in ("head", "get"): 

59 request_ctxt = nullcontext( 

60 httpx.request( 

61 request_mode.upper(), 

62 val_url, 

63 timeout=timeout, 

64 follow_redirects=True, 

65 ) 

66 ) 

67 elif request_mode == "get_stream": 

68 request_ctxt = httpx.stream( 

69 "GET", val_url, timeout=timeout, follow_redirects=True 

70 ) 

71 else: 

72 assert_never(request_mode) 

73 

74 with request_ctxt as r: 

75 status_code = r.status_code 

76 reason = r.reason_phrase 

77 location = r.headers.get("location") 

78 

79 except ( 

80 httpx.InvalidURL, 

81 httpx.TooManyRedirects, 

82 ) as e: 

83 raise ValueError(f"Invalid URL '{url}': {e}") 

84 except httpx.RequestError as e: 

85 issue_warning( 

86 "Failed to validate URL '{value}': {error}\nrequest: {request}", 

87 value=url, 

88 msg_context={"error": str(e), "request": e.request}, 

89 ) 

90 except Exception as e: 

91 issue_warning( 

92 "Failed to validate URL '{value}': {error}", 

93 value=url, 

94 msg_context={"error": str(e)}, 

95 ) 

96 else: 

97 if status_code == 200: # ok 

98 pass 

99 elif status_code in (302, 303): # found 

100 pass 

101 elif status_code in (301, 308): 

102 issue_warning( 

103 "URL redirected ({status_code}): consider updating {value} with new" 

104 + " location: {location}", 

105 value=url, 

106 severity=warning_levels.INFO, 

107 msg_context={ 

108 "status_code": status_code, 

109 "location": location, 

110 }, 

111 ) 

112 elif request_mode == "head": 

113 return _validate_url_impl(url, request_mode="get_stream", timeout=timeout) 

114 elif request_mode == "get_stream": 

115 return _validate_url_impl(url, request_mode="get", timeout=timeout) 

116 elif request_mode == "get": 

117 issue_warning( 

118 "{status_code}: {reason} ({value})", 

119 value=url, 

120 severity=( 

121 warning_levels.INFO 

122 if status_code == 405 # may be returned due to a captcha 

123 else warning_levels.WARNING 

124 ), 

125 msg_context={ 

126 "status_code": status_code, 

127 "reason": reason, 

128 }, 

129 ) 

130 else: 

131 assert_never(request_mode) 

132 

133 context.known_files[url] = None 

134 return pydantic.HttpUrl(url) 

135 

136 

137class HttpUrl(RootHttpUrl): 

138 """A URL with the HTTP or HTTPS scheme.""" 

139 

140 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[pydantic.HttpUrl] 

141 _exists: Optional[bool] = None 

142 

143 def _after_validator(self): 

144 self = super()._after_validator() 

145 context = get_validation_context() 

146 if context.perform_io_checks: 

147 _ = self.exists() 

148 

149 return self 

150 

151 def exists(self): 

152 """True if URL is available""" 

153 if self._exists is None: 

154 ctxt = get_validation_context() 

155 try: 

156 with ctxt.replace(warning_level=warning_levels.WARNING): 

157 self._validated = _validate_url(self._validated) 

158 except Exception as e: 

159 if ctxt.log_warnings: 

160 logger.info(e) 

161 

162 self._exists = False 

163 else: 

164 self._exists = True 

165 

166 return self._exists