Coverage for src / bioimageio / spec / _internal / url.py: 95%

76 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-09 13:16 +0000

1from contextlib import nullcontext 

2from typing import Any, ClassVar, Optional, Type, Union 

3 

4import httpx 

5import pydantic 

6from loguru import logger 

7from pydantic import RootModel 

8from typing_extensions import Literal, assert_never 

9 

10from . import warning_levels 

11from ._settings import settings 

12from .field_warning import issue_warning 

13from .root_url import RootHttpUrl 

14from .validation_context import get_validation_context 

15 

16 

17def _validate_url(url: Union[str, pydantic.HttpUrl]) -> pydantic.HttpUrl: 

18 return _validate_url_impl(url, request_mode="head", timeout=settings.http_timeout) 

19 

20 

21def _validate_url_impl( 

22 url: Union[str, pydantic.HttpUrl], 

23 request_mode: Literal["head", "get_stream", "get"], 

24 timeout: float, 

25) -> pydantic.HttpUrl: 

26 url = str(url) 

27 context = get_validation_context() 

28 if url in context.known_files: 

29 return pydantic.HttpUrl(url) 

30 

31 val_url = url 

32 

33 if url.startswith("http://example.com") or url.startswith("https://example.com"): 

34 return pydantic.HttpUrl(url) 

35 

36 if url.startswith("https://colab.research.google.com/github/"): 

37 # get requests for colab returns 200 even if the source notebook does not exists. 

38 # We therefore validate the url to the notebbok instead (for github notebooks) 

39 val_url = url.replace( 

40 "https://colab.research.google.com/github/", "https://github.com/" 

41 ) 

42 elif url.startswith("https://colab.research.google.com/"): 

43 # TODO: improve validation of non-github colab urls 

44 issue_warning( 

45 "colab urls currently pass even if the notebook url was not found. Cannot fully validate {value}", 

46 value=url, 

47 severity=warning_levels.INFO, 

48 ) 

49 

50 try: 

51 if request_mode in ("head", "get"): 

52 request_ctxt = nullcontext( 

53 httpx.request( 

54 request_mode.upper(), 

55 val_url, 

56 timeout=timeout, 

57 follow_redirects=True, 

58 ) 

59 ) 

60 elif request_mode == "get_stream": 

61 request_ctxt = httpx.stream( 

62 "GET", val_url, timeout=timeout, follow_redirects=True 

63 ) 

64 else: 

65 assert_never(request_mode) 

66 

67 with request_ctxt as r: 

68 status_code = r.status_code 

69 reason = r.reason_phrase 

70 location = r.headers.get("location") 

71 

72 except ( 

73 httpx.InvalidURL, 

74 httpx.TooManyRedirects, 

75 ) as e: 

76 raise ValueError(f"Invalid URL '{url}': {e}") 

77 except httpx.RequestError as e: 

78 issue_warning( 

79 "Failed to validate URL '{value}': {error}\nrequest: {request}", 

80 value=url, 

81 msg_context={"error": str(e), "request": e.request}, 

82 ) 

83 except Exception as e: 

84 issue_warning( 

85 "Failed to validate URL '{value}': {error}", 

86 value=url, 

87 msg_context={"error": str(e)}, 

88 ) 

89 else: 

90 if status_code == 200: # ok 

91 pass 

92 elif status_code in (302, 303): # found 

93 pass 

94 elif status_code in (301, 308): 

95 issue_warning( 

96 "URL redirected ({status_code}): consider updating {value} with new" 

97 + " location: {location}", 

98 value=url, 

99 severity=warning_levels.INFO, 

100 msg_context={ 

101 "status_code": status_code, 

102 "location": location, 

103 }, 

104 ) 

105 elif request_mode == "head": 

106 return _validate_url_impl(url, request_mode="get_stream", timeout=timeout) 

107 elif request_mode == "get_stream": 

108 return _validate_url_impl(url, request_mode="get", timeout=timeout) 

109 elif request_mode == "get": 

110 issue_warning( 

111 "{status_code}: {reason} ({value})", 

112 value=url, 

113 severity=( 

114 warning_levels.INFO 

115 if status_code == 405 # may be returned due to a captcha 

116 else warning_levels.WARNING 

117 ), 

118 msg_context={ 

119 "status_code": status_code, 

120 "reason": reason, 

121 }, 

122 ) 

123 else: 

124 assert_never(request_mode) 

125 

126 context.known_files[url] = None 

127 return pydantic.HttpUrl(url) 

128 

129 

130class HttpUrl(RootHttpUrl): 

131 """A URL with the HTTP or HTTPS scheme.""" 

132 

133 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[pydantic.HttpUrl] 

134 _exists: Optional[bool] = None 

135 

136 def _after_validator(self): 

137 self = super()._after_validator() 

138 context = get_validation_context() 

139 if context.perform_io_checks: 

140 _ = self.exists() 

141 

142 return self 

143 

144 def exists(self): 

145 """True if URL is available""" 

146 if self._exists is None: 

147 ctxt = get_validation_context() 

148 try: 

149 with ctxt.replace(warning_level=warning_levels.WARNING): 

150 self._validated = _validate_url(self._validated) 

151 except Exception as e: 

152 if ctxt.log_warnings: 

153 logger.info(e) 

154 

155 self._exists = False 

156 else: 

157 self._exists = True 

158 

159 return self._exists