Coverage for bioimageio/spec/_internal/url.py: 96%
77 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-11 07:34 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-11 07:34 +0000
1from contextlib import nullcontext
2from typing import Any, ClassVar, Optional, Type, Union
4import httpx
5import pydantic
6from loguru import logger
7from pydantic import RootModel
8from typing_extensions import Literal, assert_never
10from . import warning_levels
11from .field_warning import issue_warning
12from .root_url import RootHttpUrl
13from .validation_context import get_validation_context
16def _validate_url(url: Union[str, pydantic.HttpUrl]) -> pydantic.HttpUrl:
17 return _validate_url_impl(url, request_mode="head")
20_KNOWN_VALID_URLS = ("https://zenodo.org/records/3446812/files/unet2d_weights.torch",)
21"""known valid urls to bypass validation for to avoid sporadic 503 errors in tests etc."""
24def _validate_url_impl(
25 url: Union[str, pydantic.HttpUrl],
26 request_mode: Literal["head", "get_stream", "get"],
27 timeout: int = 3,
28) -> pydantic.HttpUrl:
29 url = str(url)
30 context = get_validation_context()
31 if url in context.known_files:
32 return pydantic.HttpUrl(url)
34 val_url = url
36 if (
37 url.startswith("http://example.com")
38 or url.startswith("https://example.com")
39 or url in _KNOWN_VALID_URLS
40 ):
41 return pydantic.HttpUrl(url)
43 if url.startswith("https://colab.research.google.com/github/"):
44 # get requests for colab returns 200 even if the source notebook does not exists.
45 # We therefore validate the url to the notebbok instead (for github notebooks)
46 val_url = url.replace(
47 "https://colab.research.google.com/github/", "https://github.com/"
48 )
49 elif url.startswith("https://colab.research.google.com/"):
50 # TODO: improve validation of non-github colab urls
51 issue_warning(
52 "colab urls currently pass even if the notebook url was not found. Cannot fully validate {value}",
53 value=url,
54 severity=warning_levels.INFO,
55 )
57 try:
58 if request_mode in ("head", "get"):
59 request_ctxt = nullcontext(
60 httpx.request(
61 request_mode.upper(),
62 val_url,
63 timeout=timeout,
64 follow_redirects=True,
65 )
66 )
67 elif request_mode == "get_stream":
68 request_ctxt = httpx.stream(
69 "GET", val_url, timeout=timeout, follow_redirects=True
70 )
71 else:
72 assert_never(request_mode)
74 with request_ctxt as r:
75 status_code = r.status_code
76 reason = r.reason_phrase
77 location = r.headers.get("location")
79 except (
80 httpx.InvalidURL,
81 httpx.TooManyRedirects,
82 ) as e:
83 raise ValueError(f"Invalid URL '{url}': {e}")
84 except httpx.RequestError as e:
85 issue_warning(
86 "Failed to validate URL '{value}': {error}\nrequest: {request}",
87 value=url,
88 msg_context={"error": str(e), "request": e.request},
89 )
90 except Exception as e:
91 issue_warning(
92 "Failed to validate URL '{value}': {error}",
93 value=url,
94 msg_context={"error": str(e)},
95 )
96 else:
97 if status_code == 200: # ok
98 pass
99 elif status_code in (302, 303): # found
100 pass
101 elif status_code in (301, 308):
102 issue_warning(
103 "URL redirected ({status_code}): consider updating {value} with new"
104 + " location: {location}",
105 value=url,
106 severity=warning_levels.INFO,
107 msg_context={
108 "status_code": status_code,
109 "location": location,
110 },
111 )
112 elif request_mode == "head":
113 return _validate_url_impl(url, request_mode="get_stream", timeout=timeout)
114 elif request_mode == "get_stream":
115 return _validate_url_impl(url, request_mode="get", timeout=timeout)
116 elif request_mode == "get":
117 issue_warning(
118 "{status_code}: {reason} ({value})",
119 value=url,
120 severity=(
121 warning_levels.INFO
122 if status_code == 405 # may be returned due to a captcha
123 else warning_levels.WARNING
124 ),
125 msg_context={
126 "status_code": status_code,
127 "reason": reason,
128 },
129 )
130 else:
131 assert_never(request_mode)
133 context.known_files[url] = None
134 return pydantic.HttpUrl(url)
137class HttpUrl(RootHttpUrl):
138 """A URL with the HTTP or HTTPS scheme."""
140 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[pydantic.HttpUrl]
141 _exists: Optional[bool] = None
143 def _after_validator(self):
144 self = super()._after_validator()
145 context = get_validation_context()
146 if context.perform_io_checks:
147 _ = self.exists()
149 return self
151 def exists(self):
152 """True if URL is available"""
153 if self._exists is None:
154 ctxt = get_validation_context()
155 try:
156 with ctxt.replace(warning_level=warning_levels.WARNING):
157 self._validated = _validate_url(self._validated)
158 except Exception as e:
159 if ctxt.log_warnings:
160 logger.info(e)
162 self._exists = False
163 else:
164 self._exists = True
166 return self._exists