Coverage for src / bioimageio / spec / _internal / url.py: 95%
76 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-09 13:16 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-09 13:16 +0000
1from contextlib import nullcontext
2from typing import Any, ClassVar, Optional, Type, Union
4import httpx
5import pydantic
6from loguru import logger
7from pydantic import RootModel
8from typing_extensions import Literal, assert_never
10from . import warning_levels
11from ._settings import settings
12from .field_warning import issue_warning
13from .root_url import RootHttpUrl
14from .validation_context import get_validation_context
17def _validate_url(url: Union[str, pydantic.HttpUrl]) -> pydantic.HttpUrl:
18 return _validate_url_impl(url, request_mode="head", timeout=settings.http_timeout)
21def _validate_url_impl(
22 url: Union[str, pydantic.HttpUrl],
23 request_mode: Literal["head", "get_stream", "get"],
24 timeout: float,
25) -> pydantic.HttpUrl:
26 url = str(url)
27 context = get_validation_context()
28 if url in context.known_files:
29 return pydantic.HttpUrl(url)
31 val_url = url
33 if url.startswith("http://example.com") or url.startswith("https://example.com"):
34 return pydantic.HttpUrl(url)
36 if url.startswith("https://colab.research.google.com/github/"):
37 # get requests for colab returns 200 even if the source notebook does not exists.
38 # We therefore validate the url to the notebbok instead (for github notebooks)
39 val_url = url.replace(
40 "https://colab.research.google.com/github/", "https://github.com/"
41 )
42 elif url.startswith("https://colab.research.google.com/"):
43 # TODO: improve validation of non-github colab urls
44 issue_warning(
45 "colab urls currently pass even if the notebook url was not found. Cannot fully validate {value}",
46 value=url,
47 severity=warning_levels.INFO,
48 )
50 try:
51 if request_mode in ("head", "get"):
52 request_ctxt = nullcontext(
53 httpx.request(
54 request_mode.upper(),
55 val_url,
56 timeout=timeout,
57 follow_redirects=True,
58 )
59 )
60 elif request_mode == "get_stream":
61 request_ctxt = httpx.stream(
62 "GET", val_url, timeout=timeout, follow_redirects=True
63 )
64 else:
65 assert_never(request_mode)
67 with request_ctxt as r:
68 status_code = r.status_code
69 reason = r.reason_phrase
70 location = r.headers.get("location")
72 except (
73 httpx.InvalidURL,
74 httpx.TooManyRedirects,
75 ) as e:
76 raise ValueError(f"Invalid URL '{url}': {e}")
77 except httpx.RequestError as e:
78 issue_warning(
79 "Failed to validate URL '{value}': {error}\nrequest: {request}",
80 value=url,
81 msg_context={"error": str(e), "request": e.request},
82 )
83 except Exception as e:
84 issue_warning(
85 "Failed to validate URL '{value}': {error}",
86 value=url,
87 msg_context={"error": str(e)},
88 )
89 else:
90 if status_code == 200: # ok
91 pass
92 elif status_code in (302, 303): # found
93 pass
94 elif status_code in (301, 308):
95 issue_warning(
96 "URL redirected ({status_code}): consider updating {value} with new"
97 + " location: {location}",
98 value=url,
99 severity=warning_levels.INFO,
100 msg_context={
101 "status_code": status_code,
102 "location": location,
103 },
104 )
105 elif request_mode == "head":
106 return _validate_url_impl(url, request_mode="get_stream", timeout=timeout)
107 elif request_mode == "get_stream":
108 return _validate_url_impl(url, request_mode="get", timeout=timeout)
109 elif request_mode == "get":
110 issue_warning(
111 "{status_code}: {reason} ({value})",
112 value=url,
113 severity=(
114 warning_levels.INFO
115 if status_code == 405 # may be returned due to a captcha
116 else warning_levels.WARNING
117 ),
118 msg_context={
119 "status_code": status_code,
120 "reason": reason,
121 },
122 )
123 else:
124 assert_never(request_mode)
126 context.known_files[url] = None
127 return pydantic.HttpUrl(url)
130class HttpUrl(RootHttpUrl):
131 """A URL with the HTTP or HTTPS scheme."""
133 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[pydantic.HttpUrl]
134 _exists: Optional[bool] = None
136 def _after_validator(self):
137 self = super()._after_validator()
138 context = get_validation_context()
139 if context.perform_io_checks:
140 _ = self.exists()
142 return self
144 def exists(self):
145 """True if URL is available"""
146 if self._exists is None:
147 ctxt = get_validation_context()
148 try:
149 with ctxt.replace(warning_level=warning_levels.WARNING):
150 self._validated = _validate_url(self._validated)
151 except Exception as e:
152 if ctxt.log_warnings:
153 logger.info(e)
155 self._exists = False
156 else:
157 self._exists = True
159 return self._exists