Coverage for bioimageio/spec/_internal/url.py: 97%
73 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-02 14:21 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-02 14:21 +0000
1from typing import Any, ClassVar, Optional, Type, Union
3import pydantic
4import requests
5import requests.exceptions
6from loguru import logger
7from pydantic import RootModel
8from typing_extensions import Literal, assert_never
10from .field_warning import issue_warning
11from .root_url import RootHttpUrl
12from .validation_context import get_validation_context
15def _validate_url(url: Union[str, pydantic.HttpUrl]) -> pydantic.HttpUrl:
16 return _validate_url_impl(url, request_mode="head")
19def _validate_url_impl(
20 url: Union[str, pydantic.HttpUrl],
21 request_mode: Literal["head", "get_stream", "get"],
22 timeout: int = 3,
23) -> pydantic.HttpUrl:
25 url = str(url)
26 context = get_validation_context()
27 if url in context.known_files:
28 with context.replace(perform_io_checks=False):
29 return ( # pyright: ignore[reportUnknownVariableType]
30 # TODO: remove pyright ignore for pydantic > 2.9
31 pydantic.HttpUrl(url) # pyright: ignore[reportCallIssue]
32 )
34 val_url = url
36 if url.startswith("http://example.com") or url.startswith("https://example.com"):
37 return pydantic.HttpUrl( # pyright: ignore[reportUnknownVariableType,reportCallIssue]
38 url
39 )
41 if url.startswith("https://colab.research.google.com/github/"):
42 # get requests for colab returns 200 even if the source notebook does not exists.
43 # We therefore validate the url to the notebbok instead (for github notebooks)
44 val_url = url.replace(
45 "https://colab.research.google.com/github/", "https://github.com/"
46 )
47 elif url.startswith("https://colab.research.google.com/"):
48 # TODO: improve validation of non-github colab urls
49 issue_warning(
50 "colab urls currently pass even if the notebook url was not found. Cannot fully validate {value}",
51 value=url,
52 )
54 try:
55 if request_mode == "head":
56 response = requests.head(val_url, timeout=timeout)
57 elif request_mode == "get_stream":
58 response = requests.get(val_url, stream=True, timeout=timeout)
59 elif request_mode == "get":
60 response = requests.get(val_url, stream=False, timeout=timeout)
61 else:
62 assert_never(request_mode)
63 except (
64 requests.exceptions.ChunkedEncodingError,
65 requests.exceptions.ContentDecodingError,
66 requests.exceptions.InvalidHeader,
67 requests.exceptions.InvalidJSONError,
68 requests.exceptions.InvalidSchema,
69 requests.exceptions.InvalidURL,
70 requests.exceptions.MissingSchema,
71 requests.exceptions.StreamConsumedError,
72 requests.exceptions.TooManyRedirects,
73 requests.exceptions.UnrewindableBodyError,
74 requests.exceptions.URLRequired,
75 ) as e:
76 raise ValueError(
77 f"Invalid URL '{url}': {e}\nrequest: {e.request}\nresponse: {e.response}"
78 )
79 except requests.RequestException as e:
80 issue_warning(
81 "Failed to validate URL '{value}': {error}\nrequest: {request}\nresponse: {response}",
82 value=url,
83 msg_context={"error": str(e), "response": e.response, "request": e.request},
84 )
85 except Exception as e:
86 issue_warning(
87 "Failed to validate URL '{value}': {error}",
88 value=url,
89 msg_context={"error": str(e)},
90 )
91 else:
92 if response.status_code == 200: # ok
93 pass
94 elif response.status_code in (302, 303): # found
95 pass
96 elif response.status_code in (301, 308):
97 issue_warning(
98 "URL redirected ({status_code}): consider updating {value} with new"
99 + " location: {location}",
100 value=url,
101 msg_context={
102 "status_code": response.status_code,
103 "location": response.headers.get("location"),
104 },
105 )
106 elif request_mode == "head":
107 return _validate_url_impl(url, request_mode="get_stream", timeout=timeout)
108 elif request_mode == "get_stream":
109 return _validate_url_impl(url, request_mode="get", timeout=timeout)
110 elif response.status_code == 405:
111 issue_warning(
112 "{status_code}: {reason} {value}",
113 value=url,
114 msg_context={
115 "status_code": response.status_code,
116 "reason": response.reason,
117 },
118 )
119 elif request_mode == "get":
120 raise ValueError(f"{response.status_code}: {response.reason} {url}")
121 else:
122 assert_never(request_mode)
124 context.known_files[url] = None
125 return ( # pyright: ignore[reportUnknownVariableType]
126 # TODO: remove pyright ignore for pydantic > 2.9
127 pydantic.HttpUrl(url) # pyright: ignore[reportCallIssue]
128 )
131class HttpUrl(RootHttpUrl):
132 """A URL with the HTTP or HTTPS scheme."""
134 root_model: ClassVar[Type[RootModel[Any]]] = RootModel[pydantic.HttpUrl]
135 _exists: Optional[bool] = None
137 def _after_validator(self):
138 self = super()._after_validator()
139 context = get_validation_context()
140 if context.perform_io_checks:
141 self._validated = _validate_url(self._validated)
142 self._exists = True
144 return self
146 def exists(self):
147 """True if URL is available"""
148 if self._exists is None:
149 try:
150 self._validated = _validate_url(self._validated)
151 except Exception as e:
152 logger.info(e)
153 self._exists = False
154 else:
155 self._exists = True
157 return self._exists