243 lines
8.1 KiB
Python
243 lines
8.1 KiB
Python
from __future__ import annotations
|
|
|
|
import ipaddress
|
|
import os
|
|
import re
|
|
import typing
|
|
from urllib.request import getproxies
|
|
|
|
from ._types import PrimitiveData
|
|
|
|
if typing.TYPE_CHECKING: # pragma: no cover
|
|
from ._urls import URL
|
|
|
|
|
|
def primitive_value_to_str(value: PrimitiveData) -> str:
|
|
"""
|
|
Coerce a primitive data type into a string value.
|
|
|
|
Note that we prefer JSON-style 'true'/'false' for boolean values here.
|
|
"""
|
|
if value is True:
|
|
return "true"
|
|
elif value is False:
|
|
return "false"
|
|
elif value is None:
|
|
return ""
|
|
return str(value)
|
|
|
|
|
|
def get_environment_proxies() -> dict[str, str | None]:
|
|
"""Gets proxy information from the environment"""
|
|
|
|
# urllib.request.getproxies() falls back on System
|
|
# Registry and Config for proxies on Windows and macOS.
|
|
# We don't want to propagate non-HTTP proxies into
|
|
# our configuration such as 'TRAVIS_APT_PROXY'.
|
|
proxy_info = getproxies()
|
|
mounts: dict[str, str | None] = {}
|
|
|
|
for scheme in ("http", "https", "all"):
|
|
if proxy_info.get(scheme):
|
|
hostname = proxy_info[scheme]
|
|
mounts[f"{scheme}://"] = (
|
|
hostname if "://" in hostname else f"http://{hostname}"
|
|
)
|
|
|
|
no_proxy_hosts = [host.strip() for host in proxy_info.get("no", "").split(",")]
|
|
for hostname in no_proxy_hosts:
|
|
# See https://curl.haxx.se/libcurl/c/CURLOPT_NOPROXY.html for details
|
|
# on how names in `NO_PROXY` are handled.
|
|
if hostname == "*":
|
|
# If NO_PROXY=* is used or if "*" occurs as any one of the comma
|
|
# separated hostnames, then we should just bypass any information
|
|
# from HTTP_PROXY, HTTPS_PROXY, ALL_PROXY, and always ignore
|
|
# proxies.
|
|
return {}
|
|
elif hostname:
|
|
# NO_PROXY=.google.com is marked as "all://*.google.com,
|
|
# which disables "www.google.com" but not "google.com"
|
|
# NO_PROXY=google.com is marked as "all://*google.com,
|
|
# which disables "www.google.com" and "google.com".
|
|
# (But not "wwwgoogle.com")
|
|
# NO_PROXY can include domains, IPv6, IPv4 addresses and "localhost"
|
|
# NO_PROXY=example.com,::1,localhost,192.168.0.0/16
|
|
if "://" in hostname:
|
|
mounts[hostname] = None
|
|
elif is_ipv4_hostname(hostname):
|
|
mounts[f"all://{hostname}"] = None
|
|
elif is_ipv6_hostname(hostname):
|
|
mounts[f"all://[{hostname}]"] = None
|
|
elif hostname.lower() == "localhost":
|
|
mounts[f"all://{hostname}"] = None
|
|
else:
|
|
mounts[f"all://*{hostname}"] = None
|
|
|
|
return mounts
|
|
|
|
|
|
def to_bytes(value: str | bytes, encoding: str = "utf-8") -> bytes:
|
|
return value.encode(encoding) if isinstance(value, str) else value
|
|
|
|
|
|
def to_str(value: str | bytes, encoding: str = "utf-8") -> str:
|
|
return value if isinstance(value, str) else value.decode(encoding)
|
|
|
|
|
|
def to_bytes_or_str(value: str, match_type_of: typing.AnyStr) -> typing.AnyStr:
|
|
return value if isinstance(match_type_of, str) else value.encode()
|
|
|
|
|
|
def unquote(value: str) -> str:
|
|
return value[1:-1] if value[0] == value[-1] == '"' else value
|
|
|
|
|
|
def peek_filelike_length(stream: typing.Any) -> int | None:
|
|
"""
|
|
Given a file-like stream object, return its length in number of bytes
|
|
without reading it into memory.
|
|
"""
|
|
try:
|
|
# Is it an actual file?
|
|
fd = stream.fileno()
|
|
# Yup, seems to be an actual file.
|
|
length = os.fstat(fd).st_size
|
|
except (AttributeError, OSError):
|
|
# No... Maybe it's something that supports random access, like `io.BytesIO`?
|
|
try:
|
|
# Assuming so, go to end of stream to figure out its length,
|
|
# then put it back in place.
|
|
offset = stream.tell()
|
|
length = stream.seek(0, os.SEEK_END)
|
|
stream.seek(offset)
|
|
except (AttributeError, OSError):
|
|
# Not even that? Sorry, we're doomed...
|
|
return None
|
|
|
|
return length
|
|
|
|
|
|
class URLPattern:
|
|
"""
|
|
A utility class currently used for making lookups against proxy keys...
|
|
|
|
# Wildcard matching...
|
|
>>> pattern = URLPattern("all://")
|
|
>>> pattern.matches(httpx.URL("http://example.com"))
|
|
True
|
|
|
|
# Witch scheme matching...
|
|
>>> pattern = URLPattern("https://")
|
|
>>> pattern.matches(httpx.URL("https://example.com"))
|
|
True
|
|
>>> pattern.matches(httpx.URL("http://example.com"))
|
|
False
|
|
|
|
# With domain matching...
|
|
>>> pattern = URLPattern("https://example.com")
|
|
>>> pattern.matches(httpx.URL("https://example.com"))
|
|
True
|
|
>>> pattern.matches(httpx.URL("http://example.com"))
|
|
False
|
|
>>> pattern.matches(httpx.URL("https://other.com"))
|
|
False
|
|
|
|
# Wildcard scheme, with domain matching...
|
|
>>> pattern = URLPattern("all://example.com")
|
|
>>> pattern.matches(httpx.URL("https://example.com"))
|
|
True
|
|
>>> pattern.matches(httpx.URL("http://example.com"))
|
|
True
|
|
>>> pattern.matches(httpx.URL("https://other.com"))
|
|
False
|
|
|
|
# With port matching...
|
|
>>> pattern = URLPattern("https://example.com:1234")
|
|
>>> pattern.matches(httpx.URL("https://example.com:1234"))
|
|
True
|
|
>>> pattern.matches(httpx.URL("https://example.com"))
|
|
False
|
|
"""
|
|
|
|
def __init__(self, pattern: str) -> None:
|
|
from ._urls import URL
|
|
|
|
if pattern and ":" not in pattern:
|
|
raise ValueError(
|
|
f"Proxy keys should use proper URL forms rather "
|
|
f"than plain scheme strings. "
|
|
f'Instead of "{pattern}", use "{pattern}://"'
|
|
)
|
|
|
|
url = URL(pattern)
|
|
self.pattern = pattern
|
|
self.scheme = "" if url.scheme == "all" else url.scheme
|
|
self.host = "" if url.host == "*" else url.host
|
|
self.port = url.port
|
|
if not url.host or url.host == "*":
|
|
self.host_regex: typing.Pattern[str] | None = None
|
|
elif url.host.startswith("*."):
|
|
# *.example.com should match "www.example.com", but not "example.com"
|
|
domain = re.escape(url.host[2:])
|
|
self.host_regex = re.compile(f"^.+\\.{domain}$")
|
|
elif url.host.startswith("*"):
|
|
# *example.com should match "www.example.com" and "example.com"
|
|
domain = re.escape(url.host[1:])
|
|
self.host_regex = re.compile(f"^(.+\\.)?{domain}$")
|
|
else:
|
|
# example.com should match "example.com" but not "www.example.com"
|
|
domain = re.escape(url.host)
|
|
self.host_regex = re.compile(f"^{domain}$")
|
|
|
|
def matches(self, other: URL) -> bool:
|
|
if self.scheme and self.scheme != other.scheme:
|
|
return False
|
|
if (
|
|
self.host
|
|
and self.host_regex is not None
|
|
and not self.host_regex.match(other.host)
|
|
):
|
|
return False
|
|
if self.port is not None and self.port != other.port:
|
|
return False
|
|
return True
|
|
|
|
@property
|
|
def priority(self) -> tuple[int, int, int]:
|
|
"""
|
|
The priority allows URLPattern instances to be sortable, so that
|
|
we can match from most specific to least specific.
|
|
"""
|
|
# URLs with a port should take priority over URLs without a port.
|
|
port_priority = 0 if self.port is not None else 1
|
|
# Longer hostnames should match first.
|
|
host_priority = -len(self.host)
|
|
# Longer schemes should match first.
|
|
scheme_priority = -len(self.scheme)
|
|
return (port_priority, host_priority, scheme_priority)
|
|
|
|
def __hash__(self) -> int:
|
|
return hash(self.pattern)
|
|
|
|
def __lt__(self, other: URLPattern) -> bool:
|
|
return self.priority < other.priority
|
|
|
|
def __eq__(self, other: typing.Any) -> bool:
|
|
return isinstance(other, URLPattern) and self.pattern == other.pattern
|
|
|
|
|
|
def is_ipv4_hostname(hostname: str) -> bool:
|
|
try:
|
|
ipaddress.IPv4Address(hostname.split("/")[0])
|
|
except Exception:
|
|
return False
|
|
return True
|
|
|
|
|
|
def is_ipv6_hostname(hostname: str) -> bool:
|
|
try:
|
|
ipaddress.IPv6Address(hostname.split("/")[0])
|
|
except Exception:
|
|
return False
|
|
return True
|