from __future__ import annotations import ipaddress import socket from io import BytesIO from urllib.parse import urljoin, urlparse import httpx from PIL import Image DEFAULT_MAX_BYTES = 52_428_800 # 50 MB _MAX_REDIRECTS = 3 class ImageLoadError(ValueError): pass def _validate_public_url(url: str) -> str: """Raise ImageLoadError if the URL is not a safe public http/https address. Prevents SSRF by rejecting private, loopback, link-local, and reserved IPs. """ parsed = urlparse(url) if parsed.scheme not in ("http", "https"): raise ImageLoadError("Only http and https URLs are allowed") if not parsed.hostname: raise ImageLoadError("URL must include a hostname") hostname = parsed.hostname.strip().lower() if hostname in {"localhost", "127.0.0.1", "::1"}: raise ImageLoadError("Localhost URLs are not allowed") port = parsed.port or (443 if parsed.scheme == "https" else 80) try: resolved = socket.getaddrinfo(hostname, port, type=socket.SOCK_STREAM) except socket.gaierror as exc: raise ImageLoadError(f"Cannot resolve host: {exc}") from exc for entry in resolved: ip = ipaddress.ip_address(entry[4][0]) if ( ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_multicast or ip.is_reserved or ip.is_unspecified ): raise ImageLoadError("URLs resolving to private or reserved addresses are not allowed") return url async def load_image_from_url( url: str, max_bytes: int = DEFAULT_MAX_BYTES, timeout: float = 60.0, ) -> Image.Image: """Fetch an image from a validated public URL and return it as a PIL Image (RGBA).""" validated = _validate_public_url(url) current = validated async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client: for _ in range(_MAX_REDIRECTS + 1): resp = await client.get(current) if 300 <= resp.status_code < 400: location = resp.headers.get("location") if not location: raise ImageLoadError("Redirect missing Location header") current = _validate_public_url(urljoin(current, location)) continue resp.raise_for_status() content_type = (resp.headers.get("content-type") or "").lower() if content_type and not content_type.startswith("image/"): raise ImageLoadError(f"URL does not point to an image: {content_type}") data = resp.content if len(data) > max_bytes: raise ImageLoadError(f"Image exceeds maximum allowed size ({max_bytes} bytes)") return _decode(data) raise ImageLoadError(f"Too many redirects (>{_MAX_REDIRECTS})") def load_image_from_bytes(data: bytes) -> Image.Image: """Decode raw bytes into a PIL Image (RGBA).""" return _decode(data) def _decode(data: bytes) -> Image.Image: try: return Image.open(BytesIO(data)).convert("RGBA") except Exception as exc: raise ImageLoadError(f"Cannot decode image: {exc}") from exc