private const USER_AGENT = 'Skinbase-LinkPreview/1.0 (+https://skinbase.org)'; /** Blocked IP ranges (SSRF protection). */ private const BLOCKED_CIDRS = [ '0.0.0.0/8', '10.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '169.254.0.0/16', '172.16.0.0/12', '192.0.0.0/24', '192.168.0.0/16', '198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '240.0.0.0/4', '::1/128', 'fc00::/7', 'fe80::/10', ]; public function __invoke(Request $request): JsonResponse { $request->validate([ 'url' => ['required', 'string', 'max:2048'], ]); $rawUrl = trim((string) $request->input('url')); // Must be http(s) if (! preg_match('#^https?://#i', $rawUrl)) { return response()->json(['error' => 'Invalid URL scheme.'], 422); } $parsed = parse_url($rawUrl); $host = $parsed['host'] ?? ''; if (empty($host)) { return response()->json(['error' => 'Invalid URL.'], 422); } // Resolve hostname and block private/loopback IPs (SSRF protection) $resolved = gethostbyname($host); if ($this->isBlockedIp($resolved)) { return response()->json(['error' => 'URL not allowed.'], 422); } try { $client = new Client([ 'timeout' => self::TIMEOUT, 'connect_timeout' => 4, 'allow_redirects' => ['max' => 5, 'strict' => false], 'headers' => [ 'User-Agent' => self::USER_AGENT, 'Accept' => 'text/html,application/xhtml+xml', ], 'verify' => true, ]); $response = $client->get($rawUrl); $status = $response->getStatusCode(); if ($status < 200 || $status >= 400) { return response()->json(['error' => 'Could not fetch URL.'], 422); } // Read up to MAX_BYTES – we only need the HTML $body = ''; $stream = $response->getBody(); while (! $stream->eof() && strlen($body) < self::MAX_BYTES) { $body .= $stream->read(4096); } $stream->close(); } catch (TransferException $e) { return response()->json(['error' => 'Could not reach URL.'], 422); } $preview = $this->extractMeta($body, $rawUrl); return response()->json($preview); } /** Extract OG / Twitter / fallback meta tags. */ private function extractMeta(string $html, string $originalUrl): array { // Limit to roughly the block for speed $head = substr($html, 0, 50_000); $og = []; // OG / Twitter meta tags preg_match_all( '/]*(?:property|name)\s*=\s*["\']([^"\']+)["\'][^>]*content\s*=\s*["\']([^"\']*)["\'][^>]*>/i', $head, $m1, PREG_SET_ORDER, ); preg_match_all( '/]*content\s*=\s*["\']([^"\']*)["\'][^>]*(?:property|name)\s*=\s*["\']([^"\']+)["\'][^>]*>/i', $head, $m2, PREG_SET_ORDER, ); $allMeta = array_merge( array_map(fn ($r) => ['key' => strtolower($r[1]), 'value' => $r[2]], $m1), array_map(fn ($r) => ['key' => strtolower($r[2]), 'value' => $r[1]], $m2), ); $map = []; foreach ($allMeta as $entry) { $map[$entry['key']] ??= $entry['value']; } // Canonical URL $canonical = $originalUrl; if (preg_match('/]+rel\s*=\s*["\']canonical["\'][^>]+href\s*=\s*["\']([^"\']+)["\'][^>]*>/i', $head, $mc)) { $canonical = $mc[1]; } elseif (preg_match('/]+href\s*=\s*["\']([^"\']+)["\'][^>]+rel\s*=\s*["\']canonical["\'][^>]*>/i', $head, $mc)) { $canonical = $mc[1]; } // Title $title = $map['og:title'] ?? $map['twitter:title'] ?? null; if (! $title && preg_match('/]*>([^<]+)<\/title>/i', $head, $mt)) { $title = trim(html_entity_decode($mt[1])); } // Description $description = $map['og:description'] ?? $map['twitter:description'] ?? $map['description'] ?? null; // Image $image = $map['og:image'] ?? $map['twitter:image'] ?? $map['twitter:image:src'] ?? null; // Resolve relative image URL if ($image && ! preg_match('#^https?://#i', $image)) { $parsed = parse_url($originalUrl); $base = ($parsed['scheme'] ?? 'https') . '://' . ($parsed['host'] ?? ''); $image = $base . '/' . ltrim($image, '/'); } // Site name $siteName = $map['og:site_name'] ?? parse_url($originalUrl, PHP_URL_HOST) ?? null; return [ 'url' => $canonical, 'title' => $title ? html_entity_decode($title) : null, 'description' => $description ? html_entity_decode($description) : null, 'image' => $image, 'site_name' => $siteName, ]; } private function isBlockedIp(string $ip): bool { if (! filter_var($ip, FILTER_VALIDATE_IP)) { return true; // could not resolve } foreach (self::BLOCKED_CIDRS as $cidr) { if ($this->ipInCidr($ip, $cidr)) { return true; } } return false; } private function ipInCidr(string $ip, string $cidr): bool { [$subnet, $bits] = explode('/', $cidr) + [1 => 32]; // IPv6 if (str_contains($cidr, ':')) { if (! filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) { return false; } $ipBin = inet_pton($ip); $subnetBin = inet_pton($subnet); if ($ipBin === false || $subnetBin === false) { return false; } $bits = (int) $bits; $mask = str_repeat("\xff", (int) ($bits / 8)); $remain = $bits % 8; if ($remain) { $mask .= chr(0xff << (8 - $remain)); } $mask = str_pad($mask, strlen($subnetBin), "\x00"); return ($ipBin & $mask) === ($subnetBin & $mask); } // IPv4 if (! filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { return false; } $ipLong = ip2long($ip); $subnetLong = ip2long($subnet); $maskLong = $bits == 32 ? -1 : ~((1 << (32 - (int) $bits)) - 1); return ($ipLong & $maskLong) === ($subnetLong & $maskLong); } }