private const USER_AGENT = 'Skinbase-LinkPreview/1.0 (+https://skinbase.org)'; /** Blocked IP ranges (SSRF protection). */ private const BLOCKED_CIDRS = [ '0.0.0.0/8', '10.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '169.254.0.0/16', '172.16.0.0/12', '192.0.0.0/24', '192.168.0.0/16', '198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '240.0.0.0/4', '::1/128', 'fc00::/7', 'fe80::/10', ]; public function __invoke(Request $request): JsonResponse { $request->validate([ 'url' => ['required', 'string', 'max:2048'], ]); $rawUrl = trim((string) $request->input('url')); // Must be http(s) if (! preg_match('#^https?://#i', $rawUrl)) { return response()->json(['error' => 'Invalid URL scheme.'], 422); } $parsed = parse_url($rawUrl); $host = $parsed['host'] ?? ''; if (empty($host)) { return response()->json(['error' => 'Invalid URL.'], 422); } // Resolve hostname and block private/loopback IPs (SSRF protection) $resolved = gethostbyname($host); if ($this->isBlockedIp($resolved)) { return response()->json(['error' => 'URL not allowed.'], 422); } try { $client = new Client([ 'timeout' => self::TIMEOUT, 'connect_timeout' => 4, 'allow_redirects' => ['max' => 5, 'strict' => false], 'headers' => [ 'User-Agent' => self::USER_AGENT, 'Accept' => 'text/html,application/xhtml+xml', ], 'verify' => true, ]); $response = $client->get($rawUrl); $status = $response->getStatusCode(); if ($status < 200 || $status >= 400) { return response()->json(['error' => 'Could not fetch URL.'], 422); } // Read up to MAX_BYTES – we only need the HTML
$body = ''; $stream = $response->getBody(); while (! $stream->eof() && strlen($body) < self::MAX_BYTES) { $body .= $stream->read(4096); } $stream->close(); } catch (TransferException $e) { return response()->json(['error' => 'Could not reach URL.'], 422); } $preview = $this->extractMeta($body, $rawUrl); return response()->json($preview); } /** Extract OG / Twitter / fallback meta tags. */ private function extractMeta(string $html, string $originalUrl): array { // Limit to roughly the block for speed $head = substr($html, 0, 50_000); $og = []; // OG / Twitter meta tags preg_match_all( '/]*(?:property|name)\s*=\s*["\']([^"\']+)["\'][^>]*content\s*=\s*["\']([^"\']*)["\'][^>]*>/i', $head, $m1, PREG_SET_ORDER, ); preg_match_all( '/]*content\s*=\s*["\']([^"\']*)["\'][^>]*(?:property|name)\s*=\s*["\']([^"\']+)["\'][^>]*>/i', $head, $m2, PREG_SET_ORDER, ); $allMeta = array_merge( array_map(fn ($r) => ['key' => strtolower($r[1]), 'value' => $r[2]], $m1), array_map(fn ($r) => ['key' => strtolower($r[2]), 'value' => $r[1]], $m2), ); $map = []; foreach ($allMeta as $entry) { $map[$entry['key']] ??= $entry['value']; } // Canonical URL $canonical = $originalUrl; if (preg_match('/]+rel\s*=\s*["\']canonical["\'][^>]+href\s*=\s*["\']([^"\']+)["\'][^>]*>/i', $head, $mc)) { $canonical = $mc[1]; } elseif (preg_match('/]+href\s*=\s*["\']([^"\']+)["\'][^>]+rel\s*=\s*["\']canonical["\'][^>]*>/i', $head, $mc)) { $canonical = $mc[1]; } // Title $title = $map['og:title'] ?? $map['twitter:title'] ?? null; if (! $title && preg_match('/