extractUrls($content)); if (empty($urls)) { return []; } $domainService = app(DomainReputationService::class); $shortenerDomains = $domainService->shortenerDomains(); $externalUrls = []; $shortenerUrls = []; foreach ($urls as $url) { $host = $this->extractHost($url); if ($host === null) { continue; } if ($domainService->statusForDomain($host) === ModerationDomainStatus::Allowed) { continue; } if ($this->isDomainInList($host, $shortenerDomains)) { $shortenerUrls[] = $url; } $externalUrls[] = $url; } $findings = []; $weights = app('config')->get('content_moderation.weights', []); if (count($shortenerUrls) > 0) { $findings[] = [ 'rule' => 'shortened_link', 'score' => $weights['shortened_link'] ?? 30, 'reason' => 'Contains ' . count($shortenerUrls) . ' shortened URL(s)', 'links' => $shortenerUrls, 'domains' => array_map(fn ($u) => $this->extractHost($u), $shortenerUrls), 'keywords' => [], ]; } if (count($externalUrls) > 1) { $findings[] = [ 'rule' => 'multiple_links', 'score' => $weights['multiple_links'] ?? 40, 'reason' => 'Contains ' . count($externalUrls) . ' external links', 'links' => $externalUrls, 'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))), 'keywords' => [], ]; } elseif (count($externalUrls) === 1) { $findings[] = [ 'rule' => 'single_external_link', 'score' => $weights['single_external_link'] ?? 20, 'reason' => 'Contains an external link', 'links' => $externalUrls, 'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))), 'keywords' => [], ]; } return $findings; } /** @return string[] */ public function extractUrls(string $text): array { $matches = []; preg_match_all("#https?://[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $httpMatches); preg_match_all("#\\bwww\.[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $wwwMatches); $matches = array_merge($httpMatches[0] ?? [], $wwwMatches[0] ?? []); return array_values(array_unique($matches)); } public function extractHost(string $url): ?string { $normalizedUrl = preg_match('#^https?://#i', $url) ? $url : 'https://' . ltrim($url, '/'); $host = parse_url($normalizedUrl, PHP_URL_HOST); if (!is_string($host)) { return null; } return app(DomainReputationService::class)->normalizeDomain($host); } private function isDomainInList(string $host, array $list): bool { foreach ($list as $entry) { $entry = strtolower($entry); if ($host === $entry) { return true; } // Check if host is a subdomain of the entry if (str_ends_with($host, '.' . $entry)) { return true; } } return false; } }