Files
SkinbaseNova/app/Services/Moderation/Rules/LinkPresenceRule.php

119 lines
3.9 KiB
PHP

<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
use App\Enums\ModerationDomainStatus;
use App\Services\Moderation\DomainReputationService;
class LinkPresenceRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$urls = (array) ($context['extracted_urls'] ?? $this->extractUrls($content));
if (empty($urls)) {
return [];
}
$domainService = app(DomainReputationService::class);
$shortenerDomains = $domainService->shortenerDomains();
$externalUrls = [];
$shortenerUrls = [];
foreach ($urls as $url) {
$host = $this->extractHost($url);
if ($host === null) {
continue;
}
if ($domainService->statusForDomain($host) === ModerationDomainStatus::Allowed) {
continue;
}
if ($this->isDomainInList($host, $shortenerDomains)) {
$shortenerUrls[] = $url;
}
$externalUrls[] = $url;
}
$findings = [];
$weights = app('config')->get('content_moderation.weights', []);
if (count($shortenerUrls) > 0) {
$findings[] = [
'rule' => 'shortened_link',
'score' => $weights['shortened_link'] ?? 30,
'reason' => 'Contains ' . count($shortenerUrls) . ' shortened URL(s)',
'links' => $shortenerUrls,
'domains' => array_map(fn ($u) => $this->extractHost($u), $shortenerUrls),
'keywords' => [],
];
}
if (count($externalUrls) > 1) {
$findings[] = [
'rule' => 'multiple_links',
'score' => $weights['multiple_links'] ?? 40,
'reason' => 'Contains ' . count($externalUrls) . ' external links',
'links' => $externalUrls,
'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
'keywords' => [],
];
} elseif (count($externalUrls) === 1) {
$findings[] = [
'rule' => 'single_external_link',
'score' => $weights['single_external_link'] ?? 20,
'reason' => 'Contains an external link',
'links' => $externalUrls,
'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
'keywords' => [],
];
}
return $findings;
}
/** @return string[] */
public function extractUrls(string $text): array
{
$matches = [];
preg_match_all("#https?://[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $httpMatches);
preg_match_all("#\\bwww\.[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $wwwMatches);
$matches = array_merge($httpMatches[0] ?? [], $wwwMatches[0] ?? []);
return array_values(array_unique($matches));
}
public function extractHost(string $url): ?string
{
$normalizedUrl = preg_match('#^https?://#i', $url) ? $url : 'https://' . ltrim($url, '/');
$host = parse_url($normalizedUrl, PHP_URL_HOST);
if (!is_string($host)) {
return null;
}
return app(DomainReputationService::class)->normalizeDomain($host);
}
private function isDomainInList(string $host, array $list): bool
{
foreach ($list as $entry) {
$entry = strtolower($entry);
if ($host === $entry) {
return true;
}
// Check if host is a subdomain of the entry
if (str_ends_with($host, '.' . $entry)) {
return true;
}
}
return false;
}
}