119 lines
3.9 KiB
PHP
119 lines
3.9 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Moderation\Rules;
|
|
|
|
use App\Contracts\Moderation\ModerationRuleInterface;
|
|
use App\Enums\ModerationDomainStatus;
|
|
use App\Services\Moderation\DomainReputationService;
|
|
|
|
class LinkPresenceRule implements ModerationRuleInterface
|
|
{
|
|
public function analyze(string $content, string $normalized, array $context = []): array
|
|
{
|
|
$urls = (array) ($context['extracted_urls'] ?? $this->extractUrls($content));
|
|
|
|
if (empty($urls)) {
|
|
return [];
|
|
}
|
|
|
|
$domainService = app(DomainReputationService::class);
|
|
$shortenerDomains = $domainService->shortenerDomains();
|
|
|
|
$externalUrls = [];
|
|
$shortenerUrls = [];
|
|
|
|
foreach ($urls as $url) {
|
|
$host = $this->extractHost($url);
|
|
if ($host === null) {
|
|
continue;
|
|
}
|
|
|
|
if ($domainService->statusForDomain($host) === ModerationDomainStatus::Allowed) {
|
|
continue;
|
|
}
|
|
|
|
if ($this->isDomainInList($host, $shortenerDomains)) {
|
|
$shortenerUrls[] = $url;
|
|
}
|
|
|
|
$externalUrls[] = $url;
|
|
}
|
|
|
|
$findings = [];
|
|
$weights = app('config')->get('content_moderation.weights', []);
|
|
|
|
if (count($shortenerUrls) > 0) {
|
|
$findings[] = [
|
|
'rule' => 'shortened_link',
|
|
'score' => $weights['shortened_link'] ?? 30,
|
|
'reason' => 'Contains ' . count($shortenerUrls) . ' shortened URL(s)',
|
|
'links' => $shortenerUrls,
|
|
'domains' => array_map(fn ($u) => $this->extractHost($u), $shortenerUrls),
|
|
'keywords' => [],
|
|
];
|
|
}
|
|
|
|
if (count($externalUrls) > 1) {
|
|
$findings[] = [
|
|
'rule' => 'multiple_links',
|
|
'score' => $weights['multiple_links'] ?? 40,
|
|
'reason' => 'Contains ' . count($externalUrls) . ' external links',
|
|
'links' => $externalUrls,
|
|
'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
|
|
'keywords' => [],
|
|
];
|
|
} elseif (count($externalUrls) === 1) {
|
|
$findings[] = [
|
|
'rule' => 'single_external_link',
|
|
'score' => $weights['single_external_link'] ?? 20,
|
|
'reason' => 'Contains an external link',
|
|
'links' => $externalUrls,
|
|
'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
|
|
'keywords' => [],
|
|
];
|
|
}
|
|
|
|
return $findings;
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function extractUrls(string $text): array
|
|
{
|
|
$matches = [];
|
|
|
|
preg_match_all("#https?://[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $httpMatches);
|
|
preg_match_all("#\\bwww\.[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $wwwMatches);
|
|
|
|
$matches = array_merge($httpMatches[0] ?? [], $wwwMatches[0] ?? []);
|
|
|
|
return array_values(array_unique($matches));
|
|
}
|
|
|
|
public function extractHost(string $url): ?string
|
|
{
|
|
$normalizedUrl = preg_match('#^https?://#i', $url) ? $url : 'https://' . ltrim($url, '/');
|
|
$host = parse_url($normalizedUrl, PHP_URL_HOST);
|
|
if (!is_string($host)) {
|
|
return null;
|
|
}
|
|
|
|
return app(DomainReputationService::class)->normalizeDomain($host);
|
|
}
|
|
|
|
private function isDomainInList(string $host, array $list): bool
|
|
{
|
|
foreach ($list as $entry) {
|
|
$entry = strtolower($entry);
|
|
if ($host === $entry) {
|
|
return true;
|
|
}
|
|
// Check if host is a subdomain of the entry
|
|
if (str_ends_with($host, '.' . $entry)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|