49 lines
1.7 KiB
PHP
49 lines
1.7 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Moderation\Rules;
|
|
|
|
use App\Contracts\Moderation\ModerationRuleInterface;
|
|
|
|
class KeywordStuffingRule implements ModerationRuleInterface
|
|
{
|
|
public function analyze(string $content, string $normalized, array $context = []): array
|
|
{
|
|
preg_match_all('/[\p{L}\p{N}]+/u', $normalized, $matches);
|
|
|
|
$words = array_values(array_filter($matches[0] ?? [], static fn (string $word): bool => mb_strlen($word) > 1));
|
|
$totalWords = count($words);
|
|
$config = app('config')->get('content_moderation.keyword_stuffing', []);
|
|
|
|
if ($totalWords < (int) ($config['min_word_count'] ?? 20)) {
|
|
return [];
|
|
}
|
|
|
|
$frequencies = array_count_values($words);
|
|
$uniqueRatio = count($frequencies) / max($totalWords, 1);
|
|
$topFrequency = max($frequencies);
|
|
$topWordRatio = $topFrequency / max($totalWords, 1);
|
|
|
|
$maxUniqueRatio = (float) ($config['max_unique_ratio'] ?? 0.3);
|
|
$maxSingleWordFrequency = (float) ($config['max_single_word_frequency'] ?? 0.25);
|
|
|
|
if ($uniqueRatio >= $maxUniqueRatio && $topWordRatio <= $maxSingleWordFrequency) {
|
|
return [];
|
|
}
|
|
|
|
arsort($frequencies);
|
|
$keywords = array_slice(array_keys($frequencies), 0, 5);
|
|
|
|
return [[
|
|
'rule' => 'keyword_stuffing',
|
|
'score' => app('config')->get('content_moderation.weights.keyword_stuffing', 20),
|
|
'reason' => sprintf(
|
|
'Likely keyword stuffing (unique ratio %.2f, top word ratio %.2f)',
|
|
$uniqueRatio,
|
|
$topWordRatio
|
|
),
|
|
'links' => [],
|
|
'domains' => [],
|
|
'keywords' => $keywords,
|
|
]];
|
|
}
|
|
} |