Files
SkinbaseNova/app/Services/Moderation/Rules/KeywordStuffingRule.php

49 lines
1.7 KiB
PHP

<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
class KeywordStuffingRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
preg_match_all('/[\p{L}\p{N}]+/u', $normalized, $matches);
$words = array_values(array_filter($matches[0] ?? [], static fn (string $word): bool => mb_strlen($word) > 1));
$totalWords = count($words);
$config = app('config')->get('content_moderation.keyword_stuffing', []);
if ($totalWords < (int) ($config['min_word_count'] ?? 20)) {
return [];
}
$frequencies = array_count_values($words);
$uniqueRatio = count($frequencies) / max($totalWords, 1);
$topFrequency = max($frequencies);
$topWordRatio = $topFrequency / max($totalWords, 1);
$maxUniqueRatio = (float) ($config['max_unique_ratio'] ?? 0.3);
$maxSingleWordFrequency = (float) ($config['max_single_word_frequency'] ?? 0.25);
if ($uniqueRatio >= $maxUniqueRatio && $topWordRatio <= $maxSingleWordFrequency) {
return [];
}
arsort($frequencies);
$keywords = array_slice(array_keys($frequencies), 0, 5);
return [[
'rule' => 'keyword_stuffing',
'score' => app('config')->get('content_moderation.weights.keyword_stuffing', 20),
'reason' => sprintf(
'Likely keyword stuffing (unique ratio %.2f, top word ratio %.2f)',
$uniqueRatio,
$topWordRatio
),
'links' => [],
'domains' => [],
'keywords' => $keywords,
]];
}
}