Files
SkinbaseNova/app/Services/Moderation/Rules/RepeatedPhraseRule.php

57 lines
2.0 KiB
PHP

<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
class RepeatedPhraseRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$config = app('config')->get('content_moderation.repeated_phrase', []);
$minPhraseLength = $config['min_phrase_length'] ?? 4;
$minRepetitions = $config['min_repetitions'] ?? 3;
$weights = app('config')->get('content_moderation.weights', []);
$words = preg_split('/\s+/', $normalized);
if (count($words) < $minPhraseLength * $minRepetitions) {
return [];
}
$findings = [];
$repeatedPhrases = [];
// Check for repeated n-grams of various lengths
for ($phraseLen = $minPhraseLength; $phraseLen <= min(8, intdiv(count($words), 2)); $phraseLen++) {
$ngrams = [];
for ($i = 0; $i <= count($words) - $phraseLen; $i++) {
$ngram = implode(' ', array_slice($words, $i, $phraseLen));
$ngrams[$ngram] = ($ngrams[$ngram] ?? 0) + 1;
}
foreach ($ngrams as $phrase => $count) {
if ($count >= $minRepetitions) {
$repeatedPhrases[$phrase] = $count;
}
}
}
if (!empty($repeatedPhrases)) {
$findings[] = [
'rule' => 'repeated_phrase',
'score' => $weights['repeated_phrase'] ?? 25,
'reason' => 'Contains repeated phrases: ' . implode(', ', array_map(
fn ($phrase, $count) => "\"{$phrase}\" ({$count}x)",
array_keys($repeatedPhrases),
array_values($repeatedPhrases)
)),
'links' => [],
'domains' => [],
'keywords' => array_keys($repeatedPhrases),
];
}
return $findings;
}
}