get('content_moderation.repeated_phrase', []); $minPhraseLength = $config['min_phrase_length'] ?? 4; $minRepetitions = $config['min_repetitions'] ?? 3; $weights = app('config')->get('content_moderation.weights', []); $words = preg_split('/\s+/', $normalized); if (count($words) < $minPhraseLength * $minRepetitions) { return []; } $findings = []; $repeatedPhrases = []; // Check for repeated n-grams of various lengths for ($phraseLen = $minPhraseLength; $phraseLen <= min(8, intdiv(count($words), 2)); $phraseLen++) { $ngrams = []; for ($i = 0; $i <= count($words) - $phraseLen; $i++) { $ngram = implode(' ', array_slice($words, $i, $phraseLen)); $ngrams[$ngram] = ($ngrams[$ngram] ?? 0) + 1; } foreach ($ngrams as $phrase => $count) { if ($count >= $minRepetitions) { $repeatedPhrases[$phrase] = $count; } } } if (!empty($repeatedPhrases)) { $findings[] = [ 'rule' => 'repeated_phrase', 'score' => $weights['repeated_phrase'] ?? 25, 'reason' => 'Contains repeated phrases: ' . implode(', ', array_map( fn ($phrase, $count) => "\"{$phrase}\" ({$count}x)", array_keys($repeatedPhrases), array_values($repeatedPhrases) )), 'links' => [], 'domains' => [], 'keywords' => array_keys($repeatedPhrases), ]; } return $findings; } }