Files
SkinbaseNova/app/Services/Moderation/Rules/UnicodeObfuscationRule.php

50 lines
1.7 KiB
PHP

<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
class UnicodeObfuscationRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$findings = [];
$weights = app('config')->get('content_moderation.weights', []);
// Detect homoglyph / lookalike characters
// Common spam tactic: replace Latin chars with Cyrillic, Greek, or special Unicode
$suspiciousPatterns = [
// Mixed script detection: Latin + Cyrillic in same word
'/\b(?=\S*[\x{0400}-\x{04FF}])(?=\S*[a-zA-Z])\S+\b/u',
// Zero-width characters
'/[\x{200B}\x{200C}\x{200D}\x{FEFF}\x{00AD}]/u',
// Invisible formatting characters
'/[\x{2060}\x{2061}\x{2062}\x{2063}\x{2064}]/u',
// Fullwidth Latin letters (used to bypass filters)
'/[\x{FF01}-\x{FF5E}]/u',
// Mathematical alphanumeric symbols used as text
'/[\x{1D400}-\x{1D7FF}]/u',
];
$matchCount = 0;
foreach ($suspiciousPatterns as $pattern) {
if (preg_match($pattern, $content)) {
$matchCount++;
}
}
if ($matchCount > 0) {
$findings[] = [
'rule' => 'unicode_obfuscation',
'score' => ($weights['unicode_obfuscation'] ?? 30) * $matchCount,
'reason' => 'Contains suspicious Unicode characters/obfuscation (' . $matchCount . ' pattern(s) matched)',
'links' => [],
'domains' => [],
'keywords' => [],
];
}
return $findings;
}
}