88 lines
3.6 KiB
PHP
88 lines
3.6 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Moderation\Providers;
|
|
|
|
use App\Contracts\Moderation\ModerationSuggestionProviderInterface;
|
|
use App\Data\Moderation\ModerationResultData;
|
|
use App\Data\Moderation\ModerationSuggestionData;
|
|
|
|
class HeuristicModerationSuggestionProvider implements ModerationSuggestionProviderInterface
|
|
{
|
|
public function suggest(string $content, ModerationResultData $result, array $context = []): ModerationSuggestionData
|
|
{
|
|
$label = null;
|
|
$action = null;
|
|
$confidence = null;
|
|
$reason = null;
|
|
$campaignTags = [];
|
|
|
|
if ($result->score === 0) {
|
|
return new ModerationSuggestionData(
|
|
provider: 'heuristic_assist',
|
|
suggestedLabel: 'likely_safe',
|
|
suggestedAction: 'mark_safe',
|
|
confidence: 82,
|
|
explanation: 'No suspicious signals were detected by the deterministic moderation rules.',
|
|
);
|
|
}
|
|
|
|
if (isset($result->ruleHits['blocked_domain']) || isset($result->ruleHits['blacklisted_domain'])) {
|
|
$label = 'seo_spam';
|
|
$action = $result->autoHideRecommended ? 'auto_hide_review' : 'confirm_spam';
|
|
$confidence = 94;
|
|
$reason = 'Blocked-domain activity was detected and strongly correlates with outbound spam campaigns.';
|
|
$campaignTags[] = 'blocked-domain';
|
|
} elseif (isset($result->ruleHits['high_risk_keyword'])) {
|
|
$label = $this->labelFromKeywords($result->matchedKeywords);
|
|
$action = 'confirm_spam';
|
|
$confidence = 88;
|
|
$reason = 'High-risk spam keywords were matched across the content snapshot.';
|
|
$campaignTags[] = 'high-risk-keywords';
|
|
} elseif (isset($result->ruleHits['near_duplicate_campaign']) || isset($result->ruleHits['duplicate_comment'])) {
|
|
$label = 'campaign_spam';
|
|
$action = 'review_cluster';
|
|
$confidence = 86;
|
|
$reason = 'The content appears linked to a repeated spam template or campaign cluster.';
|
|
$campaignTags[] = 'duplicate-campaign';
|
|
} else {
|
|
$label = 'needs_review';
|
|
$action = 'review';
|
|
$confidence = max(55, min(84, $result->score));
|
|
$reason = 'Multiple suspicious signals were detected, but the content should remain human-reviewed.';
|
|
}
|
|
|
|
if ($result->matchedDomains !== []) {
|
|
$campaignTags[] = 'domains:' . implode(',', array_slice($result->matchedDomains, 0, 3));
|
|
}
|
|
|
|
return new ModerationSuggestionData(
|
|
provider: 'heuristic_assist',
|
|
suggestedLabel: $label,
|
|
suggestedAction: $action,
|
|
confidence: $confidence,
|
|
explanation: $reason,
|
|
campaignTags: array_values(array_unique($campaignTags)),
|
|
rawResponse: [
|
|
'rule_hits' => $result->ruleHits,
|
|
'matched_domains' => $result->matchedDomains,
|
|
'matched_keywords' => $result->matchedKeywords,
|
|
],
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param array<int, string> $keywords
|
|
*/
|
|
private function labelFromKeywords(array $keywords): string
|
|
{
|
|
$joined = mb_strtolower(implode(' ', $keywords));
|
|
|
|
return match (true) {
|
|
str_contains($joined, 'casino'), str_contains($joined, 'bet') => 'casino_spam',
|
|
str_contains($joined, 'adult'), str_contains($joined, 'webcam') => 'adult_spam',
|
|
str_contains($joined, 'bitcoin'), str_contains($joined, 'crypto') => 'crypto_spam',
|
|
str_contains($joined, 'pharma'), str_contains($joined, 'viagra') => 'pharma_spam',
|
|
default => 'spam',
|
|
};
|
|
}
|
|
} |