Files
SkinbaseNova/app/Services/Moderation/Providers/HeuristicModerationSuggestionProvider.php

88 lines
3.6 KiB
PHP

<?php
namespace App\Services\Moderation\Providers;
use App\Contracts\Moderation\ModerationSuggestionProviderInterface;
use App\Data\Moderation\ModerationResultData;
use App\Data\Moderation\ModerationSuggestionData;
class HeuristicModerationSuggestionProvider implements ModerationSuggestionProviderInterface
{
public function suggest(string $content, ModerationResultData $result, array $context = []): ModerationSuggestionData
{
$label = null;
$action = null;
$confidence = null;
$reason = null;
$campaignTags = [];
if ($result->score === 0) {
return new ModerationSuggestionData(
provider: 'heuristic_assist',
suggestedLabel: 'likely_safe',
suggestedAction: 'mark_safe',
confidence: 82,
explanation: 'No suspicious signals were detected by the deterministic moderation rules.',
);
}
if (isset($result->ruleHits['blocked_domain']) || isset($result->ruleHits['blacklisted_domain'])) {
$label = 'seo_spam';
$action = $result->autoHideRecommended ? 'auto_hide_review' : 'confirm_spam';
$confidence = 94;
$reason = 'Blocked-domain activity was detected and strongly correlates with outbound spam campaigns.';
$campaignTags[] = 'blocked-domain';
} elseif (isset($result->ruleHits['high_risk_keyword'])) {
$label = $this->labelFromKeywords($result->matchedKeywords);
$action = 'confirm_spam';
$confidence = 88;
$reason = 'High-risk spam keywords were matched across the content snapshot.';
$campaignTags[] = 'high-risk-keywords';
} elseif (isset($result->ruleHits['near_duplicate_campaign']) || isset($result->ruleHits['duplicate_comment'])) {
$label = 'campaign_spam';
$action = 'review_cluster';
$confidence = 86;
$reason = 'The content appears linked to a repeated spam template or campaign cluster.';
$campaignTags[] = 'duplicate-campaign';
} else {
$label = 'needs_review';
$action = 'review';
$confidence = max(55, min(84, $result->score));
$reason = 'Multiple suspicious signals were detected, but the content should remain human-reviewed.';
}
if ($result->matchedDomains !== []) {
$campaignTags[] = 'domains:' . implode(',', array_slice($result->matchedDomains, 0, 3));
}
return new ModerationSuggestionData(
provider: 'heuristic_assist',
suggestedLabel: $label,
suggestedAction: $action,
confidence: $confidence,
explanation: $reason,
campaignTags: array_values(array_unique($campaignTags)),
rawResponse: [
'rule_hits' => $result->ruleHits,
'matched_domains' => $result->matchedDomains,
'matched_keywords' => $result->matchedKeywords,
],
);
}
/**
* @param array<int, string> $keywords
*/
private function labelFromKeywords(array $keywords): string
{
$joined = mb_strtolower(implode(' ', $keywords));
return match (true) {
str_contains($joined, 'casino'), str_contains($joined, 'bet') => 'casino_spam',
str_contains($joined, 'adult'), str_contains($joined, 'webcam') => 'adult_spam',
str_contains($joined, 'bitcoin'), str_contains($joined, 'crypto') => 'crypto_spam',
str_contains($joined, 'pharma'), str_contains($joined, 'viagra') => 'pharma_spam',
default => 'spam',
};
}
}