SkinbaseNova/app/Services/Moderation/Providers/HeuristicModerationSuggestionProvider.php

<?php

namespace App\Services\Moderation\Providers;

use App\Contracts\Moderation\ModerationSuggestionProviderInterface;
use App\Data\Moderation\ModerationResultData;
use App\Data\Moderation\ModerationSuggestionData;

class HeuristicModerationSuggestionProvider implements ModerationSuggestionProviderInterface
{
    public function suggest(string $content, ModerationResultData $result, array $context = []): ModerationSuggestionData
    {
        $label = null;
        $action = null;
        $confidence = null;
        $reason = null;
        $campaignTags = [];

        if ($result->score === 0) {
            return new ModerationSuggestionData(
                provider: 'heuristic_assist',
                suggestedLabel: 'likely_safe',
                suggestedAction: 'mark_safe',
                confidence: 82,
                explanation: 'No suspicious signals were detected by the deterministic moderation rules.',
            );
        }

        if (isset($result->ruleHits['blocked_domain']) || isset($result->ruleHits['blacklisted_domain'])) {
            $label = 'seo_spam';
            $action = $result->autoHideRecommended ? 'auto_hide_review' : 'confirm_spam';
            $confidence = 94;
            $reason = 'Blocked-domain activity was detected and strongly correlates with outbound spam campaigns.';
            $campaignTags[] = 'blocked-domain';
        } elseif (isset($result->ruleHits['high_risk_keyword'])) {
            $label = $this->labelFromKeywords($result->matchedKeywords);
            $action = 'confirm_spam';
            $confidence = 88;
            $reason = 'High-risk spam keywords were matched across the content snapshot.';
            $campaignTags[] = 'high-risk-keywords';
        } elseif (isset($result->ruleHits['near_duplicate_campaign']) || isset($result->ruleHits['duplicate_comment'])) {
            $label = 'campaign_spam';
            $action = 'review_cluster';
            $confidence = 86;
            $reason = 'The content appears linked to a repeated spam template or campaign cluster.';
            $campaignTags[] = 'duplicate-campaign';
        } else {
            $label = 'needs_review';
            $action = 'review';
            $confidence = max(55, min(84, $result->score));
            $reason = 'Multiple suspicious signals were detected, but the content should remain human-reviewed.';
        }

        if ($result->matchedDomains !== []) {
            $campaignTags[] = 'domains:' . implode(',', array_slice($result->matchedDomains, 0, 3));
        }

        return new ModerationSuggestionData(
            provider: 'heuristic_assist',
            suggestedLabel: $label,
            suggestedAction: $action,
            confidence: $confidence,
            explanation: $reason,
            campaignTags: array_values(array_unique($campaignTags)),
            rawResponse: [
                'rule_hits' => $result->ruleHits,
                'matched_domains' => $result->matchedDomains,
                'matched_keywords' => $result->matchedKeywords,
            ],
        );
    }

    /**
     * @param array<int, string> $keywords
     */
    private function labelFromKeywords(array $keywords): string
    {
        $joined = mb_strtolower(implode(' ', $keywords));

        return match (true) {
            str_contains($joined, 'casino'), str_contains($joined, 'bet') => 'casino_spam',
            str_contains($joined, 'adult'), str_contains($joined, 'webcam') => 'adult_spam',
            str_contains($joined, 'bitcoin'), str_contains($joined, 'crypto') => 'crypto_spam',
            str_contains($joined, 'pharma'), str_contains($joined, 'viagra') => 'pharma_spam',
            default => 'spam',
        };
    }
}