Implement creator studio and upload updates

2026-04-04 10:12:02 +02:00
parent 1da7d3bf88
commit 0b216b7ecd
15107 changed files with 31206 additions and 626514 deletions
--- a/app/Services/Moderation/Rules/DomainBlacklistRule.php
+++ b/app/Services/Moderation/Rules/DomainBlacklistRule.php
@@ -0,0 +1,68 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+use App\Enums\ModerationDomainStatus;
+use App\Services\Moderation\DomainReputationService;
+
+class DomainBlacklistRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $linkRule = app(LinkPresenceRule::class);
+        $urls = (array) ($context['extracted_urls'] ?? $linkRule->extractUrls($content));
+
+        if (empty($urls)) {
+            return [];
+        }
+
+        $weights = app('config')->get('content_moderation.weights', []);
+        $domainService = app(DomainReputationService::class);
+
+        $findings = [];
+        $blockedMatches = [];
+        $suspiciousMatches = [];
+
+        foreach ($urls as $url) {
+            $host = $linkRule->extractHost($url);
+            if ($host === null) {
+                continue;
+            }
+
+            $status = $domainService->statusForDomain($host);
+            if ($status === ModerationDomainStatus::Blocked) {
+                $blockedMatches[] = $host;
+            } elseif ($status === ModerationDomainStatus::Suspicious) {
+                $suspiciousMatches[] = $host;
+            }
+        }
+
+        $blockedMatches = array_values(array_unique($blockedMatches));
+        $suspiciousMatches = array_values(array_unique($suspiciousMatches));
+
+        if (!empty($blockedMatches)) {
+            $findings[] = [
+                'rule'     => 'blocked_domain',
+                'score'    => ($weights['blacklisted_domain'] ?? 70) * count($blockedMatches),
+                'reason'   => 'Contains blocked domain(s): ' . implode(', ', $blockedMatches),
+                'links'    => $urls,
+                'domains'  => $blockedMatches,
+                'keywords' => [],
+            ];
+        }
+
+        if (!empty($suspiciousMatches)) {
+            $findings[] = [
+                'rule'     => 'suspicious_domain',
+                'score'    => ($weights['suspicious_domain'] ?? 40) * count($suspiciousMatches),
+                'reason'   => 'Contains suspicious TLD domain(s): ' . implode(', ', $suspiciousMatches),
+                'links'    => $urls,
+                'domains'  => $suspiciousMatches,
+                'keywords' => [],
+            ];
+        }
+
+        return $findings;
+    }
+}
--- a/app/Services/Moderation/Rules/DuplicateCommentRule.php
+++ b/app/Services/Moderation/Rules/DuplicateCommentRule.php
@@ -0,0 +1,41 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+use App\Enums\ModerationContentType;
+use App\Models\ArtworkComment;
+
+class DuplicateCommentRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        if (($context['content_type'] ?? null) !== ModerationContentType::ArtworkComment->value) {
+            return [];
+        }
+
+        $contentId = (int) ($context['content_id'] ?? 0);
+        if ($contentId <= 0 || $normalized === '') {
+            return [];
+        }
+
+        $duplicates = ArtworkComment::query()
+            ->where('id', '!=', $contentId)
+            ->whereNull('deleted_at')
+            ->whereRaw('LOWER(TRIM(COALESCE(raw_content, content))) = ?', [$normalized])
+            ->count();
+
+        if ($duplicates < 1) {
+            return [];
+        }
+
+        return [[
+            'rule' => 'duplicate_comment',
+            'score' => app('config')->get('content_moderation.weights.duplicate_comment', 35),
+            'reason' => 'Matches ' . $duplicates . ' existing comment(s) exactly',
+            'links' => [],
+            'domains' => [],
+            'keywords' => [],
+        ]];
+    }
+}
--- a/app/Services/Moderation/Rules/ExcessivePunctuationRule.php
+++ b/app/Services/Moderation/Rules/ExcessivePunctuationRule.php
@@ -0,0 +1,54 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+
+class ExcessivePunctuationRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $config = app('config')->get('content_moderation.excessive_punctuation', []);
+        $length = mb_strlen($content);
+
+        if ($length < (int) ($config['min_length'] ?? 20)) {
+            return [];
+        }
+
+        $exclamationRatio = substr_count($content, '!') / max($length, 1);
+        $questionRatio = substr_count($content, '?') / max($length, 1);
+        $capsRatio = $this->capsRatio($content);
+        $symbolBurst = preg_match('/[!?$%*@#._\-]{6,}/', $content) === 1;
+
+        if (
+            $exclamationRatio <= (float) ($config['max_exclamation_ratio'] ?? 0.1)
+            && $questionRatio <= (float) ($config['max_question_ratio'] ?? 0.1)
+            && $capsRatio <= (float) ($config['max_caps_ratio'] ?? 0.7)
+            && ! $symbolBurst
+        ) {
+            return [];
+        }
+
+        return [[
+            'rule' => 'excessive_punctuation',
+            'score' => app('config')->get('content_moderation.weights.excessive_punctuation', 15),
+            'reason' => 'Contains excessive punctuation, all-caps patterns, or symbol spam',
+            'links' => [],
+            'domains' => [],
+            'keywords' => [],
+        ]];
+    }
+
+    private function capsRatio(string $content): float
+    {
+        preg_match_all('/\p{Lu}/u', $content, $upperMatches);
+        preg_match_all('/\p{L}/u', $content, $letterMatches);
+
+        $letters = count($letterMatches[0] ?? []);
+        if ($letters === 0) {
+            return 0.0;
+        }
+
+        return count($upperMatches[0] ?? []) / $letters;
+    }
+}
--- a/app/Services/Moderation/Rules/KeywordStuffingRule.php
+++ b/app/Services/Moderation/Rules/KeywordStuffingRule.php
@@ -0,0 +1,49 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+
+class KeywordStuffingRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        preg_match_all('/[\p{L}\p{N}]+/u', $normalized, $matches);
+
+        $words = array_values(array_filter($matches[0] ?? [], static fn (string $word): bool => mb_strlen($word) > 1));
+        $totalWords = count($words);
+        $config = app('config')->get('content_moderation.keyword_stuffing', []);
+
+        if ($totalWords < (int) ($config['min_word_count'] ?? 20)) {
+            return [];
+        }
+
+        $frequencies = array_count_values($words);
+        $uniqueRatio = count($frequencies) / max($totalWords, 1);
+        $topFrequency = max($frequencies);
+        $topWordRatio = $topFrequency / max($totalWords, 1);
+
+        $maxUniqueRatio = (float) ($config['max_unique_ratio'] ?? 0.3);
+        $maxSingleWordFrequency = (float) ($config['max_single_word_frequency'] ?? 0.25);
+
+        if ($uniqueRatio >= $maxUniqueRatio && $topWordRatio <= $maxSingleWordFrequency) {
+            return [];
+        }
+
+        arsort($frequencies);
+        $keywords = array_slice(array_keys($frequencies), 0, 5);
+
+        return [[
+            'rule' => 'keyword_stuffing',
+            'score' => app('config')->get('content_moderation.weights.keyword_stuffing', 20),
+            'reason' => sprintf(
+                'Likely keyword stuffing (unique ratio %.2f, top word ratio %.2f)',
+                $uniqueRatio,
+                $topWordRatio
+            ),
+            'links' => [],
+            'domains' => [],
+            'keywords' => $keywords,
+        ]];
+    }
+}
--- a/app/Services/Moderation/Rules/LinkPresenceRule.php
+++ b/app/Services/Moderation/Rules/LinkPresenceRule.php
@@ -0,0 +1,118 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+use App\Enums\ModerationDomainStatus;
+use App\Services\Moderation\DomainReputationService;
+
+class LinkPresenceRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $urls = (array) ($context['extracted_urls'] ?? $this->extractUrls($content));
+
+        if (empty($urls)) {
+            return [];
+        }
+
+        $domainService = app(DomainReputationService::class);
+        $shortenerDomains = $domainService->shortenerDomains();
+
+        $externalUrls = [];
+        $shortenerUrls = [];
+
+        foreach ($urls as $url) {
+            $host = $this->extractHost($url);
+            if ($host === null) {
+                continue;
+            }
+
+            if ($domainService->statusForDomain($host) === ModerationDomainStatus::Allowed) {
+                continue;
+            }
+
+            if ($this->isDomainInList($host, $shortenerDomains)) {
+                $shortenerUrls[] = $url;
+            }
+
+            $externalUrls[] = $url;
+        }
+
+        $findings = [];
+        $weights = app('config')->get('content_moderation.weights', []);
+
+        if (count($shortenerUrls) > 0) {
+            $findings[] = [
+                'rule'     => 'shortened_link',
+                'score'    => $weights['shortened_link'] ?? 30,
+                'reason'   => 'Contains ' . count($shortenerUrls) . ' shortened URL(s)',
+                'links'    => $shortenerUrls,
+                'domains'  => array_map(fn ($u) => $this->extractHost($u), $shortenerUrls),
+                'keywords' => [],
+            ];
+        }
+
+        if (count($externalUrls) > 1) {
+            $findings[] = [
+                'rule'     => 'multiple_links',
+                'score'    => $weights['multiple_links'] ?? 40,
+                'reason'   => 'Contains ' . count($externalUrls) . ' external links',
+                'links'    => $externalUrls,
+                'domains'  => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
+                'keywords' => [],
+            ];
+        } elseif (count($externalUrls) === 1) {
+            $findings[] = [
+                'rule'     => 'single_external_link',
+                'score'    => $weights['single_external_link'] ?? 20,
+                'reason'   => 'Contains an external link',
+                'links'    => $externalUrls,
+                'domains'  => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
+                'keywords' => [],
+            ];
+        }
+
+        return $findings;
+    }
+
+    /** @return string[] */
+    public function extractUrls(string $text): array
+    {
+        $matches = [];
+
+        preg_match_all("#https?://[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $httpMatches);
+        preg_match_all("#\\bwww\.[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $wwwMatches);
+
+        $matches = array_merge($httpMatches[0] ?? [], $wwwMatches[0] ?? []);
+
+        return array_values(array_unique($matches));
+    }
+
+    public function extractHost(string $url): ?string
+    {
+        $normalizedUrl = preg_match('#^https?://#i', $url) ? $url : 'https://' . ltrim($url, '/');
+        $host = parse_url($normalizedUrl, PHP_URL_HOST);
+        if (!is_string($host)) {
+            return null;
+        }
+
+        return app(DomainReputationService::class)->normalizeDomain($host);
+    }
+
+    private function isDomainInList(string $host, array $list): bool
+    {
+        foreach ($list as $entry) {
+            $entry = strtolower($entry);
+            if ($host === $entry) {
+                return true;
+            }
+            // Check if host is a subdomain of the entry
+            if (str_ends_with($host, '.' . $entry)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+}
--- a/app/Services/Moderation/Rules/NearDuplicateCampaignRule.php
+++ b/app/Services/Moderation/Rules/NearDuplicateCampaignRule.php
@@ -0,0 +1,28 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+use App\Services\Moderation\DuplicateDetectionService;
+
+class NearDuplicateCampaignRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $domains = (array) ($context['extracted_domains'] ?? []);
+        $duplicates = app(DuplicateDetectionService::class)->nearDuplicateCount($content, $context, $domains);
+
+        if ($duplicates < 2) {
+            return [];
+        }
+
+        return [[
+            'rule' => 'near_duplicate_campaign',
+            'score' => app('config')->get('content_moderation.weights.near_duplicate_campaign', 30),
+            'reason' => 'Appears to match an existing spam campaign template (' . $duplicates . ' similar item(s))',
+            'links' => (array) ($context['extracted_urls'] ?? []),
+            'domains' => $domains,
+            'keywords' => [],
+        ]];
+    }
+}
--- a/app/Services/Moderation/Rules/RegexPatternRule.php
+++ b/app/Services/Moderation/Rules/RegexPatternRule.php
@@ -0,0 +1,38 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+use App\Services\Moderation\ModerationRuleRegistryService;
+
+class RegexPatternRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $registry = \app(ModerationRuleRegistryService::class);
+        $findings = [];
+
+        foreach ($registry->regexRules() as $rule) {
+            $pattern = (string) ($rule['pattern'] ?? '');
+            if ($pattern === '') {
+                continue;
+            }
+
+            $matched = @preg_match($pattern, $content) === 1 || @preg_match($pattern, $normalized) === 1;
+            if (! $matched) {
+                continue;
+            }
+
+            $findings[] = [
+                'rule' => 'regex_pattern',
+                'score' => (int) ($rule['weight'] ?? \app('config')->get('content_moderation.weights.regex_pattern', 30)),
+                'reason' => 'Matched custom moderation regex rule',
+                'links' => [],
+                'domains' => [],
+                'keywords' => [$pattern],
+            ];
+        }
+
+        return $findings;
+    }
+}
--- a/app/Services/Moderation/Rules/RepeatedPhraseRule.php
+++ b/app/Services/Moderation/Rules/RepeatedPhraseRule.php
@@ -0,0 +1,56 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+
+class RepeatedPhraseRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $config = app('config')->get('content_moderation.repeated_phrase', []);
+        $minPhraseLength = $config['min_phrase_length'] ?? 4;
+        $minRepetitions = $config['min_repetitions'] ?? 3;
+        $weights = app('config')->get('content_moderation.weights', []);
+
+        $words = preg_split('/\s+/', $normalized);
+        if (count($words) < $minPhraseLength * $minRepetitions) {
+            return [];
+        }
+
+        $findings = [];
+        $repeatedPhrases = [];
+
+        // Check for repeated n-grams of various lengths
+        for ($phraseLen = $minPhraseLength; $phraseLen <= min(8, intdiv(count($words), 2)); $phraseLen++) {
+            $ngrams = [];
+            for ($i = 0; $i <= count($words) - $phraseLen; $i++) {
+                $ngram = implode(' ', array_slice($words, $i, $phraseLen));
+                $ngrams[$ngram] = ($ngrams[$ngram] ?? 0) + 1;
+            }
+
+            foreach ($ngrams as $phrase => $count) {
+                if ($count >= $minRepetitions) {
+                    $repeatedPhrases[$phrase] = $count;
+                }
+            }
+        }
+
+        if (!empty($repeatedPhrases)) {
+            $findings[] = [
+                'rule'     => 'repeated_phrase',
+                'score'    => $weights['repeated_phrase'] ?? 25,
+                'reason'   => 'Contains repeated phrases: ' . implode(', ', array_map(
+                    fn ($phrase, $count) => "\"{$phrase}\" ({$count}x)",
+                    array_keys($repeatedPhrases),
+                    array_values($repeatedPhrases)
+                )),
+                'links'    => [],
+                'domains'  => [],
+                'keywords' => array_keys($repeatedPhrases),
+            ];
+        }
+
+        return $findings;
+    }
+}
--- a/app/Services/Moderation/Rules/SuspiciousKeywordRule.php
+++ b/app/Services/Moderation/Rules/SuspiciousKeywordRule.php
@@ -0,0 +1,55 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+use App\Services\Moderation\ModerationRuleRegistryService;
+
+class SuspiciousKeywordRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $registry = app(ModerationRuleRegistryService::class);
+        $weights = app('config')->get('content_moderation.weights', []);
+        $findings = [];
+
+        $highRiskMatched = [];
+        $suspiciousMatched = [];
+
+        foreach ($registry->highRiskKeywords() as $phrase) {
+            if (str_contains($normalized, strtolower($phrase))) {
+                $highRiskMatched[] = $phrase;
+            }
+        }
+
+        foreach ($registry->suspiciousKeywords() as $phrase) {
+            if (str_contains($normalized, strtolower($phrase))) {
+                $suspiciousMatched[] = $phrase;
+            }
+        }
+
+        if (!empty($highRiskMatched)) {
+            $findings[] = [
+                'rule'     => 'high_risk_keyword',
+                'score'    => ($weights['high_risk_keyword'] ?? 40) * count($highRiskMatched),
+                'reason'   => 'Contains high-risk keyword(s): ' . implode(', ', $highRiskMatched),
+                'links'    => [],
+                'domains'  => [],
+                'keywords' => $highRiskMatched,
+            ];
+        }
+
+        if (!empty($suspiciousMatched)) {
+            $findings[] = [
+                'rule'     => 'suspicious_keyword',
+                'score'    => ($weights['suspicious_keyword'] ?? 25) * count($suspiciousMatched),
+                'reason'   => 'Contains suspicious keyword(s): ' . implode(', ', $suspiciousMatched),
+                'links'    => [],
+                'domains'  => [],
+                'keywords' => $suspiciousMatched,
+            ];
+        }
+
+        return $findings;
+    }
+}
--- a/app/Services/Moderation/Rules/UnicodeObfuscationRule.php
+++ b/app/Services/Moderation/Rules/UnicodeObfuscationRule.php
@@ -0,0 +1,49 @@
+<?php
+
+namespace App\Services\Moderation\Rules;
+
+use App\Contracts\Moderation\ModerationRuleInterface;
+
+class UnicodeObfuscationRule implements ModerationRuleInterface
+{
+    public function analyze(string $content, string $normalized, array $context = []): array
+    {
+        $findings = [];
+        $weights = app('config')->get('content_moderation.weights', []);
+
+        // Detect homoglyph / lookalike characters
+        // Common spam tactic: replace Latin chars with Cyrillic, Greek, or special Unicode
+        $suspiciousPatterns = [
+            // Mixed script detection: Latin + Cyrillic in same word
+            '/\b(?=\S*[\x{0400}-\x{04FF}])(?=\S*[a-zA-Z])\S+\b/u',
+            // Zero-width characters
+            '/[\x{200B}\x{200C}\x{200D}\x{FEFF}\x{00AD}]/u',
+            // Invisible formatting characters
+            '/[\x{2060}\x{2061}\x{2062}\x{2063}\x{2064}]/u',
+            // Fullwidth Latin letters (used to bypass filters)
+            '/[\x{FF01}-\x{FF5E}]/u',
+            // Mathematical alphanumeric symbols used as text
+            '/[\x{1D400}-\x{1D7FF}]/u',
+        ];
+
+        $matchCount = 0;
+        foreach ($suspiciousPatterns as $pattern) {
+            if (preg_match($pattern, $content)) {
+                $matchCount++;
+            }
+        }
+
+        if ($matchCount > 0) {
+            $findings[] = [
+                'rule'     => 'unicode_obfuscation',
+                'score'    => ($weights['unicode_obfuscation'] ?? 30) * $matchCount,
+                'reason'   => 'Contains suspicious Unicode characters/obfuscation (' . $matchCount . ' pattern(s) matched)',
+                'links'    => [],
+                'domains'  => [],
+                'keywords' => [],
+            ];
+        }
+
+        return $findings;
+    }
+}