Implement creator studio and upload updates

This commit is contained in:
2026-04-04 10:12:02 +02:00
parent 1da7d3bf88
commit 0b216b7ecd
15107 changed files with 31206 additions and 626514 deletions

View File

@@ -0,0 +1,68 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
use App\Enums\ModerationDomainStatus;
use App\Services\Moderation\DomainReputationService;
class DomainBlacklistRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$linkRule = app(LinkPresenceRule::class);
$urls = (array) ($context['extracted_urls'] ?? $linkRule->extractUrls($content));
if (empty($urls)) {
return [];
}
$weights = app('config')->get('content_moderation.weights', []);
$domainService = app(DomainReputationService::class);
$findings = [];
$blockedMatches = [];
$suspiciousMatches = [];
foreach ($urls as $url) {
$host = $linkRule->extractHost($url);
if ($host === null) {
continue;
}
$status = $domainService->statusForDomain($host);
if ($status === ModerationDomainStatus::Blocked) {
$blockedMatches[] = $host;
} elseif ($status === ModerationDomainStatus::Suspicious) {
$suspiciousMatches[] = $host;
}
}
$blockedMatches = array_values(array_unique($blockedMatches));
$suspiciousMatches = array_values(array_unique($suspiciousMatches));
if (!empty($blockedMatches)) {
$findings[] = [
'rule' => 'blocked_domain',
'score' => ($weights['blacklisted_domain'] ?? 70) * count($blockedMatches),
'reason' => 'Contains blocked domain(s): ' . implode(', ', $blockedMatches),
'links' => $urls,
'domains' => $blockedMatches,
'keywords' => [],
];
}
if (!empty($suspiciousMatches)) {
$findings[] = [
'rule' => 'suspicious_domain',
'score' => ($weights['suspicious_domain'] ?? 40) * count($suspiciousMatches),
'reason' => 'Contains suspicious TLD domain(s): ' . implode(', ', $suspiciousMatches),
'links' => $urls,
'domains' => $suspiciousMatches,
'keywords' => [],
];
}
return $findings;
}
}

View File

@@ -0,0 +1,41 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
use App\Enums\ModerationContentType;
use App\Models\ArtworkComment;
class DuplicateCommentRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
if (($context['content_type'] ?? null) !== ModerationContentType::ArtworkComment->value) {
return [];
}
$contentId = (int) ($context['content_id'] ?? 0);
if ($contentId <= 0 || $normalized === '') {
return [];
}
$duplicates = ArtworkComment::query()
->where('id', '!=', $contentId)
->whereNull('deleted_at')
->whereRaw('LOWER(TRIM(COALESCE(raw_content, content))) = ?', [$normalized])
->count();
if ($duplicates < 1) {
return [];
}
return [[
'rule' => 'duplicate_comment',
'score' => app('config')->get('content_moderation.weights.duplicate_comment', 35),
'reason' => 'Matches ' . $duplicates . ' existing comment(s) exactly',
'links' => [],
'domains' => [],
'keywords' => [],
]];
}
}

View File

@@ -0,0 +1,54 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
class ExcessivePunctuationRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$config = app('config')->get('content_moderation.excessive_punctuation', []);
$length = mb_strlen($content);
if ($length < (int) ($config['min_length'] ?? 20)) {
return [];
}
$exclamationRatio = substr_count($content, '!') / max($length, 1);
$questionRatio = substr_count($content, '?') / max($length, 1);
$capsRatio = $this->capsRatio($content);
$symbolBurst = preg_match('/[!?$%*@#._\-]{6,}/', $content) === 1;
if (
$exclamationRatio <= (float) ($config['max_exclamation_ratio'] ?? 0.1)
&& $questionRatio <= (float) ($config['max_question_ratio'] ?? 0.1)
&& $capsRatio <= (float) ($config['max_caps_ratio'] ?? 0.7)
&& ! $symbolBurst
) {
return [];
}
return [[
'rule' => 'excessive_punctuation',
'score' => app('config')->get('content_moderation.weights.excessive_punctuation', 15),
'reason' => 'Contains excessive punctuation, all-caps patterns, or symbol spam',
'links' => [],
'domains' => [],
'keywords' => [],
]];
}
private function capsRatio(string $content): float
{
preg_match_all('/\p{Lu}/u', $content, $upperMatches);
preg_match_all('/\p{L}/u', $content, $letterMatches);
$letters = count($letterMatches[0] ?? []);
if ($letters === 0) {
return 0.0;
}
return count($upperMatches[0] ?? []) / $letters;
}
}

View File

@@ -0,0 +1,49 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
class KeywordStuffingRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
preg_match_all('/[\p{L}\p{N}]+/u', $normalized, $matches);
$words = array_values(array_filter($matches[0] ?? [], static fn (string $word): bool => mb_strlen($word) > 1));
$totalWords = count($words);
$config = app('config')->get('content_moderation.keyword_stuffing', []);
if ($totalWords < (int) ($config['min_word_count'] ?? 20)) {
return [];
}
$frequencies = array_count_values($words);
$uniqueRatio = count($frequencies) / max($totalWords, 1);
$topFrequency = max($frequencies);
$topWordRatio = $topFrequency / max($totalWords, 1);
$maxUniqueRatio = (float) ($config['max_unique_ratio'] ?? 0.3);
$maxSingleWordFrequency = (float) ($config['max_single_word_frequency'] ?? 0.25);
if ($uniqueRatio >= $maxUniqueRatio && $topWordRatio <= $maxSingleWordFrequency) {
return [];
}
arsort($frequencies);
$keywords = array_slice(array_keys($frequencies), 0, 5);
return [[
'rule' => 'keyword_stuffing',
'score' => app('config')->get('content_moderation.weights.keyword_stuffing', 20),
'reason' => sprintf(
'Likely keyword stuffing (unique ratio %.2f, top word ratio %.2f)',
$uniqueRatio,
$topWordRatio
),
'links' => [],
'domains' => [],
'keywords' => $keywords,
]];
}
}

View File

@@ -0,0 +1,118 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
use App\Enums\ModerationDomainStatus;
use App\Services\Moderation\DomainReputationService;
class LinkPresenceRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$urls = (array) ($context['extracted_urls'] ?? $this->extractUrls($content));
if (empty($urls)) {
return [];
}
$domainService = app(DomainReputationService::class);
$shortenerDomains = $domainService->shortenerDomains();
$externalUrls = [];
$shortenerUrls = [];
foreach ($urls as $url) {
$host = $this->extractHost($url);
if ($host === null) {
continue;
}
if ($domainService->statusForDomain($host) === ModerationDomainStatus::Allowed) {
continue;
}
if ($this->isDomainInList($host, $shortenerDomains)) {
$shortenerUrls[] = $url;
}
$externalUrls[] = $url;
}
$findings = [];
$weights = app('config')->get('content_moderation.weights', []);
if (count($shortenerUrls) > 0) {
$findings[] = [
'rule' => 'shortened_link',
'score' => $weights['shortened_link'] ?? 30,
'reason' => 'Contains ' . count($shortenerUrls) . ' shortened URL(s)',
'links' => $shortenerUrls,
'domains' => array_map(fn ($u) => $this->extractHost($u), $shortenerUrls),
'keywords' => [],
];
}
if (count($externalUrls) > 1) {
$findings[] = [
'rule' => 'multiple_links',
'score' => $weights['multiple_links'] ?? 40,
'reason' => 'Contains ' . count($externalUrls) . ' external links',
'links' => $externalUrls,
'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
'keywords' => [],
];
} elseif (count($externalUrls) === 1) {
$findings[] = [
'rule' => 'single_external_link',
'score' => $weights['single_external_link'] ?? 20,
'reason' => 'Contains an external link',
'links' => $externalUrls,
'domains' => array_values(array_unique(array_filter(array_map(fn ($u) => $this->extractHost($u), $externalUrls)))),
'keywords' => [],
];
}
return $findings;
}
/** @return string[] */
public function extractUrls(string $text): array
{
$matches = [];
preg_match_all("#https?://[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $httpMatches);
preg_match_all("#\\bwww\.[^\\s<>\\[\\]\"'`\\)]+#iu", $text, $wwwMatches);
$matches = array_merge($httpMatches[0] ?? [], $wwwMatches[0] ?? []);
return array_values(array_unique($matches));
}
public function extractHost(string $url): ?string
{
$normalizedUrl = preg_match('#^https?://#i', $url) ? $url : 'https://' . ltrim($url, '/');
$host = parse_url($normalizedUrl, PHP_URL_HOST);
if (!is_string($host)) {
return null;
}
return app(DomainReputationService::class)->normalizeDomain($host);
}
private function isDomainInList(string $host, array $list): bool
{
foreach ($list as $entry) {
$entry = strtolower($entry);
if ($host === $entry) {
return true;
}
// Check if host is a subdomain of the entry
if (str_ends_with($host, '.' . $entry)) {
return true;
}
}
return false;
}
}

View File

@@ -0,0 +1,28 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
use App\Services\Moderation\DuplicateDetectionService;
class NearDuplicateCampaignRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$domains = (array) ($context['extracted_domains'] ?? []);
$duplicates = app(DuplicateDetectionService::class)->nearDuplicateCount($content, $context, $domains);
if ($duplicates < 2) {
return [];
}
return [[
'rule' => 'near_duplicate_campaign',
'score' => app('config')->get('content_moderation.weights.near_duplicate_campaign', 30),
'reason' => 'Appears to match an existing spam campaign template (' . $duplicates . ' similar item(s))',
'links' => (array) ($context['extracted_urls'] ?? []),
'domains' => $domains,
'keywords' => [],
]];
}
}

View File

@@ -0,0 +1,38 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
use App\Services\Moderation\ModerationRuleRegistryService;
class RegexPatternRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$registry = \app(ModerationRuleRegistryService::class);
$findings = [];
foreach ($registry->regexRules() as $rule) {
$pattern = (string) ($rule['pattern'] ?? '');
if ($pattern === '') {
continue;
}
$matched = @preg_match($pattern, $content) === 1 || @preg_match($pattern, $normalized) === 1;
if (! $matched) {
continue;
}
$findings[] = [
'rule' => 'regex_pattern',
'score' => (int) ($rule['weight'] ?? \app('config')->get('content_moderation.weights.regex_pattern', 30)),
'reason' => 'Matched custom moderation regex rule',
'links' => [],
'domains' => [],
'keywords' => [$pattern],
];
}
return $findings;
}
}

View File

@@ -0,0 +1,56 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
class RepeatedPhraseRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$config = app('config')->get('content_moderation.repeated_phrase', []);
$minPhraseLength = $config['min_phrase_length'] ?? 4;
$minRepetitions = $config['min_repetitions'] ?? 3;
$weights = app('config')->get('content_moderation.weights', []);
$words = preg_split('/\s+/', $normalized);
if (count($words) < $minPhraseLength * $minRepetitions) {
return [];
}
$findings = [];
$repeatedPhrases = [];
// Check for repeated n-grams of various lengths
for ($phraseLen = $minPhraseLength; $phraseLen <= min(8, intdiv(count($words), 2)); $phraseLen++) {
$ngrams = [];
for ($i = 0; $i <= count($words) - $phraseLen; $i++) {
$ngram = implode(' ', array_slice($words, $i, $phraseLen));
$ngrams[$ngram] = ($ngrams[$ngram] ?? 0) + 1;
}
foreach ($ngrams as $phrase => $count) {
if ($count >= $minRepetitions) {
$repeatedPhrases[$phrase] = $count;
}
}
}
if (!empty($repeatedPhrases)) {
$findings[] = [
'rule' => 'repeated_phrase',
'score' => $weights['repeated_phrase'] ?? 25,
'reason' => 'Contains repeated phrases: ' . implode(', ', array_map(
fn ($phrase, $count) => "\"{$phrase}\" ({$count}x)",
array_keys($repeatedPhrases),
array_values($repeatedPhrases)
)),
'links' => [],
'domains' => [],
'keywords' => array_keys($repeatedPhrases),
];
}
return $findings;
}
}

View File

@@ -0,0 +1,55 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
use App\Services\Moderation\ModerationRuleRegistryService;
class SuspiciousKeywordRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$registry = app(ModerationRuleRegistryService::class);
$weights = app('config')->get('content_moderation.weights', []);
$findings = [];
$highRiskMatched = [];
$suspiciousMatched = [];
foreach ($registry->highRiskKeywords() as $phrase) {
if (str_contains($normalized, strtolower($phrase))) {
$highRiskMatched[] = $phrase;
}
}
foreach ($registry->suspiciousKeywords() as $phrase) {
if (str_contains($normalized, strtolower($phrase))) {
$suspiciousMatched[] = $phrase;
}
}
if (!empty($highRiskMatched)) {
$findings[] = [
'rule' => 'high_risk_keyword',
'score' => ($weights['high_risk_keyword'] ?? 40) * count($highRiskMatched),
'reason' => 'Contains high-risk keyword(s): ' . implode(', ', $highRiskMatched),
'links' => [],
'domains' => [],
'keywords' => $highRiskMatched,
];
}
if (!empty($suspiciousMatched)) {
$findings[] = [
'rule' => 'suspicious_keyword',
'score' => ($weights['suspicious_keyword'] ?? 25) * count($suspiciousMatched),
'reason' => 'Contains suspicious keyword(s): ' . implode(', ', $suspiciousMatched),
'links' => [],
'domains' => [],
'keywords' => $suspiciousMatched,
];
}
return $findings;
}
}

View File

@@ -0,0 +1,49 @@
<?php
namespace App\Services\Moderation\Rules;
use App\Contracts\Moderation\ModerationRuleInterface;
class UnicodeObfuscationRule implements ModerationRuleInterface
{
public function analyze(string $content, string $normalized, array $context = []): array
{
$findings = [];
$weights = app('config')->get('content_moderation.weights', []);
// Detect homoglyph / lookalike characters
// Common spam tactic: replace Latin chars with Cyrillic, Greek, or special Unicode
$suspiciousPatterns = [
// Mixed script detection: Latin + Cyrillic in same word
'/\b(?=\S*[\x{0400}-\x{04FF}])(?=\S*[a-zA-Z])\S+\b/u',
// Zero-width characters
'/[\x{200B}\x{200C}\x{200D}\x{FEFF}\x{00AD}]/u',
// Invisible formatting characters
'/[\x{2060}\x{2061}\x{2062}\x{2063}\x{2064}]/u',
// Fullwidth Latin letters (used to bypass filters)
'/[\x{FF01}-\x{FF5E}]/u',
// Mathematical alphanumeric symbols used as text
'/[\x{1D400}-\x{1D7FF}]/u',
];
$matchCount = 0;
foreach ($suspiciousPatterns as $pattern) {
if (preg_match($pattern, $content)) {
$matchCount++;
}
}
if ($matchCount > 0) {
$findings[] = [
'rule' => 'unicode_obfuscation',
'score' => ($weights['unicode_obfuscation'] ?? 30) * $matchCount,
'reason' => 'Contains suspicious Unicode characters/obfuscation (' . $matchCount . ' pattern(s) matched)',
'links' => [],
'domains' => [],
'keywords' => [],
];
}
return $findings;
}
}