366 lines
14 KiB
PHP
366 lines
14 KiB
PHP
<?php
|
|
|
|
namespace App\Console\Commands;
|
|
|
|
use App\Enums\ModerationContentType;
|
|
use App\Enums\ModerationStatus;
|
|
use App\Services\Moderation\ContentModerationPersistenceService;
|
|
use App\Services\Moderation\ContentModerationProcessingService;
|
|
use App\Services\Moderation\ContentModerationService;
|
|
use App\Services\Moderation\ContentModerationSourceService;
|
|
use Illuminate\Console\Command;
|
|
use Illuminate\Support\Facades\Log;
|
|
|
|
class ScanContentModerationCommand extends Command
|
|
{
|
|
protected $signature = 'skinbase:scan-content-moderation
|
|
{--only= : comments, descriptions, titles, bios, profile-links, collections, stories, cards, or a comma-separated list}
|
|
{--limit= : Maximum number of rows to scan}
|
|
{--from-id= : Start scanning at or after this source ID}
|
|
{--status= : Reserved for compatibility with rescan tooling}
|
|
{--force : Re-scan unchanged content}
|
|
{--dry-run : Analyze content without persisting findings}';
|
|
|
|
protected $description = 'Scan artwork comments and descriptions for suspicious or spam-like content.';
|
|
|
|
public function __construct(
|
|
private readonly ContentModerationService $moderation,
|
|
private readonly ContentModerationPersistenceService $persistence,
|
|
private readonly ContentModerationProcessingService $processing,
|
|
private readonly ContentModerationSourceService $sources,
|
|
) {
|
|
parent::__construct();
|
|
}
|
|
|
|
public function handle(): int
|
|
{
|
|
$targets = $this->targets();
|
|
$limit = max(0, (int) ($this->option('limit') ?? 0));
|
|
$remaining = $limit > 0 ? $limit : null;
|
|
$counts = [
|
|
'scanned' => 0,
|
|
'flagged' => 0,
|
|
'created' => 0,
|
|
'updated' => 0,
|
|
'skipped' => 0,
|
|
'clean' => 0,
|
|
'auto_hidden' => 0,
|
|
];
|
|
|
|
$this->announceScanStart($targets, $limit);
|
|
|
|
foreach ($targets as $target) {
|
|
if ($remaining !== null && $remaining <= 0) {
|
|
$this->comment('Scan limit reached. Stopping before the next content target.');
|
|
break;
|
|
}
|
|
|
|
$counts = $this->scanTarget($target, $counts, $remaining);
|
|
}
|
|
|
|
$this->table(['Metric', 'Count'], [
|
|
['Scanned', $counts['scanned']],
|
|
['Flagged', $counts['flagged']],
|
|
['Created', $counts['created']],
|
|
['Updated', $counts['updated']],
|
|
['Auto-hidden', $counts['auto_hidden']],
|
|
['Clean', $counts['clean']],
|
|
['Skipped', $counts['skipped']],
|
|
]);
|
|
|
|
Log::info('Content moderation scan complete.', [
|
|
'targets' => array_map(static fn (ModerationContentType $target): string => $target->value, $targets),
|
|
'limit' => $limit > 0 ? $limit : null,
|
|
'from_id' => max(0, (int) ($this->option('from-id') ?? 0)) ?: null,
|
|
'force' => (bool) $this->option('force'),
|
|
'dry_run' => (bool) $this->option('dry-run'),
|
|
'counts' => $counts,
|
|
]);
|
|
|
|
$this->info('Content moderation scan complete.');
|
|
|
|
return self::SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* @param array<string, int> $counts
|
|
* @return array<string, int>
|
|
*/
|
|
private function scanTarget(ModerationContentType $target, array $counts, ?int &$remaining): array
|
|
{
|
|
$before = $counts;
|
|
$this->info('Scanning ' . $target->label() . ' entries...');
|
|
|
|
$query = match ($target) {
|
|
ModerationContentType::ArtworkComment,
|
|
ModerationContentType::ArtworkDescription,
|
|
ModerationContentType::ArtworkTitle,
|
|
ModerationContentType::UserBio,
|
|
ModerationContentType::UserProfileLink,
|
|
ModerationContentType::CollectionTitle,
|
|
ModerationContentType::CollectionDescription,
|
|
ModerationContentType::StoryTitle,
|
|
ModerationContentType::StoryContent,
|
|
ModerationContentType::CardTitle,
|
|
ModerationContentType::CardText => $this->sources->queryForType($target),
|
|
};
|
|
|
|
$fromId = max(0, (int) ($this->option('from-id') ?? 0));
|
|
if ($fromId > 0) {
|
|
$query->where('id', '>=', $fromId);
|
|
}
|
|
|
|
$query->chunkById(200, function ($rows) use ($target, &$counts, &$remaining): bool {
|
|
foreach ($rows as $row) {
|
|
if ($remaining !== null && $remaining <= 0) {
|
|
return false;
|
|
}
|
|
|
|
$context = $this->sources->buildContext($target, $row);
|
|
$snapshot = (string) ($context['content_snapshot'] ?? '');
|
|
$sourceId = (int) ($context['content_id'] ?? 0);
|
|
|
|
if ($snapshot === '') {
|
|
$counts['skipped']++;
|
|
$this->verboseLine($target, $sourceId, 'skipped empty snapshot');
|
|
continue;
|
|
}
|
|
|
|
$analysis = $this->moderation->analyze($snapshot, $context);
|
|
$counts['scanned']++;
|
|
|
|
if (! $this->option('force') && ! $this->option('dry-run') && $this->persistence->hasCurrentFinding(
|
|
(string) $context['content_type'],
|
|
(int) $context['content_id'],
|
|
$analysis->contentHash,
|
|
$analysis->scannerVersion,
|
|
)) {
|
|
$counts['skipped']++;
|
|
$this->verboseLine($target, $sourceId, 'skipped unchanged content');
|
|
$remaining = $remaining !== null ? $remaining - 1 : null;
|
|
continue;
|
|
}
|
|
|
|
if ($this->option('dry-run')) {
|
|
if ($analysis->status === ModerationStatus::Pending) {
|
|
$counts['flagged']++;
|
|
$this->verboseAnalysis($target, $sourceId, $analysis, 'dry-run flagged');
|
|
} else {
|
|
$counts['clean']++;
|
|
$this->verboseLine($target, $sourceId, 'dry-run clean');
|
|
}
|
|
|
|
$remaining = $remaining !== null ? $remaining - 1 : null;
|
|
continue;
|
|
}
|
|
|
|
$result = $this->processing->process($snapshot, $context, true);
|
|
|
|
if ($analysis->status !== ModerationStatus::Pending) {
|
|
$counts['clean']++;
|
|
if ($result['updated']) {
|
|
$counts['updated']++;
|
|
}
|
|
$this->verboseLine($target, $sourceId, $result['updated'] ? 'clean, existing finding updated' : 'clean');
|
|
$remaining = $remaining !== null ? $remaining - 1 : null;
|
|
continue;
|
|
}
|
|
|
|
$counts['flagged']++;
|
|
|
|
if ($result['created']) {
|
|
$counts['created']++;
|
|
} elseif ($result['updated']) {
|
|
$counts['updated']++;
|
|
}
|
|
|
|
if ($result['auto_hidden']) {
|
|
$counts['auto_hidden']++;
|
|
}
|
|
|
|
$outcome = $result['created']
|
|
? 'flagged, finding created'
|
|
: ($result['updated'] ? 'flagged, finding updated' : 'flagged');
|
|
|
|
if ($result['auto_hidden']) {
|
|
$outcome .= ', auto-hidden';
|
|
}
|
|
|
|
$this->verboseAnalysis($target, $sourceId, $analysis, $outcome);
|
|
|
|
$remaining = $remaining !== null ? $remaining - 1 : null;
|
|
}
|
|
|
|
return true;
|
|
}, 'id');
|
|
|
|
$targetCounts = [
|
|
'scanned' => $counts['scanned'] - $before['scanned'],
|
|
'flagged' => $counts['flagged'] - $before['flagged'],
|
|
'created' => $counts['created'] - $before['created'],
|
|
'updated' => $counts['updated'] - $before['updated'],
|
|
'auto_hidden' => $counts['auto_hidden'] - $before['auto_hidden'],
|
|
'clean' => $counts['clean'] - $before['clean'],
|
|
'skipped' => $counts['skipped'] - $before['skipped'],
|
|
];
|
|
|
|
$this->line(sprintf(
|
|
'Finished %s: scanned=%d, flagged=%d, created=%d, updated=%d, auto-hidden=%d, clean=%d, skipped=%d',
|
|
$target->label(),
|
|
$targetCounts['scanned'],
|
|
$targetCounts['flagged'],
|
|
$targetCounts['created'],
|
|
$targetCounts['updated'],
|
|
$targetCounts['auto_hidden'],
|
|
$targetCounts['clean'],
|
|
$targetCounts['skipped'],
|
|
));
|
|
|
|
return $counts;
|
|
}
|
|
|
|
/**
|
|
* @param array<int, ModerationContentType> $targets
|
|
*/
|
|
private function announceScanStart(array $targets, int $limit): void
|
|
{
|
|
$this->info('Starting content moderation scan...');
|
|
$this->line('Targets: ' . implode(', ', array_map(static fn (ModerationContentType $target): string => $target->label(), $targets)));
|
|
$this->line('Mode: ' . ($this->option('dry-run') ? 'dry-run' : 'persist findings'));
|
|
$this->line('Force re-scan: ' . ($this->option('force') ? 'yes' : 'no'));
|
|
$this->line('From source ID: ' . (max(0, (int) ($this->option('from-id') ?? 0)) ?: 'start'));
|
|
$this->line('Limit: ' . ($limit > 0 ? (string) $limit : 'none'));
|
|
|
|
if ($this->output->isVerbose()) {
|
|
$this->comment('Verbose mode enabled. Use -vv for detailed reasons and matched domains.');
|
|
}
|
|
}
|
|
|
|
private function verboseLine(ModerationContentType $target, int $sourceId, string $message): void
|
|
{
|
|
if (! $this->output->isVerbose()) {
|
|
return;
|
|
}
|
|
|
|
$this->line(sprintf('[%s #%d] %s', $target->value, $sourceId, $message));
|
|
}
|
|
|
|
private function verboseAnalysis(ModerationContentType $target, int $sourceId, mixed $analysis, string $prefix): void
|
|
{
|
|
if (! $this->output->isVerbose()) {
|
|
return;
|
|
}
|
|
|
|
$message = sprintf(
|
|
'[%s #%d] %s; score=%d; severity=%s; policy=%s; queue=%s',
|
|
$target->value,
|
|
$sourceId,
|
|
$prefix,
|
|
$analysis->score,
|
|
$analysis->severity->value,
|
|
$analysis->policyName ?? 'default',
|
|
$analysis->status->value,
|
|
);
|
|
|
|
if ($analysis->priorityScore !== null) {
|
|
$message .= '; priority=' . $analysis->priorityScore;
|
|
}
|
|
|
|
if ($analysis->reviewBucket !== null) {
|
|
$message .= '; bucket=' . $analysis->reviewBucket;
|
|
}
|
|
|
|
if ($analysis->aiLabel !== null) {
|
|
$message .= '; ai=' . $analysis->aiLabel;
|
|
if ($analysis->aiConfidence !== null) {
|
|
$message .= ' (' . $analysis->aiConfidence . '%)';
|
|
}
|
|
}
|
|
|
|
$this->line($message);
|
|
|
|
if ($this->output->isVeryVerbose()) {
|
|
if ($analysis->matchedDomains !== []) {
|
|
$this->line(' matched domains: ' . implode(', ', $analysis->matchedDomains));
|
|
}
|
|
|
|
if ($analysis->matchedKeywords !== []) {
|
|
$this->line(' matched keywords: ' . implode(', ', $analysis->matchedKeywords));
|
|
}
|
|
|
|
if ($analysis->reasons !== []) {
|
|
$this->line(' reasons: ' . implode(' | ', $analysis->reasons));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return array<int, ModerationContentType>
|
|
*/
|
|
private function targets(): array
|
|
{
|
|
$raw = trim((string) ($this->option('only') ?? ''));
|
|
if ($raw === '') {
|
|
return [
|
|
ModerationContentType::ArtworkComment,
|
|
ModerationContentType::ArtworkDescription,
|
|
];
|
|
}
|
|
|
|
$selected = collect(explode(',', $raw))
|
|
->map(static fn (string $value): string => trim(strtolower($value)))
|
|
->filter()
|
|
->values();
|
|
|
|
$targets = [];
|
|
|
|
if ($selected->contains('comments')) {
|
|
$targets[] = ModerationContentType::ArtworkComment;
|
|
}
|
|
|
|
if ($selected->contains('descriptions')) {
|
|
$targets[] = ModerationContentType::ArtworkDescription;
|
|
}
|
|
|
|
if ($selected->contains('titles') || $selected->contains('artwork_titles')) {
|
|
$targets[] = ModerationContentType::ArtworkTitle;
|
|
}
|
|
|
|
if ($selected->contains('bios') || $selected->contains('user_bios')) {
|
|
$targets[] = ModerationContentType::UserBio;
|
|
}
|
|
|
|
if ($selected->contains('profile-links') || $selected->contains('profile_links')) {
|
|
$targets[] = ModerationContentType::UserProfileLink;
|
|
}
|
|
|
|
if ($selected->contains('collections') || $selected->contains('collection_titles')) {
|
|
$targets[] = ModerationContentType::CollectionTitle;
|
|
$targets[] = ModerationContentType::CollectionDescription;
|
|
}
|
|
|
|
if ($selected->contains('stories') || $selected->contains('story_titles')) {
|
|
$targets[] = ModerationContentType::StoryTitle;
|
|
$targets[] = ModerationContentType::StoryContent;
|
|
}
|
|
|
|
if ($selected->contains('cards') || $selected->contains('card_titles')) {
|
|
$targets[] = ModerationContentType::CardTitle;
|
|
$targets[] = ModerationContentType::CardText;
|
|
}
|
|
|
|
return $targets === [] ? [
|
|
ModerationContentType::ArtworkComment,
|
|
ModerationContentType::ArtworkDescription,
|
|
ModerationContentType::ArtworkTitle,
|
|
ModerationContentType::UserBio,
|
|
ModerationContentType::UserProfileLink,
|
|
ModerationContentType::CollectionTitle,
|
|
ModerationContentType::CollectionDescription,
|
|
ModerationContentType::StoryTitle,
|
|
ModerationContentType::StoryContent,
|
|
ModerationContentType::CardTitle,
|
|
ModerationContentType::CardText,
|
|
] : $targets;
|
|
}
|
|
} |