Files
SkinbaseNova/app/Console/Commands/SanitizeContent.php
2026-02-26 21:12:32 +01:00

189 lines
6.9 KiB
PHP

<?php
namespace App\Console\Commands;
use App\Services\ContentSanitizer;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
/**
* php artisan skinbase:sanitize-content
*
* Scans legacy content for unsafe HTML, converts it to Markdown-safe text,
* and populates the raw_content / rendered_content columns on artwork_comments.
*
* Options:
* --dry-run Preview changes without writing
* --chunk=200 Rows per batch
* --table= Limit to one target
* --artwork-id= Limit to a single artwork (filters artwork_comments by artwork_id, artworks by id)
*/
class SanitizeContent extends Command
{
protected $signature = 'skinbase:sanitize-content
{--dry-run : Preview changes without writing to the database}
{--chunk=200 : Number of rows per batch}
{--table= : Limit scan to a single target (artwork_comments|artworks|forum_posts)}
{--artwork-id= : Limit scan to a single artwork ID (skips forum_posts)}';
protected $description = 'Strip unsafe HTML from legacy content and populate sanitized columns.';
/**
* table => [read_col, write_raw_col, write_rendered_col|null]
*
* For artwork_comments we write two columns; for the others we only sanitize in-place.
*/
private const TARGETS = [
'artwork_comments' => [
'read' => 'content',
'write_raw' => 'raw_content',
'write_rendered' => 'rendered_content',
],
'artworks' => [
'read' => 'description',
'write_raw' => 'description',
'write_rendered' => null,
],
'forum_posts' => [
'read' => 'content',
'write_raw' => 'content',
'write_rendered' => null,
],
];
public function handle(): int
{
$dryRun = (bool) $this->option('dry-run');
$chunk = max(1, (int) $this->option('chunk'));
$tableOpt = $this->option('table');
$artworkId = $this->option('artwork-id');
if ($artworkId !== null) {
if (! ctype_digit((string) $artworkId) || (int) $artworkId < 1) {
$this->error("--artwork-id must be a positive integer. Got: {$artworkId}");
return self::FAILURE;
}
$artworkId = (int) $artworkId;
}
$targets = self::TARGETS;
if ($tableOpt) {
if (! isset($targets[$tableOpt])) {
$this->error("Unknown table: {$tableOpt}. Allowed: " . implode(', ', array_keys($targets)));
return self::FAILURE;
}
$targets = [$tableOpt => $targets[$tableOpt]];
}
// --artwork-id removes forum_posts (no artwork FK) and informs the user.
if ($artworkId !== null) {
unset($targets['forum_posts']);
$this->line("Filtering to artwork <info>#{$artworkId}</info> (forum_posts skipped).");
}
if ($dryRun) {
$this->warn('DRY-RUN mode — no changes will be written.');
}
$totalModified = 0;
$totalRows = 0;
foreach ($targets as $table => $def) {
$this->line("Processing <info>{$table}</info>…");
[$modified, $rows] = $this->processTable($table, $def, $chunk, $dryRun, $artworkId);
$totalModified += $modified;
$totalRows += $rows;
$this->line("{$rows} rows scanned, {$modified} modified.");
}
$this->newLine();
$this->info("Summary: {$totalRows} rows, {$totalModified} " . ($dryRun ? 'would be ' : '') . 'modified.');
return self::SUCCESS;
}
private function processTable(
string $table,
array $def,
int $chunk,
bool $dryRun,
?int $artworkId = null
): array {
$totalModified = 0;
$totalRows = 0;
$readCol = $def['read'];
$writeRawCol = $def['write_raw'];
$writeRenderedCol = $def['write_rendered'];
DB::table($table)
->whereNotNull($readCol)
->when($artworkId !== null, function ($q) use ($table, $artworkId) {
// artwork_comments has artwork_id; artworks is filtered by its own PK.
$filterCol = $table === 'artwork_comments' ? 'artwork_id' : 'id';
$q->where($filterCol, $artworkId);
})
->orderBy('id')
->chunk($chunk, function ($rows) use (
$table, $readCol, $writeRawCol, $writeRenderedCol,
$dryRun, &$totalModified, &$totalRows
) {
foreach ($rows as $row) {
$original = $row->$readCol ?? '';
$stripped = ContentSanitizer::stripToPlain($original);
$totalRows++;
// Detect if content had HTML that we need to clean
$hadHtml = $original !== $stripped && preg_match('/<[a-z][^>]*>/i', $original);
if ($writeRawCol === $readCol && ! $hadHtml) {
// Same column, no HTML, skip
continue;
}
$rendered = ContentSanitizer::render($stripped);
$totalModified++;
if ($hadHtml) {
$this->line(" [{$table}#{$row->id}] Stripped HTML from content.");
Log::info("skinbase:sanitize-content stripped HTML from {$table}#{$row->id}");
}
if ($dryRun) {
continue;
}
$update = [$writeRawCol => $stripped];
if ($writeRenderedCol) {
$update[$writeRenderedCol] = $rendered;
}
DB::table($table)->where('id', $row->id)->update($update);
}
// Also populate rendered_content for rows that have raw_content but no rendered_content
if ($writeRenderedCol && ! $dryRun) {
DB::table($table)
->whereNotNull($writeRawCol)
->whereNull($writeRenderedCol)
->orderBy('id')
->chunk(200, function ($missing) use ($table, $writeRawCol, $writeRenderedCol) {
foreach ($missing as $row) {
$rendered = ContentSanitizer::render($row->$writeRawCol ?? '');
DB::table($table)->where('id', $row->id)->update([
$writeRenderedCol => $rendered,
]);
}
});
}
});
return [$totalModified, $totalRows];
}
}