189 lines
6.9 KiB
PHP
189 lines
6.9 KiB
PHP
<?php
|
|
|
|
namespace App\Console\Commands;
|
|
|
|
use App\Services\ContentSanitizer;
|
|
use Illuminate\Console\Command;
|
|
use Illuminate\Support\Facades\DB;
|
|
use Illuminate\Support\Facades\Log;
|
|
|
|
/**
|
|
* php artisan skinbase:sanitize-content
|
|
*
|
|
* Scans legacy content for unsafe HTML, converts it to Markdown-safe text,
|
|
* and populates the raw_content / rendered_content columns on artwork_comments.
|
|
*
|
|
* Options:
|
|
* --dry-run Preview changes without writing
|
|
* --chunk=200 Rows per batch
|
|
* --table= Limit to one target
|
|
* --artwork-id= Limit to a single artwork (filters artwork_comments by artwork_id, artworks by id)
|
|
*/
|
|
class SanitizeContent extends Command
|
|
{
|
|
protected $signature = 'skinbase:sanitize-content
|
|
{--dry-run : Preview changes without writing to the database}
|
|
{--chunk=200 : Number of rows per batch}
|
|
{--table= : Limit scan to a single target (artwork_comments|artworks|forum_posts)}
|
|
{--artwork-id= : Limit scan to a single artwork ID (skips forum_posts)}';
|
|
|
|
protected $description = 'Strip unsafe HTML from legacy content and populate sanitized columns.';
|
|
|
|
/**
|
|
* table => [read_col, write_raw_col, write_rendered_col|null]
|
|
*
|
|
* For artwork_comments we write two columns; for the others we only sanitize in-place.
|
|
*/
|
|
private const TARGETS = [
|
|
'artwork_comments' => [
|
|
'read' => 'content',
|
|
'write_raw' => 'raw_content',
|
|
'write_rendered' => 'rendered_content',
|
|
],
|
|
'artworks' => [
|
|
'read' => 'description',
|
|
'write_raw' => 'description',
|
|
'write_rendered' => null,
|
|
],
|
|
'forum_posts' => [
|
|
'read' => 'content',
|
|
'write_raw' => 'content',
|
|
'write_rendered' => null,
|
|
],
|
|
];
|
|
|
|
public function handle(): int
|
|
{
|
|
$dryRun = (bool) $this->option('dry-run');
|
|
$chunk = max(1, (int) $this->option('chunk'));
|
|
$tableOpt = $this->option('table');
|
|
$artworkId = $this->option('artwork-id');
|
|
|
|
if ($artworkId !== null) {
|
|
if (! ctype_digit((string) $artworkId) || (int) $artworkId < 1) {
|
|
$this->error("--artwork-id must be a positive integer. Got: {$artworkId}");
|
|
return self::FAILURE;
|
|
}
|
|
$artworkId = (int) $artworkId;
|
|
}
|
|
|
|
$targets = self::TARGETS;
|
|
if ($tableOpt) {
|
|
if (! isset($targets[$tableOpt])) {
|
|
$this->error("Unknown table: {$tableOpt}. Allowed: " . implode(', ', array_keys($targets)));
|
|
return self::FAILURE;
|
|
}
|
|
$targets = [$tableOpt => $targets[$tableOpt]];
|
|
}
|
|
|
|
// --artwork-id removes forum_posts (no artwork FK) and informs the user.
|
|
if ($artworkId !== null) {
|
|
unset($targets['forum_posts']);
|
|
$this->line("Filtering to artwork <info>#{$artworkId}</info> (forum_posts skipped).");
|
|
}
|
|
|
|
if ($dryRun) {
|
|
$this->warn('DRY-RUN mode — no changes will be written.');
|
|
}
|
|
|
|
$totalModified = 0;
|
|
$totalRows = 0;
|
|
|
|
foreach ($targets as $table => $def) {
|
|
$this->line("Processing <info>{$table}</info>…");
|
|
|
|
[$modified, $rows] = $this->processTable($table, $def, $chunk, $dryRun, $artworkId);
|
|
$totalModified += $modified;
|
|
$totalRows += $rows;
|
|
|
|
$this->line(" → {$rows} rows scanned, {$modified} modified.");
|
|
}
|
|
|
|
$this->newLine();
|
|
$this->info("Summary: {$totalRows} rows, {$totalModified} " . ($dryRun ? 'would be ' : '') . 'modified.');
|
|
|
|
return self::SUCCESS;
|
|
}
|
|
|
|
private function processTable(
|
|
string $table,
|
|
array $def,
|
|
int $chunk,
|
|
bool $dryRun,
|
|
?int $artworkId = null
|
|
): array {
|
|
$totalModified = 0;
|
|
$totalRows = 0;
|
|
|
|
$readCol = $def['read'];
|
|
$writeRawCol = $def['write_raw'];
|
|
$writeRenderedCol = $def['write_rendered'];
|
|
|
|
DB::table($table)
|
|
->whereNotNull($readCol)
|
|
->when($artworkId !== null, function ($q) use ($table, $artworkId) {
|
|
// artwork_comments has artwork_id; artworks is filtered by its own PK.
|
|
$filterCol = $table === 'artwork_comments' ? 'artwork_id' : 'id';
|
|
$q->where($filterCol, $artworkId);
|
|
})
|
|
->orderBy('id')
|
|
->chunk($chunk, function ($rows) use (
|
|
$table, $readCol, $writeRawCol, $writeRenderedCol,
|
|
$dryRun, &$totalModified, &$totalRows
|
|
) {
|
|
foreach ($rows as $row) {
|
|
$original = $row->$readCol ?? '';
|
|
$stripped = ContentSanitizer::stripToPlain($original);
|
|
|
|
$totalRows++;
|
|
|
|
// Detect if content had HTML that we need to clean
|
|
$hadHtml = $original !== $stripped && preg_match('/<[a-z][^>]*>/i', $original);
|
|
|
|
if ($writeRawCol === $readCol && ! $hadHtml) {
|
|
// Same column, no HTML, skip
|
|
continue;
|
|
}
|
|
|
|
$rendered = ContentSanitizer::render($stripped);
|
|
$totalModified++;
|
|
|
|
if ($hadHtml) {
|
|
$this->line(" [{$table}#{$row->id}] Stripped HTML from content.");
|
|
Log::info("skinbase:sanitize-content stripped HTML from {$table}#{$row->id}");
|
|
}
|
|
|
|
if ($dryRun) {
|
|
continue;
|
|
}
|
|
|
|
$update = [$writeRawCol => $stripped];
|
|
|
|
if ($writeRenderedCol) {
|
|
$update[$writeRenderedCol] = $rendered;
|
|
}
|
|
|
|
DB::table($table)->where('id', $row->id)->update($update);
|
|
}
|
|
|
|
// Also populate rendered_content for rows that have raw_content but no rendered_content
|
|
if ($writeRenderedCol && ! $dryRun) {
|
|
DB::table($table)
|
|
->whereNotNull($writeRawCol)
|
|
->whereNull($writeRenderedCol)
|
|
->orderBy('id')
|
|
->chunk(200, function ($missing) use ($table, $writeRawCol, $writeRenderedCol) {
|
|
foreach ($missing as $row) {
|
|
$rendered = ContentSanitizer::render($row->$writeRawCol ?? '');
|
|
DB::table($table)->where('id', $row->id)->update([
|
|
$writeRenderedCol => $rendered,
|
|
]);
|
|
}
|
|
});
|
|
}
|
|
});
|
|
|
|
return [$totalModified, $totalRows];
|
|
}
|
|
}
|