Files
SkinbaseNova/app/Console/Commands/IndexArtworkVectorsCommand.php

251 lines
9.2 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Console\Commands;
use App\Models\Artwork;
use App\Services\Vision\VectorService;
use Carbon\CarbonImmutable;
use InvalidArgumentException;
use Illuminate\Console\Command;
final class IndexArtworkVectorsCommand extends Command
{
protected $signature = 'artworks:vectors-index
{--order=updated-desc : Ordering mode: updated-desc or id-asc}
{--start-id=0 : Start from this artwork id (inclusive)}
{--after-id=0 : Resume after this artwork id}
{--after-updated-at= : Resume updated-desc mode after this ISO-8601 timestamp}
{--batch=100 : Batch size per iteration}
{--limit=0 : Maximum artworks to process in this run}
{--embedded-only : Re-upsert only artworks that already have local embeddings}
{--public-only : Index only public, approved, published artworks}
{--dry-run : Preview requests without sending them}';
protected $description = 'Send artwork image URLs to the vector gateway for indexing';
public function handle(VectorService $vectors): int
{
$dryRun = (bool) $this->option('dry-run');
if (! $dryRun && ! $vectors->isConfigured()) {
$this->error('Vision vector gateway is not configured. Set VISION_VECTOR_GATEWAY_URL and VISION_VECTOR_GATEWAY_API_KEY.');
return self::FAILURE;
}
$order = $this->normalizeOrder((string) $this->option('order'));
$startId = max(0, (int) $this->option('start-id'));
$afterId = max(0, (int) $this->option('after-id'));
try {
$afterUpdatedAt = $this->resolveAfterUpdatedAt($order, (string) $this->option('after-updated-at'));
} catch (InvalidArgumentException $exception) {
$this->error($exception->getMessage());
return self::INVALID;
}
$batch = max(1, min((int) $this->option('batch'), 1000));
$limit = max(0, (int) $this->option('limit'));
$publicOnly = (bool) $this->option('public-only');
$nextId = $startId > 0 ? $startId : max(1, $afterId + 1);
$embeddedOnly = (bool) $this->option('embedded-only');
$processed = 0;
$indexed = 0;
$skipped = 0;
$failed = 0;
$lastId = $afterId;
$nextUpdatedAt = $afterUpdatedAt;
if ($startId > 0 && $afterId > 0) {
$this->warn(sprintf(
'Both --start-id=%d and --after-id=%d were provided. Using --start-id and ignoring --after-id.',
$startId,
$afterId
));
}
if ($order === 'updated-desc' && ($startId > 0 || $afterId > 0) && $afterUpdatedAt === null) {
$this->warn('The --start-id/--after-id options are legacy id-asc cursors. They are ignored unless --order=id-asc is used, or unless --after-updated-at is also provided for updated-desc mode.');
}
$this->info(sprintf(
'Starting vector index: order=%s start_id=%d after_id=%d after_updated_at=%s next_id=%d batch=%d limit=%s embedded_only=%s public_only=%s dry_run=%s',
$order,
$startId,
$afterId,
$afterUpdatedAt?->toIso8601String() ?? 'none',
$nextId,
$batch,
$limit > 0 ? (string) $limit : 'all',
$embeddedOnly ? 'yes' : 'no',
$publicOnly ? 'yes' : 'no',
$dryRun ? 'yes' : 'no'
));
while (true) {
$remaining = $limit > 0 ? max(0, $limit - $processed) : $batch;
if ($limit > 0 && $remaining === 0) {
break;
}
$take = $limit > 0 ? min($batch, $remaining) : $batch;
$query = Artwork::query()
->with(['categories' => fn ($categories) => $categories->with('contentType')->orderBy('sort_order')->orderBy('name')])
->whereNotNull('hash');
if ($order === 'updated-desc') {
$query->orderByDesc('updated_at')
->orderByDesc('id')
->limit($take);
if ($nextUpdatedAt !== null) {
$query->where(function ($cursorQuery) use ($nextUpdatedAt, $afterId): void {
$cursorQuery->where('updated_at', '<', $nextUpdatedAt)
->orWhere(function ($sameTimestampQuery) use ($nextUpdatedAt, $afterId): void {
$sameTimestampQuery->where('updated_at', '=', $nextUpdatedAt)
->where('id', '<', $afterId);
});
});
}
} else {
$query->where('id', '>=', $nextId)
->orderBy('id')
->limit($take);
}
if ($embeddedOnly) {
$query->whereHas('embeddings');
}
if ($publicOnly) {
$query->public()->published();
}
$artworks = $query->get();
if ($artworks->isEmpty()) {
$this->line('No more artworks matched the current query window.');
break;
}
$this->line(sprintf(
'Fetched batch: count=%d first_id=%d last_id=%d first_updated_at=%s last_updated_at=%s',
$artworks->count(),
(int) $artworks->first()->id,
(int) $artworks->last()->id,
optional($artworks->first()->updated_at)->toIso8601String() ?? 'null',
optional($artworks->last()->updated_at)->toIso8601String() ?? 'null'
));
foreach ($artworks as $artwork) {
$processed++;
$lastId = (int) $artwork->id;
if ($order === 'updated-desc') {
$nextUpdatedAt = $artwork->updated_at !== null
? CarbonImmutable::instance($artwork->updated_at)
: null;
$afterId = $lastId;
} else {
$nextId = $lastId + 1;
}
try {
$payload = $vectors->payloadForArtwork($artwork);
} catch (\Throwable $e) {
$skipped++;
$this->warn("Skipped artwork {$artwork->id}: {$e->getMessage()}");
continue;
}
$this->line(sprintf(
'Processing artwork=%d hash=%s thumb_ext=%s url=%s metadata=%s',
(int) $artwork->id,
(string) ($artwork->hash ?? ''),
(string) ($artwork->thumb_ext ?? ''),
$payload['url'],
$this->json($payload['metadata'])
));
if ($dryRun) {
$indexed++;
$this->line(sprintf(
'[dry] artwork=%d indexed=%d/%d',
(int) $artwork->id,
$indexed,
$processed
));
continue;
}
try {
$vectors->upsertArtwork($artwork);
$indexed++;
$this->info(sprintf(
'Indexed artwork %d successfully. totals: processed=%d indexed=%d skipped=%d failed=%d',
(int) $artwork->id,
$processed,
$indexed,
$skipped,
$failed
));
} catch (\Throwable $e) {
$failed++;
$this->warn("Failed artwork {$artwork->id}: {$e->getMessage()}");
}
}
}
$this->info(sprintf(
'Vector index finished. processed=%d indexed=%d skipped=%d failed=%d last_id=%d next_id=%d next_updated_at=%s',
$processed,
$indexed,
$skipped,
$failed,
$lastId,
$nextId,
$nextUpdatedAt?->toIso8601String() ?? 'none'
));
return $failed > 0 ? self::FAILURE : self::SUCCESS;
}
private function normalizeOrder(string $order): string
{
$normalized = strtolower(trim($order));
return match ($normalized) {
'updated-desc', 'updated', 'latest', 'latest-updated' => 'updated-desc',
'id-asc', 'id', 'legacy' => 'id-asc',
default => 'updated-desc',
};
}
private function resolveAfterUpdatedAt(string $order, string $afterUpdatedAt): ?CarbonImmutable
{
if ($order !== 'updated-desc') {
return null;
}
$value = trim($afterUpdatedAt);
if ($value === '') {
return null;
}
try {
return CarbonImmutable::parse($value);
} catch (\Throwable) {
throw new InvalidArgumentException(sprintf('Invalid --after-updated-at value [%s]. Use an ISO-8601 timestamp.', $afterUpdatedAt));
}
}
/**
* @param array<string, string> $payload
*/
private function json(array $payload): string
{
$json = json_encode($payload, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
return is_string($json) ? $json : '{}';
}
}