287 lines
10 KiB
PHP
287 lines
10 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Jobs;
|
|
|
|
use App\Models\Artwork;
|
|
use App\Models\RecArtworkRec;
|
|
use Illuminate\Bus\Queueable;
|
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
|
use Illuminate\Foundation\Bus\Dispatchable;
|
|
use Illuminate\Queue\InteractsWithQueue;
|
|
use Illuminate\Queue\SerializesModels;
|
|
use Illuminate\Support\Facades\DB;
|
|
use Illuminate\Support\Facades\Log;
|
|
|
|
/**
|
|
* Compute hybrid similarity by blending tag, behavior, and optionally visual scores.
|
|
*
|
|
* Spec §7.4 — runs nightly.
|
|
* Merges candidates from tag + behavior + vector lists, applies hybrid blend weights,
|
|
* enforces diversity, stores top 30.
|
|
*/
|
|
final class RecComputeSimilarHybridJob implements ShouldQueue
|
|
{
|
|
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
|
|
|
public int $tries = 2;
|
|
public int $timeout = 900;
|
|
|
|
public function __construct(
|
|
private readonly ?int $artworkId = null,
|
|
private readonly int $batchSize = 200,
|
|
) {
|
|
$queue = (string) config('recommendations.queue', 'default');
|
|
if ($queue !== '') {
|
|
$this->onQueue($queue);
|
|
}
|
|
}
|
|
|
|
public function handle(): void
|
|
{
|
|
$modelVersion = (string) config('recommendations.similarity.model_version', 'sim_v1');
|
|
$vectorEnabled = (bool) config('recommendations.similarity.vector_enabled', false);
|
|
$resultLimit = (int) config('recommendations.similarity.result_limit', 30);
|
|
$maxPerAuthor = (int) config('recommendations.similarity.max_per_author', 2);
|
|
$minCatsTop12 = (int) config('recommendations.similarity.min_categories_top12', 2);
|
|
|
|
$weights = $vectorEnabled
|
|
? (array) config('recommendations.similarity.weights_with_vector')
|
|
: (array) config('recommendations.similarity.weights_without_vector');
|
|
|
|
$query = Artwork::query()->public()->published()->select('id', 'user_id');
|
|
|
|
if ($this->artworkId !== null) {
|
|
$query->where('id', $this->artworkId);
|
|
}
|
|
|
|
$query->chunkById($this->batchSize, function ($artworks) use (
|
|
$modelVersion, $vectorEnabled, $resultLimit, $maxPerAuthor, $minCatsTop12, $weights
|
|
) {
|
|
foreach ($artworks as $artwork) {
|
|
try {
|
|
$this->processArtwork(
|
|
$artwork, $modelVersion, $vectorEnabled, $resultLimit,
|
|
$maxPerAuthor, $minCatsTop12, $weights
|
|
);
|
|
} catch (\Throwable $e) {
|
|
Log::warning("[RecComputeSimilarHybrid] Failed for artwork {$artwork->id}: {$e->getMessage()}");
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
private function processArtwork(
|
|
Artwork $artwork,
|
|
string $modelVersion,
|
|
bool $vectorEnabled,
|
|
int $resultLimit,
|
|
int $maxPerAuthor,
|
|
int $minCatsTop12,
|
|
array $weights,
|
|
): void {
|
|
// ── Collect sub-lists ──────────────────────────────────────────────────
|
|
$tagRec = RecArtworkRec::query()
|
|
->where('artwork_id', $artwork->id)
|
|
->where('rec_type', 'similar_tags')
|
|
->where('model_version', $modelVersion)
|
|
->first();
|
|
|
|
$behRec = RecArtworkRec::query()
|
|
->where('artwork_id', $artwork->id)
|
|
->where('rec_type', 'similar_behavior')
|
|
->where('model_version', $modelVersion)
|
|
->first();
|
|
|
|
$tagIds = $tagRec ? ($tagRec->recs ?? []) : [];
|
|
$behIds = $behRec ? ($behRec->recs ?? []) : [];
|
|
|
|
$vecIds = [];
|
|
$vecScores = [];
|
|
if ($vectorEnabled) {
|
|
$vecRec = RecArtworkRec::query()
|
|
->where('artwork_id', $artwork->id)
|
|
->where('rec_type', 'similar_visual')
|
|
->where('model_version', $modelVersion)
|
|
->first();
|
|
if ($vecRec) {
|
|
$vecIds = $vecRec->recs ?? [];
|
|
}
|
|
}
|
|
|
|
// Merge all candidate IDs
|
|
$allIds = array_values(array_unique(array_merge($tagIds, $behIds, $vecIds)));
|
|
|
|
if ($allIds === []) {
|
|
return;
|
|
}
|
|
|
|
// ── Build normalized score maps ────────────────────────────────────────
|
|
$tagScoreMap = $this->rankToScore($tagIds);
|
|
$behScoreMap = $this->rankToScore($behIds);
|
|
$vecScoreMap = $this->rankToScore($vecIds);
|
|
|
|
// Fetch artwork metadata for category + author diversity
|
|
$metaRows = DB::table('artworks')
|
|
->whereIn('id', $allIds)
|
|
->where('is_public', true)
|
|
->where('is_approved', true)
|
|
->whereNotNull('published_at')
|
|
->where('published_at', '<=', now())
|
|
->whereNull('deleted_at')
|
|
->select('id', 'user_id')
|
|
->get()
|
|
->keyBy('id');
|
|
|
|
$catMap = DB::table('artwork_category')
|
|
->whereIn('artwork_id', $allIds)
|
|
->select('artwork_id', 'category_id')
|
|
->get()
|
|
->groupBy('artwork_id');
|
|
|
|
// Source artwork categories
|
|
$srcCatIds = DB::table('artwork_category')
|
|
->where('artwork_id', $artwork->id)
|
|
->pluck('category_id')
|
|
->all();
|
|
$srcCatSet = array_flip($srcCatIds);
|
|
|
|
// ── Compute hybrid score ───────────────────────────────────────────────
|
|
$scored = [];
|
|
foreach ($allIds as $candidateId) {
|
|
if (! $metaRows->has($candidateId)) {
|
|
continue;
|
|
}
|
|
|
|
$meta = $metaRows->get($candidateId);
|
|
$candidateCats = $catMap->get($candidateId, collect())->pluck('category_id')->all();
|
|
|
|
// Category overlap
|
|
$catScore = 0.0;
|
|
foreach ($candidateCats as $catId) {
|
|
if (isset($srcCatSet[$catId])) {
|
|
$catScore = 1.0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$tagS = $tagScoreMap[$candidateId] ?? 0.0;
|
|
$behS = $behScoreMap[$candidateId] ?? 0.0;
|
|
$vecS = $vecScoreMap[$candidateId] ?? 0.0;
|
|
|
|
if ($vectorEnabled) {
|
|
$score = ($weights['visual'] ?? 0.45) * $vecS
|
|
+ ($weights['tag'] ?? 0.25) * $tagS
|
|
+ ($weights['behavior'] ?? 0.20) * $behS
|
|
+ ($weights['category'] ?? 0.10) * $catScore;
|
|
} else {
|
|
$score = ($weights['tag'] ?? 0.55) * $tagS
|
|
+ ($weights['behavior'] ?? 0.35) * $behS
|
|
+ ($weights['category'] ?? 0.10) * $catScore;
|
|
}
|
|
|
|
$scored[] = [
|
|
'artwork_id' => $candidateId,
|
|
'user_id' => (int) $meta->user_id,
|
|
'cat_ids' => $candidateCats,
|
|
'score' => $score,
|
|
];
|
|
}
|
|
|
|
usort($scored, fn (array $a, array $b) => $b['score'] <=> $a['score']);
|
|
|
|
// ── Diversity enforcement ──────────────────────────────────────────────
|
|
$authorCounts = [];
|
|
$final = [];
|
|
$catsInTop12 = [];
|
|
|
|
foreach ($scored as $item) {
|
|
$authorId = $item['user_id'];
|
|
$authorCounts[$authorId] = ($authorCounts[$authorId] ?? 0) + 1;
|
|
|
|
if ($authorCounts[$authorId] > $maxPerAuthor) {
|
|
continue;
|
|
}
|
|
|
|
$final[] = $item;
|
|
|
|
if (count($final) <= 12) {
|
|
foreach ($item['cat_ids'] as $cId) {
|
|
$catsInTop12[$cId] = true;
|
|
}
|
|
}
|
|
|
|
if (count($final) >= $resultLimit) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// ── Min-categories enforcement in top 12 (spec §6) ────────────────────
|
|
if (count($catsInTop12) < $minCatsTop12 && count($final) >= 12) {
|
|
// Find items beyond the initial selection that introduce a new category
|
|
$usedIds = array_flip(array_column($final, 'artwork_id'));
|
|
$promotable = [];
|
|
foreach ($scored as $item) {
|
|
if (isset($usedIds[$item['artwork_id']])) {
|
|
continue;
|
|
}
|
|
$newCats = array_diff($item['cat_ids'], array_keys($catsInTop12));
|
|
if ($newCats !== []) {
|
|
$promotable[] = $item;
|
|
if (count($promotable) >= ($minCatsTop12 - count($catsInTop12))) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
// Inject promoted items at position 12 (end of visible top block)
|
|
if ($promotable !== []) {
|
|
$top = array_slice($final, 0, 11);
|
|
$rest = array_slice($final, 11);
|
|
$final = array_merge($top, $promotable, $rest);
|
|
$final = array_slice($final, 0, $resultLimit);
|
|
}
|
|
}
|
|
|
|
$finalIds = array_column($final, 'artwork_id');
|
|
|
|
if ($finalIds === []) {
|
|
return;
|
|
}
|
|
|
|
RecArtworkRec::query()->updateOrCreate(
|
|
[
|
|
'artwork_id' => $artwork->id,
|
|
'rec_type' => 'similar_hybrid',
|
|
'model_version' => $modelVersion,
|
|
],
|
|
[
|
|
'recs' => $finalIds,
|
|
'computed_at' => now(),
|
|
],
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Convert a ranked list of IDs into a score map (1.0 at rank 0, decaying).
|
|
*
|
|
* @param list<int> $ids
|
|
* @return array<int, float>
|
|
*/
|
|
private function rankToScore(array $ids): array
|
|
{
|
|
$map = [];
|
|
$total = count($ids);
|
|
if ($total === 0) {
|
|
return $map;
|
|
}
|
|
|
|
foreach ($ids as $rank => $id) {
|
|
// Linear decay from 1.0 → ~0.0
|
|
$map[(int) $id] = 1.0 - ($rank / max(1, $total));
|
|
}
|
|
|
|
return $map;
|
|
}
|
|
}
|