Files
SkinbaseNova/app/Jobs/RecBuildItemPairsFromFavouritesJob.php

225 lines
6.9 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Jobs;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\DB;
/**
* Build item-item co-occurrence pairs from user favourites.
*
* Spec §7.1 — runs hourly or every few hours.
* For each user: take last N favourites, create pairs, increment weights.
*
* Safety: limits per-user pairs to avoid O(n²) explosion.
*/
final class RecBuildItemPairsFromFavouritesJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public int $tries = 2;
public int $timeout = 600;
public function __construct(
private readonly int $userBatchSize = 500,
) {
$queue = (string) config('recommendations.queue', 'default');
if ($queue !== '') {
$this->onQueue($queue);
}
}
public function handle(): void
{
$favCap = (int) config('recommendations.similarity.user_favourites_cap', 50);
// ── Pre-compute per-artwork total favourite counts for cosine normalization ──
$this->artworkLikeCounts = DB::table('artwork_favourites')
->select('artwork_id', DB::raw('COUNT(*) as cnt'))
->groupBy('artwork_id')
->pluck('cnt', 'artwork_id')
->all();
// ── Rebuild weights from scratch to avoid cross-run accumulation ──
DB::table('rec_item_pairs')->delete();
DB::table('artwork_favourites')
->select('user_id')
->groupBy('user_id')
->orderBy('user_id')
->chunk($this->userBatchSize, function ($userRows) use ($favCap) {
$userIds = [];
foreach ($userRows as $row) {
$userIds[] = (int) $row->user_id;
}
$this->flushPairCountChunk($this->pairCountsForUsers($userIds, $favCap));
});
}
/** @var array<int, int> artwork_id => total favourite count */
private array $artworkLikeCounts = [];
/**
* Collect pairs from a single user's last N favourites.
*
* @return list<array{0: int, 1: int}>
*/
public function pairsForUser(int $userId, int $cap): array
{
$artworkIds = DB::table('artwork_favourites')
->where('user_id', $userId)
->orderByDesc('created_at')
->limit($cap)
->pluck('artwork_id')
->map(fn ($id) => (int) $id)
->all();
return $this->pairsForArtworkIds($artworkIds);
}
/**
* Collect chunk-local pair counts using one capped favourites query for the chunk.
*
* @param list<int> $userIds
* @return array<string, int>
*/
private function pairCountsForUsers(array $userIds, int $cap): array
{
if ($userIds === []) {
return [];
}
$rankedFavourites = DB::query()
->fromSub(
DB::table('artwork_favourites')
->selectRaw('user_id, artwork_id, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC, artwork_id DESC) as favourite_rank')
->whereIn('user_id', $userIds),
'ranked_favourites'
)
->where('favourite_rank', '<=', $cap)
->orderBy('user_id')
->orderBy('favourite_rank')
->get(['user_id', 'artwork_id']);
$artworksByUser = [];
foreach ($rankedFavourites as $row) {
$artworksByUser[(int) $row->user_id][] = (int) $row->artwork_id;
}
$pairCounts = [];
foreach ($artworksByUser as $artworkIds) {
foreach ($this->pairsForArtworkIds($artworkIds) as [$a, $b]) {
$key = $this->pairKey($a, $b);
$pairCounts[$key] = ($pairCounts[$key] ?? 0) + 1;
}
}
return $pairCounts;
}
/**
* @param list<int> $artworkIds
* @return list<array{0: int, 1: int}>
*/
private function pairsForArtworkIds(array $artworkIds): array
{
$count = count($artworkIds);
if ($count < 2) {
return [];
}
$pairs = [];
// Cap max pairs per user to avoid explosion: C(50,2) = 1225 worst case = acceptable
for ($i = 0; $i < $count - 1; $i++) {
for ($j = $i + 1; $j < $count; $j++) {
$a = min($artworkIds[$i], $artworkIds[$j]);
$b = max($artworkIds[$i], $artworkIds[$j]);
$pairs[] = [$a, $b];
}
}
return $pairs;
}
/**
* Upsert one chunk of pair counts into rec_item_pairs.
*
* @param array<string, int> $pairCounts key = "a:b", value = chunk-local co-occurrence count
*/
private function flushPairCountChunk(array $pairCounts): void
{
if ($pairCounts === []) {
return;
}
$now = now();
foreach (array_chunk($pairCounts, 500, preserve_keys: true) as $chunk) {
$pairIds = [];
$aIds = [];
$bIds = [];
foreach ($chunk as $key => $count) {
[$a, $b] = $this->pairIdsFromKey($key);
$pairIds[$key] = [$a, $b];
$aIds[] = $a;
$bIds[] = $b;
}
$existingWeights = DB::table('rec_item_pairs')
->whereIn('a_artwork_id', array_values(array_unique($aIds)))
->whereIn('b_artwork_id', array_values(array_unique($bIds)))
->get(['a_artwork_id', 'b_artwork_id', 'weight'])
->mapWithKeys(fn ($row): array => [
$this->pairKey((int) $row->a_artwork_id, (int) $row->b_artwork_id) => (float) $row->weight,
])
->all();
$rows = [];
foreach ($chunk as $key => $count) {
[$a, $b] = $pairIds[$key];
$likesA = $this->artworkLikeCounts[$a] ?? 1;
$likesB = $this->artworkLikeCounts[$b] ?? 1;
$deltaWeight = $count / sqrt($likesA * $likesB);
$rows[] = [
'a_artwork_id' => $a,
'b_artwork_id' => $b,
'weight' => ($existingWeights[$key] ?? 0.0) + $deltaWeight,
'updated_at' => $now,
];
}
DB::table('rec_item_pairs')->upsert(
$rows,
['a_artwork_id', 'b_artwork_id'],
['weight', 'updated_at'],
);
}
}
private function pairKey(int $a, int $b): string
{
return $a . ':' . $b;
}
/**
* @return array{0: int, 1: int}
*/
private function pairIdsFromKey(string $key): array
{
[$a, $b] = explode(':', $key, 2);
return [(int) $a, (int) $b];
}
}