Files
SkinbaseNova/app/Services/Recommendations/PersonalizedFeedService.php
2026-02-14 15:14:12 +01:00

568 lines
20 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Services\Recommendations;
use App\Jobs\RegenerateUserRecommendationCacheJob;
use App\Models\Artwork;
use App\Models\UserInterestProfile;
use App\Models\UserRecommendationCache;
use Carbon\CarbonImmutable;
use Illuminate\Support\Arr;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\DB;
final class PersonalizedFeedService
{
public function getFeed(int $userId, int $limit = 24, ?string $cursor = null, ?string $algoVersion = null): array
{
$safeLimit = max(1, min(50, $limit));
$resolvedAlgoVersion = $this->resolveAlgoVersion($algoVersion, $userId);
$weightSet = $this->resolveRankingWeights($resolvedAlgoVersion);
$offset = $this->decodeCursorToOffset($cursor);
$cache = UserRecommendationCache::query()
->where('user_id', $userId)
->where('algo_version', $resolvedAlgoVersion)
->first();
$cacheItems = $this->extractCacheItems($cache);
$isFresh = $cache !== null && $cache->expires_at !== null && $cache->expires_at->isFuture();
$cacheStatus = 'hit';
if ($cache === null) {
$cacheStatus = 'miss';
} elseif (! $isFresh) {
$cacheStatus = 'stale';
}
if ($cache === null || ! $isFresh) {
RegenerateUserRecommendationCacheJob::dispatch($userId, $resolvedAlgoVersion)
->onQueue((string) config('discovery.queue', 'default'));
}
$items = $cacheItems;
if ($items === []) {
$items = $this->buildColdStartRecommendations($resolvedAlgoVersion, 240, 'fallback');
$cacheStatus = $cacheStatus . '-fallback';
}
return $this->buildFeedPageResponse(
items: $items,
offset: $offset,
limit: $safeLimit,
algoVersion: $resolvedAlgoVersion,
weightVersion: (string) $weightSet['version'],
cacheStatus: $cacheStatus,
generatedAt: $cache?->generated_at?->toIso8601String()
);
}
public function regenerateCacheForUser(int $userId, ?string $algoVersion = null): void
{
$resolvedAlgoVersion = $this->resolveAlgoVersion($algoVersion, $userId);
$cacheVersion = (string) config('discovery.cache_version', 'cache-v1');
$ttlMinutes = max(1, (int) config('discovery.cache_ttl_minutes', 60));
$items = $this->buildRecommendations($userId, $resolvedAlgoVersion, 240);
$generatedAt = now();
$expiresAt = now()->addMinutes($ttlMinutes);
UserRecommendationCache::query()->updateOrCreate(
[
'user_id' => $userId,
'algo_version' => $resolvedAlgoVersion,
],
[
'cache_version' => $cacheVersion,
'recommendations_json' => [
'items' => $items,
'algo_version' => $resolvedAlgoVersion,
'weight_version' => (string) $this->resolveRankingWeights($resolvedAlgoVersion)['version'],
'generated_at' => $generatedAt->toIso8601String(),
],
'generated_at' => $generatedAt,
'expires_at' => $expiresAt,
]
);
}
/**
* @return array<int, array{artwork_id:int,score:float,source:string}>
*/
public function buildRecommendations(int $userId, string $algoVersion, int $maxItems = 240): array
{
$profileVersion = (string) config('discovery.profile_version', 'profile-v1');
$profile = UserInterestProfile::query()
->where('user_id', $userId)
->where('profile_version', $profileVersion)
->where('algo_version', $algoVersion)
->first();
$normalized = $profile !== null ? (array) ($profile->normalized_scores_json ?? []) : [];
$personalized = $this->buildProfileBasedRecommendations($normalized, $maxItems, $algoVersion);
if ($personalized === []) {
return $this->buildColdStartRecommendations($algoVersion, $maxItems, 'cold_start');
}
$fallback = $this->buildColdStartRecommendations($algoVersion, $maxItems, 'fallback');
$combined = [];
foreach (array_merge($personalized, $fallback) as $item) {
$artworkId = (int) ($item['artwork_id'] ?? 0);
if ($artworkId <= 0) {
continue;
}
if (! isset($combined[$artworkId])) {
$combined[$artworkId] = [
'artwork_id' => $artworkId,
'score' => (float) ($item['score'] ?? 0.0),
'source' => (string) ($item['source'] ?? 'mixed'),
];
continue;
}
if ((float) $item['score'] > (float) $combined[$artworkId]['score']) {
$combined[$artworkId]['score'] = (float) $item['score'];
$combined[$artworkId]['source'] = (string) ($item['source'] ?? $combined[$artworkId]['source']);
}
}
$candidates = array_values($combined);
usort($candidates, static fn (array $a, array $b): int => $b['score'] <=> $a['score']);
return $this->applyDiversityGuard($candidates, $algoVersion, $maxItems);
}
/**
* @param array<string, mixed> $normalizedScores
* @return array<int, array{artwork_id:int,score:float,source:string}>
*/
private function buildProfileBasedRecommendations(array $normalizedScores, int $maxItems, string $algoVersion): array
{
$weightSet = $this->resolveRankingWeights($algoVersion);
$w1 = (float) $weightSet['w1'];
$w2 = (float) $weightSet['w2'];
$w3 = (float) $weightSet['w3'];
$w4 = (float) $weightSet['w4'];
$categoryAffinities = [];
foreach ($normalizedScores as $key => $score) {
if (! is_numeric($score)) {
continue;
}
if (! str_starts_with((string) $key, 'category:')) {
continue;
}
$categoryId = (int) str_replace('category:', '', (string) $key);
if ($categoryId <= 0) {
continue;
}
$categoryAffinities[$categoryId] = (float) $score;
}
if ($categoryAffinities === []) {
return [];
}
$rows = DB::table('artworks')
->join('artwork_category', 'artwork_category.artwork_id', '=', 'artworks.id')
->leftJoin('artwork_stats', 'artwork_stats.artwork_id', '=', 'artworks.id')
->whereIn('artwork_category.category_id', array_keys($categoryAffinities))
->whereNull('artworks.deleted_at')
->where('artworks.is_public', true)
->where('artworks.is_approved', true)
->whereNotNull('artworks.published_at')
->where('artworks.published_at', '<=', now())
->orderByDesc('artworks.published_at')
->limit(max(200, $maxItems * 8))
->get([
'artworks.id',
'artworks.published_at',
'artwork_category.category_id',
DB::raw('COALESCE(artwork_stats.views, 0) as views'),
]);
$scored = [];
foreach ($rows as $row) {
$artworkId = (int) $row->id;
$categoryId = (int) $row->category_id;
$affinity = (float) ($categoryAffinities[$categoryId] ?? 0.0);
if ($affinity <= 0.0) {
continue;
}
$publishedAt = CarbonImmutable::parse((string) $row->published_at);
$ageDays = max(0.0, (float) $publishedAt->diffInSeconds(now()) / 86400);
$recency = exp(-$ageDays / 30.0);
$popularity = log(1 + max(0, (int) $row->views)) / 10.0;
$novelty = max(0.0, 1.0 - min(1.0, $popularity));
// Phase 8B blend with versioned weights (manual tuning, no auto-tuning yet).
$score = ($w1 * $affinity) + ($w2 * $recency) + ($w3 * $popularity) + ($w4 * $novelty);
if (! isset($scored[$artworkId]) || $score > $scored[$artworkId]['score']) {
$scored[$artworkId] = [
'artwork_id' => $artworkId,
'score' => $score,
'source' => 'personalized',
];
}
}
$candidates = array_values($scored);
usort($candidates, static fn (array $a, array $b): int => $b['score'] <=> $a['score']);
return $this->applyDiversityGuard($candidates, $algoVersion, $maxItems);
}
/**
* @return array<int, array{artwork_id:int,score:float,source:string}>
*/
private function buildColdStartRecommendations(string $algoVersion, int $maxItems, string $sourceLabel = 'cold_start'): array
{
$popularIds = DB::table('artworks')
->leftJoin('artwork_stats', 'artwork_stats.artwork_id', '=', 'artworks.id')
->whereNull('artworks.deleted_at')
->where('artworks.is_public', true)
->where('artworks.is_approved', true)
->whereNotNull('artworks.published_at')
->where('artworks.published_at', '<=', now())
->orderByDesc('artwork_stats.views')
->orderByDesc('artwork_stats.downloads')
->orderByDesc('artworks.published_at')
->limit(max(40, $maxItems))
->pluck('artworks.id')
->map(static fn (mixed $id): int => (int) $id)
->all();
$seedIds = array_slice($popularIds, 0, 12);
$similarIds = [];
if ($seedIds !== []) {
$similarIds = DB::table('artwork_similarities')
->where('algo_version', $algoVersion)
->whereIn('artwork_id', $seedIds)
->orderBy('rank')
->orderByDesc('score')
->limit(max(80, $maxItems * 2))
->pluck('similar_artwork_id')
->map(static fn (mixed $id): int => (int) $id)
->all();
}
$candidates = [];
foreach ($popularIds as $index => $artworkId) {
$candidates[] = [
'artwork_id' => $artworkId,
'score' => max(0.0, 1.0 - ($index * 0.003)),
'source' => $sourceLabel,
];
}
foreach ($similarIds as $index => $artworkId) {
$candidates[] = [
'artwork_id' => $artworkId,
'score' => max(0.0, 0.75 - ($index * 0.002)),
'source' => $sourceLabel,
];
}
usort($candidates, static fn (array $a, array $b): int => $b['score'] <=> $a['score']);
return $this->applyDiversityGuard($candidates, $algoVersion, $maxItems);
}
/**
* @param array<int, array{artwork_id:int,score:float,source:string}> $candidates
* @return array<int, array{artwork_id:int,score:float,source:string}>
*/
private function applyDiversityGuard(array $candidates, string $algoVersion, int $maxItems): array
{
if ($candidates === []) {
return [];
}
$uniqueCandidates = [];
foreach ($candidates as $candidate) {
$artworkId = (int) ($candidate['artwork_id'] ?? 0);
if ($artworkId <= 0 || isset($uniqueCandidates[$artworkId])) {
continue;
}
$uniqueCandidates[$artworkId] = [
'artwork_id' => $artworkId,
'score' => (float) ($candidate['score'] ?? 0.0),
'source' => (string) ($candidate['source'] ?? 'mixed'),
];
}
$flattened = array_values($uniqueCandidates);
$candidateIds = array_map(static fn (array $item): int => (int) $item['artwork_id'], $flattened);
$nearDuplicatePairs = DB::table('artwork_similarities')
->where('algo_version', $algoVersion)
->where('score', '>=', 0.97)
->whereIn('artwork_id', $candidateIds)
->whereIn('similar_artwork_id', $candidateIds)
->get(['artwork_id', 'similar_artwork_id']);
$adjacency = [];
foreach ($nearDuplicatePairs as $pair) {
$left = (int) $pair->artwork_id;
$right = (int) $pair->similar_artwork_id;
if ($left === $right) {
continue;
}
$adjacency[$left][$right] = true;
$adjacency[$right][$left] = true;
}
$selected = [];
$selectedSet = [];
foreach ($flattened as $candidate) {
$id = (int) $candidate['artwork_id'];
$isNearDuplicate = false;
foreach ($selectedSet as $selectedId => $value) {
if (($adjacency[$id][$selectedId] ?? false) || ($adjacency[$selectedId][$id] ?? false)) {
$isNearDuplicate = true;
break;
}
}
if ($isNearDuplicate) {
continue;
}
$selected[] = [
'artwork_id' => $id,
'score' => round((float) $candidate['score'], 6),
'source' => (string) $candidate['source'],
];
$selectedSet[$id] = true;
if (count($selected) >= $maxItems) {
break;
}
}
return $selected;
}
/**
* @param array<int, array{artwork_id:int,score:float,source:string}> $items
*/
private function buildFeedPageResponse(
array $items,
int $offset,
int $limit,
string $algoVersion,
string $weightVersion,
string $cacheStatus,
?string $generatedAt
): array {
$safeOffset = max(0, $offset);
$pageItems = array_slice($items, $safeOffset, $limit);
$ids = array_values(array_unique(array_map(
static fn (array $item): int => (int) ($item['artwork_id'] ?? 0),
$pageItems
)));
/** @var Collection<int, Artwork> $artworks */
$artworks = Artwork::query()
->with(['user:id,name'])
->whereIn('id', $ids)
->public()
->published()
->get()
->keyBy('id');
$responseItems = [];
foreach ($pageItems as $item) {
$artworkId = (int) ($item['artwork_id'] ?? 0);
$artwork = $artworks->get($artworkId);
if ($artwork === null) {
continue;
}
$responseItems[] = [
'id' => $artwork->id,
'slug' => $artwork->slug,
'title' => $artwork->title,
'thumbnail_url' => $artwork->thumb_url,
'author' => $artwork->user?->name,
'score' => (float) ($item['score'] ?? 0.0),
'source' => (string) ($item['source'] ?? 'mixed'),
];
}
$nextOffset = $safeOffset + $limit;
$hasNext = $nextOffset < count($items);
return [
'data' => $responseItems,
'meta' => [
'algo_version' => $algoVersion,
'weight_version' => $weightVersion,
'cursor' => $this->encodeOffsetToCursor($safeOffset),
'next_cursor' => $hasNext ? $this->encodeOffsetToCursor($nextOffset) : null,
'limit' => $limit,
'cache_status' => $cacheStatus,
'generated_at' => $generatedAt,
'total_candidates' => count($items),
],
];
}
private function resolveAlgoVersion(?string $algoVersion = null, ?int $userId = null): string
{
if ($algoVersion !== null && $algoVersion !== '') {
return $algoVersion;
}
$forcedAlgoVersion = trim((string) config('discovery.rollout.force_algo_version', ''));
if ($forcedAlgoVersion !== '') {
return $forcedAlgoVersion;
}
$defaultAlgoVersion = (string) config('discovery.algo_version', 'clip-cosine-v1');
$rolloutEnabled = (bool) config('discovery.rollout.enabled', false);
if (! $rolloutEnabled || $userId === null || $userId <= 0) {
return $defaultAlgoVersion;
}
$baselineAlgoVersion = (string) config('discovery.rollout.baseline_algo_version', $defaultAlgoVersion);
$candidateAlgoVersion = (string) config('discovery.rollout.candidate_algo_version', $defaultAlgoVersion);
if ($candidateAlgoVersion === '' || $candidateAlgoVersion === $baselineAlgoVersion) {
return $baselineAlgoVersion;
}
$activeGate = (string) config('discovery.rollout.active_gate', 'g10');
$gates = (array) config('discovery.rollout.gates', []);
$gate = (array) ($gates[$activeGate] ?? []);
$rolloutPercentage = (int) ($gate['percentage'] ?? 0);
$rolloutPercentage = max(0, min(100, $rolloutPercentage));
if ($rolloutPercentage <= 0) {
return $baselineAlgoVersion;
}
if ($rolloutPercentage >= 100) {
return $candidateAlgoVersion;
}
$bucket = abs((int) crc32((string) $userId)) % 100;
return $bucket < $rolloutPercentage
? $candidateAlgoVersion
: $baselineAlgoVersion;
}
/**
* @return array{version:string,w1:float,w2:float,w3:float,w4:float}
*/
public function resolveRankingWeights(string $algoVersion): array
{
$defaults = (array) config('discovery.ranking.default_weights', []);
$byAlgo = (array) config('discovery.ranking.algo_weight_sets', []);
$override = (array) ($byAlgo[$algoVersion] ?? []);
$resolved = array_merge($defaults, $override);
$weights = [
'version' => (string) ($resolved['version'] ?? 'rank-w-v1'),
'w1' => max(0.0, (float) ($resolved['w1'] ?? 0.65)),
'w2' => max(0.0, (float) ($resolved['w2'] ?? 0.20)),
'w3' => max(0.0, (float) ($resolved['w3'] ?? 0.10)),
'w4' => max(0.0, (float) ($resolved['w4'] ?? 0.05)),
];
$sum = $weights['w1'] + $weights['w2'] + $weights['w3'] + $weights['w4'];
if ($sum > 0.0) {
$weights['w1'] /= $sum;
$weights['w2'] /= $sum;
$weights['w3'] /= $sum;
$weights['w4'] /= $sum;
}
return $weights;
}
private function decodeCursorToOffset(?string $cursor): int
{
if ($cursor === null || $cursor === '') {
return 0;
}
$decoded = base64_decode(strtr($cursor, '-_', '+/'), true);
if ($decoded === false) {
return 0;
}
$json = json_decode($decoded, true);
if (! is_array($json)) {
return 0;
}
return max(0, (int) Arr::get($json, 'offset', 0));
}
private function encodeOffsetToCursor(int $offset): string
{
$payload = json_encode(['offset' => max(0, $offset)]);
if (! is_string($payload)) {
return '';
}
return rtrim(strtr(base64_encode($payload), '+/', '-_'), '=');
}
/**
* @return array<int, array{artwork_id:int,score:float,source:string}>
*/
private function extractCacheItems(?UserRecommendationCache $cache): array
{
if ($cache === null) {
return [];
}
$raw = (array) ($cache->recommendations_json ?? []);
$items = $raw['items'] ?? null;
if (! is_array($items)) {
return [];
}
$typed = [];
foreach ($items as $item) {
if (! is_array($item)) {
continue;
}
$artworkId = (int) ($item['artwork_id'] ?? 0);
if ($artworkId <= 0) {
continue;
}
$typed[] = [
'artwork_id' => $artworkId,
'score' => (float) ($item['score'] ?? 0.0),
'source' => (string) ($item['source'] ?? 'mixed'),
];
}
return $typed;
}
}