250 lines
9.0 KiB
PHP
250 lines
9.0 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Services\Studio;
|
|
|
|
use App\Models\Category;
|
|
use App\Models\ContentType;
|
|
use Illuminate\Support\Collection;
|
|
use Illuminate\Support\Str;
|
|
|
|
final class StudioAiCategoryMapper
|
|
{
|
|
/**
|
|
* @param array<int, string> $signals
|
|
* @return array{content_type: array<string, mixed>|null, category: array<string, mixed>|null}
|
|
*/
|
|
public function map(array $signals, ?Category $currentCategory = null): array
|
|
{
|
|
$tokens = $this->tokenize($signals);
|
|
$haystack = ' ' . implode(' ', $tokens) . ' ';
|
|
|
|
$contentTypes = ContentType::query()->with(['rootCategories.children'])->get();
|
|
$contentTypeScores = $contentTypes
|
|
->map(fn (ContentType $contentType): array => $this->scoreContentType($contentType, $tokens, $haystack))
|
|
->filter(fn (array $row): bool => $row['score'] > 0)
|
|
->sortByDesc('score')
|
|
->values();
|
|
|
|
$selectedContentTypeRow = $contentTypeScores->first();
|
|
$selectedContentType = is_array($selectedContentTypeRow) ? ($selectedContentTypeRow['model'] ?? null) : null;
|
|
if (! $selectedContentType) {
|
|
$selectedContentType = $currentCategory?->contentType;
|
|
}
|
|
|
|
$categoryScores = $this->scoreCategories($contentTypes, $tokens, $haystack, $selectedContentType?->id);
|
|
$selectedCategoryRow = $categoryScores->first();
|
|
$selectedCategory = is_array($selectedCategoryRow) ? ($selectedCategoryRow['model'] ?? null) : null;
|
|
if (! $selectedCategory) {
|
|
$selectedCategory = $currentCategory;
|
|
}
|
|
|
|
return [
|
|
'content_type' => $selectedContentType ? $this->serializeContentType(
|
|
$selectedContentType,
|
|
$this->confidenceForModel($contentTypeScores, $selectedContentType->id)
|
|
) : null,
|
|
'category' => $selectedCategory ? $this->serializeCategory(
|
|
$selectedCategory,
|
|
$this->confidenceForModel($categoryScores, $selectedCategory->id),
|
|
$categoryScores
|
|
->reject(fn (array $row): bool => (int) $row['model']->id === (int) $selectedCategory->id)
|
|
->take(3)
|
|
->map(fn (array $row): array => $this->serializeCategory($row['model'], $row['confidence']))
|
|
->all()
|
|
) : null,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @param array<int, string> $tokens
|
|
* @return array<string, mixed>
|
|
*/
|
|
private function scoreContentType(ContentType $contentType, array $tokens, string $haystack): array
|
|
{
|
|
$keywords = array_merge([$contentType->slug, $contentType->name], $this->keywordsForContentType($contentType->slug));
|
|
$score = $this->keywordScore($keywords, $tokens, $haystack);
|
|
|
|
return [
|
|
'model' => $contentType,
|
|
'score' => $score,
|
|
'confidence' => $this->normalizeConfidence($score),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @return Collection<int, array{model: Category, score: int, confidence: float}>
|
|
*/
|
|
private function scoreCategories(Collection $contentTypes, array $tokens, string $haystack, ?int $contentTypeId = null): Collection
|
|
{
|
|
return $contentTypes
|
|
->filter(fn (ContentType $contentType): bool => $contentTypeId === null || (int) $contentType->id === (int) $contentTypeId)
|
|
->flatMap(function (ContentType $contentType) use ($tokens, $haystack): array {
|
|
$categories = [];
|
|
|
|
foreach ($contentType->rootCategories as $rootCategory) {
|
|
$categories[] = $rootCategory;
|
|
foreach ($rootCategory->children as $childCategory) {
|
|
$categories[] = $childCategory;
|
|
}
|
|
}
|
|
|
|
return array_map(function (Category $category) use ($tokens, $haystack): array {
|
|
$keywords = array_filter([
|
|
$category->slug,
|
|
$category->name,
|
|
$category->parent?->slug,
|
|
$category->parent?->name,
|
|
]);
|
|
$score = $this->keywordScore($keywords, $tokens, $haystack);
|
|
|
|
return [
|
|
'model' => $category,
|
|
'score' => $score,
|
|
'confidence' => $this->normalizeConfidence($score),
|
|
];
|
|
}, $categories);
|
|
})
|
|
->filter(fn (array $row): bool => $row['score'] > 0)
|
|
->sortByDesc('score')
|
|
->values();
|
|
}
|
|
|
|
/**
|
|
* @param array<int, string> $signals
|
|
* @return array<int, string>
|
|
*/
|
|
private function tokenize(array $signals): array
|
|
{
|
|
return Collection::make($signals)
|
|
->filter(fn (mixed $value): bool => is_string($value) && trim($value) !== '')
|
|
->flatMap(function (string $value): array {
|
|
$normalized = Str::of($value)
|
|
->lower()
|
|
->replaceMatches('/[^a-z0-9\s\-]+/', ' ')
|
|
->replace('-', ' ')
|
|
->squish()
|
|
->value();
|
|
|
|
return $normalized === '' ? [] : explode(' ', $normalized);
|
|
})
|
|
->filter(fn (string $value): bool => $value !== '' && strlen($value) >= 3)
|
|
->unique()
|
|
->values()
|
|
->all();
|
|
}
|
|
|
|
/**
|
|
* @param array<int, string> $keywords
|
|
* @param array<int, string> $tokens
|
|
*/
|
|
private function keywordScore(array $keywords, array $tokens, string $haystack): int
|
|
{
|
|
$score = 0;
|
|
$tokenVariants = Collection::make($tokens)
|
|
->flatMap(fn (string $token): array => array_unique([$token, $this->singularize($token), $this->pluralize($token)]))
|
|
->filter(fn (string $token): bool => $token !== '')
|
|
->values()
|
|
->all();
|
|
|
|
foreach ($keywords as $keyword) {
|
|
$normalized = Str::of((string) $keyword)
|
|
->lower()
|
|
->replaceMatches('/[^a-z0-9\s\-]+/', ' ')
|
|
->replace('-', ' ')
|
|
->squish()
|
|
->value();
|
|
|
|
if ($normalized === '') {
|
|
continue;
|
|
}
|
|
|
|
if (str_contains($haystack, ' ' . $normalized . ' ')) {
|
|
$score += str_contains($normalized, ' ') ? 4 : 3;
|
|
continue;
|
|
}
|
|
|
|
foreach (explode(' ', $normalized) as $part) {
|
|
if ($part !== '' && in_array($part, $tokenVariants, true)) {
|
|
$score += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $score;
|
|
}
|
|
|
|
/**
|
|
* @return array<int, string>
|
|
*/
|
|
private function keywordsForContentType(string $slug): array
|
|
{
|
|
return match ($slug) {
|
|
'skins' => ['skin', 'winamp', 'theme', 'interface skin'],
|
|
'wallpapers' => ['wallpaper', 'background', 'desktop', 'lockscreen'],
|
|
'photography' => ['photo', 'photograph', 'photography', 'portrait', 'macro', 'nature', 'camera'],
|
|
'members' => ['profile', 'avatar', 'member'],
|
|
default => ['artwork', 'illustration', 'digital art', 'painting', 'concept art', 'screenshot', 'ui', 'game'],
|
|
};
|
|
}
|
|
|
|
private function normalizeConfidence(int $score): float
|
|
{
|
|
if ($score <= 0) {
|
|
return 0.0;
|
|
}
|
|
|
|
return min(0.99, round(0.45 + ($score * 0.08), 2));
|
|
}
|
|
|
|
private function singularize(string $value): string
|
|
{
|
|
return str_ends_with($value, 's') ? rtrim($value, 's') : $value;
|
|
}
|
|
|
|
private function pluralize(string $value): string
|
|
{
|
|
return str_ends_with($value, 's') ? $value : $value . 's';
|
|
}
|
|
|
|
private function confidenceForModel(Collection $scores, int $modelId): float
|
|
{
|
|
$row = $scores->first(fn (array $item): bool => (int) $item['model']->id === $modelId);
|
|
|
|
return (float) ($row['confidence'] ?? 0.55);
|
|
}
|
|
|
|
/**
|
|
* @return array<string, mixed>
|
|
*/
|
|
private function serializeContentType(ContentType $contentType, float $confidence): array
|
|
{
|
|
return [
|
|
'id' => (int) $contentType->id,
|
|
'value' => (string) $contentType->slug,
|
|
'label' => (string) $contentType->name,
|
|
'confidence' => $confidence,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @param array<int, array<string, mixed>> $alternatives
|
|
* @return array<string, mixed>
|
|
*/
|
|
private function serializeCategory(Category $category, float $confidence, array $alternatives = []): array
|
|
{
|
|
$rootCategory = $category->parent ?: $category;
|
|
|
|
return [
|
|
'id' => (int) $category->id,
|
|
'value' => (string) $category->slug,
|
|
'label' => (string) $category->name,
|
|
'confidence' => $confidence,
|
|
'content_type_id' => (int) $category->content_type_id,
|
|
'root_category_id' => (int) $rootCategory->id,
|
|
'sub_category_id' => $category->parent_id ? (int) $category->id : null,
|
|
'alternatives' => array_values($alternatives),
|
|
];
|
|
}
|
|
} |