optimizations
This commit is contained in:
250
app/Services/Studio/StudioAiCategoryMapper.php
Normal file
250
app/Services/Studio/StudioAiCategoryMapper.php
Normal file
@@ -0,0 +1,250 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Services\Studio;
|
||||
|
||||
use App\Models\Category;
|
||||
use App\Models\ContentType;
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
final class StudioAiCategoryMapper
|
||||
{
|
||||
/**
|
||||
* @param array<int, string> $signals
|
||||
* @return array{content_type: array<string, mixed>|null, category: array<string, mixed>|null}
|
||||
*/
|
||||
public function map(array $signals, ?Category $currentCategory = null): array
|
||||
{
|
||||
$tokens = $this->tokenize($signals);
|
||||
$haystack = ' ' . implode(' ', $tokens) . ' ';
|
||||
|
||||
$contentTypes = ContentType::query()->with(['rootCategories.children'])->get();
|
||||
$contentTypeScores = $contentTypes
|
||||
->map(fn (ContentType $contentType): array => $this->scoreContentType($contentType, $tokens, $haystack))
|
||||
->filter(fn (array $row): bool => $row['score'] > 0)
|
||||
->sortByDesc('score')
|
||||
->values();
|
||||
|
||||
$selectedContentTypeRow = $contentTypeScores->first();
|
||||
$selectedContentType = is_array($selectedContentTypeRow) ? ($selectedContentTypeRow['model'] ?? null) : null;
|
||||
if (! $selectedContentType) {
|
||||
$selectedContentType = $currentCategory?->contentType;
|
||||
}
|
||||
|
||||
$categoryScores = $this->scoreCategories($contentTypes, $tokens, $haystack, $selectedContentType?->id);
|
||||
$selectedCategoryRow = $categoryScores->first();
|
||||
$selectedCategory = is_array($selectedCategoryRow) ? ($selectedCategoryRow['model'] ?? null) : null;
|
||||
if (! $selectedCategory) {
|
||||
$selectedCategory = $currentCategory;
|
||||
}
|
||||
|
||||
return [
|
||||
'content_type' => $selectedContentType ? $this->serializeContentType(
|
||||
$selectedContentType,
|
||||
$this->confidenceForModel($contentTypeScores, $selectedContentType->id)
|
||||
) : null,
|
||||
'category' => $selectedCategory ? $this->serializeCategory(
|
||||
$selectedCategory,
|
||||
$this->confidenceForModel($categoryScores, $selectedCategory->id),
|
||||
$categoryScores
|
||||
->reject(fn (array $row): bool => (int) $row['model']->id === (int) $selectedCategory->id)
|
||||
->take(3)
|
||||
->map(fn (array $row): array => $this->serializeCategory($row['model'], $row['confidence']))
|
||||
->all()
|
||||
) : null,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, string> $tokens
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function scoreContentType(ContentType $contentType, array $tokens, string $haystack): array
|
||||
{
|
||||
$keywords = array_merge([$contentType->slug, $contentType->name], $this->keywordsForContentType($contentType->slug));
|
||||
$score = $this->keywordScore($keywords, $tokens, $haystack);
|
||||
|
||||
return [
|
||||
'model' => $contentType,
|
||||
'score' => $score,
|
||||
'confidence' => $this->normalizeConfidence($score),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Collection<int, array{model: Category, score: int, confidence: float}>
|
||||
*/
|
||||
private function scoreCategories(Collection $contentTypes, array $tokens, string $haystack, ?int $contentTypeId = null): Collection
|
||||
{
|
||||
return $contentTypes
|
||||
->filter(fn (ContentType $contentType): bool => $contentTypeId === null || (int) $contentType->id === (int) $contentTypeId)
|
||||
->flatMap(function (ContentType $contentType) use ($tokens, $haystack): array {
|
||||
$categories = [];
|
||||
|
||||
foreach ($contentType->rootCategories as $rootCategory) {
|
||||
$categories[] = $rootCategory;
|
||||
foreach ($rootCategory->children as $childCategory) {
|
||||
$categories[] = $childCategory;
|
||||
}
|
||||
}
|
||||
|
||||
return array_map(function (Category $category) use ($tokens, $haystack): array {
|
||||
$keywords = array_filter([
|
||||
$category->slug,
|
||||
$category->name,
|
||||
$category->parent?->slug,
|
||||
$category->parent?->name,
|
||||
]);
|
||||
$score = $this->keywordScore($keywords, $tokens, $haystack);
|
||||
|
||||
return [
|
||||
'model' => $category,
|
||||
'score' => $score,
|
||||
'confidence' => $this->normalizeConfidence($score),
|
||||
];
|
||||
}, $categories);
|
||||
})
|
||||
->filter(fn (array $row): bool => $row['score'] > 0)
|
||||
->sortByDesc('score')
|
||||
->values();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, string> $signals
|
||||
* @return array<int, string>
|
||||
*/
|
||||
private function tokenize(array $signals): array
|
||||
{
|
||||
return Collection::make($signals)
|
||||
->filter(fn (mixed $value): bool => is_string($value) && trim($value) !== '')
|
||||
->flatMap(function (string $value): array {
|
||||
$normalized = Str::of($value)
|
||||
->lower()
|
||||
->replaceMatches('/[^a-z0-9\s\-]+/', ' ')
|
||||
->replace('-', ' ')
|
||||
->squish()
|
||||
->value();
|
||||
|
||||
return $normalized === '' ? [] : explode(' ', $normalized);
|
||||
})
|
||||
->filter(fn (string $value): bool => $value !== '' && strlen($value) >= 3)
|
||||
->unique()
|
||||
->values()
|
||||
->all();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, string> $keywords
|
||||
* @param array<int, string> $tokens
|
||||
*/
|
||||
private function keywordScore(array $keywords, array $tokens, string $haystack): int
|
||||
{
|
||||
$score = 0;
|
||||
$tokenVariants = Collection::make($tokens)
|
||||
->flatMap(fn (string $token): array => array_unique([$token, $this->singularize($token), $this->pluralize($token)]))
|
||||
->filter(fn (string $token): bool => $token !== '')
|
||||
->values()
|
||||
->all();
|
||||
|
||||
foreach ($keywords as $keyword) {
|
||||
$normalized = Str::of((string) $keyword)
|
||||
->lower()
|
||||
->replaceMatches('/[^a-z0-9\s\-]+/', ' ')
|
||||
->replace('-', ' ')
|
||||
->squish()
|
||||
->value();
|
||||
|
||||
if ($normalized === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (str_contains($haystack, ' ' . $normalized . ' ')) {
|
||||
$score += str_contains($normalized, ' ') ? 4 : 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (explode(' ', $normalized) as $part) {
|
||||
if ($part !== '' && in_array($part, $tokenVariants, true)) {
|
||||
$score += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $score;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, string>
|
||||
*/
|
||||
private function keywordsForContentType(string $slug): array
|
||||
{
|
||||
return match ($slug) {
|
||||
'skins' => ['skin', 'winamp', 'theme', 'interface skin'],
|
||||
'wallpapers' => ['wallpaper', 'background', 'desktop', 'lockscreen'],
|
||||
'photography' => ['photo', 'photograph', 'photography', 'portrait', 'macro', 'nature', 'camera'],
|
||||
'members' => ['profile', 'avatar', 'member'],
|
||||
default => ['artwork', 'illustration', 'digital art', 'painting', 'concept art', 'screenshot', 'ui', 'game'],
|
||||
};
|
||||
}
|
||||
|
||||
private function normalizeConfidence(int $score): float
|
||||
{
|
||||
if ($score <= 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return min(0.99, round(0.45 + ($score * 0.08), 2));
|
||||
}
|
||||
|
||||
private function singularize(string $value): string
|
||||
{
|
||||
return str_ends_with($value, 's') ? rtrim($value, 's') : $value;
|
||||
}
|
||||
|
||||
private function pluralize(string $value): string
|
||||
{
|
||||
return str_ends_with($value, 's') ? $value : $value . 's';
|
||||
}
|
||||
|
||||
private function confidenceForModel(Collection $scores, int $modelId): float
|
||||
{
|
||||
$row = $scores->first(fn (array $item): bool => (int) $item['model']->id === $modelId);
|
||||
|
||||
return (float) ($row['confidence'] ?? 0.55);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function serializeContentType(ContentType $contentType, float $confidence): array
|
||||
{
|
||||
return [
|
||||
'id' => (int) $contentType->id,
|
||||
'value' => (string) $contentType->slug,
|
||||
'label' => (string) $contentType->name,
|
||||
'confidence' => $confidence,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array<string, mixed>> $alternatives
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function serializeCategory(Category $category, float $confidence, array $alternatives = []): array
|
||||
{
|
||||
$rootCategory = $category->parent ?: $category;
|
||||
|
||||
return [
|
||||
'id' => (int) $category->id,
|
||||
'value' => (string) $category->slug,
|
||||
'label' => (string) $category->name,
|
||||
'confidence' => $confidence,
|
||||
'content_type_id' => (int) $category->content_type_id,
|
||||
'root_category_id' => (int) $rootCategory->id,
|
||||
'sub_category_id' => $category->parent_id ? (int) $category->id : null,
|
||||
'alternatives' => array_values($alternatives),
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user