89 lines
3.7 KiB
PHP
89 lines
3.7 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Moderation;
|
|
|
|
use App\Data\Moderation\ModerationResultData;
|
|
use App\Models\ContentModerationCluster;
|
|
use App\Models\ContentModerationFinding;
|
|
|
|
class ModerationClusterService
|
|
{
|
|
/**
|
|
* @param array<string, mixed> $context
|
|
* @param array<string, mixed> $suggestion
|
|
* @return array{campaign_key:string,cluster_score:int,cluster_reason:string}
|
|
*/
|
|
public function classify(string $content, ModerationResultData $result, array $context = [], array $suggestion = []): array
|
|
{
|
|
$domains = array_values(array_filter($result->matchedDomains));
|
|
$keywords = array_values(array_filter($result->matchedKeywords));
|
|
$reason = 'normalized_content';
|
|
|
|
if ($domains !== [] && $keywords !== []) {
|
|
$reason = 'domain_keyword_cta';
|
|
$key = 'campaign:' . sha1(implode('|', [implode(',', array_slice($domains, 0, 3)), implode(',', array_slice($keywords, 0, 3))]));
|
|
} elseif ($domains !== []) {
|
|
$reason = 'domain_fingerprint';
|
|
$key = 'campaign:' . sha1(implode(',', array_slice($domains, 0, 3)) . '|' . ($result->contentHashNormalized ?? $result->contentHash));
|
|
} elseif (! empty($suggestion['campaign_tags'])) {
|
|
$reason = 'suggested_cluster';
|
|
$key = 'campaign:' . sha1(implode('|', (array) $suggestion['campaign_tags']));
|
|
} else {
|
|
$key = 'campaign:' . sha1((string) ($result->groupKey ?? $result->contentHashNormalized ?? $result->contentHash));
|
|
}
|
|
|
|
$clusterScore = min(100, $result->score + (count($domains) * 8) + (count($keywords) * 4));
|
|
|
|
return [
|
|
'campaign_key' => $key,
|
|
'cluster_score' => $clusterScore,
|
|
'cluster_reason' => $reason,
|
|
];
|
|
}
|
|
|
|
public function syncFinding(ContentModerationFinding $finding): void
|
|
{
|
|
if (! $finding->campaign_key) {
|
|
return;
|
|
}
|
|
|
|
$query = ContentModerationFinding::query()->where('campaign_key', $finding->campaign_key);
|
|
$findings = $query->get(['id', 'user_id', 'matched_domains_json', 'matched_keywords_json', 'review_bucket', 'cluster_score', 'created_at']);
|
|
|
|
$domains = $findings
|
|
->flatMap(static fn (ContentModerationFinding $item): array => (array) $item->matched_domains_json)
|
|
->filter()
|
|
->unique()
|
|
->values();
|
|
|
|
$keywords = $findings
|
|
->flatMap(static fn (ContentModerationFinding $item): array => (array) $item->matched_keywords_json)
|
|
->filter()
|
|
->unique()
|
|
->take(8)
|
|
->values();
|
|
|
|
ContentModerationCluster::query()->updateOrCreate(
|
|
['campaign_key' => $finding->campaign_key],
|
|
[
|
|
'cluster_reason' => $finding->cluster_reason,
|
|
'review_bucket' => $finding->review_bucket,
|
|
'escalation_status' => $finding->escalation_status?->value ?? (string) $finding->escalation_status,
|
|
'cluster_score' => (int) ($findings->max('cluster_score') ?? $finding->cluster_score ?? 0),
|
|
'findings_count' => $findings->count(),
|
|
'unique_users_count' => $findings->pluck('user_id')->filter()->unique()->count(),
|
|
'unique_domains_count' => $domains->count(),
|
|
'latest_finding_at' => $findings->max('created_at') ?: now(),
|
|
'summary_json' => [
|
|
'domains' => $domains->take(8)->all(),
|
|
'keywords' => $keywords->all(),
|
|
],
|
|
],
|
|
);
|
|
|
|
$clusterSize = $findings->count();
|
|
if ($clusterSize > 1) {
|
|
$query->update(['priority_score' => $finding->priority_score + min(25, ($clusterSize - 1) * 3)]);
|
|
}
|
|
}
|
|
} |