$domains */ public function buildGroupKey(string $content, array $domains = []): string { $template = $this->campaignText($content); $tokens = preg_split('/\s+/u', $template, -1, PREG_SPLIT_NO_EMPTY) ?: []; $signature = implode(' ', array_slice($tokens, 0, 12)); $domainPart = implode('|', array_slice(array_values(array_unique($domains)), 0, 2)); return hash('sha256', $domainPart . '::' . $signature); } /** * @param array $context * @param array $domains */ public function nearDuplicateCount(string $content, array $context = [], array $domains = []): int { $type = (string) ($context['content_type'] ?? ''); $contentId = (int) ($context['content_id'] ?? 0); $artworkId = (int) ($context['artwork_id'] ?? 0); $signature = $this->campaignText($content); if ($signature === '') { return 0; } $candidates = match ($type) { ModerationContentType::ArtworkComment->value => ArtworkComment::query() ->where('id', '!=', $contentId) ->whereNull('deleted_at') ->latest('id') ->limit(80) ->get(['id', 'artwork_id', 'raw_content', 'content']), ModerationContentType::ArtworkDescription->value => Artwork::query() ->where('id', '!=', $contentId) ->whereNotNull('description') ->latest('id') ->limit(80) ->get(['id', 'description']), default => \collect(), }; $matches = 0; foreach ($candidates as $candidate) { $candidateText = match ($type) { ModerationContentType::ArtworkComment->value => (string) ($candidate->raw_content ?: $candidate->content), ModerationContentType::ArtworkDescription->value => (string) ($candidate->description ?? ''), default => '', }; if ($candidateText === '') { continue; } $candidateSignature = $this->campaignText($candidateText); similar_text($signature, $candidateSignature, $similarity); $sameArtworkPenalty = $artworkId > 0 && (int) ($candidate->artwork_id ?? $candidate->id ?? 0) === $artworkId ? 4 : 0; if ($similarity >= (float) \app('config')->get('content_moderation.duplicate_detection.near_duplicate_similarity', 84) - $sameArtworkPenalty) { $matches++; continue; } if ($domains !== []) { $topDomain = $domains[0] ?? null; if ($topDomain !== null && str_contains(mb_strtolower($candidateText), mb_strtolower($topDomain))) { similar_text($this->stripLinks($signature), $this->stripLinks($candidateSignature), $linklessSimilarity); if ($linklessSimilarity >= 72) { $matches++; } } } } return $matches; } private function stripLinks(string $text): string { return trim(str_replace('[link]', '', $text)); } }