Implement creator studio and upload updates
This commit is contained in:
106
app/Services/Moderation/DuplicateDetectionService.php
Normal file
106
app/Services/Moderation/DuplicateDetectionService.php
Normal file
@@ -0,0 +1,106 @@
|
||||
<?php
|
||||
|
||||
namespace App\Services\Moderation;
|
||||
|
||||
use App\Enums\ModerationContentType;
|
||||
use App\Models\Artwork;
|
||||
use App\Models\ArtworkComment;
|
||||
|
||||
class DuplicateDetectionService
|
||||
{
|
||||
public function campaignText(string $content): string
|
||||
{
|
||||
$text = mb_strtolower($content);
|
||||
$text = preg_replace('/https?:\/\/\S+/iu', ' [link] ', $text);
|
||||
$text = preg_replace('/www\.\S+/iu', ' [link] ', (string) $text);
|
||||
$text = preg_replace('/[^\p{L}\p{N}\s\[\]]+/u', ' ', (string) $text);
|
||||
$text = preg_replace('/\s+/u', ' ', trim((string) $text));
|
||||
|
||||
return (string) $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, string> $domains
|
||||
*/
|
||||
public function buildGroupKey(string $content, array $domains = []): string
|
||||
{
|
||||
$template = $this->campaignText($content);
|
||||
$tokens = preg_split('/\s+/u', $template, -1, PREG_SPLIT_NO_EMPTY) ?: [];
|
||||
$signature = implode(' ', array_slice($tokens, 0, 12));
|
||||
$domainPart = implode('|', array_slice(array_values(array_unique($domains)), 0, 2));
|
||||
|
||||
return hash('sha256', $domainPart . '::' . $signature);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $context
|
||||
* @param array<int, string> $domains
|
||||
*/
|
||||
public function nearDuplicateCount(string $content, array $context = [], array $domains = []): int
|
||||
{
|
||||
$type = (string) ($context['content_type'] ?? '');
|
||||
$contentId = (int) ($context['content_id'] ?? 0);
|
||||
$artworkId = (int) ($context['artwork_id'] ?? 0);
|
||||
$signature = $this->campaignText($content);
|
||||
if ($signature === '') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$candidates = match ($type) {
|
||||
ModerationContentType::ArtworkComment->value => ArtworkComment::query()
|
||||
->where('id', '!=', $contentId)
|
||||
->whereNull('deleted_at')
|
||||
->latest('id')
|
||||
->limit(80)
|
||||
->get(['id', 'artwork_id', 'raw_content', 'content']),
|
||||
ModerationContentType::ArtworkDescription->value => Artwork::query()
|
||||
->where('id', '!=', $contentId)
|
||||
->whereNotNull('description')
|
||||
->latest('id')
|
||||
->limit(80)
|
||||
->get(['id', 'description']),
|
||||
default => \collect(),
|
||||
};
|
||||
|
||||
$matches = 0;
|
||||
|
||||
foreach ($candidates as $candidate) {
|
||||
$candidateText = match ($type) {
|
||||
ModerationContentType::ArtworkComment->value => (string) ($candidate->raw_content ?: $candidate->content),
|
||||
ModerationContentType::ArtworkDescription->value => (string) ($candidate->description ?? ''),
|
||||
default => '',
|
||||
};
|
||||
|
||||
if ($candidateText === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$candidateSignature = $this->campaignText($candidateText);
|
||||
similar_text($signature, $candidateSignature, $similarity);
|
||||
|
||||
$sameArtworkPenalty = $artworkId > 0 && (int) ($candidate->artwork_id ?? $candidate->id ?? 0) === $artworkId ? 4 : 0;
|
||||
|
||||
if ($similarity >= (float) \app('config')->get('content_moderation.duplicate_detection.near_duplicate_similarity', 84) - $sameArtworkPenalty) {
|
||||
$matches++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($domains !== []) {
|
||||
$topDomain = $domains[0] ?? null;
|
||||
if ($topDomain !== null && str_contains(mb_strtolower($candidateText), mb_strtolower($topDomain))) {
|
||||
similar_text($this->stripLinks($signature), $this->stripLinks($candidateSignature), $linklessSimilarity);
|
||||
if ($linklessSimilarity >= 72) {
|
||||
$matches++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $matches;
|
||||
}
|
||||
|
||||
private function stripLinks(string $text): string
|
||||
{
|
||||
return trim(str_replace('[link]', '', $text));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user