168 lines
5.2 KiB
PHP
168 lines
5.2 KiB
PHP
<?php
|
||
|
||
namespace App\Services;
|
||
|
||
/**
|
||
* Centralized mapping from legacy GIF smiley codes to Unicode emoji.
|
||
*
|
||
* Usage:
|
||
* $result = LegacySmileyMapper::convert($text);
|
||
* $map = LegacySmileyMapper::getMap();
|
||
*/
|
||
class LegacySmileyMapper
|
||
{
|
||
/**
|
||
* The canonical smiley-code → emoji map.
|
||
* Keys must be unique; variants are listed via aliases.
|
||
*/
|
||
private static array $map = [
|
||
// Core
|
||
':beer' => '🍺',
|
||
':clap' => '👏',
|
||
':coffee' => '☕',
|
||
':cry' => '😢',
|
||
':lol' => '😂',
|
||
':love' => '❤️',
|
||
':HB' => '🎂',
|
||
':wow' => '😮',
|
||
// Extended legacy codes
|
||
':smile' => '😊',
|
||
':grin' => '😁',
|
||
':wink' => '😉',
|
||
':tongue' => '😛',
|
||
':cool' => '😎',
|
||
':angry' => '😠',
|
||
':sad' => '😞',
|
||
':laugh' => '😆',
|
||
':hug' => '🤗',
|
||
':thumb' => '👍',
|
||
':thumbs' => '👍',
|
||
':thumbsup' => '👍',
|
||
':fire' => '🔥',
|
||
':star' => '⭐',
|
||
':heart' => '❤️',
|
||
':broken' => '💔',
|
||
':music' => '🎵',
|
||
':note' => '🎶',
|
||
':art' => '🎨',
|
||
':camera' => '📷',
|
||
':gift' => '🎁',
|
||
':cake' => '🎂',
|
||
':wave' => '👋',
|
||
':ok' => '👌',
|
||
':pray' => '🙏',
|
||
':think' => '🤔',
|
||
':eyes' => '👀',
|
||
':rainbow' => '🌈',
|
||
':sun' => '☀️',
|
||
':moon' => '🌙',
|
||
':party' => '🎉',
|
||
':bomb' => '💣',
|
||
':skull' => '💀',
|
||
':alien' => '👽',
|
||
':robot' => '🤖',
|
||
':poop' => '💩',
|
||
':money' => '💰',
|
||
':bulb' => '💡',
|
||
':check' => '✅',
|
||
':x' => '❌',
|
||
':warning' => '⚠️',
|
||
':question' => '❓',
|
||
':exclamation' => '❗',
|
||
':100' => '💯',
|
||
];
|
||
|
||
/**
|
||
* Convert all legacy smiley codes in $text to Unicode emoji.
|
||
* Only replaces codes that are surrounded by whitespace or start/end of string.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function convert(string $text): string
|
||
{
|
||
if (empty($text)) {
|
||
return $text;
|
||
}
|
||
|
||
foreach (static::$map as $code => $emoji) {
|
||
// Use word-boundary-style: the code must be followed by whitespace,
|
||
// end of string, or punctuation — not part of a word.
|
||
$escaped = preg_quote($code, '/');
|
||
$text = preg_replace(
|
||
'/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um',
|
||
$emoji,
|
||
$text
|
||
);
|
||
}
|
||
|
||
return $text;
|
||
}
|
||
|
||
/**
|
||
* Returns all codes that are present in the given text (for reporting).
|
||
*
|
||
* @return string[]
|
||
*/
|
||
public static function detect(string $text): array
|
||
{
|
||
$found = [];
|
||
foreach (array_keys(static::$map) as $code) {
|
||
$escaped = preg_quote($code, '/');
|
||
if (preg_match('/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um', $text)) {
|
||
$found[] = $code;
|
||
}
|
||
}
|
||
return $found;
|
||
}
|
||
|
||
/**
|
||
* Collapse consecutive runs of the same emoji that exceed $maxRun repetitions.
|
||
*
|
||
* Transforms e.g. "🍺 🍺 🍺 🍺 🍺 🍺 🍺 🍺" (8×) → "🍺 🍺 🍺 🍺 🍺 ×8"
|
||
* so that spam/flood content is stored compactly and rendered readably.
|
||
*
|
||
* Both whitespace-separated ("🍺 🍺 🍺") and run-together ("🍺🍺🍺") forms
|
||
* are collapsed. Only emoji from the common Unicode blocks are affected;
|
||
* regular text is never touched.
|
||
*
|
||
* @param int $maxRun Maximum number of identical emoji to keep (default 5).
|
||
*/
|
||
public static function collapseFlood(string $text, int $maxRun = 5): string
|
||
{
|
||
if (empty($text)) {
|
||
return $text;
|
||
}
|
||
|
||
$limit = max(1, $maxRun);
|
||
|
||
// Match one emoji "unit" (codepoint from common ranges + optional variation
|
||
// selector U+FE0E / U+FE0F), followed by $limit or more repetitions of
|
||
// (optional horizontal whitespace + the same unit).
|
||
// The \1 backreference works byte-for-byte in UTF-8, so it correctly
|
||
// matches the same multi-byte sequence each time.
|
||
$pattern = '/([\x{1F000}-\x{1FFFF}\x{2600}-\x{27EF}][\x{FE0E}\x{FE0F}]?)'
|
||
. '([ \t]*\1){' . $limit . ',}/u';
|
||
|
||
return preg_replace_callback(
|
||
$pattern,
|
||
static function (array $m) use ($limit): string {
|
||
$unit = $m[1];
|
||
// substr_count is byte-safe and correct for multi-byte sequences.
|
||
$count = substr_count($m[0], $unit);
|
||
return str_repeat($unit . ' ', $limit - 1) . $unit . ' ×' . $count;
|
||
},
|
||
$text
|
||
) ?? $text;
|
||
}
|
||
|
||
/**
|
||
* Get the full mapping array.
|
||
*
|
||
* @return array<string, string>
|
||
*/
|
||
public static function getMap(): array
|
||
{
|
||
return static::$map;
|
||
}
|
||
}
|