175 lines
4.6 KiB
PHP
175 lines
4.6 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Services;
|
|
|
|
use DOMDocument;
|
|
use DOMElement;
|
|
use DOMNode;
|
|
|
|
class HomepageAnnouncementSanitizer
|
|
{
|
|
private const ALLOWED_TAGS = [
|
|
'p', 'br', 'strong', 'b', 'em', 'i', 'a', 'ul', 'ol', 'li', 'h2', 'h3', 'blockquote',
|
|
];
|
|
|
|
private const ALLOWED_ATTRS = [
|
|
'a' => ['href', 'title', 'target', 'rel'],
|
|
];
|
|
|
|
public function sanitizeHtml(?string $html): string
|
|
{
|
|
if ($html === null || trim($html) === '') {
|
|
return '';
|
|
}
|
|
|
|
$encodedHtml = mb_encode_numericentity(
|
|
$html,
|
|
[0x80, 0x10FFFF, 0, 0xFFFFFF],
|
|
'UTF-8'
|
|
);
|
|
|
|
$document = new DOMDocument('1.0', 'UTF-8');
|
|
|
|
libxml_use_internal_errors(true);
|
|
$document->loadHTML(
|
|
'<?xml encoding="UTF-8"><html><body>' . $encodedHtml . '</body></html>',
|
|
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
|
|
);
|
|
libxml_clear_errors();
|
|
|
|
$body = $document->getElementsByTagName('body')->item(0);
|
|
if (! $body instanceof DOMNode) {
|
|
return '';
|
|
}
|
|
|
|
$this->cleanNode($body);
|
|
|
|
$innerHtml = '';
|
|
foreach ($body->childNodes as $child) {
|
|
$innerHtml .= $document->saveHTML($child);
|
|
}
|
|
|
|
return trim(html_entity_decode($innerHtml, ENT_QUOTES | ENT_HTML5, 'UTF-8'));
|
|
}
|
|
|
|
public function sanitizeCustomUrl(?string $url): ?string
|
|
{
|
|
$url = trim((string) $url);
|
|
|
|
if ($url === '') {
|
|
return null;
|
|
}
|
|
|
|
if (! $this->isSafeCustomUrl($url)) {
|
|
return null;
|
|
}
|
|
|
|
return $url;
|
|
}
|
|
|
|
public function isSafeCustomUrl(?string $url): bool
|
|
{
|
|
$url = trim((string) $url);
|
|
if ($url === '') {
|
|
return true;
|
|
}
|
|
|
|
$lower = strtolower($url);
|
|
if (str_starts_with($lower, 'javascript:') || str_contains($lower, 'onerror=') || str_contains($lower, 'onclick=')) {
|
|
return false;
|
|
}
|
|
|
|
if (str_starts_with($url, '/')) {
|
|
return true;
|
|
}
|
|
|
|
return str_starts_with($lower, 'https://');
|
|
}
|
|
|
|
private function cleanNode(DOMNode $node): void
|
|
{
|
|
$toRemove = [];
|
|
$toUnwrap = [];
|
|
|
|
foreach ($node->childNodes as $child) {
|
|
if ($child->nodeType !== XML_ELEMENT_NODE) {
|
|
continue;
|
|
}
|
|
|
|
if (! $child instanceof DOMElement) {
|
|
continue;
|
|
}
|
|
|
|
$tag = strtolower($child->nodeName);
|
|
|
|
if (in_array($tag, ['script', 'style', 'iframe'], true)) {
|
|
$toRemove[] = $child;
|
|
continue;
|
|
}
|
|
|
|
if (! in_array($tag, self::ALLOWED_TAGS, true)) {
|
|
$toUnwrap[] = $child;
|
|
continue;
|
|
}
|
|
|
|
$allowedAttrs = self::ALLOWED_ATTRS[$tag] ?? [];
|
|
$attrsToRemove = [];
|
|
foreach ($child->attributes as $attribute) {
|
|
if (! in_array($attribute->nodeName, $allowedAttrs, true)) {
|
|
$attrsToRemove[] = $attribute->nodeName;
|
|
}
|
|
}
|
|
|
|
foreach ($attrsToRemove as $attributeName) {
|
|
$child->removeAttribute($attributeName);
|
|
}
|
|
|
|
if ($tag === 'a') {
|
|
$href = trim($child->getAttribute('href'));
|
|
if ($href === '' || ! $this->isSafeAnchorHref($href)) {
|
|
$toUnwrap[] = $child;
|
|
continue;
|
|
}
|
|
|
|
if (str_starts_with(strtolower($href), 'https://')) {
|
|
$child->setAttribute('rel', 'noopener noreferrer');
|
|
$child->setAttribute('target', '_blank');
|
|
} else {
|
|
$child->removeAttribute('target');
|
|
$child->removeAttribute('rel');
|
|
}
|
|
}
|
|
|
|
$this->cleanNode($child);
|
|
}
|
|
|
|
foreach ($toRemove as $element) {
|
|
$node->removeChild($element);
|
|
}
|
|
|
|
foreach ($toUnwrap as $element) {
|
|
while ($element->firstChild) {
|
|
$node->insertBefore($element->firstChild, $element);
|
|
}
|
|
|
|
$node->removeChild($element);
|
|
}
|
|
}
|
|
|
|
private function isSafeAnchorHref(string $href): bool
|
|
{
|
|
$lower = strtolower(trim($href));
|
|
|
|
if (str_starts_with($lower, 'javascript:') || str_starts_with($lower, 'data:')) {
|
|
return false;
|
|
}
|
|
|
|
if (str_starts_with($href, '/') || str_starts_with($href, '#')) {
|
|
return true;
|
|
}
|
|
|
|
return str_starts_with($lower, 'https://');
|
|
}
|
|
} |