fixed sanitazer and academy

This commit is contained in:
2026-06-05 16:53:20 +02:00
parent 15870ddb1f
commit f89ee937c0
29 changed files with 2444 additions and 1039 deletions

View File

@@ -37,11 +37,22 @@ class ContentSanitizer
'p', 'br', 'strong', 'em', 'code', 'pre',
'a', 'ul', 'ol', 'li', 'blockquote', 'del',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
// Image and embed-related tags used by the rich editor
'figure', 'figcaption', 'img', 'picture', 'source', 'iframe',
// Basic structural/inline helpers sometimes produced by embeds
'div', 'span'
];
// Allowed attributes per tag
private const ALLOWED_ATTRS = [
'a' => ['href', 'title', 'rel', 'target'],
'img' => ['src', 'srcset', 'sizes', 'alt', 'title', 'loading', 'decoding', 'width', 'height', 'style', 'class', 'data-width'],
'source' => ['srcset', 'src', 'type', 'media', 'sizes'],
'figure' => ['class', 'data-rich-image', 'data-platform', 'data-video-embed', 'data-social-embed', 'data-artwork-embed'],
'figcaption' => ['class'],
'iframe' => ['src', 'title', 'loading', 'frameborder', 'allow', 'allowfullscreen', 'referrerpolicy'],
'div' => ['class', 'data-href', 'data-show-text'],
'span' => ['class'],
];
private static ?MarkdownConverter $converter = null;
@@ -261,14 +272,82 @@ class ContentSanitizer
$allowedAttrs = self::ALLOWED_ATTRS[$tag] ?? [];
$attrsToRemove = [];
foreach ($child->attributes as $attr) {
if (! in_array($attr->nodeName, $allowedAttrs, true)) {
$attrsToRemove[] = $attr->nodeName;
$name = $attr->nodeName;
// Allow data-* attributes and class on allowed tags
if (str_starts_with($name, 'data-') || $name === 'class') {
continue;
}
if (! in_array($name, $allowedAttrs, true)) {
$attrsToRemove[] = $name;
}
}
foreach ($attrsToRemove as $attrName) {
$child->removeAttribute($attrName);
}
// Validate URL-like attributes for image/source/iframe
if ($tag === 'img') {
$src = $child->getAttribute('src');
if ($src && ! static::isSafeUrl($src)) {
$toUnwrap[] = $child;
continue;
}
// Validate srcset: ensure each URL is safe; if not, remove the attribute
$srcset = $child->getAttribute('srcset');
if ($srcset) {
$parts = array_map('trim', explode(',', $srcset));
$valid = true;
foreach ($parts as $part) {
if ($part === '') {
continue;
}
// Each part: "url [descriptor]"
$pieces = preg_split('/\s+/', $part);
$url = $pieces[0] ?? '';
if ($url !== '' && ! static::isSafeUrl($url)) {
$valid = false;
break;
}
}
if (! $valid) {
$child->removeAttribute('srcset');
}
}
}
if ($tag === 'source') {
$src = $child->getAttribute('src') ?: $child->getAttribute('srcset');
if ($src) {
// For srcset allow comma-separated list; validate each
$values = array_map('trim', explode(',', $src));
$valid = true;
foreach ($values as $v) {
if ($v === '') continue;
$pieces = preg_split('/\s+/', $v);
$url = $pieces[0] ?? '';
if ($url !== '' && ! static::isSafeUrl($url)) {
$valid = false;
break;
}
}
if (! $valid) {
$toUnwrap[] = $child;
continue;
}
}
}
if ($tag === 'iframe') {
$src = $child->getAttribute('src');
if ($src && ! static::isSafeUrl($src)) {
$toUnwrap[] = $child;
continue;
}
}
// Force external links to be safe
if ($tag === 'a') {
if (! $allowLinks) {