fix image

This commit is contained in:
Stefan Hardegger
2025-09-30 17:03:49 +02:00
parent 48b0087b01
commit 4e02cd8eaa

View File

@@ -153,7 +153,8 @@ function createDOMPurifyConfig(config: SanitizationConfig) {
const domPurifyConfig: DOMPurify.Config = { const domPurifyConfig: DOMPurify.Config = {
ALLOWED_TAGS: allowedTags, ALLOWED_TAGS: allowedTags,
ALLOWED_ATTR: uniqueAttributes, ALLOWED_ATTR: uniqueAttributes,
ALLOWED_URI_REGEXP: /^(?:(?:https?|#|\/):?\/?)[\w.\-#/?=&%]+$/i, // More permissive URL regex to allow complex query strings and tokens
ALLOWED_URI_REGEXP: /^(?:(?:https?|data|#|\/):)?[\s\S]*$/i,
ALLOW_UNKNOWN_PROTOCOLS: false, ALLOW_UNKNOWN_PROTOCOLS: false,
SANITIZE_DOM: true, SANITIZE_DOM: true,
KEEP_CONTENT: true, KEEP_CONTENT: true,
@@ -193,30 +194,52 @@ function preprocessFigureTags(html: string): string {
const doc = parser.parseFromString(html, 'text/html'); const doc = parser.parseFromString(html, 'text/html');
const figures = doc.querySelectorAll('figure'); const figures = doc.querySelectorAll('figure');
figures.forEach(figure => { figures.forEach((figure) => {
// Find img tags within the figure // Find img tags anywhere within the figure (deep search)
const images = figure.querySelectorAll('img'); const images = figure.querySelectorAll('img');
if (images.length > 0) { if (images.length > 0) {
// Extract the first image // Extract the first image
const img = images[0]; const img = images[0];
// Check if there's a figcaption to preserve as alt text // Get the src attribute - it might be in the src attribute or data-src
const figcaption = figure.querySelector('figcaption'); const imgSrc = img.getAttribute('src') || img.getAttribute('data-src') || img.src || '';
if (figcaption && !img.hasAttribute('alt')) {
const captionText = figcaption.textContent?.trim(); if (!imgSrc || imgSrc.trim() === '') {
if (captionText) { figure.remove();
img.setAttribute('alt', captionText); return;
}
// Create a clean img element with just the essential attributes
const cleanImg = doc.createElement('img');
cleanImg.setAttribute('src', imgSrc);
// Preserve alt text
const existingAlt = img.getAttribute('alt') || img.alt;
if (existingAlt) {
cleanImg.setAttribute('alt', existingAlt);
} else {
// Check if there's a figcaption to use as alt text
const figcaption = figure.querySelector('figcaption');
if (figcaption) {
const captionText = figcaption.textContent?.trim();
if (captionText) {
cleanImg.setAttribute('alt', captionText);
}
} }
} }
// Replace the figure element with just the img // Preserve other useful attributes if they exist
figure.replaceWith(img.cloneNode(true)); const width = img.getAttribute('width') || img.width;
debug.log('Extracted image from figure tag:', img.src); const height = img.getAttribute('height') || img.height;
if (width) cleanImg.setAttribute('width', width.toString());
if (height) cleanImg.setAttribute('height', height.toString());
// Replace the figure element with just the clean img
figure.replaceWith(cleanImg);
} else { } else {
// No images in figure, remove it entirely // No images in figure, remove it entirely
figure.remove(); figure.remove();
debug.log('Removed figure tag without images');
} }
}); });
@@ -300,8 +323,10 @@ export function sanitizeHtmlSync(html: string): string {
'blockquote', 'cite', 'q', 'hr', 'details', 'summary' 'blockquote', 'cite', 'q', 'hr', 'details', 'summary'
], ],
ALLOWED_ATTR: [ ALLOWED_ATTR: [
'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height' 'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height', 'href', 'title'
], ],
// More permissive URL regex to allow complex query strings and tokens
ALLOWED_URI_REGEXP: /^(?:(?:https?|data|#|\/):)?[\s\S]*$/i,
ALLOW_UNKNOWN_PROTOCOLS: false, ALLOW_UNKNOWN_PROTOCOLS: false,
SANITIZE_DOM: true, SANITIZE_DOM: true,
KEEP_CONTENT: true, KEEP_CONTENT: true,