fix image
This commit is contained in:
@@ -153,7 +153,8 @@ function createDOMPurifyConfig(config: SanitizationConfig) {
|
||||
const domPurifyConfig: DOMPurify.Config = {
|
||||
ALLOWED_TAGS: allowedTags,
|
||||
ALLOWED_ATTR: uniqueAttributes,
|
||||
ALLOWED_URI_REGEXP: /^(?:(?:https?|#|\/):?\/?)[\w.\-#/?=&%]+$/i,
|
||||
// More permissive URL regex to allow complex query strings and tokens
|
||||
ALLOWED_URI_REGEXP: /^(?:(?:https?|data|#|\/):)?[\s\S]*$/i,
|
||||
ALLOW_UNKNOWN_PROTOCOLS: false,
|
||||
SANITIZE_DOM: true,
|
||||
KEEP_CONTENT: true,
|
||||
@@ -193,30 +194,52 @@ function preprocessFigureTags(html: string): string {
|
||||
const doc = parser.parseFromString(html, 'text/html');
|
||||
const figures = doc.querySelectorAll('figure');
|
||||
|
||||
figures.forEach(figure => {
|
||||
// Find img tags within the figure
|
||||
figures.forEach((figure) => {
|
||||
// Find img tags anywhere within the figure (deep search)
|
||||
const images = figure.querySelectorAll('img');
|
||||
|
||||
if (images.length > 0) {
|
||||
// Extract the first image
|
||||
const img = images[0];
|
||||
|
||||
// Check if there's a figcaption to preserve as alt text
|
||||
// Get the src attribute - it might be in the src attribute or data-src
|
||||
const imgSrc = img.getAttribute('src') || img.getAttribute('data-src') || img.src || '';
|
||||
|
||||
if (!imgSrc || imgSrc.trim() === '') {
|
||||
figure.remove();
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a clean img element with just the essential attributes
|
||||
const cleanImg = doc.createElement('img');
|
||||
cleanImg.setAttribute('src', imgSrc);
|
||||
|
||||
// Preserve alt text
|
||||
const existingAlt = img.getAttribute('alt') || img.alt;
|
||||
if (existingAlt) {
|
||||
cleanImg.setAttribute('alt', existingAlt);
|
||||
} else {
|
||||
// Check if there's a figcaption to use as alt text
|
||||
const figcaption = figure.querySelector('figcaption');
|
||||
if (figcaption && !img.hasAttribute('alt')) {
|
||||
if (figcaption) {
|
||||
const captionText = figcaption.textContent?.trim();
|
||||
if (captionText) {
|
||||
img.setAttribute('alt', captionText);
|
||||
cleanImg.setAttribute('alt', captionText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Replace the figure element with just the img
|
||||
figure.replaceWith(img.cloneNode(true));
|
||||
debug.log('Extracted image from figure tag:', img.src);
|
||||
// Preserve other useful attributes if they exist
|
||||
const width = img.getAttribute('width') || img.width;
|
||||
const height = img.getAttribute('height') || img.height;
|
||||
if (width) cleanImg.setAttribute('width', width.toString());
|
||||
if (height) cleanImg.setAttribute('height', height.toString());
|
||||
|
||||
// Replace the figure element with just the clean img
|
||||
figure.replaceWith(cleanImg);
|
||||
} else {
|
||||
// No images in figure, remove it entirely
|
||||
figure.remove();
|
||||
debug.log('Removed figure tag without images');
|
||||
}
|
||||
});
|
||||
|
||||
@@ -300,8 +323,10 @@ export function sanitizeHtmlSync(html: string): string {
|
||||
'blockquote', 'cite', 'q', 'hr', 'details', 'summary'
|
||||
],
|
||||
ALLOWED_ATTR: [
|
||||
'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height'
|
||||
'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height', 'href', 'title'
|
||||
],
|
||||
// More permissive URL regex to allow complex query strings and tokens
|
||||
ALLOWED_URI_REGEXP: /^(?:(?:https?|data|#|\/):)?[\s\S]*$/i,
|
||||
ALLOW_UNKNOWN_PROTOCOLS: false,
|
||||
SANITIZE_DOM: true,
|
||||
KEEP_CONTENT: true,
|
||||
|
||||
Reference in New Issue
Block a user