fix image
This commit is contained in:
@@ -153,7 +153,8 @@ function createDOMPurifyConfig(config: SanitizationConfig) {
|
|||||||
const domPurifyConfig: DOMPurify.Config = {
|
const domPurifyConfig: DOMPurify.Config = {
|
||||||
ALLOWED_TAGS: allowedTags,
|
ALLOWED_TAGS: allowedTags,
|
||||||
ALLOWED_ATTR: uniqueAttributes,
|
ALLOWED_ATTR: uniqueAttributes,
|
||||||
ALLOWED_URI_REGEXP: /^(?:(?:https?|#|\/):?\/?)[\w.\-#/?=&%]+$/i,
|
// More permissive URL regex to allow complex query strings and tokens
|
||||||
|
ALLOWED_URI_REGEXP: /^(?:(?:https?|data|#|\/):)?[\s\S]*$/i,
|
||||||
ALLOW_UNKNOWN_PROTOCOLS: false,
|
ALLOW_UNKNOWN_PROTOCOLS: false,
|
||||||
SANITIZE_DOM: true,
|
SANITIZE_DOM: true,
|
||||||
KEEP_CONTENT: true,
|
KEEP_CONTENT: true,
|
||||||
@@ -193,30 +194,52 @@ function preprocessFigureTags(html: string): string {
|
|||||||
const doc = parser.parseFromString(html, 'text/html');
|
const doc = parser.parseFromString(html, 'text/html');
|
||||||
const figures = doc.querySelectorAll('figure');
|
const figures = doc.querySelectorAll('figure');
|
||||||
|
|
||||||
figures.forEach(figure => {
|
figures.forEach((figure) => {
|
||||||
// Find img tags within the figure
|
// Find img tags anywhere within the figure (deep search)
|
||||||
const images = figure.querySelectorAll('img');
|
const images = figure.querySelectorAll('img');
|
||||||
|
|
||||||
if (images.length > 0) {
|
if (images.length > 0) {
|
||||||
// Extract the first image
|
// Extract the first image
|
||||||
const img = images[0];
|
const img = images[0];
|
||||||
|
|
||||||
// Check if there's a figcaption to preserve as alt text
|
// Get the src attribute - it might be in the src attribute or data-src
|
||||||
const figcaption = figure.querySelector('figcaption');
|
const imgSrc = img.getAttribute('src') || img.getAttribute('data-src') || img.src || '';
|
||||||
if (figcaption && !img.hasAttribute('alt')) {
|
|
||||||
const captionText = figcaption.textContent?.trim();
|
if (!imgSrc || imgSrc.trim() === '') {
|
||||||
if (captionText) {
|
figure.remove();
|
||||||
img.setAttribute('alt', captionText);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a clean img element with just the essential attributes
|
||||||
|
const cleanImg = doc.createElement('img');
|
||||||
|
cleanImg.setAttribute('src', imgSrc);
|
||||||
|
|
||||||
|
// Preserve alt text
|
||||||
|
const existingAlt = img.getAttribute('alt') || img.alt;
|
||||||
|
if (existingAlt) {
|
||||||
|
cleanImg.setAttribute('alt', existingAlt);
|
||||||
|
} else {
|
||||||
|
// Check if there's a figcaption to use as alt text
|
||||||
|
const figcaption = figure.querySelector('figcaption');
|
||||||
|
if (figcaption) {
|
||||||
|
const captionText = figcaption.textContent?.trim();
|
||||||
|
if (captionText) {
|
||||||
|
cleanImg.setAttribute('alt', captionText);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace the figure element with just the img
|
// Preserve other useful attributes if they exist
|
||||||
figure.replaceWith(img.cloneNode(true));
|
const width = img.getAttribute('width') || img.width;
|
||||||
debug.log('Extracted image from figure tag:', img.src);
|
const height = img.getAttribute('height') || img.height;
|
||||||
|
if (width) cleanImg.setAttribute('width', width.toString());
|
||||||
|
if (height) cleanImg.setAttribute('height', height.toString());
|
||||||
|
|
||||||
|
// Replace the figure element with just the clean img
|
||||||
|
figure.replaceWith(cleanImg);
|
||||||
} else {
|
} else {
|
||||||
// No images in figure, remove it entirely
|
// No images in figure, remove it entirely
|
||||||
figure.remove();
|
figure.remove();
|
||||||
debug.log('Removed figure tag without images');
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -300,8 +323,10 @@ export function sanitizeHtmlSync(html: string): string {
|
|||||||
'blockquote', 'cite', 'q', 'hr', 'details', 'summary'
|
'blockquote', 'cite', 'q', 'hr', 'details', 'summary'
|
||||||
],
|
],
|
||||||
ALLOWED_ATTR: [
|
ALLOWED_ATTR: [
|
||||||
'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height'
|
'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height', 'href', 'title'
|
||||||
],
|
],
|
||||||
|
// More permissive URL regex to allow complex query strings and tokens
|
||||||
|
ALLOWED_URI_REGEXP: /^(?:(?:https?|data|#|\/):)?[\s\S]*$/i,
|
||||||
ALLOW_UNKNOWN_PROTOCOLS: false,
|
ALLOW_UNKNOWN_PROTOCOLS: false,
|
||||||
SANITIZE_DOM: true,
|
SANITIZE_DOM: true,
|
||||||
KEEP_CONTENT: true,
|
KEEP_CONTENT: true,
|
||||||
|
|||||||
Reference in New Issue
Block a user