fix embedded images on deviantart

This commit is contained in:
Stefan Hardegger
2025-09-30 16:18:05 +02:00
parent c291559366
commit 48b0087b01
2 changed files with 112 additions and 8 deletions

View File

@@ -180,6 +180,53 @@ function createDOMPurifyConfig(config: SanitizationConfig) {
return domPurifyConfig;
}
/**
* Preprocess HTML to extract images from figure tags before sanitization
*/
function preprocessFigureTags(html: string): string {
if (!html || html.trim() === '') {
return html;
}
try {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
const figures = doc.querySelectorAll('figure');
figures.forEach(figure => {
// Find img tags within the figure
const images = figure.querySelectorAll('img');
if (images.length > 0) {
// Extract the first image
const img = images[0];
// Check if there's a figcaption to preserve as alt text
const figcaption = figure.querySelector('figcaption');
if (figcaption && !img.hasAttribute('alt')) {
const captionText = figcaption.textContent?.trim();
if (captionText) {
img.setAttribute('alt', captionText);
}
}
// Replace the figure element with just the img
figure.replaceWith(img.cloneNode(true));
debug.log('Extracted image from figure tag:', img.src);
} else {
// No images in figure, remove it entirely
figure.remove();
debug.log('Removed figure tag without images');
}
});
return doc.body.innerHTML;
} catch (error) {
console.warn('Failed to preprocess figure tags, returning original HTML:', error);
return html;
}
}
/**
* Sanitize HTML content using shared configuration from backend
*/
@@ -189,12 +236,15 @@ export async function sanitizeHtml(html: string): Promise<string> {
}
try {
// Preprocess to extract images from figure tags
const preprocessed = preprocessFigureTags(html);
const config = await fetchSanitizationConfig();
const domPurifyConfig = createDOMPurifyConfig(config);
// Configure DOMPurify with our settings
const cleanHtml = DOMPurify.sanitize(html, domPurifyConfig as any);
// Configure DOMPurify with our settings
const cleanHtml = DOMPurify.sanitize(preprocessed, domPurifyConfig as any);
return cleanHtml.toString();
} catch (error) {
console.error('Error during HTML sanitization:', error);
@@ -212,10 +262,13 @@ export function sanitizeHtmlSync(html: string): string {
return '';
}
// Preprocess to extract images from figure tags
const preprocessed = preprocessFigureTags(html);
// If we have cached config, use it
if (cachedConfig) {
const domPurifyConfig = createDOMPurifyConfig(cachedConfig);
return DOMPurify.sanitize(html, domPurifyConfig as any).toString();
return DOMPurify.sanitize(preprocessed, domPurifyConfig as any).toString();
}
// If we don't have cached config but there's an ongoing request, wait for it
@@ -270,8 +323,8 @@ export function sanitizeHtmlSync(html: string): string {
}
}
});
return DOMPurify.sanitize(html, fallbackConfig as any).toString();
return DOMPurify.sanitize(preprocessed, fallbackConfig as any).toString();
}
/**