Files
storycove/frontend/src/lib/sanitization.ts
2025-09-16 14:58:50 +02:00

289 lines
9.5 KiB
TypeScript

import DOMPurify from 'dompurify';
import { configApi } from './api';
interface SanitizationConfig {
allowedTags: string[];
allowedAttributes: Record<string, string[]>;
allowedCssProperties: string[];
removedAttributes?: Record<string, string[]>;
allowedProtocols?: Record<string, Record<string, string[]>>;
description: string;
}
let cachedConfig: SanitizationConfig | null = null;
let configPromise: Promise<SanitizationConfig> | null = null;
/**
* Filter CSS properties in a style attribute value
*/
function filterCssProperties(styleValue: string, allowedProperties: string[]): string {
// Parse CSS declarations
const declarations = styleValue.split(';').map(decl => decl.trim()).filter(decl => decl);
const filteredDeclarations = declarations.filter(declaration => {
const colonIndex = declaration.indexOf(':');
if (colonIndex === -1) return false;
const property = declaration.substring(0, colonIndex).trim().toLowerCase();
const isAllowed = allowedProperties.includes(property);
if (!isAllowed) {
console.log(`CSS property "${property}" was filtered out (not in allowed list)`);
}
return isAllowed;
});
const result = filteredDeclarations.join('; ');
if (declarations.length !== filteredDeclarations.length) {
console.log(`CSS filtering: ${declarations.length} -> ${filteredDeclarations.length} properties`);
console.log('Original:', styleValue);
console.log('Filtered:', result);
}
return result;
}
/**
* Fetch sanitization configuration from backend
*/
async function fetchSanitizationConfig(): Promise<SanitizationConfig> {
if (cachedConfig) {
return cachedConfig;
}
if (configPromise) {
return configPromise;
}
configPromise = configApi.getHtmlSanitizationConfig()
.then(config => {
cachedConfig = config;
configPromise = null;
return config;
})
.catch(error => {
console.error('Failed to fetch sanitization config, using fallback:', error);
configPromise = null;
// Return fallback configuration
const fallbackConfig: SanitizationConfig = {
allowedTags: [
'p', 'br', 'div', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'b', 'strong', 'i', 'em', 'u', 's', 'strike', 'del', 'ins',
'sup', 'sub', 'small', 'big', 'mark', 'pre', 'code',
'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'a', 'img',
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', 'caption',
'blockquote', 'cite', 'q', 'hr'
],
allowedAttributes: {
'p': ['class', 'style'],
'div': ['class', 'style'],
'span': ['class', 'style'],
'h1': ['class', 'style'],
'h2': ['class', 'style'],
'h3': ['class', 'style'],
'h4': ['class', 'style'],
'h5': ['class', 'style'],
'h6': ['class', 'style'],
'a': ['class'],
'img': ['src', 'alt', 'width', 'height', 'class', 'style'],
'table': ['class'],
'td': ['class', 'colspan', 'rowspan'],
'th': ['class', 'colspan', 'rowspan']
},
allowedCssProperties: [
'color', 'background-color', 'font-size', 'font-weight',
'font-style', 'text-align', 'text-decoration', 'margin',
'padding', 'text-indent', 'line-height'
],
allowedProtocols: {
'a': {
'href': ['http', 'https', '#', '/']
},
'img': {
'src': ['http', 'https', 'data', '/']
}
},
description: 'Fallback sanitization configuration'
};
cachedConfig = fallbackConfig;
return fallbackConfig;
});
return configPromise;
}
/**
* Create DOMPurify configuration from backend sanitization config
*/
function createDOMPurifyConfig(config: SanitizationConfig) {
const allowedTags = config.allowedTags;
const allowedAttributes: Record<string, string[]> = { ...config.allowedAttributes };
// Remove attributes that should be stripped (deprecated, keeping for backward compatibility)
if (config.removedAttributes) {
Object.keys(config.removedAttributes).forEach(tag => {
const attributesToRemove = config.removedAttributes![tag];
if (allowedAttributes[tag]) {
allowedAttributes[tag] = allowedAttributes[tag].filter(
attr => !attributesToRemove.includes(attr)
);
}
});
}
// Create a proper DOMPurify configuration
// DOMPurify expects ALLOWED_ATTR to be an array of allowed attributes
// We need to flatten the tag-specific attributes into a global list
const flattenedAttributes = Object.values(allowedAttributes).flat();
const uniqueAttributes = Array.from(new Set(flattenedAttributes));
// Configure allowed protocols for URL validation
const allowedSchemes: string[] = [];
if (config.allowedProtocols) {
Object.values(config.allowedProtocols).forEach(attributeProtocols => {
Object.values(attributeProtocols).forEach(protocols => {
allowedSchemes.push(...protocols);
});
});
}
const domPurifyConfig: DOMPurify.Config = {
ALLOWED_TAGS: allowedTags,
ALLOWED_ATTR: uniqueAttributes,
ALLOWED_URI_REGEXP: /^(?:(?:https?|#|\/):?\/?)[\w.\-#/?=&%]+$/i,
ALLOW_UNKNOWN_PROTOCOLS: false,
SANITIZE_DOM: true,
KEEP_CONTENT: true,
ALLOW_DATA_ATTR: false,
};
// Clear any existing hooks and add CSS property filtering
DOMPurify.removeAllHooks();
DOMPurify.addHook('afterSanitizeAttributes', function (node) {
// Only process elements with style attributes
if (node.hasAttribute && node.hasAttribute('style')) {
const styleValue = node.getAttribute('style');
if (styleValue) {
const filteredStyle = filterCssProperties(styleValue, config.allowedCssProperties);
if (filteredStyle) {
node.setAttribute('style', filteredStyle);
} else {
node.removeAttribute('style');
}
}
}
});
return domPurifyConfig;
}
/**
* Sanitize HTML content using shared configuration from backend
*/
export async function sanitizeHtml(html: string): Promise<string> {
if (!html || html.trim() === '') {
return '';
}
try {
const config = await fetchSanitizationConfig();
const domPurifyConfig = createDOMPurifyConfig(config);
// Configure DOMPurify with our settings
const cleanHtml = DOMPurify.sanitize(html, domPurifyConfig as any);
return cleanHtml.toString();
} catch (error) {
console.error('Error during HTML sanitization:', error);
// Fallback to basic DOMPurify sanitization
return DOMPurify.sanitize(html).toString();
}
}
/**
* Synchronous sanitization using cached config (for cases where async is not possible)
* Falls back to a safe configuration if no config is cached
*/
export function sanitizeHtmlSync(html: string): string {
if (!html || html.trim() === '') {
return '';
}
// If we have cached config, use it
if (cachedConfig) {
const domPurifyConfig = createDOMPurifyConfig(cachedConfig);
return DOMPurify.sanitize(html, domPurifyConfig as any).toString();
}
// If we don't have cached config but there's an ongoing request, wait for it
if (configPromise) {
console.log('Sanitization config loading in progress, using fallback for now');
} else {
// No config and no ongoing request - try to load it for next time
console.warn('No cached sanitization config available, triggering load for future use');
fetchSanitizationConfig().catch(error => {
console.error('Failed to load sanitization config:', error);
});
}
// Use comprehensive fallback configuration that preserves formatting
console.log('Using fallback sanitization configuration with formatting support');
const fallbackAllowedCssProperties = [
'color', 'font-size', 'font-weight',
'font-style', 'text-align', 'text-decoration', 'margin',
'padding', 'text-indent', 'line-height'
];
const fallbackConfig: DOMPurify.Config = {
ALLOWED_TAGS: [
'p', 'br', 'div', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'b', 'strong', 'i', 'em', 'u', 's', 'strike', 'del', 'ins',
'sup', 'sub', 'small', 'big', 'mark', 'pre', 'code', 'kbd', 'samp', 'var',
'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'a', 'img',
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', 'caption', 'colgroup', 'col',
'blockquote', 'cite', 'q', 'hr', 'details', 'summary'
],
ALLOWED_ATTR: [
'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height'
],
ALLOW_UNKNOWN_PROTOCOLS: false,
SANITIZE_DOM: true,
KEEP_CONTENT: true,
ALLOW_DATA_ATTR: false,
};
// Clear hooks and add CSS property filtering for fallback config
DOMPurify.removeAllHooks();
DOMPurify.addHook('afterSanitizeAttributes', function (node) {
if (node.hasAttribute && node.hasAttribute('style')) {
const styleValue = node.getAttribute('style');
if (styleValue) {
const filteredStyle = filterCssProperties(styleValue, fallbackAllowedCssProperties);
if (filteredStyle) {
node.setAttribute('style', filteredStyle);
} else {
node.removeAttribute('style');
}
}
}
});
return DOMPurify.sanitize(html, fallbackConfig as any).toString();
}
/**
* Preload sanitization configuration (call early in app lifecycle)
*/
export function preloadSanitizationConfig(): Promise<SanitizationConfig> {
return fetchSanitizationConfig();
}
/**
* Clear cached configuration (useful for testing or config updates)
*/
export function clearSanitizationCache(): void {
cachedConfig = null;
configPromise = null;
}