289 lines
9.5 KiB
TypeScript
289 lines
9.5 KiB
TypeScript
import DOMPurify from 'dompurify';
|
|
import { configApi } from './api';
|
|
|
|
interface SanitizationConfig {
|
|
allowedTags: string[];
|
|
allowedAttributes: Record<string, string[]>;
|
|
allowedCssProperties: string[];
|
|
removedAttributes?: Record<string, string[]>;
|
|
allowedProtocols?: Record<string, Record<string, string[]>>;
|
|
description: string;
|
|
}
|
|
|
|
let cachedConfig: SanitizationConfig | null = null;
|
|
let configPromise: Promise<SanitizationConfig> | null = null;
|
|
|
|
/**
|
|
* Filter CSS properties in a style attribute value
|
|
*/
|
|
function filterCssProperties(styleValue: string, allowedProperties: string[]): string {
|
|
// Parse CSS declarations
|
|
const declarations = styleValue.split(';').map(decl => decl.trim()).filter(decl => decl);
|
|
|
|
const filteredDeclarations = declarations.filter(declaration => {
|
|
const colonIndex = declaration.indexOf(':');
|
|
if (colonIndex === -1) return false;
|
|
|
|
const property = declaration.substring(0, colonIndex).trim().toLowerCase();
|
|
const isAllowed = allowedProperties.includes(property);
|
|
|
|
if (!isAllowed) {
|
|
console.log(`CSS property "${property}" was filtered out (not in allowed list)`);
|
|
}
|
|
|
|
return isAllowed;
|
|
});
|
|
|
|
const result = filteredDeclarations.join('; ');
|
|
|
|
if (declarations.length !== filteredDeclarations.length) {
|
|
console.log(`CSS filtering: ${declarations.length} -> ${filteredDeclarations.length} properties`);
|
|
console.log('Original:', styleValue);
|
|
console.log('Filtered:', result);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Fetch sanitization configuration from backend
|
|
*/
|
|
async function fetchSanitizationConfig(): Promise<SanitizationConfig> {
|
|
if (cachedConfig) {
|
|
return cachedConfig;
|
|
}
|
|
|
|
if (configPromise) {
|
|
return configPromise;
|
|
}
|
|
|
|
configPromise = configApi.getHtmlSanitizationConfig()
|
|
.then(config => {
|
|
cachedConfig = config;
|
|
configPromise = null;
|
|
return config;
|
|
})
|
|
.catch(error => {
|
|
console.error('Failed to fetch sanitization config, using fallback:', error);
|
|
configPromise = null;
|
|
// Return fallback configuration
|
|
const fallbackConfig: SanitizationConfig = {
|
|
allowedTags: [
|
|
'p', 'br', 'div', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
'b', 'strong', 'i', 'em', 'u', 's', 'strike', 'del', 'ins',
|
|
'sup', 'sub', 'small', 'big', 'mark', 'pre', 'code',
|
|
'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'a', 'img',
|
|
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', 'caption',
|
|
'blockquote', 'cite', 'q', 'hr'
|
|
],
|
|
allowedAttributes: {
|
|
'p': ['class', 'style'],
|
|
'div': ['class', 'style'],
|
|
'span': ['class', 'style'],
|
|
'h1': ['class', 'style'],
|
|
'h2': ['class', 'style'],
|
|
'h3': ['class', 'style'],
|
|
'h4': ['class', 'style'],
|
|
'h5': ['class', 'style'],
|
|
'h6': ['class', 'style'],
|
|
'a': ['class'],
|
|
'img': ['src', 'alt', 'width', 'height', 'class', 'style'],
|
|
'table': ['class'],
|
|
'td': ['class', 'colspan', 'rowspan'],
|
|
'th': ['class', 'colspan', 'rowspan']
|
|
},
|
|
allowedCssProperties: [
|
|
'color', 'background-color', 'font-size', 'font-weight',
|
|
'font-style', 'text-align', 'text-decoration', 'margin',
|
|
'padding', 'text-indent', 'line-height'
|
|
],
|
|
allowedProtocols: {
|
|
'a': {
|
|
'href': ['http', 'https', '#', '/']
|
|
},
|
|
'img': {
|
|
'src': ['http', 'https', 'data', '/']
|
|
}
|
|
},
|
|
description: 'Fallback sanitization configuration'
|
|
};
|
|
cachedConfig = fallbackConfig;
|
|
return fallbackConfig;
|
|
});
|
|
|
|
return configPromise;
|
|
}
|
|
|
|
/**
|
|
* Create DOMPurify configuration from backend sanitization config
|
|
*/
|
|
function createDOMPurifyConfig(config: SanitizationConfig) {
|
|
const allowedTags = config.allowedTags;
|
|
const allowedAttributes: Record<string, string[]> = { ...config.allowedAttributes };
|
|
|
|
// Remove attributes that should be stripped (deprecated, keeping for backward compatibility)
|
|
if (config.removedAttributes) {
|
|
Object.keys(config.removedAttributes).forEach(tag => {
|
|
const attributesToRemove = config.removedAttributes![tag];
|
|
if (allowedAttributes[tag]) {
|
|
allowedAttributes[tag] = allowedAttributes[tag].filter(
|
|
attr => !attributesToRemove.includes(attr)
|
|
);
|
|
}
|
|
});
|
|
}
|
|
|
|
// Create a proper DOMPurify configuration
|
|
// DOMPurify expects ALLOWED_ATTR to be an array of allowed attributes
|
|
// We need to flatten the tag-specific attributes into a global list
|
|
const flattenedAttributes = Object.values(allowedAttributes).flat();
|
|
const uniqueAttributes = Array.from(new Set(flattenedAttributes));
|
|
|
|
// Configure allowed protocols for URL validation
|
|
const allowedSchemes: string[] = [];
|
|
if (config.allowedProtocols) {
|
|
Object.values(config.allowedProtocols).forEach(attributeProtocols => {
|
|
Object.values(attributeProtocols).forEach(protocols => {
|
|
allowedSchemes.push(...protocols);
|
|
});
|
|
});
|
|
}
|
|
|
|
const domPurifyConfig: DOMPurify.Config = {
|
|
ALLOWED_TAGS: allowedTags,
|
|
ALLOWED_ATTR: uniqueAttributes,
|
|
ALLOWED_URI_REGEXP: /^(?:(?:https?|#|\/):?\/?)[\w.\-#/?=&%]+$/i,
|
|
ALLOW_UNKNOWN_PROTOCOLS: false,
|
|
SANITIZE_DOM: true,
|
|
KEEP_CONTENT: true,
|
|
ALLOW_DATA_ATTR: false,
|
|
};
|
|
|
|
// Clear any existing hooks and add CSS property filtering
|
|
DOMPurify.removeAllHooks();
|
|
DOMPurify.addHook('afterSanitizeAttributes', function (node) {
|
|
// Only process elements with style attributes
|
|
if (node.hasAttribute && node.hasAttribute('style')) {
|
|
const styleValue = node.getAttribute('style');
|
|
if (styleValue) {
|
|
const filteredStyle = filterCssProperties(styleValue, config.allowedCssProperties);
|
|
if (filteredStyle) {
|
|
node.setAttribute('style', filteredStyle);
|
|
} else {
|
|
node.removeAttribute('style');
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
return domPurifyConfig;
|
|
}
|
|
|
|
/**
|
|
* Sanitize HTML content using shared configuration from backend
|
|
*/
|
|
export async function sanitizeHtml(html: string): Promise<string> {
|
|
if (!html || html.trim() === '') {
|
|
return '';
|
|
}
|
|
|
|
try {
|
|
const config = await fetchSanitizationConfig();
|
|
const domPurifyConfig = createDOMPurifyConfig(config);
|
|
|
|
// Configure DOMPurify with our settings
|
|
const cleanHtml = DOMPurify.sanitize(html, domPurifyConfig as any);
|
|
|
|
return cleanHtml.toString();
|
|
} catch (error) {
|
|
console.error('Error during HTML sanitization:', error);
|
|
// Fallback to basic DOMPurify sanitization
|
|
return DOMPurify.sanitize(html).toString();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Synchronous sanitization using cached config (for cases where async is not possible)
|
|
* Falls back to a safe configuration if no config is cached
|
|
*/
|
|
export function sanitizeHtmlSync(html: string): string {
|
|
if (!html || html.trim() === '') {
|
|
return '';
|
|
}
|
|
|
|
// If we have cached config, use it
|
|
if (cachedConfig) {
|
|
const domPurifyConfig = createDOMPurifyConfig(cachedConfig);
|
|
return DOMPurify.sanitize(html, domPurifyConfig as any).toString();
|
|
}
|
|
|
|
// If we don't have cached config but there's an ongoing request, wait for it
|
|
if (configPromise) {
|
|
console.log('Sanitization config loading in progress, using fallback for now');
|
|
} else {
|
|
// No config and no ongoing request - try to load it for next time
|
|
console.warn('No cached sanitization config available, triggering load for future use');
|
|
fetchSanitizationConfig().catch(error => {
|
|
console.error('Failed to load sanitization config:', error);
|
|
});
|
|
}
|
|
|
|
// Use comprehensive fallback configuration that preserves formatting
|
|
console.log('Using fallback sanitization configuration with formatting support');
|
|
const fallbackAllowedCssProperties = [
|
|
'color', 'font-size', 'font-weight',
|
|
'font-style', 'text-align', 'text-decoration', 'margin',
|
|
'padding', 'text-indent', 'line-height'
|
|
];
|
|
|
|
const fallbackConfig: DOMPurify.Config = {
|
|
ALLOWED_TAGS: [
|
|
'p', 'br', 'div', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
'b', 'strong', 'i', 'em', 'u', 's', 'strike', 'del', 'ins',
|
|
'sup', 'sub', 'small', 'big', 'mark', 'pre', 'code', 'kbd', 'samp', 'var',
|
|
'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'a', 'img',
|
|
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', 'caption', 'colgroup', 'col',
|
|
'blockquote', 'cite', 'q', 'hr', 'details', 'summary'
|
|
],
|
|
ALLOWED_ATTR: [
|
|
'class', 'style', 'colspan', 'rowspan', 'src', 'alt', 'width', 'height'
|
|
],
|
|
ALLOW_UNKNOWN_PROTOCOLS: false,
|
|
SANITIZE_DOM: true,
|
|
KEEP_CONTENT: true,
|
|
ALLOW_DATA_ATTR: false,
|
|
};
|
|
|
|
// Clear hooks and add CSS property filtering for fallback config
|
|
DOMPurify.removeAllHooks();
|
|
DOMPurify.addHook('afterSanitizeAttributes', function (node) {
|
|
if (node.hasAttribute && node.hasAttribute('style')) {
|
|
const styleValue = node.getAttribute('style');
|
|
if (styleValue) {
|
|
const filteredStyle = filterCssProperties(styleValue, fallbackAllowedCssProperties);
|
|
if (filteredStyle) {
|
|
node.setAttribute('style', filteredStyle);
|
|
} else {
|
|
node.removeAttribute('style');
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
return DOMPurify.sanitize(html, fallbackConfig as any).toString();
|
|
}
|
|
|
|
/**
|
|
* Preload sanitization configuration (call early in app lifecycle)
|
|
*/
|
|
export function preloadSanitizationConfig(): Promise<SanitizationConfig> {
|
|
return fetchSanitizationConfig();
|
|
}
|
|
|
|
/**
|
|
* Clear cached configuration (useful for testing or config updates)
|
|
*/
|
|
export function clearSanitizationCache(): void {
|
|
cachedConfig = null;
|
|
configPromise = null;
|
|
} |