fix formatting loss

This commit is contained in:
Stefan Hardegger
2026-02-23 11:55:40 +01:00
parent 5b1c11ff47
commit 5a1a453798

View File

@@ -70,6 +70,33 @@ const htmlToSlate = (html: string): Descendant[] => {
const nodes: Descendant[] = []; const nodes: Descendant[] = [];
// Parse inline-formatted children of a block element into CustomText leaves,
// preserving bold/italic/underline/strikethrough marks.
const parseInlineChildren = (element: Element): CustomText[] => {
const children: CustomText[] = [];
const processNode = (node: Node, marks: Partial<CustomText> = {}) => {
if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent || '';
if (text) {
children.push({ text, ...marks });
}
} else if (node.nodeType === Node.ELEMENT_NODE) {
const el = node as Element;
const newMarks = { ...marks };
const tag = el.tagName.toLowerCase();
if (tag === 'strong' || tag === 'b') newMarks.bold = true;
if (tag === 'em' || tag === 'i') newMarks.italic = true;
if (tag === 'u') newMarks.underline = true;
if (tag === 's' || tag === 'del' || tag === 'strike') newMarks.strikethrough = true;
el.childNodes.forEach(child => processNode(child, newMarks));
}
};
element.childNodes.forEach(child => processNode(child));
return children.length > 0 ? children : [{ text: '' }];
};
// Process all nodes in document order to maintain sequence // Process all nodes in document order to maintain sequence
const processChildNodes = (parentNode: Node): Descendant[] => { const processChildNodes = (parentNode: Node): Descendant[] => {
const results: Descendant[] = []; const results: Descendant[] = [];
@@ -82,19 +109,19 @@ const htmlToSlate = (html: string): Descendant[] => {
case 'h1': case 'h1':
results.push({ results.push({
type: 'heading-one', type: 'heading-one',
children: [{ text: element.textContent || '' }] children: parseInlineChildren(element)
}); });
break; break;
case 'h2': case 'h2':
results.push({ results.push({
type: 'heading-two', type: 'heading-two',
children: [{ text: element.textContent || '' }] children: parseInlineChildren(element)
}); });
break; break;
case 'h3': case 'h3':
results.push({ results.push({
type: 'heading-three', type: 'heading-three',
children: [{ text: element.textContent || '' }] children: parseInlineChildren(element)
}); });
break; break;
case 'blockquote': case 'blockquote':
@@ -122,23 +149,26 @@ const htmlToSlate = (html: string): Descendant[] => {
}); });
break; break;
} }
case 'p': case 'p': {
case 'div': {
// Check if this paragraph contains mixed content (text + images) // Check if this paragraph contains mixed content (text + images)
if (element.querySelector('img')) { if (element.querySelector('img')) {
// Process mixed content - handle both text and images in order // Process mixed content - handle both text and images in order
results.push(...processChildNodes(element)); results.push(...processChildNodes(element));
} else { } else {
const text = element.textContent || ''; const inlineChildren = parseInlineChildren(element);
if (text.trim()) { if (inlineChildren.some(c => c.text.trim())) {
results.push({ results.push({ type: 'paragraph', children: inlineChildren });
type: 'paragraph',
children: [{ text }]
});
} }
} }
break; break;
} }
case 'div': {
// Always recurse into divs: they may wrap headings or other block elements.
// Using textContent here would flatten everything into a single paragraph
// and silently drop any headings nested inside.
results.push(...processChildNodes(element));
break;
}
case 'br': case 'br':
// Handle line breaks by creating empty paragraphs // Handle line breaks by creating empty paragraphs
results.push({ results.push({
@@ -194,32 +224,54 @@ const htmlToSlate = (html: string): Descendant[] => {
const slateToHtml = (nodes: Descendant[]): string => { const slateToHtml = (nodes: Descendant[]): string => {
const htmlParts: string[] = []; const htmlParts: string[] = [];
// Serialize a single leaf with its inline marks applied as HTML tags.
// Text content is escaped so literal <, >, & don't break the markup.
const serializeLeaf = (leaf: CustomText): string => {
let text = leaf.text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
if (leaf.bold) text = `<strong>${text}</strong>`;
if (leaf.italic) text = `<em>${text}</em>`;
if (leaf.underline) text = `<u>${text}</u>`;
if (leaf.strikethrough) text = `<s>${text}</s>`;
return text;
};
nodes.forEach(node => { nodes.forEach(node => {
if (SlateElement.isElement(node)) { if (SlateElement.isElement(node)) {
const element = node as CustomElement; const element = node as CustomElement;
const text = SlateNode.string(node);
switch (element.type) { switch (element.type) {
case 'heading-one': case 'heading-one': {
htmlParts.push(`<h1>${text}</h1>`); const inner = element.children.map(serializeLeaf).join('');
htmlParts.push(`<h1>${inner}</h1>`);
break; break;
case 'heading-two': }
htmlParts.push(`<h2>${text}</h2>`); case 'heading-two': {
const inner = element.children.map(serializeLeaf).join('');
htmlParts.push(`<h2>${inner}</h2>`);
break; break;
case 'heading-three': }
htmlParts.push(`<h3>${text}</h3>`); case 'heading-three': {
const inner = element.children.map(serializeLeaf).join('');
htmlParts.push(`<h3>${inner}</h3>`);
break; break;
case 'image': }
case 'image': {
const attrs: string[] = []; const attrs: string[] = [];
if (element.src) attrs.push(`src="${element.src}"`); if (element.src) attrs.push(`src="${element.src}"`);
if (element.alt) attrs.push(`alt="${element.alt}"`); if (element.alt) attrs.push(`alt="${element.alt}"`);
if (element.caption) attrs.push(`title="${element.caption}"`); if (element.caption) attrs.push(`title="${element.caption}"`);
htmlParts.push(`<img ${attrs.join(' ')} />`); htmlParts.push(`<img ${attrs.join(' ')} />`);
break; break;
}
case 'paragraph': case 'paragraph':
default: default: {
htmlParts.push(text ? `<p>${text}</p>` : '<p></p>'); const inner = element.children.map(serializeLeaf).join('');
htmlParts.push(inner ? `<p>${inner}</p>` : '<p></p>');
break; break;
}
} }
} }
}); });