Imrove Imports

This commit is contained in:
Stefan Hardegger
2026-03-21 15:36:54 +01:00
parent 5a1a453798
commit 1dae7b1737
2 changed files with 23 additions and 1 deletions

View File

@@ -212,7 +212,9 @@ public class PDFImportService {
for (String para : paragraphs) {
String trimmed = para.trim();
if (!trimmed.isEmpty() && !isLikelyHeaderFooter(trimmed)) {
htmlContent.append("<p>").append(escapeHtml(trimmed)).append("</p>\n");
// Normalize soft line wraps (single newlines from PDF layout) to spaces
String normalized = trimmed.replaceAll("[ \t]*\n[ \t]*", " ").replaceAll(" {2,}", " ").trim();
htmlContent.append("<p>").append(escapeHtml(normalized)).append("</p>\n");
}
}
}

View File

@@ -89,6 +89,26 @@ const htmlToSlate = (html: string): Descendant[] => {
if (tag === 'em' || tag === 'i') newMarks.italic = true;
if (tag === 'u') newMarks.underline = true;
if (tag === 's' || tag === 'del' || tag === 'strike') newMarks.strikethrough = true;
// Also detect bold/italic from inline CSS styles (e.g. pasted web content)
const style = el.getAttribute('style');
if (style) {
const fwMatch = style.match(/font-weight\s*:\s*([^;]+)/i);
if (fwMatch) {
const fw = fwMatch[1].trim().toLowerCase();
if (fw === 'bold' || fw === 'bolder' || Number(fw) >= 600) {
newMarks.bold = true;
}
}
const fsMatch = style.match(/font-style\s*:\s*([^;]+)/i);
if (fsMatch) {
const fs = fsMatch[1].trim().toLowerCase();
if (fs === 'italic' || fs === 'oblique') {
newMarks.italic = true;
}
}
}
el.childNodes.forEach(child => processNode(child, newMarks));
}
};