richtext replacement

This commit is contained in:
Stefan Hardegger
2025-09-21 10:10:04 +02:00
parent aae8f8926b
commit b1dbd85346
28 changed files with 3337 additions and 10558 deletions

View File

@@ -0,0 +1,274 @@
/**
* Conversion utilities between HTML and Portable Text
* Maintains compatibility with existing sanitization strategy
*/
import type { PortableTextBlock } from '@portabletext/types';
import type { CustomPortableTextBlock } from './schema';
import { createTextBlock, createImageBlock } from './schema';
import { sanitizeHtmlSync } from '../sanitization';
/**
* Convert HTML to Portable Text
* This maintains backward compatibility with existing HTML content
*/
export function htmlToPortableText(html: string): CustomPortableTextBlock[] {
if (!html || html.trim() === '') {
return [createTextBlock()];
}
// First sanitize the HTML using existing strategy
const sanitizedHtml = sanitizeHtmlSync(html);
// Parse the sanitized HTML into Portable Text blocks
const parser = new DOMParser();
const doc = parser.parseFromString(sanitizedHtml, 'text/html');
const blocks: CustomPortableTextBlock[] = [];
// Process each child element in the body
const walker = doc.createTreeWalker(
doc.body,
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT
);
let currentBlock: PortableTextBlock | null = null;
let node = walker.nextNode();
while (node) {
if (node.nodeType === Node.ELEMENT_NODE) {
const element = node as Element;
// Handle block-level elements
if (isBlockElement(element.tagName)) {
// Finish current block if any
if (currentBlock) {
blocks.push(currentBlock);
currentBlock = null;
}
// Handle images separately
if (element.tagName === 'IMG') {
const img = element as HTMLImageElement;
blocks.push(createImageBlock(
img.src,
img.alt,
img.title || undefined
));
} else {
// Create new block for this element
const style = getBlockStyle(element.tagName);
const text = element.textContent || '';
currentBlock = createTextBlock(text, style);
}
} else {
// Handle inline elements - add to current block
if (!currentBlock) {
currentBlock = createTextBlock();
}
// Inline elements are handled by processing their text content
// Mark handling would go here for future enhancement
}
} else if (node.nodeType === Node.TEXT_NODE && node.textContent?.trim()) {
// Handle text nodes
if (!currentBlock) {
currentBlock = createTextBlock();
}
// Text content is already included in the parent element processing
}
node = walker.nextNode();
}
// Add final block if any
if (currentBlock) {
blocks.push(currentBlock);
}
// If no blocks were created, return empty content
if (blocks.length === 0) {
return [createTextBlock()];
}
return blocks;
}
/**
* Convert Portable Text to HTML
* This ensures compatibility with existing backend processing
*/
export function portableTextToHtml(blocks: CustomPortableTextBlock[]): string {
if (!blocks || blocks.length === 0) {
return '';
}
const htmlParts: string[] = [];
for (const block of blocks) {
if (block._type === 'block') {
const portableBlock = block as PortableTextBlock;
const tag = getHtmlTag(portableBlock.style || 'normal');
const text = extractTextFromBlock(portableBlock);
if (text.trim() || portableBlock.style !== 'normal') {
htmlParts.push(`<${tag}>${text}</${tag}>`);
}
} else if (block._type === 'image') {
const imgBlock = block as any; // Type assertion for custom image block
const alt = imgBlock.alt ? ` alt="${escapeHtml(imgBlock.alt)}"` : '';
const title = imgBlock.caption ? ` title="${escapeHtml(imgBlock.caption)}"` : '';
htmlParts.push(`<img src="${escapeHtml(imgBlock.src)}"${alt}${title} />`);
}
}
const html = htmlParts.join('\n');
// Apply final sanitization to ensure security
return sanitizeHtmlSync(html);
}
/**
* Extract plain text from a Portable Text block
*/
function extractTextFromBlock(block: PortableTextBlock): string {
if (!block.children) return '';
return block.children
.map(child => {
if (child._type === 'span') {
return child.text || '';
}
return '';
})
.join('');
}
/**
* Determine if an HTML tag is a block-level element
*/
function isBlockElement(tagName: string): boolean {
const blockElements = [
'P', 'DIV', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6',
'BLOCKQUOTE', 'UL', 'OL', 'LI', 'IMG', 'BR'
];
return blockElements.includes(tagName.toUpperCase());
}
/**
* Get Portable Text block style from HTML tag
*/
function getBlockStyle(tagName: string): string {
const styleMap: Record<string, string> = {
'P': 'normal',
'DIV': 'normal',
'H1': 'h1',
'H2': 'h2',
'H3': 'h3',
'H4': 'h4',
'H5': 'h5',
'H6': 'h6',
'BLOCKQUOTE': 'blockquote',
};
return styleMap[tagName.toUpperCase()] || 'normal';
}
/**
* Get HTML tag from Portable Text block style
*/
function getHtmlTag(style: string): string {
const tagMap: Record<string, string> = {
'normal': 'p',
'h1': 'h1',
'h2': 'h2',
'h3': 'h3',
'h4': 'h4',
'h5': 'h5',
'h6': 'h6',
'blockquote': 'blockquote',
};
return tagMap[style] || 'p';
}
/**
* Escape HTML entities
*/
function escapeHtml(text: string): string {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
/**
* Simple HTML parsing for converting existing content
* This is a basic implementation - could be enhanced with more sophisticated parsing
*/
export function parseHtmlToBlocks(html: string): CustomPortableTextBlock[] {
if (!html || html.trim() === '') {
return [createTextBlock()];
}
// Sanitize first
const sanitizedHtml = sanitizeHtmlSync(html);
// Split by block-level elements and convert
const blocks: CustomPortableTextBlock[] = [];
// Simple regex-based parsing for common elements
const blockElements = sanitizedHtml.split(/(<\/?(?:p|div|h[1-6]|blockquote|img)[^>]*>)/i)
.filter(part => part.trim().length > 0);
let currentText = '';
let currentStyle = 'normal';
for (const part of blockElements) {
if (part.match(/^<(h[1-6]|p|div|blockquote)/i)) {
// Start of block element
const match = part.match(/^<(h[1-6]|p|div|blockquote)/i);
if (match) {
currentStyle = getBlockStyle(match[1]);
}
} else if (part.match(/^<img/i)) {
// Image element
const srcMatch = part.match(/src=['"']([^'"']+)['"']/);
const altMatch = part.match(/alt=['"']([^'"']+)['"']/);
const titleMatch = part.match(/title=['"']([^'"']+)['"']/);
if (srcMatch) {
blocks.push(createImageBlock(
srcMatch[1],
altMatch?.[1],
titleMatch?.[1]
));
}
} else if (part.match(/^<\//)) {
// End tag - finalize current block
if (currentText.trim()) {
blocks.push(createTextBlock(currentText.trim(), currentStyle));
currentText = '';
currentStyle = 'normal';
}
} else if (!part.match(/^</)) {
// Text content
currentText += part;
}
}
// Handle remaining text
if (currentText.trim()) {
blocks.push(createTextBlock(currentText.trim(), currentStyle));
}
// If no blocks created, return empty block
if (blocks.length === 0) {
return [createTextBlock()];
}
return blocks;
}
// Helper function to generate unique keys
function generateKey(): string {
return Math.random().toString(36).substr(2, 9);
}

View File

@@ -0,0 +1,97 @@
/**
* Portable Text Editor Schema Definition
* Defines the structure and capabilities of the editor
*/
import { defineSchema } from '@portabletext/editor';
import type { SchemaDefinition } from '@portabletext/editor';
export const editorSchema: SchemaDefinition = defineSchema({
// Text decorators (inline formatting)
decorators: [
{ name: 'strong' },
{ name: 'em' },
{ name: 'underline' },
{ name: 'strike' },
{ name: 'code' },
],
// Block styles (paragraph types)
styles: [
{ name: 'normal' },
{ name: 'h1' },
{ name: 'h2' },
{ name: 'h3' },
{ name: 'h4' },
{ name: 'h5' },
{ name: 'h6' },
{ name: 'blockquote' },
],
// List types
lists: [
{ name: 'bullet' },
{ name: 'number' },
],
// Annotations (links, etc.)
annotations: [
{
name: 'link',
type: 'object',
fields: [
{
name: 'href',
type: 'string',
},
],
},
],
// Block objects (custom content types)
blockObjects: [
{
name: 'image',
type: 'object',
fields: [
{
name: 'src',
type: 'string',
},
{
name: 'alt',
type: 'string',
},
{
name: 'caption',
type: 'string',
},
{
name: 'width',
type: 'number',
},
{
name: 'height',
type: 'number',
},
],
},
{
name: 'codeBlock',
type: 'object',
fields: [
{
name: 'code',
type: 'string',
},
{
name: 'language',
type: 'string',
},
],
},
],
});
// Type exports for use in components
export type EditorSchema = typeof editorSchema;

View File

@@ -0,0 +1,169 @@
/**
* Portable Text schema definition matching current RichTextEditor functionality
*/
import type {
PortableTextBlock,
ArbitraryTypedObject,
PortableTextMarkDefinition,
PortableTextSpan
} from '@portabletext/types';
// Define custom marks (inline formatting)
export interface StrongMark extends PortableTextMarkDefinition {
_type: 'strong';
}
export interface EmMark extends PortableTextMarkDefinition {
_type: 'em';
}
export interface UnderlineMark extends PortableTextMarkDefinition {
_type: 'underline';
}
export interface StrikeMark extends PortableTextMarkDefinition {
_type: 'strike';
}
export interface CodeMark extends PortableTextMarkDefinition {
_type: 'code';
}
// Custom block types for images (future enhancement)
export interface ImageBlock extends ArbitraryTypedObject {
_type: 'image';
src: string;
alt?: string;
caption?: string;
isProcessing?: boolean;
originalUrl?: string;
}
// Define the schema configuration
export const portableTextSchema = {
// Block styles (paragraph, headings)
styles: [
{ title: 'Normal', value: 'normal' },
{ title: 'Heading 1', value: 'h1' },
{ title: 'Heading 2', value: 'h2' },
{ title: 'Heading 3', value: 'h3' },
{ title: 'Heading 4', value: 'h4' },
{ title: 'Heading 5', value: 'h5' },
{ title: 'Heading 6', value: 'h6' },
{ title: 'Quote', value: 'blockquote' },
],
// List types
lists: [
{ title: 'Bullet', value: 'bullet' },
{ title: 'Number', value: 'number' },
],
// Marks (inline formatting)
marks: {
// Decorators
decorators: [
{ title: 'Strong', value: 'strong' },
{ title: 'Emphasis', value: 'em' },
{ title: 'Underline', value: 'underline' },
{ title: 'Strike', value: 'strike' },
{ title: 'Code', value: 'code' },
],
// Annotations (links, etc.)
annotations: [
{
title: 'URL',
name: 'link',
type: 'object',
fields: [
{
title: 'URL',
name: 'href',
type: 'url',
},
],
},
],
},
// Custom block types
blockTypes: [
{
title: 'Image',
name: 'image',
type: 'object',
fields: [
{ name: 'src', type: 'string', title: 'Image URL' },
{ name: 'alt', type: 'string', title: 'Alt Text' },
{ name: 'caption', type: 'string', title: 'Caption' },
{ name: 'isProcessing', type: 'boolean', title: 'Processing' },
{ name: 'originalUrl', type: 'string', title: 'Original URL' },
],
},
],
};
// Type definitions for our Portable Text content
export type CustomPortableTextBlock = PortableTextBlock | ImageBlock;
export type CustomMarkDefinition =
| StrongMark
| EmMark
| UnderlineMark
| StrikeMark
| CodeMark;
export type CustomPortableTextSpan = PortableTextSpan & {
marks?: string[];
};
// Helper function to create a basic block
export function createTextBlock(
text: string = '',
style: string = 'normal'
): PortableTextBlock {
return {
_type: 'block',
_key: generateKey(),
style,
markDefs: [],
children: [
{
_type: 'span',
_key: generateKey(),
text,
marks: [],
},
],
};
}
// Helper function to create an image block
export function createImageBlock(
src: string,
alt?: string,
caption?: string,
isProcessing?: boolean,
originalUrl?: string
): ImageBlock {
return {
_type: 'image',
_key: generateKey(),
src,
alt,
caption,
isProcessing,
originalUrl,
};
}
// Helper function to generate unique keys
function generateKey(): string {
return Math.random().toString(36).substr(2, 9);
}
// Default empty content
export const emptyPortableTextContent: CustomPortableTextBlock[] = [
createTextBlock('', 'normal')
];

View File

@@ -0,0 +1,32 @@
/**
* Progress tracking utilities for bulk operations
*/
export interface ProgressUpdate {
type: 'progress' | 'completed' | 'error';
current: number;
total: number;
message: string;
url?: string;
title?: string;
author?: string;
wordCount?: number;
totalWordCount?: number;
error?: string;
combinedStory?: any;
results?: any[];
summary?: any;
hasImages?: boolean;
imageWarnings?: string[];
}
// Global progress storage (in production, use Redis or database)
export const progressStore = new Map<string, ProgressUpdate[]>();
// Helper function for other routes to send progress updates
export function sendProgressUpdate(sessionId: string, update: ProgressUpdate) {
if (!progressStore.has(sessionId)) {
progressStore.set(sessionId, []);
}
progressStore.get(sessionId)!.push(update);
}

View File

@@ -129,8 +129,7 @@ export async function cleanHtml(html: string): Promise<string> {
const cheerio = await import('cheerio');
const $ = cheerio.load(html, {
// Preserve self-closing tags like <br>
xmlMode: false,
decodeEntities: false
xmlMode: false
});
// Remove dangerous elements

View File

@@ -182,7 +182,7 @@ export function extractLinkText(
$: cheerio.CheerioAPI,
config: LinkTextStrategy
): string {
let searchScope: cheerio.Cheerio<cheerio.AnyNode>;
let searchScope: any;
if (config.searchWithin) {
searchScope = $(config.searchWithin);
@@ -196,7 +196,7 @@ export function extractLinkText(
config.nearText.forEach(text => {
if (foundText) return; // Already found
searchScope.find('*').each((_, elem) => {
searchScope.find('*').each((_: any, elem: any) => {
const $elem = $(elem);
const elemText = $elem.text().toLowerCase();