274
frontend/src/lib/portabletext/conversion.ts
Normal file
274
frontend/src/lib/portabletext/conversion.ts
Normal file
@@ -0,0 +1,274 @@
|
||||
/**
|
||||
* Conversion utilities between HTML and Portable Text
|
||||
* Maintains compatibility with existing sanitization strategy
|
||||
*/
|
||||
|
||||
import type { PortableTextBlock } from '@portabletext/types';
|
||||
import type { CustomPortableTextBlock } from './schema';
|
||||
import { createTextBlock, createImageBlock } from './schema';
|
||||
import { sanitizeHtmlSync } from '../sanitization';
|
||||
|
||||
/**
|
||||
* Convert HTML to Portable Text
|
||||
* This maintains backward compatibility with existing HTML content
|
||||
*/
|
||||
export function htmlToPortableText(html: string): CustomPortableTextBlock[] {
|
||||
if (!html || html.trim() === '') {
|
||||
return [createTextBlock()];
|
||||
}
|
||||
|
||||
// First sanitize the HTML using existing strategy
|
||||
const sanitizedHtml = sanitizeHtmlSync(html);
|
||||
|
||||
// Parse the sanitized HTML into Portable Text blocks
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(sanitizedHtml, 'text/html');
|
||||
|
||||
const blocks: CustomPortableTextBlock[] = [];
|
||||
|
||||
// Process each child element in the body
|
||||
const walker = doc.createTreeWalker(
|
||||
doc.body,
|
||||
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT
|
||||
);
|
||||
|
||||
let currentBlock: PortableTextBlock | null = null;
|
||||
let node = walker.nextNode();
|
||||
|
||||
while (node) {
|
||||
if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
const element = node as Element;
|
||||
|
||||
// Handle block-level elements
|
||||
if (isBlockElement(element.tagName)) {
|
||||
// Finish current block if any
|
||||
if (currentBlock) {
|
||||
blocks.push(currentBlock);
|
||||
currentBlock = null;
|
||||
}
|
||||
|
||||
// Handle images separately
|
||||
if (element.tagName === 'IMG') {
|
||||
const img = element as HTMLImageElement;
|
||||
blocks.push(createImageBlock(
|
||||
img.src,
|
||||
img.alt,
|
||||
img.title || undefined
|
||||
));
|
||||
} else {
|
||||
// Create new block for this element
|
||||
const style = getBlockStyle(element.tagName);
|
||||
const text = element.textContent || '';
|
||||
currentBlock = createTextBlock(text, style);
|
||||
}
|
||||
} else {
|
||||
// Handle inline elements - add to current block
|
||||
if (!currentBlock) {
|
||||
currentBlock = createTextBlock();
|
||||
}
|
||||
// Inline elements are handled by processing their text content
|
||||
// Mark handling would go here for future enhancement
|
||||
}
|
||||
} else if (node.nodeType === Node.TEXT_NODE && node.textContent?.trim()) {
|
||||
// Handle text nodes
|
||||
if (!currentBlock) {
|
||||
currentBlock = createTextBlock();
|
||||
}
|
||||
// Text content is already included in the parent element processing
|
||||
}
|
||||
|
||||
node = walker.nextNode();
|
||||
}
|
||||
|
||||
// Add final block if any
|
||||
if (currentBlock) {
|
||||
blocks.push(currentBlock);
|
||||
}
|
||||
|
||||
// If no blocks were created, return empty content
|
||||
if (blocks.length === 0) {
|
||||
return [createTextBlock()];
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Portable Text to HTML
|
||||
* This ensures compatibility with existing backend processing
|
||||
*/
|
||||
export function portableTextToHtml(blocks: CustomPortableTextBlock[]): string {
|
||||
if (!blocks || blocks.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const htmlParts: string[] = [];
|
||||
|
||||
for (const block of blocks) {
|
||||
if (block._type === 'block') {
|
||||
const portableBlock = block as PortableTextBlock;
|
||||
const tag = getHtmlTag(portableBlock.style || 'normal');
|
||||
const text = extractTextFromBlock(portableBlock);
|
||||
|
||||
if (text.trim() || portableBlock.style !== 'normal') {
|
||||
htmlParts.push(`<${tag}>${text}</${tag}>`);
|
||||
}
|
||||
} else if (block._type === 'image') {
|
||||
const imgBlock = block as any; // Type assertion for custom image block
|
||||
const alt = imgBlock.alt ? ` alt="${escapeHtml(imgBlock.alt)}"` : '';
|
||||
const title = imgBlock.caption ? ` title="${escapeHtml(imgBlock.caption)}"` : '';
|
||||
htmlParts.push(`<img src="${escapeHtml(imgBlock.src)}"${alt}${title} />`);
|
||||
}
|
||||
}
|
||||
|
||||
const html = htmlParts.join('\n');
|
||||
|
||||
// Apply final sanitization to ensure security
|
||||
return sanitizeHtmlSync(html);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract plain text from a Portable Text block
|
||||
*/
|
||||
function extractTextFromBlock(block: PortableTextBlock): string {
|
||||
if (!block.children) return '';
|
||||
|
||||
return block.children
|
||||
.map(child => {
|
||||
if (child._type === 'span') {
|
||||
return child.text || '';
|
||||
}
|
||||
return '';
|
||||
})
|
||||
.join('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if an HTML tag is a block-level element
|
||||
*/
|
||||
function isBlockElement(tagName: string): boolean {
|
||||
const blockElements = [
|
||||
'P', 'DIV', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6',
|
||||
'BLOCKQUOTE', 'UL', 'OL', 'LI', 'IMG', 'BR'
|
||||
];
|
||||
return blockElements.includes(tagName.toUpperCase());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Portable Text block style from HTML tag
|
||||
*/
|
||||
function getBlockStyle(tagName: string): string {
|
||||
const styleMap: Record<string, string> = {
|
||||
'P': 'normal',
|
||||
'DIV': 'normal',
|
||||
'H1': 'h1',
|
||||
'H2': 'h2',
|
||||
'H3': 'h3',
|
||||
'H4': 'h4',
|
||||
'H5': 'h5',
|
||||
'H6': 'h6',
|
||||
'BLOCKQUOTE': 'blockquote',
|
||||
};
|
||||
|
||||
return styleMap[tagName.toUpperCase()] || 'normal';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get HTML tag from Portable Text block style
|
||||
*/
|
||||
function getHtmlTag(style: string): string {
|
||||
const tagMap: Record<string, string> = {
|
||||
'normal': 'p',
|
||||
'h1': 'h1',
|
||||
'h2': 'h2',
|
||||
'h3': 'h3',
|
||||
'h4': 'h4',
|
||||
'h5': 'h5',
|
||||
'h6': 'h6',
|
||||
'blockquote': 'blockquote',
|
||||
};
|
||||
|
||||
return tagMap[style] || 'p';
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape HTML entities
|
||||
*/
|
||||
function escapeHtml(text: string): string {
|
||||
const div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple HTML parsing for converting existing content
|
||||
* This is a basic implementation - could be enhanced with more sophisticated parsing
|
||||
*/
|
||||
export function parseHtmlToBlocks(html: string): CustomPortableTextBlock[] {
|
||||
if (!html || html.trim() === '') {
|
||||
return [createTextBlock()];
|
||||
}
|
||||
|
||||
// Sanitize first
|
||||
const sanitizedHtml = sanitizeHtmlSync(html);
|
||||
|
||||
// Split by block-level elements and convert
|
||||
const blocks: CustomPortableTextBlock[] = [];
|
||||
|
||||
// Simple regex-based parsing for common elements
|
||||
const blockElements = sanitizedHtml.split(/(<\/?(?:p|div|h[1-6]|blockquote|img)[^>]*>)/i)
|
||||
.filter(part => part.trim().length > 0);
|
||||
|
||||
let currentText = '';
|
||||
let currentStyle = 'normal';
|
||||
|
||||
for (const part of blockElements) {
|
||||
if (part.match(/^<(h[1-6]|p|div|blockquote)/i)) {
|
||||
// Start of block element
|
||||
const match = part.match(/^<(h[1-6]|p|div|blockquote)/i);
|
||||
if (match) {
|
||||
currentStyle = getBlockStyle(match[1]);
|
||||
}
|
||||
} else if (part.match(/^<img/i)) {
|
||||
// Image element
|
||||
const srcMatch = part.match(/src=['"']([^'"']+)['"']/);
|
||||
const altMatch = part.match(/alt=['"']([^'"']+)['"']/);
|
||||
const titleMatch = part.match(/title=['"']([^'"']+)['"']/);
|
||||
|
||||
if (srcMatch) {
|
||||
blocks.push(createImageBlock(
|
||||
srcMatch[1],
|
||||
altMatch?.[1],
|
||||
titleMatch?.[1]
|
||||
));
|
||||
}
|
||||
} else if (part.match(/^<\//)) {
|
||||
// End tag - finalize current block
|
||||
if (currentText.trim()) {
|
||||
blocks.push(createTextBlock(currentText.trim(), currentStyle));
|
||||
currentText = '';
|
||||
currentStyle = 'normal';
|
||||
}
|
||||
} else if (!part.match(/^</)) {
|
||||
// Text content
|
||||
currentText += part;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle remaining text
|
||||
if (currentText.trim()) {
|
||||
blocks.push(createTextBlock(currentText.trim(), currentStyle));
|
||||
}
|
||||
|
||||
// If no blocks created, return empty block
|
||||
if (blocks.length === 0) {
|
||||
return [createTextBlock()];
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
// Helper function to generate unique keys
|
||||
function generateKey(): string {
|
||||
return Math.random().toString(36).substr(2, 9);
|
||||
}
|
||||
97
frontend/src/lib/portabletext/editorSchema.ts
Normal file
97
frontend/src/lib/portabletext/editorSchema.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Portable Text Editor Schema Definition
|
||||
* Defines the structure and capabilities of the editor
|
||||
*/
|
||||
|
||||
import { defineSchema } from '@portabletext/editor';
|
||||
import type { SchemaDefinition } from '@portabletext/editor';
|
||||
|
||||
export const editorSchema: SchemaDefinition = defineSchema({
|
||||
// Text decorators (inline formatting)
|
||||
decorators: [
|
||||
{ name: 'strong' },
|
||||
{ name: 'em' },
|
||||
{ name: 'underline' },
|
||||
{ name: 'strike' },
|
||||
{ name: 'code' },
|
||||
],
|
||||
|
||||
// Block styles (paragraph types)
|
||||
styles: [
|
||||
{ name: 'normal' },
|
||||
{ name: 'h1' },
|
||||
{ name: 'h2' },
|
||||
{ name: 'h3' },
|
||||
{ name: 'h4' },
|
||||
{ name: 'h5' },
|
||||
{ name: 'h6' },
|
||||
{ name: 'blockquote' },
|
||||
],
|
||||
|
||||
// List types
|
||||
lists: [
|
||||
{ name: 'bullet' },
|
||||
{ name: 'number' },
|
||||
],
|
||||
|
||||
// Annotations (links, etc.)
|
||||
annotations: [
|
||||
{
|
||||
name: 'link',
|
||||
type: 'object',
|
||||
fields: [
|
||||
{
|
||||
name: 'href',
|
||||
type: 'string',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
|
||||
// Block objects (custom content types)
|
||||
blockObjects: [
|
||||
{
|
||||
name: 'image',
|
||||
type: 'object',
|
||||
fields: [
|
||||
{
|
||||
name: 'src',
|
||||
type: 'string',
|
||||
},
|
||||
{
|
||||
name: 'alt',
|
||||
type: 'string',
|
||||
},
|
||||
{
|
||||
name: 'caption',
|
||||
type: 'string',
|
||||
},
|
||||
{
|
||||
name: 'width',
|
||||
type: 'number',
|
||||
},
|
||||
{
|
||||
name: 'height',
|
||||
type: 'number',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'codeBlock',
|
||||
type: 'object',
|
||||
fields: [
|
||||
{
|
||||
name: 'code',
|
||||
type: 'string',
|
||||
},
|
||||
{
|
||||
name: 'language',
|
||||
type: 'string',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Type exports for use in components
|
||||
export type EditorSchema = typeof editorSchema;
|
||||
169
frontend/src/lib/portabletext/schema.ts
Normal file
169
frontend/src/lib/portabletext/schema.ts
Normal file
@@ -0,0 +1,169 @@
|
||||
/**
|
||||
* Portable Text schema definition matching current RichTextEditor functionality
|
||||
*/
|
||||
|
||||
import type {
|
||||
PortableTextBlock,
|
||||
ArbitraryTypedObject,
|
||||
PortableTextMarkDefinition,
|
||||
PortableTextSpan
|
||||
} from '@portabletext/types';
|
||||
|
||||
// Define custom marks (inline formatting)
|
||||
export interface StrongMark extends PortableTextMarkDefinition {
|
||||
_type: 'strong';
|
||||
}
|
||||
|
||||
export interface EmMark extends PortableTextMarkDefinition {
|
||||
_type: 'em';
|
||||
}
|
||||
|
||||
export interface UnderlineMark extends PortableTextMarkDefinition {
|
||||
_type: 'underline';
|
||||
}
|
||||
|
||||
export interface StrikeMark extends PortableTextMarkDefinition {
|
||||
_type: 'strike';
|
||||
}
|
||||
|
||||
export interface CodeMark extends PortableTextMarkDefinition {
|
||||
_type: 'code';
|
||||
}
|
||||
|
||||
// Custom block types for images (future enhancement)
|
||||
export interface ImageBlock extends ArbitraryTypedObject {
|
||||
_type: 'image';
|
||||
src: string;
|
||||
alt?: string;
|
||||
caption?: string;
|
||||
isProcessing?: boolean;
|
||||
originalUrl?: string;
|
||||
}
|
||||
|
||||
// Define the schema configuration
|
||||
export const portableTextSchema = {
|
||||
// Block styles (paragraph, headings)
|
||||
styles: [
|
||||
{ title: 'Normal', value: 'normal' },
|
||||
{ title: 'Heading 1', value: 'h1' },
|
||||
{ title: 'Heading 2', value: 'h2' },
|
||||
{ title: 'Heading 3', value: 'h3' },
|
||||
{ title: 'Heading 4', value: 'h4' },
|
||||
{ title: 'Heading 5', value: 'h5' },
|
||||
{ title: 'Heading 6', value: 'h6' },
|
||||
{ title: 'Quote', value: 'blockquote' },
|
||||
],
|
||||
|
||||
// List types
|
||||
lists: [
|
||||
{ title: 'Bullet', value: 'bullet' },
|
||||
{ title: 'Number', value: 'number' },
|
||||
],
|
||||
|
||||
// Marks (inline formatting)
|
||||
marks: {
|
||||
// Decorators
|
||||
decorators: [
|
||||
{ title: 'Strong', value: 'strong' },
|
||||
{ title: 'Emphasis', value: 'em' },
|
||||
{ title: 'Underline', value: 'underline' },
|
||||
{ title: 'Strike', value: 'strike' },
|
||||
{ title: 'Code', value: 'code' },
|
||||
],
|
||||
// Annotations (links, etc.)
|
||||
annotations: [
|
||||
{
|
||||
title: 'URL',
|
||||
name: 'link',
|
||||
type: 'object',
|
||||
fields: [
|
||||
{
|
||||
title: 'URL',
|
||||
name: 'href',
|
||||
type: 'url',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// Custom block types
|
||||
blockTypes: [
|
||||
{
|
||||
title: 'Image',
|
||||
name: 'image',
|
||||
type: 'object',
|
||||
fields: [
|
||||
{ name: 'src', type: 'string', title: 'Image URL' },
|
||||
{ name: 'alt', type: 'string', title: 'Alt Text' },
|
||||
{ name: 'caption', type: 'string', title: 'Caption' },
|
||||
{ name: 'isProcessing', type: 'boolean', title: 'Processing' },
|
||||
{ name: 'originalUrl', type: 'string', title: 'Original URL' },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// Type definitions for our Portable Text content
|
||||
export type CustomPortableTextBlock = PortableTextBlock | ImageBlock;
|
||||
|
||||
export type CustomMarkDefinition =
|
||||
| StrongMark
|
||||
| EmMark
|
||||
| UnderlineMark
|
||||
| StrikeMark
|
||||
| CodeMark;
|
||||
|
||||
export type CustomPortableTextSpan = PortableTextSpan & {
|
||||
marks?: string[];
|
||||
};
|
||||
|
||||
// Helper function to create a basic block
|
||||
export function createTextBlock(
|
||||
text: string = '',
|
||||
style: string = 'normal'
|
||||
): PortableTextBlock {
|
||||
return {
|
||||
_type: 'block',
|
||||
_key: generateKey(),
|
||||
style,
|
||||
markDefs: [],
|
||||
children: [
|
||||
{
|
||||
_type: 'span',
|
||||
_key: generateKey(),
|
||||
text,
|
||||
marks: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
// Helper function to create an image block
|
||||
export function createImageBlock(
|
||||
src: string,
|
||||
alt?: string,
|
||||
caption?: string,
|
||||
isProcessing?: boolean,
|
||||
originalUrl?: string
|
||||
): ImageBlock {
|
||||
return {
|
||||
_type: 'image',
|
||||
_key: generateKey(),
|
||||
src,
|
||||
alt,
|
||||
caption,
|
||||
isProcessing,
|
||||
originalUrl,
|
||||
};
|
||||
}
|
||||
|
||||
// Helper function to generate unique keys
|
||||
function generateKey(): string {
|
||||
return Math.random().toString(36).substr(2, 9);
|
||||
}
|
||||
|
||||
// Default empty content
|
||||
export const emptyPortableTextContent: CustomPortableTextBlock[] = [
|
||||
createTextBlock('', 'normal')
|
||||
];
|
||||
32
frontend/src/lib/progress.ts
Normal file
32
frontend/src/lib/progress.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Progress tracking utilities for bulk operations
|
||||
*/
|
||||
|
||||
export interface ProgressUpdate {
|
||||
type: 'progress' | 'completed' | 'error';
|
||||
current: number;
|
||||
total: number;
|
||||
message: string;
|
||||
url?: string;
|
||||
title?: string;
|
||||
author?: string;
|
||||
wordCount?: number;
|
||||
totalWordCount?: number;
|
||||
error?: string;
|
||||
combinedStory?: any;
|
||||
results?: any[];
|
||||
summary?: any;
|
||||
hasImages?: boolean;
|
||||
imageWarnings?: string[];
|
||||
}
|
||||
|
||||
// Global progress storage (in production, use Redis or database)
|
||||
export const progressStore = new Map<string, ProgressUpdate[]>();
|
||||
|
||||
// Helper function for other routes to send progress updates
|
||||
export function sendProgressUpdate(sessionId: string, update: ProgressUpdate) {
|
||||
if (!progressStore.has(sessionId)) {
|
||||
progressStore.set(sessionId, []);
|
||||
}
|
||||
progressStore.get(sessionId)!.push(update);
|
||||
}
|
||||
@@ -129,8 +129,7 @@ export async function cleanHtml(html: string): Promise<string> {
|
||||
const cheerio = await import('cheerio');
|
||||
const $ = cheerio.load(html, {
|
||||
// Preserve self-closing tags like <br>
|
||||
xmlMode: false,
|
||||
decodeEntities: false
|
||||
xmlMode: false
|
||||
});
|
||||
|
||||
// Remove dangerous elements
|
||||
|
||||
@@ -182,7 +182,7 @@ export function extractLinkText(
|
||||
$: cheerio.CheerioAPI,
|
||||
config: LinkTextStrategy
|
||||
): string {
|
||||
let searchScope: cheerio.Cheerio<cheerio.AnyNode>;
|
||||
let searchScope: any;
|
||||
|
||||
if (config.searchWithin) {
|
||||
searchScope = $(config.searchWithin);
|
||||
@@ -196,7 +196,7 @@ export function extractLinkText(
|
||||
config.nearText.forEach(text => {
|
||||
if (foundText) return; // Already found
|
||||
|
||||
searchScope.find('*').each((_, elem) => {
|
||||
searchScope.find('*').each((_: any, elem: any) => {
|
||||
const $elem = $(elem);
|
||||
const elemText = $elem.text().toLowerCase();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user