embedded image finishing

This commit is contained in:
Stefan Hardegger
2025-09-17 10:28:35 +02:00
parent e5596b5a17
commit c0b3ae3b72
8 changed files with 740 additions and 20 deletions

View File

@@ -188,29 +188,47 @@ async function processCombinedMode(
// Check content size to prevent response size issues
const combinedContentString = combinedContent.join('\n');
const contentSizeInMB = new Blob([combinedContentString]).size / (1024 * 1024);
console.log(`Combined content size: ${contentSizeInMB.toFixed(2)} MB`);
console.log(`Combined content character length: ${combinedContentString.length}`);
console.log(`Combined content parts count: ${combinedContent.length}`);
// Handle content truncation if needed
let finalContent = contentSizeInMB > 10 ?
combinedContentString.substring(0, Math.floor(combinedContentString.length * (10 / contentSizeInMB))) + '\n\n<!-- Content truncated due to size limit -->' :
combinedContentString;
let finalSummary = contentSizeInMB > 10 ? baseSummary + ' (Content truncated due to size limit)' : baseSummary;
// Check if combined content has images and mark for processing
const hasImages = /<img[^>]+src=['"'][^'"']*['"][^>]*>/i.test(finalContent);
if (hasImages) {
finalSummary += ' (Contains embedded images - will be processed after story creation)';
console.log(`Combined story contains embedded images - will need processing after creation`);
}
// Return the combined story data via progress update
const combinedStory = {
title: baseTitle,
author: baseAuthor,
content: contentSizeInMB > 10 ?
combinedContentString.substring(0, Math.floor(combinedContentString.length * (10 / contentSizeInMB))) + '\n\n<!-- Content truncated due to size limit -->' :
combinedContentString,
summary: contentSizeInMB > 10 ? baseSummary + ' (Content truncated due to size limit)' : baseSummary,
content: finalContent,
summary: finalSummary,
sourceUrl: baseSourceUrl,
tags: Array.from(combinedTags)
tags: Array.from(combinedTags),
hasImages: hasImages
};
// Send completion notification for combine mode
let completionMessage = `Combined scraping completed: ${totalWordCount.toLocaleString()} words from ${importedCount} stories`;
if (hasImages) {
completionMessage += ` (embedded images will be processed when story is created)`;
}
await sendProgressUpdate(sessionId, {
type: 'completed',
current: urls.length,
total: urls.length,
message: `Combined scraping completed: ${totalWordCount.toLocaleString()} words from ${importedCount} stories`,
message: completionMessage,
totalWordCount: totalWordCount,
combinedStory: combinedStory
});
@@ -346,7 +364,62 @@ async function processIndividualMode(
}
const createdStory = await createResponse.json();
// Process embedded images if content contains images
let imageProcessingWarnings: string[] = [];
const hasImages = /<img[^>]+src=['"'][^'"']*['"][^>]*>/i.test(scrapedStory.content);
if (hasImages) {
try {
console.log(`Processing embedded images for story: ${createdStory.id}`);
const imageProcessUrl = `http://backend:8080/api/stories/${createdStory.id}/process-content-images`;
const imageProcessResponse = await fetch(imageProcessUrl, {
method: 'POST',
headers: {
'Authorization': authorization,
'Content-Type': 'application/json',
},
body: JSON.stringify({ htmlContent: scrapedStory.content }),
});
if (imageProcessResponse.ok) {
const imageResult = await imageProcessResponse.json();
if (imageResult.hasWarnings && imageResult.warnings) {
imageProcessingWarnings = imageResult.warnings;
console.log(`Image processing completed with warnings for story ${createdStory.id}:`, imageResult.warnings);
} else {
console.log(`Image processing completed successfully for story ${createdStory.id}. Downloaded ${imageResult.downloadedImages?.length || 0} images.`);
}
// Update story content with processed images
if (imageResult.processedContent && imageResult.processedContent !== scrapedStory.content) {
const updateUrl = `http://backend:8080/api/stories/${createdStory.id}`;
const updateResponse = await fetch(updateUrl, {
method: 'PUT',
headers: {
'Authorization': authorization,
'Content-Type': 'application/json',
},
body: JSON.stringify({
contentHtml: imageResult.processedContent
}),
});
if (!updateResponse.ok) {
console.warn(`Failed to update story content after image processing for ${createdStory.id}`);
imageProcessingWarnings.push('Failed to update story content with processed images');
}
}
} else {
console.warn(`Image processing failed for story ${createdStory.id}:`, imageProcessResponse.status);
imageProcessingWarnings.push('Image processing failed');
}
} catch (error) {
console.error(`Error processing images for story ${createdStory.id}:`, error);
imageProcessingWarnings.push(`Image processing error: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
results.push({
url: trimmedUrl,
status: 'imported',
@@ -356,17 +429,24 @@ async function processIndividualMode(
});
importedCount++;
console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`);
console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})${hasImages ? ` with ${imageProcessingWarnings.length > 0 ? 'warnings' : 'successful image processing'}` : ''}`);
// Send progress update for successful import
let progressMessage = `Imported "${scrapedStory.title}" by ${scrapedStory.author}`;
if (hasImages) {
progressMessage += imageProcessingWarnings.length > 0 ? ' (with image warnings)' : ' (with images)';
}
await sendProgressUpdate(sessionId, {
type: 'progress',
current: i + 1,
total: urls.length,
message: `Imported "${scrapedStory.title}" by ${scrapedStory.author}`,
message: progressMessage,
url: trimmedUrl,
title: scrapedStory.title,
author: scrapedStory.author
author: scrapedStory.author,
hasImages: hasImages,
imageWarnings: imageProcessingWarnings
});
} catch (error) {

View File

@@ -19,6 +19,9 @@ export async function POST(request: NextRequest) {
const scraper = new StoryScraper();
const story = await scraper.scrapeStory(url);
// Check if scraped content contains embedded images
const hasImages = story.content ? /<img[^>]+src=['"'][^'"']*['"][^>]*>/i.test(story.content) : false;
// Debug logging
console.log('Scraped story data:', {
url: url,
@@ -28,10 +31,15 @@ export async function POST(request: NextRequest) {
contentLength: story.content?.length || 0,
contentPreview: story.content?.substring(0, 200) + '...',
tags: story.tags,
coverImage: story.coverImage
coverImage: story.coverImage,
hasEmbeddedImages: hasImages
});
return NextResponse.json(story);
// Add image processing flag to response for frontend handling
return NextResponse.json({
...story,
hasEmbeddedImages: hasImages
});
} catch (error) {
console.error('Story scraping error:', error);

View File

@@ -4,7 +4,7 @@ import { useState, useEffect } from 'react';
import AppLayout from '../../components/layout/AppLayout';
import { useTheme } from '../../lib/theme';
import Button from '../../components/ui/Button';
import { storyApi, authorApi, databaseApi } from '../../lib/api';
import { storyApi, authorApi, databaseApi, configApi } from '../../lib/api';
import { useLibraryLayout, LibraryLayoutType } from '../../hooks/useLibraryLayout';
import LibrarySettings from '../../components/library/LibrarySettings';
@@ -51,6 +51,13 @@ export default function SettingsPage() {
completeRestore: { loading: false, message: '' },
completeClear: { loading: false, message: '' }
});
const [cleanupStatus, setCleanupStatus] = useState<{
preview: { loading: boolean; message: string; success?: boolean; data?: any };
execute: { loading: boolean; message: string; success?: boolean };
}>({
preview: { loading: false, message: '' },
execute: { loading: false, message: '' }
});
// Load settings from localStorage on mount
useEffect(() => {
@@ -310,6 +317,122 @@ export default function SettingsPage() {
}, 10000);
};
const handleImageCleanupPreview = async () => {
setCleanupStatus(prev => ({
...prev,
preview: { loading: true, message: 'Scanning for orphaned images...', success: undefined }
}));
try {
const result = await configApi.previewImageCleanup();
if (result.success) {
setCleanupStatus(prev => ({
...prev,
preview: {
loading: false,
message: `Found ${result.orphanedCount} orphaned images (${result.formattedSize}) and ${result.foldersToDelete} empty folders. Referenced images: ${result.referencedImagesCount}`,
success: true,
data: result
}
}));
} else {
setCleanupStatus(prev => ({
...prev,
preview: {
loading: false,
message: result.error || 'Preview failed',
success: false
}
}));
}
} catch (error: any) {
setCleanupStatus(prev => ({
...prev,
preview: {
loading: false,
message: error.message || 'Network error occurred',
success: false
}
}));
}
// Clear message after 10 seconds
setTimeout(() => {
setCleanupStatus(prev => ({
...prev,
preview: { loading: false, message: '', success: undefined }
}));
}, 10000);
};
const handleImageCleanupExecute = async () => {
if (!cleanupStatus.preview.data || cleanupStatus.preview.data.orphanedCount === 0) {
setCleanupStatus(prev => ({
...prev,
execute: {
loading: false,
message: 'Please run preview first to see what will be deleted',
success: false
}
}));
return;
}
const confirmed = window.confirm(
`Are you sure you want to delete ${cleanupStatus.preview.data.orphanedCount} orphaned images (${cleanupStatus.preview.data.formattedSize})? This action cannot be undone!`
);
if (!confirmed) return;
setCleanupStatus(prev => ({
...prev,
execute: { loading: true, message: 'Deleting orphaned images...', success: undefined }
}));
try {
const result = await configApi.executeImageCleanup();
if (result.success) {
setCleanupStatus(prev => ({
...prev,
execute: {
loading: false,
message: `Successfully deleted ${result.deletedCount} orphaned images (${result.formattedSize}) and ${result.foldersDeleted} empty folders`,
success: true
},
preview: { loading: false, message: '', success: undefined, data: undefined } // Clear preview after successful cleanup
}));
} else {
setCleanupStatus(prev => ({
...prev,
execute: {
loading: false,
message: result.error || 'Cleanup failed',
success: false
}
}));
}
} catch (error: any) {
setCleanupStatus(prev => ({
...prev,
execute: {
loading: false,
message: error.message || 'Network error occurred',
success: false
}
}));
}
// Clear message after 10 seconds
setTimeout(() => {
setCleanupStatus(prev => ({
...prev,
execute: { loading: false, message: '', success: undefined }
}));
}, 10000);
};
return (
<AppLayout>
<div className="max-w-2xl mx-auto space-y-8">
@@ -670,6 +793,109 @@ export default function SettingsPage() {
</div>
</div>
{/* Storage Management */}
<div className="theme-card theme-shadow rounded-lg p-6">
<h2 className="text-xl font-semibold theme-header mb-4">Storage Management</h2>
<p className="theme-text mb-6">
Clean up orphaned content images that are no longer referenced in any story. This can help free up disk space.
</p>
<div className="space-y-6">
{/* Image Cleanup Section */}
<div className="border theme-border rounded-lg p-4">
<h3 className="text-lg font-semibold theme-header mb-3">🖼️ Content Images Cleanup</h3>
<p className="text-sm theme-text mb-4">
Scan for and remove orphaned content images that are no longer referenced in any story content. This includes images from deleted stories and unused downloaded images.
</p>
<div className="flex flex-col sm:flex-row gap-3 mb-3">
<Button
onClick={handleImageCleanupPreview}
disabled={cleanupStatus.preview.loading}
loading={cleanupStatus.preview.loading}
variant="ghost"
className="flex-1"
>
{cleanupStatus.preview.loading ? 'Scanning...' : 'Preview Cleanup'}
</Button>
<Button
onClick={handleImageCleanupExecute}
disabled={cleanupStatus.execute.loading || !cleanupStatus.preview.data || cleanupStatus.preview.data.orphanedCount === 0}
loading={cleanupStatus.execute.loading}
variant="secondary"
className="flex-1"
>
{cleanupStatus.execute.loading ? 'Cleaning...' : 'Execute Cleanup'}
</Button>
</div>
{/* Preview Results */}
{cleanupStatus.preview.message && (
<div className={`text-sm p-3 rounded mb-3 ${
cleanupStatus.preview.success
? 'bg-blue-50 dark:bg-blue-900/20 text-blue-800 dark:text-blue-200'
: 'bg-red-50 dark:bg-red-900/20 text-red-800 dark:text-red-200'
}`}>
{cleanupStatus.preview.message}
{cleanupStatus.preview.data && cleanupStatus.preview.data.hasErrors && (
<div className="mt-2 text-xs">
<details>
<summary className="cursor-pointer font-medium">View Errors ({cleanupStatus.preview.data.errors.length})</summary>
<ul className="mt-1 ml-4 space-y-1">
{cleanupStatus.preview.data.errors.map((error: string, index: number) => (
<li key={index} className="text-red-600 dark:text-red-400">• {error}</li>
))}
</ul>
</details>
</div>
)}
</div>
)}
{/* Execute Results */}
{cleanupStatus.execute.message && (
<div className={`text-sm p-3 rounded mb-3 ${
cleanupStatus.execute.success
? 'bg-green-50 dark:bg-green-900/20 text-green-800 dark:text-green-200'
: 'bg-red-50 dark:bg-red-900/20 text-red-800 dark:text-red-200'
}`}>
{cleanupStatus.execute.message}
</div>
)}
{/* Detailed Preview Information */}
{cleanupStatus.preview.data && cleanupStatus.preview.success && (
<div className="text-sm theme-text bg-gray-50 dark:bg-gray-800 p-3 rounded border">
<div className="grid grid-cols-2 gap-3">
<div>
<span className="font-medium">Orphaned Images:</span> {cleanupStatus.preview.data.orphanedCount}
</div>
<div>
<span className="font-medium">Total Size:</span> {cleanupStatus.preview.data.formattedSize}
</div>
<div>
<span className="font-medium">Empty Folders:</span> {cleanupStatus.preview.data.foldersToDelete}
</div>
<div>
<span className="font-medium">Referenced Images:</span> {cleanupStatus.preview.data.referencedImagesCount}
</div>
</div>
</div>
)}
</div>
<div className="text-sm theme-text bg-blue-50 dark:bg-blue-900/20 p-3 rounded-lg">
<p className="font-medium mb-1">📝 How it works:</p>
<ul className="text-xs space-y-1 ml-4">
<li>• <strong>Preview:</strong> Scans all stories to find images no longer referenced in content</li>
<li>• <strong>Execute:</strong> Permanently deletes orphaned images and empty story directories</li>
<li>• <strong>Safe:</strong> Only removes images not found in any story content</li>
<li>• <strong>Backup recommended:</strong> Consider backing up before large cleanups</li>
</ul>
</div>
</div>
</div>
{/* Database Management */}
<div className="theme-card theme-shadow rounded-lg p-6">
<h2 className="text-xl font-semibold theme-header mb-4">Database Management</h2>

View File

@@ -577,6 +577,38 @@ export const configApi = {
const response = await api.get('/config/html-sanitization');
return response.data;
},
previewImageCleanup: async (): Promise<{
success: boolean;
orphanedCount: number;
totalSizeBytes: number;
formattedSize: string;
foldersToDelete: number;
referencedImagesCount: number;
errors: string[];
hasErrors: boolean;
dryRun: boolean;
error?: string;
}> => {
const response = await api.post('/config/cleanup/images/preview');
return response.data;
},
executeImageCleanup: async (): Promise<{
success: boolean;
deletedCount: number;
totalSizeBytes: number;
formattedSize: string;
foldersDeleted: number;
referencedImagesCount: number;
errors: string[];
hasErrors: boolean;
dryRun: boolean;
error?: string;
}> => {
const response = await api.post('/config/cleanup/images/execute');
return response.data;
},
};
// Collection endpoints

File diff suppressed because one or more lines are too long