embedded image finishing

This commit is contained in:
Stefan Hardegger
2025-09-17 10:28:35 +02:00
parent e5596b5a17
commit c0b3ae3b72
8 changed files with 740 additions and 20 deletions

View File

@@ -188,29 +188,47 @@ async function processCombinedMode(
// Check content size to prevent response size issues
const combinedContentString = combinedContent.join('\n');
const contentSizeInMB = new Blob([combinedContentString]).size / (1024 * 1024);
console.log(`Combined content size: ${contentSizeInMB.toFixed(2)} MB`);
console.log(`Combined content character length: ${combinedContentString.length}`);
console.log(`Combined content parts count: ${combinedContent.length}`);
// Handle content truncation if needed
let finalContent = contentSizeInMB > 10 ?
combinedContentString.substring(0, Math.floor(combinedContentString.length * (10 / contentSizeInMB))) + '\n\n<!-- Content truncated due to size limit -->' :
combinedContentString;
let finalSummary = contentSizeInMB > 10 ? baseSummary + ' (Content truncated due to size limit)' : baseSummary;
// Check if combined content has images and mark for processing
const hasImages = /<img[^>]+src=['"'][^'"']*['"][^>]*>/i.test(finalContent);
if (hasImages) {
finalSummary += ' (Contains embedded images - will be processed after story creation)';
console.log(`Combined story contains embedded images - will need processing after creation`);
}
// Return the combined story data via progress update
const combinedStory = {
title: baseTitle,
author: baseAuthor,
content: contentSizeInMB > 10 ?
combinedContentString.substring(0, Math.floor(combinedContentString.length * (10 / contentSizeInMB))) + '\n\n<!-- Content truncated due to size limit -->' :
combinedContentString,
summary: contentSizeInMB > 10 ? baseSummary + ' (Content truncated due to size limit)' : baseSummary,
content: finalContent,
summary: finalSummary,
sourceUrl: baseSourceUrl,
tags: Array.from(combinedTags)
tags: Array.from(combinedTags),
hasImages: hasImages
};
// Send completion notification for combine mode
let completionMessage = `Combined scraping completed: ${totalWordCount.toLocaleString()} words from ${importedCount} stories`;
if (hasImages) {
completionMessage += ` (embedded images will be processed when story is created)`;
}
await sendProgressUpdate(sessionId, {
type: 'completed',
current: urls.length,
total: urls.length,
message: `Combined scraping completed: ${totalWordCount.toLocaleString()} words from ${importedCount} stories`,
message: completionMessage,
totalWordCount: totalWordCount,
combinedStory: combinedStory
});
@@ -346,7 +364,62 @@ async function processIndividualMode(
}
const createdStory = await createResponse.json();
// Process embedded images if content contains images
let imageProcessingWarnings: string[] = [];
const hasImages = /<img[^>]+src=['"'][^'"']*['"][^>]*>/i.test(scrapedStory.content);
if (hasImages) {
try {
console.log(`Processing embedded images for story: ${createdStory.id}`);
const imageProcessUrl = `http://backend:8080/api/stories/${createdStory.id}/process-content-images`;
const imageProcessResponse = await fetch(imageProcessUrl, {
method: 'POST',
headers: {
'Authorization': authorization,
'Content-Type': 'application/json',
},
body: JSON.stringify({ htmlContent: scrapedStory.content }),
});
if (imageProcessResponse.ok) {
const imageResult = await imageProcessResponse.json();
if (imageResult.hasWarnings && imageResult.warnings) {
imageProcessingWarnings = imageResult.warnings;
console.log(`Image processing completed with warnings for story ${createdStory.id}:`, imageResult.warnings);
} else {
console.log(`Image processing completed successfully for story ${createdStory.id}. Downloaded ${imageResult.downloadedImages?.length || 0} images.`);
}
// Update story content with processed images
if (imageResult.processedContent && imageResult.processedContent !== scrapedStory.content) {
const updateUrl = `http://backend:8080/api/stories/${createdStory.id}`;
const updateResponse = await fetch(updateUrl, {
method: 'PUT',
headers: {
'Authorization': authorization,
'Content-Type': 'application/json',
},
body: JSON.stringify({
contentHtml: imageResult.processedContent
}),
});
if (!updateResponse.ok) {
console.warn(`Failed to update story content after image processing for ${createdStory.id}`);
imageProcessingWarnings.push('Failed to update story content with processed images');
}
}
} else {
console.warn(`Image processing failed for story ${createdStory.id}:`, imageProcessResponse.status);
imageProcessingWarnings.push('Image processing failed');
}
} catch (error) {
console.error(`Error processing images for story ${createdStory.id}:`, error);
imageProcessingWarnings.push(`Image processing error: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
results.push({
url: trimmedUrl,
status: 'imported',
@@ -356,17 +429,24 @@ async function processIndividualMode(
});
importedCount++;
console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`);
console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})${hasImages ? ` with ${imageProcessingWarnings.length > 0 ? 'warnings' : 'successful image processing'}` : ''}`);
// Send progress update for successful import
let progressMessage = `Imported "${scrapedStory.title}" by ${scrapedStory.author}`;
if (hasImages) {
progressMessage += imageProcessingWarnings.length > 0 ? ' (with image warnings)' : ' (with images)';
}
await sendProgressUpdate(sessionId, {
type: 'progress',
current: i + 1,
total: urls.length,
message: `Imported "${scrapedStory.title}" by ${scrapedStory.author}`,
message: progressMessage,
url: trimmedUrl,
title: scrapedStory.title,
author: scrapedStory.author
author: scrapedStory.author,
hasImages: hasImages,
imageWarnings: imageProcessingWarnings
});
} catch (error) {