Various improvements & Epub support

2025-08-08 14:09:14 +02:00
parent 090b858a54
commit 379c8c170f
37 changed files with 4069 additions and 298 deletions
--- a/frontend/src/app/scrape/bulk/progress/route.ts
+++ b/frontend/src/app/scrape/bulk/progress/route.ts
@@ -0,0 +1,93 @@
+import { NextRequest } from 'next/server';
+
+// Configure route timeout for long-running progress streams
+export const maxDuration = 900; // 15 minutes (900 seconds)
+
+interface ProgressUpdate {
+  type: 'progress' | 'completed' | 'error';
+  current: number;
+  total: number;
+  message: string;
+  url?: string;
+  title?: string;
+  author?: string;
+  wordCount?: number;
+  totalWordCount?: number;
+  error?: string;
+  combinedStory?: any;
+  results?: any[];
+  summary?: any;
+}
+
+// Global progress storage (in production, use Redis or database)
+const progressStore = new Map<string, ProgressUpdate[]>();
+
+export async function GET(request: NextRequest) {
+  const searchParams = request.nextUrl.searchParams;
+  const sessionId = searchParams.get('sessionId');
+  
+  if (!sessionId) {
+    return new Response('Session ID required', { status: 400 });
+  }
+
+  // Set up Server-Sent Events
+  const stream = new ReadableStream({
+    start(controller) {
+      const encoder = new TextEncoder();
+      
+      // Send initial connection message
+      const data = `data: ${JSON.stringify({ type: 'connected', sessionId })}\n\n`;
+      controller.enqueue(encoder.encode(data));
+
+      // Check for progress updates every 500ms
+      const interval = setInterval(() => {
+        const updates = progressStore.get(sessionId);
+        if (updates && updates.length > 0) {
+          // Send all pending updates
+          updates.forEach(update => {
+            const data = `data: ${JSON.stringify(update)}\n\n`;
+            controller.enqueue(encoder.encode(data));
+          });
+          
+          // Clear sent updates
+          progressStore.delete(sessionId);
+          
+          // If this was a completion or error, close the stream
+          const lastUpdate = updates[updates.length - 1];
+          if (lastUpdate.type === 'completed' || lastUpdate.type === 'error') {
+            clearInterval(interval);
+            controller.close();
+          }
+        }
+      }, 500);
+
+      // Cleanup after timeout
+      setTimeout(() => {
+        clearInterval(interval);
+        progressStore.delete(sessionId);
+        controller.close();
+      }, 900000); // 15 minutes
+    }
+  });
+
+  return new Response(stream, {
+    headers: {
+      'Content-Type': 'text/event-stream',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive',
+      'Access-Control-Allow-Origin': '*',
+      'Access-Control-Allow-Headers': 'Cache-Control',
+    },
+  });
+}
+
+// Helper function for other routes to send progress updates
+export function sendProgressUpdate(sessionId: string, update: ProgressUpdate) {
+  if (!progressStore.has(sessionId)) {
+    progressStore.set(sessionId, []);
+  }
+  progressStore.get(sessionId)!.push(update);
+}
+
+// Export the helper for other modules to use
+export { progressStore };
--- a/frontend/src/app/scrape/bulk/route.ts
+++ b/frontend/src/app/scrape/bulk/route.ts
@@ -1,7 +1,23 @@
 import { NextRequest, NextResponse } from 'next/server';

+// Configure route timeout for long-running scraping operations
+export const maxDuration = 900; // 15 minutes (900 seconds)
+
+// Import progress tracking helper
+async function sendProgressUpdate(sessionId: string, update: any) {
+  try {
+    // Dynamic import to avoid circular dependency
+    const { sendProgressUpdate: sendUpdate } = await import('./progress/route');
+    sendUpdate(sessionId, update);
+  } catch (error) {
+    console.warn('Failed to send progress update:', error);
+  }
+}
+
 interface BulkImportRequest {
  urls: string[];
+  combineIntoOne?: boolean;
+  sessionId?: string; // For progress tracking
 }

 interface ImportResult {
@@ -22,52 +38,430 @@ interface BulkImportResponse {
    skipped: number;
    errors: number;
  };
+  combinedStory?: {
+    title: string;
+    author: string;
+    content: string;
+    summary?: string;
+    sourceUrl: string;
+    tags?: string[];
+  };
 }

-export async function POST(request: NextRequest) {
+// Background processing function for combined mode
+async function processCombinedMode(
+  urls: string[],
+  sessionId: string,
+  authorization: string,
+  scraper: any
+) {
+  const results: ImportResult[] = [];
+  let importedCount = 0;
+  let errorCount = 0;
+  
+  const combinedContent: string[] = [];
+  let baseTitle = '';
+  let baseAuthor = '';
+  let baseSummary = '';
+  let baseSourceUrl = '';
+  const combinedTags = new Set<string>();
+  let totalWordCount = 0;
+  
+  // Send initial progress update
+  await sendProgressUpdate(sessionId, {
+    type: 'progress',
+    current: 0,
+    total: urls.length,
+    message: `Starting to scrape ${urls.length} URLs for combining...`,
+    totalWordCount: 0
+  });
+  
+  for (let i = 0; i < urls.length; i++) {
+    const url = urls[i];
+    console.log(`Scraping URL ${i + 1}/${urls.length} for combine: ${url}`);
+    
+    // Send progress update
+    await sendProgressUpdate(sessionId, {
+      type: 'progress',
+      current: i,
+      total: urls.length,
+      message: `Scraping URL ${i + 1} of ${urls.length}...`,
+      url: url,
+      totalWordCount
+    });
+    
+    try {
+      const trimmedUrl = url.trim();
+      if (!trimmedUrl) {
+        results.push({
+          url: url || 'Empty URL',
+          status: 'error',
+          error: 'Empty URL in combined mode'
+        });
+        errorCount++;
+        continue;
+      }
+
+      const scrapedStory = await scraper.scrapeStory(trimmedUrl);
+      
+      // Check if we got content - this is required for combined mode
+      if (!scrapedStory.content || scrapedStory.content.trim() === '') {
+        results.push({
+          url: trimmedUrl,
+          status: 'error',
+          error: 'No content found - required for combined mode'
+        });
+        errorCount++;
+        continue;
+      }
+
+      // Use first URL for base metadata (title can be empty for combined mode)
+      if (i === 0) {
+        baseTitle = scrapedStory.title || 'Combined Story';
+        baseAuthor = scrapedStory.author || 'Unknown Author';
+        baseSummary = scrapedStory.summary || '';
+        baseSourceUrl = trimmedUrl;
+      }
+      
+      // Add content with URL separator
+      combinedContent.push(`<!-- Content from: ${trimmedUrl} -->`);
+      if (scrapedStory.title && i > 0) {
+        combinedContent.push(`<h2>${scrapedStory.title}</h2>`);
+      }
+      combinedContent.push(scrapedStory.content);
+      combinedContent.push('<hr/>'); // Visual separator between parts
+      
+      // Calculate word count for this story
+      const textContent = scrapedStory.content.replace(/<[^>]*>/g, ''); // Strip HTML
+      const wordCount = textContent.split(/\s+/).filter((word: string) => word.length > 0).length;
+      totalWordCount += wordCount;
+      
+      // Collect tags from all stories
+      if (scrapedStory.tags) {
+        scrapedStory.tags.forEach((tag: string) => combinedTags.add(tag));
+      }
+      
+      results.push({
+        url: trimmedUrl,
+        status: 'imported',
+        title: scrapedStory.title,
+        author: scrapedStory.author
+      });
+      importedCount++;
+      
+      // Send progress update with word count
+      await sendProgressUpdate(sessionId, {
+        type: 'progress',
+        current: i + 1,
+        total: urls.length,
+        message: `Scraped "${scrapedStory.title}" (${wordCount.toLocaleString()} words)`,
+        url: trimmedUrl,
+        title: scrapedStory.title,
+        author: scrapedStory.author,
+        wordCount: wordCount,
+        totalWordCount: totalWordCount
+      });
+      
+    } catch (error) {
+      console.error(`Error processing URL ${url} in combined mode:`, error);
+      results.push({
+        url: url,
+        status: 'error',
+        error: error instanceof Error ? error.message : 'Unknown error'
+      });
+      errorCount++;
+    }
+  }
+  
+  // If we have any errors, fail the entire combined operation
+  if (errorCount > 0) {
+    await sendProgressUpdate(sessionId, {
+      type: 'error',
+      current: urls.length,
+      total: urls.length,
+      message: 'Combined mode failed: some URLs could not be processed',
+      error: `${errorCount} URLs failed to process`
+    });
+    return;
+  }
+  
+  // Check content size to prevent response size issues
+  const combinedContentString = combinedContent.join('\n');
+  const contentSizeInMB = new Blob([combinedContentString]).size / (1024 * 1024);
+  
+  console.log(`Combined content size: ${contentSizeInMB.toFixed(2)} MB`);
+  console.log(`Combined content character length: ${combinedContentString.length}`);
+  console.log(`Combined content parts count: ${combinedContent.length}`);
+  
+  // Return the combined story data via progress update
+  const combinedStory = {
+    title: baseTitle,
+    author: baseAuthor,
+    content: contentSizeInMB > 10 ? 
+      combinedContentString.substring(0, Math.floor(combinedContentString.length * (10 / contentSizeInMB))) + '\n\n<!-- Content truncated due to size limit -->' :
+      combinedContentString,
+    summary: contentSizeInMB > 10 ? baseSummary + ' (Content truncated due to size limit)' : baseSummary,
+    sourceUrl: baseSourceUrl,
+    tags: Array.from(combinedTags)
+  };
+  
+  // Send completion notification for combine mode
+  await sendProgressUpdate(sessionId, {
+    type: 'completed',
+    current: urls.length,
+    total: urls.length,
+    message: `Combined scraping completed: ${totalWordCount.toLocaleString()} words from ${importedCount} stories`,
+    totalWordCount: totalWordCount,
+    combinedStory: combinedStory
+  });
+  
+  console.log(`Combined scraping completed: ${importedCount} URLs combined into one story`);
+}
+
+// Background processing function for individual mode
+async function processIndividualMode(
+  urls: string[],
+  sessionId: string,
+  authorization: string,
+  scraper: any
+) {
+  const results: ImportResult[] = [];
+  let importedCount = 0;
+  let skippedCount = 0;
+  let errorCount = 0;
+
+  await sendProgressUpdate(sessionId, {
+    type: 'progress',
+    current: 0,
+    total: urls.length,
+    message: `Starting to import ${urls.length} URLs individually...`
+  });
+  
+  for (let i = 0; i < urls.length; i++) {
+    const url = urls[i];
+    console.log(`Processing URL ${i + 1}/${urls.length}: ${url}`);
+    
+    await sendProgressUpdate(sessionId, {
+      type: 'progress',
+      current: i,
+      total: urls.length,
+      message: `Processing URL ${i + 1} of ${urls.length}...`,
+      url: url
+    });
+    
+    try {
+      // Validate URL format
+      if (!url || typeof url !== 'string' || url.trim() === '') {
+        results.push({
+          url: url || 'Empty URL',
+          status: 'error',
+          error: 'Invalid URL format'
+        });
+        errorCount++;
+        continue;
+      }
+
+      const trimmedUrl = url.trim();
+
+      // Scrape the story
+      const scrapedStory = await scraper.scrapeStory(trimmedUrl);
+
+      // Validate required fields
+      if (!scrapedStory.title || !scrapedStory.author || !scrapedStory.content) {
+        const missingFields = [];
+        if (!scrapedStory.title) missingFields.push('title');
+        if (!scrapedStory.author) missingFields.push('author');
+        if (!scrapedStory.content) missingFields.push('content');
+        
+        results.push({
+          url: trimmedUrl,
+          status: 'skipped',
+          reason: `Missing required fields: ${missingFields.join(', ')}`,
+          title: scrapedStory.title,
+          author: scrapedStory.author
+        });
+        skippedCount++;
+        continue;
+      }
+
+      // Check for duplicates using query parameters
+      try {
+        const duplicateCheckUrl = `http://backend:8080/api/stories/check-duplicate`;
+        const params = new URLSearchParams({
+          title: scrapedStory.title,
+          authorName: scrapedStory.author
+        });
+        
+        const duplicateCheckResponse = await fetch(`${duplicateCheckUrl}?${params.toString()}`, {
+          method: 'GET',
+          headers: {
+            'Authorization': authorization,
+            'Content-Type': 'application/json',
+          },
+        });
+
+        if (duplicateCheckResponse.ok) {
+          const duplicateResult = await duplicateCheckResponse.json();
+          if (duplicateResult.hasDuplicates) {
+            results.push({
+              url: trimmedUrl,
+              status: 'skipped',
+              reason: `Duplicate story found (${duplicateResult.count} existing)`,
+              title: scrapedStory.title,
+              author: scrapedStory.author
+            });
+            skippedCount++;
+            continue;
+          }
+        }
+      } catch (error) {
+        console.warn('Duplicate check failed:', error);
+        // Continue with import if duplicate check fails
+      }
+
+      // Create the story
+      try {
+        const storyData = {
+          title: scrapedStory.title,
+          summary: scrapedStory.summary || undefined,
+          contentHtml: scrapedStory.content,
+          sourceUrl: scrapedStory.sourceUrl || trimmedUrl,
+          authorName: scrapedStory.author,
+          tagNames: scrapedStory.tags && scrapedStory.tags.length > 0 ? scrapedStory.tags : undefined,
+        };
+
+        const createUrl = `http://backend:8080/api/stories`;
+        const createResponse = await fetch(createUrl, {
+          method: 'POST',
+          headers: {
+            'Authorization': authorization,
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify(storyData),
+        });
+
+        if (!createResponse.ok) {
+          const errorData = await createResponse.json();
+          throw new Error(errorData.message || 'Failed to create story');
+        }
+
+        const createdStory = await createResponse.json();
+        
+        results.push({
+          url: trimmedUrl,
+          status: 'imported',
+          title: scrapedStory.title,
+          author: scrapedStory.author,
+          storyId: createdStory.id
+        });
+        importedCount++;
+
+        console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`);
+        
+        // Send progress update for successful import
+        await sendProgressUpdate(sessionId, {
+          type: 'progress',
+          current: i + 1,
+          total: urls.length,
+          message: `Imported "${scrapedStory.title}" by ${scrapedStory.author}`,
+          url: trimmedUrl,
+          title: scrapedStory.title,
+          author: scrapedStory.author
+        });
+
+      } catch (error) {
+        console.error(`Failed to create story for ${trimmedUrl}:`, error);
+        
+        let errorMessage = 'Failed to create story';
+        if (error instanceof Error) {
+          errorMessage = error.message;
+        }
+
+        results.push({
+          url: trimmedUrl,
+          status: 'error',
+          error: errorMessage,
+          title: scrapedStory.title,
+          author: scrapedStory.author
+        });
+        errorCount++;
+      }
+
+    } catch (error) {
+      console.error(`Error processing URL ${url}:`, error);
+      
+      let errorMessage = 'Unknown error';
+      if (error instanceof Error) {
+        errorMessage = error.message;
+      }
+
+      results.push({
+        url: url,
+        status: 'error',
+        error: errorMessage
+      });
+      errorCount++;
+    }
+  }
+
+  // Send completion notification
+  await sendProgressUpdate(sessionId, {
+    type: 'completed',
+    current: urls.length,
+    total: urls.length,
+    message: `Bulk import completed: ${importedCount} imported, ${skippedCount} skipped, ${errorCount} errors`,
+    results: results,
+    summary: {
+      total: urls.length,
+      imported: importedCount,
+      skipped: skippedCount,
+      errors: errorCount
+    }
+  });
+
+  console.log(`Bulk import completed: ${importedCount} imported, ${skippedCount} skipped, ${errorCount} errors`);
+
+  // Trigger Typesense reindex if any stories were imported
+  if (importedCount > 0) {
+    try {
+      console.log('Triggering Typesense reindex after bulk import...');
+      const reindexUrl = `http://backend:8080/api/stories/reindex-typesense`;
+      const reindexResponse = await fetch(reindexUrl, {
+        method: 'POST',
+        headers: {
+          'Authorization': authorization,
+          'Content-Type': 'application/json',
+        },
+      });
+      
+      if (reindexResponse.ok) {
+        const reindexResult = await reindexResponse.json();
+        console.log('Typesense reindex completed:', reindexResult);
+      } else {
+        console.warn('Typesense reindex failed:', reindexResponse.status);
+      }
+    } catch (error) {
+      console.warn('Failed to trigger Typesense reindex:', error);
+      // Don't fail the whole request if reindex fails
+    }
+  }
+}
+
+// Background processing function
+async function processBulkImport(
+  urls: string[],
+  combineIntoOne: boolean,
+  sessionId: string,
+  authorization: string
+) {
  try {
-    // Check for authentication
-    const authorization = request.headers.get('authorization');
-    if (!authorization) {
-      return NextResponse.json(
-        { error: 'Authentication required for bulk import' },
-        { status: 401 }
-      );
-    }
-
-    const body = await request.json();
-    const { urls } = body as BulkImportRequest;
-
-    if (!urls || !Array.isArray(urls) || urls.length === 0) {
-      return NextResponse.json(
-        { error: 'URLs array is required and must not be empty' },
-        { status: 400 }
-      );
-    }
-
-    if (urls.length > 50) {
-      return NextResponse.json(
-        { error: 'Maximum 50 URLs allowed per bulk import' },
-        { status: 400 }
-      );
-    }
-
    // Dynamic imports to prevent client-side bundling
    const { StoryScraper } = await import('@/lib/scraper/scraper');
    
    const scraper = new StoryScraper();
-    const results: ImportResult[] = [];
-    let importedCount = 0;
-    let skippedCount = 0;
-    let errorCount = 0;

-    console.log(`Starting bulk scraping for ${urls.length} URLs`);
-    console.log(`Environment NEXT_PUBLIC_API_URL: ${process.env.NEXT_PUBLIC_API_URL}`);
-    
-    // For server-side API calls in Docker, use direct backend container URL
-    // Client-side calls use NEXT_PUBLIC_API_URL through nginx, but server-side needs direct container access
-    const serverSideApiBaseUrl = 'http://backend:8080/api';
-    console.log(`DEBUG: serverSideApiBaseUrl variable is: ${serverSideApiBaseUrl}`);
+    console.log(`Starting bulk scraping for ${urls.length} URLs${combineIntoOne ? ' (combine mode)' : ''}`);
+    console.log(`Session ID: ${sessionId}`);
    
    // Quick test to verify backend connectivity
    try {
@@ -84,208 +478,86 @@ export async function POST(request: NextRequest) {
      console.error(`Backend connectivity test failed:`, error);
    }

-    for (const url of urls) {
-      console.log(`Processing URL: ${url}`);
-      
-      try {
-        // Validate URL format
-        if (!url || typeof url !== 'string' || url.trim() === '') {
-          results.push({
-            url: url || 'Empty URL',
-            status: 'error',
-            error: 'Invalid URL format'
-          });
-          errorCount++;
-          continue;
-        }
-
-        const trimmedUrl = url.trim();
-
-        // Scrape the story
-        const scrapedStory = await scraper.scrapeStory(trimmedUrl);
-
-        // Validate required fields
-        if (!scrapedStory.title || !scrapedStory.author || !scrapedStory.content) {
-          const missingFields = [];
-          if (!scrapedStory.title) missingFields.push('title');
-          if (!scrapedStory.author) missingFields.push('author');
-          if (!scrapedStory.content) missingFields.push('content');
-          
-          results.push({
-            url: trimmedUrl,
-            status: 'skipped',
-            reason: `Missing required fields: ${missingFields.join(', ')}`,
-            title: scrapedStory.title,
-            author: scrapedStory.author
-          });
-          skippedCount++;
-          continue;
-        }
-
-        // Check for duplicates using query parameters
-        try {
-          // Use hardcoded backend URL for container-to-container communication
-          const duplicateCheckUrl = `http://backend:8080/api/stories/check-duplicate`;
-          console.log(`Duplicate check URL: ${duplicateCheckUrl}`);
-          const params = new URLSearchParams({
-            title: scrapedStory.title,
-            authorName: scrapedStory.author
-          });
-          
-          const duplicateCheckResponse = await fetch(`${duplicateCheckUrl}?${params.toString()}`, {
-            method: 'GET',
-            headers: {
-              'Authorization': authorization,
-              'Content-Type': 'application/json',
-            },
-          });
-
-          if (duplicateCheckResponse.ok) {
-            const duplicateResult = await duplicateCheckResponse.json();
-            if (duplicateResult.hasDuplicates) {
-              results.push({
-                url: trimmedUrl,
-                status: 'skipped',
-                reason: `Duplicate story found (${duplicateResult.count} existing)`,
-                title: scrapedStory.title,
-                author: scrapedStory.author
-              });
-              skippedCount++;
-              continue;
-            }
-          }
-        } catch (error) {
-          console.warn('Duplicate check failed:', error);
-          // Continue with import if duplicate check fails
-        }
-
-        // Create the story
-        try {
-          const storyData = {
-            title: scrapedStory.title,
-            summary: scrapedStory.summary || undefined,
-            contentHtml: scrapedStory.content,
-            sourceUrl: scrapedStory.sourceUrl || trimmedUrl,
-            authorName: scrapedStory.author,
-            tagNames: scrapedStory.tags && scrapedStory.tags.length > 0 ? scrapedStory.tags : undefined,
-          };
-
-          // Use hardcoded backend URL for container-to-container communication
-          const createUrl = `http://backend:8080/api/stories`;
-          console.log(`Create story URL: ${createUrl}`);
-          const createResponse = await fetch(createUrl, {
-            method: 'POST',
-            headers: {
-              'Authorization': authorization,
-              'Content-Type': 'application/json',
-            },
-            body: JSON.stringify(storyData),
-          });
-
-          if (!createResponse.ok) {
-            const errorData = await createResponse.json();
-            throw new Error(errorData.message || 'Failed to create story');
-          }
-
-          const createdStory = await createResponse.json();
-          
-          results.push({
-            url: trimmedUrl,
-            status: 'imported',
-            title: scrapedStory.title,
-            author: scrapedStory.author,
-            storyId: createdStory.id
-          });
-          importedCount++;
-
-          console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`);
-
-        } catch (error) {
-          console.error(`Failed to create story for ${trimmedUrl}:`, error);
-          
-          let errorMessage = 'Failed to create story';
-          if (error instanceof Error) {
-            errorMessage = error.message;
-          }
-
-          results.push({
-            url: trimmedUrl,
-            status: 'error',
-            error: errorMessage,
-            title: scrapedStory.title,
-            author: scrapedStory.author
-          });
-          errorCount++;
-        }
-
-      } catch (error) {
-        console.error(`Error processing URL ${url}:`, error);
-        
-        let errorMessage = 'Unknown error';
-        if (error instanceof Error) {
-          errorMessage = error.message;
-        }
-
-        results.push({
-          url: url,
-          status: 'error',
-          error: errorMessage
-        });
-        errorCount++;
-      }
+    // Handle combined mode
+    if (combineIntoOne) {
+      await processCombinedMode(urls, sessionId, authorization, scraper);
+    } else {
+      // Normal individual processing mode
+      await processIndividualMode(urls, sessionId, authorization, scraper);
    }

-    const response: BulkImportResponse = {
-      results,
-      summary: {
-        total: urls.length,
-        imported: importedCount,
-        skipped: skippedCount,
-        errors: errorCount
-      }
-    };
-
-    console.log(`Bulk import completed:`, response.summary);
-
-    // Trigger Typesense reindex if any stories were imported
-    if (importedCount > 0) {
-      try {
-        console.log('Triggering Typesense reindex after bulk import...');
-        const reindexUrl = `http://backend:8080/api/stories/reindex-typesense`;
-        const reindexResponse = await fetch(reindexUrl, {
-          method: 'POST',
-          headers: {
-            'Authorization': authorization,
-            'Content-Type': 'application/json',
-          },
-        });
-        
-        if (reindexResponse.ok) {
-          const reindexResult = await reindexResponse.json();
-          console.log('Typesense reindex completed:', reindexResult);
-        } else {
-          console.warn('Typesense reindex failed:', reindexResponse.status);
-        }
-      } catch (error) {
-        console.warn('Failed to trigger Typesense reindex:', error);
-        // Don't fail the whole request if reindex fails
-      }
-    }
-
-    return NextResponse.json(response);
-
  } catch (error) {
-    console.error('Bulk import error:', error);
+    console.error('Background bulk import error:', error);
+    await sendProgressUpdate(sessionId, {
+      type: 'error',
+      current: 0,
+      total: urls.length,
+      message: 'Bulk import failed due to an error',
+      error: error instanceof Error ? error.message : 'Unknown error'
+    });
+  }
+}
+
+export async function POST(request: NextRequest) {
+  try {
+    // Check for authentication
+    const authorization = request.headers.get('authorization');
+    if (!authorization) {
+      return NextResponse.json(
+        { error: 'Authentication required for bulk import' },
+        { status: 401 }
+      );
+    }
+
+    const body = await request.json();
+    const { urls, combineIntoOne = false, sessionId } = body as BulkImportRequest;
+
+    if (!urls || !Array.isArray(urls) || urls.length === 0) {
+      return NextResponse.json(
+        { error: 'URLs array is required and must not be empty' },
+        { status: 400 }
+      );
+    }
+
+    if (urls.length > 200) {
+      return NextResponse.json(
+        { error: 'Maximum 200 URLs allowed per bulk import' },
+        { status: 400 }
+      );
+    }
+
+    if (!sessionId) {
+      return NextResponse.json(
+        { error: 'Session ID is required for progress tracking' },
+        { status: 400 }
+      );
+    }
+
+    // Start the background processing
+    processBulkImport(urls, combineIntoOne, sessionId, authorization).catch(error => {
+      console.error('Failed to start background processing:', error);
+    });
+
+    // Return immediately with session info
+    return NextResponse.json({
+      message: 'Bulk import started',
+      sessionId: sessionId,
+      totalUrls: urls.length,
+      combineMode: combineIntoOne
+    });
+
+  } catch (error) {
+    console.error('Bulk import initialization error:', error);

    if (error instanceof Error) {
      return NextResponse.json(
-        { error: `Bulk import failed: ${error.message}` },
+        { error: `Bulk import failed to start: ${error.message}` },
        { status: 500 }
      );
    }

    return NextResponse.json(
-      { error: 'Bulk import failed due to an unknown error' },
+      { error: 'Bulk import failed to start due to an unknown error' },
      { status: 500 }
    );
  }