import { NextRequest, NextResponse } from 'next/server'; interface BulkImportRequest { urls: string[]; } interface ImportResult { url: string; status: 'imported' | 'skipped' | 'error'; reason?: string; title?: string; author?: string; error?: string; storyId?: string; } interface BulkImportResponse { results: ImportResult[]; summary: { total: number; imported: number; skipped: number; errors: number; }; } export async function POST(request: NextRequest) { try { // Check for authentication const authorization = request.headers.get('authorization'); if (!authorization) { return NextResponse.json( { error: 'Authentication required for bulk import' }, { status: 401 } ); } const body = await request.json(); const { urls } = body as BulkImportRequest; if (!urls || !Array.isArray(urls) || urls.length === 0) { return NextResponse.json( { error: 'URLs array is required and must not be empty' }, { status: 400 } ); } if (urls.length > 50) { return NextResponse.json( { error: 'Maximum 50 URLs allowed per bulk import' }, { status: 400 } ); } // Dynamic imports to prevent client-side bundling const { StoryScraper } = await import('@/lib/scraper/scraper'); const scraper = new StoryScraper(); const results: ImportResult[] = []; let importedCount = 0; let skippedCount = 0; let errorCount = 0; console.log(`Starting bulk scraping for ${urls.length} URLs`); console.log(`Environment NEXT_PUBLIC_API_URL: ${process.env.NEXT_PUBLIC_API_URL}`); // For server-side API calls in Docker, use direct backend container URL // Client-side calls use NEXT_PUBLIC_API_URL through nginx, but server-side needs direct container access const serverSideApiBaseUrl = 'http://backend:8080/api'; console.log(`DEBUG: serverSideApiBaseUrl variable is: ${serverSideApiBaseUrl}`); // Quick test to verify backend connectivity try { console.log(`Testing backend connectivity at: http://backend:8080/api/stories/check-duplicate`); const testResponse = await fetch(`http://backend:8080/api/stories/check-duplicate?title=test&authorName=test`, { method: 'GET', headers: { 'Authorization': authorization, 'Content-Type': 'application/json', }, }); console.log(`Backend test response status: ${testResponse.status}`); } catch (error) { console.error(`Backend connectivity test failed:`, error); } for (const url of urls) { console.log(`Processing URL: ${url}`); try { // Validate URL format if (!url || typeof url !== 'string' || url.trim() === '') { results.push({ url: url || 'Empty URL', status: 'error', error: 'Invalid URL format' }); errorCount++; continue; } const trimmedUrl = url.trim(); // Scrape the story const scrapedStory = await scraper.scrapeStory(trimmedUrl); // Validate required fields if (!scrapedStory.title || !scrapedStory.author || !scrapedStory.content) { const missingFields = []; if (!scrapedStory.title) missingFields.push('title'); if (!scrapedStory.author) missingFields.push('author'); if (!scrapedStory.content) missingFields.push('content'); results.push({ url: trimmedUrl, status: 'skipped', reason: `Missing required fields: ${missingFields.join(', ')}`, title: scrapedStory.title, author: scrapedStory.author }); skippedCount++; continue; } // Check for duplicates using query parameters try { // Use hardcoded backend URL for container-to-container communication const duplicateCheckUrl = `http://backend:8080/api/stories/check-duplicate`; console.log(`Duplicate check URL: ${duplicateCheckUrl}`); const params = new URLSearchParams({ title: scrapedStory.title, authorName: scrapedStory.author }); const duplicateCheckResponse = await fetch(`${duplicateCheckUrl}?${params.toString()}`, { method: 'GET', headers: { 'Authorization': authorization, 'Content-Type': 'application/json', }, }); if (duplicateCheckResponse.ok) { const duplicateResult = await duplicateCheckResponse.json(); if (duplicateResult.hasDuplicates) { results.push({ url: trimmedUrl, status: 'skipped', reason: `Duplicate story found (${duplicateResult.count} existing)`, title: scrapedStory.title, author: scrapedStory.author }); skippedCount++; continue; } } } catch (error) { console.warn('Duplicate check failed:', error); // Continue with import if duplicate check fails } // Create the story try { const storyData = { title: scrapedStory.title, summary: scrapedStory.summary || undefined, contentHtml: scrapedStory.content, sourceUrl: scrapedStory.sourceUrl || trimmedUrl, authorName: scrapedStory.author, tagNames: scrapedStory.tags && scrapedStory.tags.length > 0 ? scrapedStory.tags : undefined, }; // Use hardcoded backend URL for container-to-container communication const createUrl = `http://backend:8080/api/stories`; console.log(`Create story URL: ${createUrl}`); const createResponse = await fetch(createUrl, { method: 'POST', headers: { 'Authorization': authorization, 'Content-Type': 'application/json', }, body: JSON.stringify(storyData), }); if (!createResponse.ok) { const errorData = await createResponse.json(); throw new Error(errorData.message || 'Failed to create story'); } const createdStory = await createResponse.json(); results.push({ url: trimmedUrl, status: 'imported', title: scrapedStory.title, author: scrapedStory.author, storyId: createdStory.id }); importedCount++; console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`); } catch (error) { console.error(`Failed to create story for ${trimmedUrl}:`, error); let errorMessage = 'Failed to create story'; if (error instanceof Error) { errorMessage = error.message; } results.push({ url: trimmedUrl, status: 'error', error: errorMessage, title: scrapedStory.title, author: scrapedStory.author }); errorCount++; } } catch (error) { console.error(`Error processing URL ${url}:`, error); let errorMessage = 'Unknown error'; if (error instanceof Error) { errorMessage = error.message; } results.push({ url: url, status: 'error', error: errorMessage }); errorCount++; } } const response: BulkImportResponse = { results, summary: { total: urls.length, imported: importedCount, skipped: skippedCount, errors: errorCount } }; console.log(`Bulk import completed:`, response.summary); // Trigger Typesense reindex if any stories were imported if (importedCount > 0) { try { console.log('Triggering Typesense reindex after bulk import...'); const reindexUrl = `http://backend:8080/api/stories/reindex-typesense`; const reindexResponse = await fetch(reindexUrl, { method: 'POST', headers: { 'Authorization': authorization, 'Content-Type': 'application/json', }, }); if (reindexResponse.ok) { const reindexResult = await reindexResponse.json(); console.log('Typesense reindex completed:', reindexResult); } else { console.warn('Typesense reindex failed:', reindexResponse.status); } } catch (error) { console.warn('Failed to trigger Typesense reindex:', error); // Don't fail the whole request if reindex fails } } return NextResponse.json(response); } catch (error) { console.error('Bulk import error:', error); if (error instanceof Error) { return NextResponse.json( { error: `Bulk import failed: ${error.message}` }, { status: 500 } ); } return NextResponse.json( { error: 'Bulk import failed due to an unknown error' }, { status: 500 } ); } }