292 lines
9.0 KiB
TypeScript
292 lines
9.0 KiB
TypeScript
import { NextRequest, NextResponse } from 'next/server';
|
|
|
|
interface BulkImportRequest {
|
|
urls: string[];
|
|
}
|
|
|
|
interface ImportResult {
|
|
url: string;
|
|
status: 'imported' | 'skipped' | 'error';
|
|
reason?: string;
|
|
title?: string;
|
|
author?: string;
|
|
error?: string;
|
|
storyId?: string;
|
|
}
|
|
|
|
interface BulkImportResponse {
|
|
results: ImportResult[];
|
|
summary: {
|
|
total: number;
|
|
imported: number;
|
|
skipped: number;
|
|
errors: number;
|
|
};
|
|
}
|
|
|
|
export async function POST(request: NextRequest) {
|
|
try {
|
|
// Check for authentication
|
|
const authorization = request.headers.get('authorization');
|
|
if (!authorization) {
|
|
return NextResponse.json(
|
|
{ error: 'Authentication required for bulk import' },
|
|
{ status: 401 }
|
|
);
|
|
}
|
|
|
|
const body = await request.json();
|
|
const { urls } = body as BulkImportRequest;
|
|
|
|
if (!urls || !Array.isArray(urls) || urls.length === 0) {
|
|
return NextResponse.json(
|
|
{ error: 'URLs array is required and must not be empty' },
|
|
{ status: 400 }
|
|
);
|
|
}
|
|
|
|
if (urls.length > 50) {
|
|
return NextResponse.json(
|
|
{ error: 'Maximum 50 URLs allowed per bulk import' },
|
|
{ status: 400 }
|
|
);
|
|
}
|
|
|
|
// Dynamic imports to prevent client-side bundling
|
|
const { StoryScraper } = await import('@/lib/scraper/scraper');
|
|
|
|
const scraper = new StoryScraper();
|
|
const results: ImportResult[] = [];
|
|
let importedCount = 0;
|
|
let skippedCount = 0;
|
|
let errorCount = 0;
|
|
|
|
console.log(`Starting bulk scraping for ${urls.length} URLs`);
|
|
console.log(`Environment NEXT_PUBLIC_API_URL: ${process.env.NEXT_PUBLIC_API_URL}`);
|
|
|
|
// For server-side API calls in Docker, use direct backend container URL
|
|
// Client-side calls use NEXT_PUBLIC_API_URL through nginx, but server-side needs direct container access
|
|
const serverSideApiBaseUrl = 'http://backend:8080/api';
|
|
console.log(`DEBUG: serverSideApiBaseUrl variable is: ${serverSideApiBaseUrl}`);
|
|
|
|
// Quick test to verify backend connectivity
|
|
try {
|
|
console.log(`Testing backend connectivity at: http://backend:8080/api/stories/check-duplicate`);
|
|
const testResponse = await fetch(`http://backend:8080/api/stories/check-duplicate?title=test&authorName=test`, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Authorization': authorization,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
});
|
|
console.log(`Backend test response status: ${testResponse.status}`);
|
|
} catch (error) {
|
|
console.error(`Backend connectivity test failed:`, error);
|
|
}
|
|
|
|
for (const url of urls) {
|
|
console.log(`Processing URL: ${url}`);
|
|
|
|
try {
|
|
// Validate URL format
|
|
if (!url || typeof url !== 'string' || url.trim() === '') {
|
|
results.push({
|
|
url: url || 'Empty URL',
|
|
status: 'error',
|
|
error: 'Invalid URL format'
|
|
});
|
|
errorCount++;
|
|
continue;
|
|
}
|
|
|
|
const trimmedUrl = url.trim();
|
|
|
|
// Scrape the story
|
|
const scrapedStory = await scraper.scrapeStory(trimmedUrl);
|
|
|
|
// Validate required fields
|
|
if (!scrapedStory.title || !scrapedStory.author || !scrapedStory.content) {
|
|
const missingFields = [];
|
|
if (!scrapedStory.title) missingFields.push('title');
|
|
if (!scrapedStory.author) missingFields.push('author');
|
|
if (!scrapedStory.content) missingFields.push('content');
|
|
|
|
results.push({
|
|
url: trimmedUrl,
|
|
status: 'skipped',
|
|
reason: `Missing required fields: ${missingFields.join(', ')}`,
|
|
title: scrapedStory.title,
|
|
author: scrapedStory.author
|
|
});
|
|
skippedCount++;
|
|
continue;
|
|
}
|
|
|
|
// Check for duplicates using query parameters
|
|
try {
|
|
// Use hardcoded backend URL for container-to-container communication
|
|
const duplicateCheckUrl = `http://backend:8080/api/stories/check-duplicate`;
|
|
console.log(`Duplicate check URL: ${duplicateCheckUrl}`);
|
|
const params = new URLSearchParams({
|
|
title: scrapedStory.title,
|
|
authorName: scrapedStory.author
|
|
});
|
|
|
|
const duplicateCheckResponse = await fetch(`${duplicateCheckUrl}?${params.toString()}`, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Authorization': authorization,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
});
|
|
|
|
if (duplicateCheckResponse.ok) {
|
|
const duplicateResult = await duplicateCheckResponse.json();
|
|
if (duplicateResult.hasDuplicates) {
|
|
results.push({
|
|
url: trimmedUrl,
|
|
status: 'skipped',
|
|
reason: `Duplicate story found (${duplicateResult.count} existing)`,
|
|
title: scrapedStory.title,
|
|
author: scrapedStory.author
|
|
});
|
|
skippedCount++;
|
|
continue;
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.warn('Duplicate check failed:', error);
|
|
// Continue with import if duplicate check fails
|
|
}
|
|
|
|
// Create the story
|
|
try {
|
|
const storyData = {
|
|
title: scrapedStory.title,
|
|
summary: scrapedStory.summary || undefined,
|
|
contentHtml: scrapedStory.content,
|
|
sourceUrl: scrapedStory.sourceUrl || trimmedUrl,
|
|
authorName: scrapedStory.author,
|
|
tagNames: scrapedStory.tags && scrapedStory.tags.length > 0 ? scrapedStory.tags : undefined,
|
|
};
|
|
|
|
// Use hardcoded backend URL for container-to-container communication
|
|
const createUrl = `http://backend:8080/api/stories`;
|
|
console.log(`Create story URL: ${createUrl}`);
|
|
const createResponse = await fetch(createUrl, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': authorization,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify(storyData),
|
|
});
|
|
|
|
if (!createResponse.ok) {
|
|
const errorData = await createResponse.json();
|
|
throw new Error(errorData.message || 'Failed to create story');
|
|
}
|
|
|
|
const createdStory = await createResponse.json();
|
|
|
|
results.push({
|
|
url: trimmedUrl,
|
|
status: 'imported',
|
|
title: scrapedStory.title,
|
|
author: scrapedStory.author,
|
|
storyId: createdStory.id
|
|
});
|
|
importedCount++;
|
|
|
|
console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`);
|
|
|
|
} catch (error) {
|
|
console.error(`Failed to create story for ${trimmedUrl}:`, error);
|
|
|
|
let errorMessage = 'Failed to create story';
|
|
if (error instanceof Error) {
|
|
errorMessage = error.message;
|
|
}
|
|
|
|
results.push({
|
|
url: trimmedUrl,
|
|
status: 'error',
|
|
error: errorMessage,
|
|
title: scrapedStory.title,
|
|
author: scrapedStory.author
|
|
});
|
|
errorCount++;
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`Error processing URL ${url}:`, error);
|
|
|
|
let errorMessage = 'Unknown error';
|
|
if (error instanceof Error) {
|
|
errorMessage = error.message;
|
|
}
|
|
|
|
results.push({
|
|
url: url,
|
|
status: 'error',
|
|
error: errorMessage
|
|
});
|
|
errorCount++;
|
|
}
|
|
}
|
|
|
|
const response: BulkImportResponse = {
|
|
results,
|
|
summary: {
|
|
total: urls.length,
|
|
imported: importedCount,
|
|
skipped: skippedCount,
|
|
errors: errorCount
|
|
}
|
|
};
|
|
|
|
console.log(`Bulk import completed:`, response.summary);
|
|
|
|
// Trigger Typesense reindex if any stories were imported
|
|
if (importedCount > 0) {
|
|
try {
|
|
console.log('Triggering Typesense reindex after bulk import...');
|
|
const reindexUrl = `http://backend:8080/api/stories/reindex-typesense`;
|
|
const reindexResponse = await fetch(reindexUrl, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': authorization,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
});
|
|
|
|
if (reindexResponse.ok) {
|
|
const reindexResult = await reindexResponse.json();
|
|
console.log('Typesense reindex completed:', reindexResult);
|
|
} else {
|
|
console.warn('Typesense reindex failed:', reindexResponse.status);
|
|
}
|
|
} catch (error) {
|
|
console.warn('Failed to trigger Typesense reindex:', error);
|
|
// Don't fail the whole request if reindex fails
|
|
}
|
|
}
|
|
|
|
return NextResponse.json(response);
|
|
|
|
} catch (error) {
|
|
console.error('Bulk import error:', error);
|
|
|
|
if (error instanceof Error) {
|
|
return NextResponse.json(
|
|
{ error: `Bulk import failed: ${error.message}` },
|
|
{ status: 500 }
|
|
);
|
|
}
|
|
|
|
return NextResponse.json(
|
|
{ error: 'Bulk import failed due to an unknown error' },
|
|
{ status: 500 }
|
|
);
|
|
}
|
|
} |