scraping and improvements
This commit is contained in:
72
frontend/src/app/scrape/author/route.ts
Normal file
72
frontend/src/app/scrape/author/route.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const body = await request.json();
|
||||
const { url } = body;
|
||||
|
||||
if (!url || typeof url !== 'string') {
|
||||
return NextResponse.json(
|
||||
{ error: 'URL is required and must be a string' },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
// Dynamic import to prevent client-side bundling
|
||||
const { StoryScraper } = await import('@/lib/scraper/scraper');
|
||||
|
||||
const scraper = new StoryScraper();
|
||||
const stories = await scraper.scrapeAuthorPage(url);
|
||||
|
||||
return NextResponse.json({ stories });
|
||||
} catch (error) {
|
||||
console.error('Author page scraping error:', error);
|
||||
|
||||
// Check if it's a ScraperError without importing at module level
|
||||
if (error && typeof error === 'object' && error.constructor.name === 'ScraperError') {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: (error as any).message,
|
||||
url: (error as any).url
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
// Handle specific error types
|
||||
if (error.message.includes('Invalid URL')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Invalid URL provided' },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error.message.includes('not supported')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Author page scraping is not supported for this website' },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error.message.includes('HTTP 404')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Author page not found at the provided URL' },
|
||||
{ status: 404 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error.message.includes('timeout')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Request timed out while fetching content' },
|
||||
{ status: 408 }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return NextResponse.json(
|
||||
{ error: 'Failed to scrape author page. Please try again.' },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user