scraping and improvements
This commit is contained in:
85
frontend/src/app/scrape/story/route.ts
Normal file
85
frontend/src/app/scrape/story/route.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const body = await request.json();
|
||||
const { url } = body;
|
||||
|
||||
if (!url || typeof url !== 'string') {
|
||||
return NextResponse.json(
|
||||
{ error: 'URL is required and must be a string' },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
// Dynamic import to prevent client-side bundling
|
||||
const { StoryScraper } = await import('@/lib/scraper/scraper');
|
||||
const { ScraperError } = await import('@/lib/scraper/types');
|
||||
|
||||
const scraper = new StoryScraper();
|
||||
const story = await scraper.scrapeStory(url);
|
||||
|
||||
// Debug logging
|
||||
console.log('Scraped story data:', {
|
||||
url: url,
|
||||
title: story.title,
|
||||
author: story.author,
|
||||
summary: story.summary,
|
||||
contentLength: story.content?.length || 0,
|
||||
contentPreview: story.content?.substring(0, 200) + '...',
|
||||
tags: story.tags,
|
||||
coverImage: story.coverImage
|
||||
});
|
||||
|
||||
return NextResponse.json(story);
|
||||
} catch (error) {
|
||||
console.error('Story scraping error:', error);
|
||||
|
||||
// Check if it's a ScraperError without importing at module level
|
||||
if (error && typeof error === 'object' && error.constructor.name === 'ScraperError') {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: (error as any).message,
|
||||
url: (error as any).url
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
// Handle specific error types
|
||||
if (error.message.includes('Invalid URL')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Invalid URL provided' },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error.message.includes('Unsupported site')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'This website is not supported for scraping' },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error.message.includes('HTTP 404')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Story not found at the provided URL' },
|
||||
{ status: 404 }
|
||||
);
|
||||
}
|
||||
|
||||
if (error.message.includes('timeout')) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Request timed out while fetching content' },
|
||||
{ status: 408 }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return NextResponse.json(
|
||||
{ error: 'Failed to scrape story. Please try again.' },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user