scraping and improvements

This commit is contained in:
Stefan Hardegger
2025-07-28 13:52:09 +02:00
parent f95d7aa8bb
commit fcad028959
31 changed files with 3788 additions and 118 deletions

View File

@@ -0,0 +1,300 @@
'use client';
import { useState } from 'react';
import { useRouter } from 'next/navigation';
import Link from 'next/link';
import { ArrowLeftIcon } from '@heroicons/react/24/outline';
interface ImportResult {
url: string;
status: 'imported' | 'skipped' | 'error';
reason?: string;
title?: string;
author?: string;
error?: string;
storyId?: string;
}
interface BulkImportResponse {
results: ImportResult[];
summary: {
total: number;
imported: number;
skipped: number;
errors: number;
};
}
export default function BulkImportPage() {
const router = useRouter();
const [urls, setUrls] = useState('');
const [isLoading, setIsLoading] = useState(false);
const [results, setResults] = useState<BulkImportResponse | null>(null);
const [error, setError] = useState<string | null>(null);
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault();
if (!urls.trim()) {
setError('Please enter at least one URL');
return;
}
setIsLoading(true);
setError(null);
setResults(null);
try {
// Parse URLs from textarea (one per line)
const urlList = urls
.split('\n')
.map(url => url.trim())
.filter(url => url.length > 0);
if (urlList.length === 0) {
setError('Please enter at least one valid URL');
setIsLoading(false);
return;
}
if (urlList.length > 50) {
setError('Maximum 50 URLs allowed per bulk import');
setIsLoading(false);
return;
}
// Get auth token for server-side API calls
const token = localStorage.getItem('auth-token');
const response = await fetch('/scrape/bulk', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': token ? `Bearer ${token}` : '',
},
body: JSON.stringify({ urls: urlList }),
});
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.error || 'Bulk import failed');
}
const data: BulkImportResponse = await response.json();
setResults(data);
} catch (err) {
console.error('Bulk import error:', err);
setError(err instanceof Error ? err.message : 'Failed to import stories');
} finally {
setIsLoading(false);
}
};
const handleReset = () => {
setUrls('');
setResults(null);
setError(null);
};
const getStatusColor = (status: string) => {
switch (status) {
case 'imported': return 'text-green-700 bg-green-50 border-green-200';
case 'skipped': return 'text-yellow-700 bg-yellow-50 border-yellow-200';
case 'error': return 'text-red-700 bg-red-50 border-red-200';
default: return 'text-gray-700 bg-gray-50 border-gray-200';
}
};
const getStatusIcon = (status: string) => {
switch (status) {
case 'imported': return '✓';
case 'skipped': return '⚠';
case 'error': return '✗';
default: return '';
}
};
return (
<div className="container mx-auto px-4 py-6">
<div className="max-w-4xl mx-auto">
{/* Header */}
<div className="mb-6">
<div className="flex items-center gap-4 mb-4">
<Link
href="/library"
className="inline-flex items-center text-blue-600 hover:text-blue-800"
>
<ArrowLeftIcon className="h-4 w-4 mr-1" />
Back to Library
</Link>
</div>
<h1 className="text-3xl font-bold text-gray-900 mb-2">Bulk Import Stories</h1>
<p className="text-gray-600">
Import multiple stories at once by providing a list of URLs. Each URL will be scraped
and automatically added to your story collection.
</p>
</div>
{!results ? (
// Import Form
<form onSubmit={handleSubmit} className="space-y-6">
<div>
<label htmlFor="urls" className="block text-sm font-medium text-gray-700 mb-2">
Story URLs
</label>
<p className="text-sm text-gray-500 mb-3">
Enter one URL per line. Maximum 50 URLs per import.
</p>
<textarea
id="urls"
value={urls}
onChange={(e) => setUrls(e.target.value)}
placeholder="https://example.com/story1&#10;https://example.com/story2&#10;https://example.com/story3"
className="w-full h-64 px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
disabled={isLoading}
/>
<p className="mt-2 text-sm text-gray-500">
URLs: {urls.split('\n').filter(url => url.trim().length > 0).length}
</p>
</div>
{error && (
<div className="bg-red-50 border border-red-200 rounded-md p-4">
<div className="flex">
<div className="ml-3">
<h3 className="text-sm font-medium text-red-800">Error</h3>
<div className="mt-2 text-sm text-red-700">
{error}
</div>
</div>
</div>
</div>
)}
<div className="flex gap-4">
<button
type="submit"
disabled={isLoading || !urls.trim()}
className="px-6 py-2 bg-blue-600 text-white font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isLoading ? 'Importing...' : 'Start Import'}
</button>
<button
type="button"
onClick={handleReset}
disabled={isLoading}
className="px-6 py-2 bg-gray-600 text-white font-medium rounded-md hover:bg-gray-700 focus:outline-none focus:ring-2 focus:ring-gray-500 focus:ring-offset-2 disabled:opacity-50 disabled:cursor-not-allowed"
>
Clear
</button>
</div>
{isLoading && (
<div className="bg-blue-50 border border-blue-200 rounded-md p-4">
<div className="flex items-center">
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-blue-600 mr-3"></div>
<div>
<p className="text-sm font-medium text-blue-800">Processing URLs...</p>
<p className="text-sm text-blue-600">
This may take a few minutes depending on the number of URLs and response times of the source websites.
</p>
</div>
</div>
</div>
)}
</form>
) : (
// Results
<div className="space-y-6">
{/* Summary */}
<div className="bg-white border border-gray-200 rounded-lg p-6">
<h2 className="text-xl font-semibold text-gray-900 mb-4">Import Summary</h2>
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
<div className="text-center">
<div className="text-2xl font-bold text-gray-900">{results.summary.total}</div>
<div className="text-sm text-gray-600">Total URLs</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-green-600">{results.summary.imported}</div>
<div className="text-sm text-gray-600">Imported</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-yellow-600">{results.summary.skipped}</div>
<div className="text-sm text-gray-600">Skipped</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-red-600">{results.summary.errors}</div>
<div className="text-sm text-gray-600">Errors</div>
</div>
</div>
</div>
{/* Detailed Results */}
<div className="bg-white border border-gray-200 rounded-lg">
<div className="px-6 py-4 border-b border-gray-200">
<h3 className="text-lg font-medium text-gray-900">Detailed Results</h3>
</div>
<div className="divide-y divide-gray-200">
{results.results.map((result, index) => (
<div key={index} className="p-6">
<div className="flex items-start justify-between">
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 mb-2">
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium border ${getStatusColor(result.status)}`}>
{getStatusIcon(result.status)} {result.status.charAt(0).toUpperCase() + result.status.slice(1)}
</span>
</div>
<p className="text-sm text-gray-900 font-medium truncate mb-1">
{result.url}
</p>
{result.title && result.author && (
<p className="text-sm text-gray-600 mb-1">
"{result.title}" by {result.author}
</p>
)}
{result.reason && (
<p className="text-sm text-gray-500">
{result.reason}
</p>
)}
{result.error && (
<p className="text-sm text-red-600">
Error: {result.error}
</p>
)}
</div>
</div>
</div>
))}
</div>
</div>
{/* Actions */}
<div className="flex gap-4">
<button
onClick={handleReset}
className="px-6 py-2 bg-blue-600 text-white font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2"
>
Import More URLs
</button>
<Link
href="/stories"
className="px-6 py-2 bg-gray-600 text-white font-medium rounded-md hover:bg-gray-700 focus:outline-none focus:ring-2 focus:ring-gray-500 focus:ring-offset-2"
>
View Stories
</Link>
</div>
</div>
)}
</div>
</div>
);
}