scraping and improvements

2025-07-28 13:52:09 +02:00
parent f95d7aa8bb
commit fcad028959
31 changed files with 3788 additions and 118 deletions
--- a/frontend/src/app/add-story/page.tsx
+++ b/frontend/src/app/add-story/page.tsx
@@ -12,6 +12,9 @@ import ImageUpload from '../../components/ui/ImageUpload';
 import { storyApi, authorApi } from '../../lib/api';

 export default function AddStoryPage() {
+  const [importMode, setImportMode] = useState<'manual' | 'url'>('manual');
+  const [importUrl, setImportUrl] = useState('');
+  const [scraping, setScraping] = useState(false);
  const [formData, setFormData] = useState({
    title: '',
    summary: '',
@@ -130,6 +133,57 @@ export default function AddStoryPage() {
    setFormData(prev => ({ ...prev, tags }));
  };

+  const handleImportFromUrl = async () => {
+    if (!importUrl.trim()) {
+      setErrors({ importUrl: 'URL is required' });
+      return;
+    }
+
+    setScraping(true);
+    setErrors({});
+
+    try {
+      const response = await fetch('/scrape/story', {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({ url: importUrl }),
+      });
+
+      if (!response.ok) {
+        const errorData = await response.json();
+        throw new Error(errorData.error || 'Failed to scrape story');
+      }
+
+      const scrapedStory = await response.json();
+
+      // Pre-fill the form with scraped data
+      setFormData({
+        title: scrapedStory.title || '',
+        summary: scrapedStory.summary || '',
+        authorName: scrapedStory.author || '',
+        contentHtml: scrapedStory.content || '',
+        sourceUrl: scrapedStory.sourceUrl || importUrl,
+        tags: scrapedStory.tags || [],
+        seriesName: '',
+        volume: '',
+      });
+
+      // Switch to manual mode so user can edit the pre-filled data
+      setImportMode('manual');
+      setImportUrl('');
+
+      // Show success message
+      setErrors({ success: 'Story data imported successfully! Review and edit as needed before saving.' });
+    } catch (error: any) {
+      console.error('Failed to import story:', error);
+      setErrors({ importUrl: error.message });
+    } finally {
+      setScraping(false);
+    }
+  };
+
  const validateForm = () => {
    const newErrors: Record<string, string> = {};

@@ -206,7 +260,105 @@ export default function AddStoryPage() {
          </p>
        </div>

-        <form onSubmit={handleSubmit} className="space-y-6">
+        {/* Import Mode Toggle */}
+        <div className="mb-8">
+          <div className="flex border-b border-gray-200 dark:border-gray-700">
+            <button
+              type="button"
+              onClick={() => setImportMode('manual')}
+              className={`px-6 py-3 text-sm font-medium border-b-2 transition-colors ${
+                importMode === 'manual'
+                  ? 'border-theme-accent text-theme-accent'
+                  : 'border-transparent theme-text hover:text-theme-accent'
+              }`}
+            >
+              Manual Entry
+            </button>
+            <button
+              type="button"
+              onClick={() => setImportMode('url')}
+              className={`px-6 py-3 text-sm font-medium border-b-2 transition-colors ${
+                importMode === 'url'
+                  ? 'border-theme-accent text-theme-accent'
+                  : 'border-transparent theme-text hover:text-theme-accent'
+              }`}
+            >
+              Import from URL
+            </button>
+          </div>
+        </div>
+
+        {/* URL Import Section */}
+        {importMode === 'url' && (
+          <div className="bg-gray-50 dark:bg-gray-800/50 rounded-lg p-6 mb-8">
+            <h3 className="text-lg font-medium theme-header mb-4">Import Story from URL</h3>
+            <p className="theme-text text-sm mb-4">
+              Enter a URL from a supported story site to automatically extract the story content, title, author, and other metadata.
+            </p>
+            
+            <div className="space-y-4">
+              <Input
+                label="Story URL"
+                type="url"
+                value={importUrl}
+                onChange={(e) => setImportUrl(e.target.value)}
+                placeholder="https://example.com/story-url"
+                error={errors.importUrl}
+                disabled={scraping}
+              />
+              
+              <div className="flex gap-3">
+                <Button
+                  type="button"
+                  onClick={handleImportFromUrl}
+                  loading={scraping}
+                  disabled={!importUrl.trim() || scraping}
+                >
+                  {scraping ? 'Importing...' : 'Import Story'}
+                </Button>
+                
+                <Button
+                  type="button"
+                  variant="ghost"
+                  onClick={() => setImportMode('manual')}
+                  disabled={scraping}
+                >
+                  Enter Manually Instead
+                </Button>
+              </div>
+              
+              <div className="border-t pt-4 mt-4">
+                <p className="text-sm theme-text mb-2">
+                  Need to import multiple stories at once?
+                </p>
+                <Button
+                  type="button"
+                  variant="secondary"
+                  onClick={() => router.push('/stories/import/bulk')}
+                  disabled={scraping}
+                  size="sm"
+                >
+                  Bulk Import Multiple URLs
+                </Button>
+              </div>
+              
+              <div className="text-xs theme-text">
+                <p className="font-medium mb-1">Supported Sites:</p>
+                <p>Archive of Our Own, DeviantArt, FanFiction.Net, Literotica, Royal Road, Wattpad, and more</p>
+              </div>
+            </div>
+          </div>
+        )}
+
+        {/* Success Message */}
+        {errors.success && (
+          <div className="p-4 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-lg mb-6">
+            <p className="text-green-800 dark:text-green-200">{errors.success}</p>
+          </div>
+        )}
+
+        {importMode === 'manual' && (
+          <form onSubmit={handleSubmit} className="space-y-6">
          {/* Title */}
          <Input
            label="Title *"
@@ -379,6 +531,7 @@ export default function AddStoryPage() {
            </Button>
          </div>
        </form>
+        )}
      </div>
    </AppLayout>
  );
--- a/frontend/src/app/scrape/author/route.ts
+++ b/frontend/src/app/scrape/author/route.ts
@@ -0,0 +1,72 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json();
+    const { url } = body;
+
+    if (!url || typeof url !== 'string') {
+      return NextResponse.json(
+        { error: 'URL is required and must be a string' },
+        { status: 400 }
+      );
+    }
+
+    // Dynamic import to prevent client-side bundling
+    const { StoryScraper } = await import('@/lib/scraper/scraper');
+    
+    const scraper = new StoryScraper();
+    const stories = await scraper.scrapeAuthorPage(url);
+
+    return NextResponse.json({ stories });
+  } catch (error) {
+    console.error('Author page scraping error:', error);
+
+    // Check if it's a ScraperError without importing at module level
+    if (error && typeof error === 'object' && error.constructor.name === 'ScraperError') {
+      return NextResponse.json(
+        { 
+          error: (error as any).message,
+          url: (error as any).url 
+        },
+        { status: 400 }
+      );
+    }
+
+    if (error instanceof Error) {
+      // Handle specific error types
+      if (error.message.includes('Invalid URL')) {
+        return NextResponse.json(
+          { error: 'Invalid URL provided' },
+          { status: 400 }
+        );
+      }
+
+      if (error.message.includes('not supported')) {
+        return NextResponse.json(
+          { error: 'Author page scraping is not supported for this website' },
+          { status: 400 }
+        );
+      }
+
+      if (error.message.includes('HTTP 404')) {
+        return NextResponse.json(
+          { error: 'Author page not found at the provided URL' },
+          { status: 404 }
+        );
+      }
+
+      if (error.message.includes('timeout')) {
+        return NextResponse.json(
+          { error: 'Request timed out while fetching content' },
+          { status: 408 }
+        );
+      }
+    }
+
+    return NextResponse.json(
+      { error: 'Failed to scrape author page. Please try again.' },
+      { status: 500 }
+    );
+  }
+}
--- a/frontend/src/app/scrape/bulk/route.ts
+++ b/frontend/src/app/scrape/bulk/route.ts
@@ -0,0 +1,292 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+interface BulkImportRequest {
+  urls: string[];
+}
+
+interface ImportResult {
+  url: string;
+  status: 'imported' | 'skipped' | 'error';
+  reason?: string;
+  title?: string;
+  author?: string;
+  error?: string;
+  storyId?: string;
+}
+
+interface BulkImportResponse {
+  results: ImportResult[];
+  summary: {
+    total: number;
+    imported: number;
+    skipped: number;
+    errors: number;
+  };
+}
+
+export async function POST(request: NextRequest) {
+  try {
+    // Check for authentication
+    const authorization = request.headers.get('authorization');
+    if (!authorization) {
+      return NextResponse.json(
+        { error: 'Authentication required for bulk import' },
+        { status: 401 }
+      );
+    }
+
+    const body = await request.json();
+    const { urls } = body as BulkImportRequest;
+
+    if (!urls || !Array.isArray(urls) || urls.length === 0) {
+      return NextResponse.json(
+        { error: 'URLs array is required and must not be empty' },
+        { status: 400 }
+      );
+    }
+
+    if (urls.length > 50) {
+      return NextResponse.json(
+        { error: 'Maximum 50 URLs allowed per bulk import' },
+        { status: 400 }
+      );
+    }
+
+    // Dynamic imports to prevent client-side bundling
+    const { StoryScraper } = await import('@/lib/scraper/scraper');
+    
+    const scraper = new StoryScraper();
+    const results: ImportResult[] = [];
+    let importedCount = 0;
+    let skippedCount = 0;
+    let errorCount = 0;
+
+    console.log(`Starting bulk scraping for ${urls.length} URLs`);
+    console.log(`Environment NEXT_PUBLIC_API_URL: ${process.env.NEXT_PUBLIC_API_URL}`);
+    
+    // For server-side API calls in Docker, use direct backend container URL
+    // Client-side calls use NEXT_PUBLIC_API_URL through nginx, but server-side needs direct container access
+    const serverSideApiBaseUrl = 'http://backend:8080/api';
+    console.log(`DEBUG: serverSideApiBaseUrl variable is: ${serverSideApiBaseUrl}`);
+    
+    // Quick test to verify backend connectivity
+    try {
+      console.log(`Testing backend connectivity at: http://backend:8080/api/stories/check-duplicate`);
+      const testResponse = await fetch(`http://backend:8080/api/stories/check-duplicate?title=test&authorName=test`, {
+        method: 'GET',
+        headers: {
+          'Authorization': authorization,
+          'Content-Type': 'application/json',
+        },
+      });
+      console.log(`Backend test response status: ${testResponse.status}`);
+    } catch (error) {
+      console.error(`Backend connectivity test failed:`, error);
+    }
+
+    for (const url of urls) {
+      console.log(`Processing URL: ${url}`);
+      
+      try {
+        // Validate URL format
+        if (!url || typeof url !== 'string' || url.trim() === '') {
+          results.push({
+            url: url || 'Empty URL',
+            status: 'error',
+            error: 'Invalid URL format'
+          });
+          errorCount++;
+          continue;
+        }
+
+        const trimmedUrl = url.trim();
+
+        // Scrape the story
+        const scrapedStory = await scraper.scrapeStory(trimmedUrl);
+
+        // Validate required fields
+        if (!scrapedStory.title || !scrapedStory.author || !scrapedStory.content) {
+          const missingFields = [];
+          if (!scrapedStory.title) missingFields.push('title');
+          if (!scrapedStory.author) missingFields.push('author');
+          if (!scrapedStory.content) missingFields.push('content');
+          
+          results.push({
+            url: trimmedUrl,
+            status: 'skipped',
+            reason: `Missing required fields: ${missingFields.join(', ')}`,
+            title: scrapedStory.title,
+            author: scrapedStory.author
+          });
+          skippedCount++;
+          continue;
+        }
+
+        // Check for duplicates using query parameters
+        try {
+          // Use hardcoded backend URL for container-to-container communication
+          const duplicateCheckUrl = `http://backend:8080/api/stories/check-duplicate`;
+          console.log(`Duplicate check URL: ${duplicateCheckUrl}`);
+          const params = new URLSearchParams({
+            title: scrapedStory.title,
+            authorName: scrapedStory.author
+          });
+          
+          const duplicateCheckResponse = await fetch(`${duplicateCheckUrl}?${params.toString()}`, {
+            method: 'GET',
+            headers: {
+              'Authorization': authorization,
+              'Content-Type': 'application/json',
+            },
+          });
+
+          if (duplicateCheckResponse.ok) {
+            const duplicateResult = await duplicateCheckResponse.json();
+            if (duplicateResult.hasDuplicates) {
+              results.push({
+                url: trimmedUrl,
+                status: 'skipped',
+                reason: `Duplicate story found (${duplicateResult.count} existing)`,
+                title: scrapedStory.title,
+                author: scrapedStory.author
+              });
+              skippedCount++;
+              continue;
+            }
+          }
+        } catch (error) {
+          console.warn('Duplicate check failed:', error);
+          // Continue with import if duplicate check fails
+        }
+
+        // Create the story
+        try {
+          const storyData = {
+            title: scrapedStory.title,
+            summary: scrapedStory.summary || undefined,
+            contentHtml: scrapedStory.content,
+            sourceUrl: scrapedStory.sourceUrl || trimmedUrl,
+            authorName: scrapedStory.author,
+            tagNames: scrapedStory.tags && scrapedStory.tags.length > 0 ? scrapedStory.tags : undefined,
+          };
+
+          // Use hardcoded backend URL for container-to-container communication
+          const createUrl = `http://backend:8080/api/stories`;
+          console.log(`Create story URL: ${createUrl}`);
+          const createResponse = await fetch(createUrl, {
+            method: 'POST',
+            headers: {
+              'Authorization': authorization,
+              'Content-Type': 'application/json',
+            },
+            body: JSON.stringify(storyData),
+          });
+
+          if (!createResponse.ok) {
+            const errorData = await createResponse.json();
+            throw new Error(errorData.message || 'Failed to create story');
+          }
+
+          const createdStory = await createResponse.json();
+          
+          results.push({
+            url: trimmedUrl,
+            status: 'imported',
+            title: scrapedStory.title,
+            author: scrapedStory.author,
+            storyId: createdStory.id
+          });
+          importedCount++;
+
+          console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`);
+
+        } catch (error) {
+          console.error(`Failed to create story for ${trimmedUrl}:`, error);
+          
+          let errorMessage = 'Failed to create story';
+          if (error instanceof Error) {
+            errorMessage = error.message;
+          }
+
+          results.push({
+            url: trimmedUrl,
+            status: 'error',
+            error: errorMessage,
+            title: scrapedStory.title,
+            author: scrapedStory.author
+          });
+          errorCount++;
+        }
+
+      } catch (error) {
+        console.error(`Error processing URL ${url}:`, error);
+        
+        let errorMessage = 'Unknown error';
+        if (error instanceof Error) {
+          errorMessage = error.message;
+        }
+
+        results.push({
+          url: url,
+          status: 'error',
+          error: errorMessage
+        });
+        errorCount++;
+      }
+    }
+
+    const response: BulkImportResponse = {
+      results,
+      summary: {
+        total: urls.length,
+        imported: importedCount,
+        skipped: skippedCount,
+        errors: errorCount
+      }
+    };
+
+    console.log(`Bulk import completed:`, response.summary);
+
+    // Trigger Typesense reindex if any stories were imported
+    if (importedCount > 0) {
+      try {
+        console.log('Triggering Typesense reindex after bulk import...');
+        const reindexUrl = `http://backend:8080/api/stories/reindex-typesense`;
+        const reindexResponse = await fetch(reindexUrl, {
+          method: 'POST',
+          headers: {
+            'Authorization': authorization,
+            'Content-Type': 'application/json',
+          },
+        });
+        
+        if (reindexResponse.ok) {
+          const reindexResult = await reindexResponse.json();
+          console.log('Typesense reindex completed:', reindexResult);
+        } else {
+          console.warn('Typesense reindex failed:', reindexResponse.status);
+        }
+      } catch (error) {
+        console.warn('Failed to trigger Typesense reindex:', error);
+        // Don't fail the whole request if reindex fails
+      }
+    }
+
+    return NextResponse.json(response);
+
+  } catch (error) {
+    console.error('Bulk import error:', error);
+
+    if (error instanceof Error) {
+      return NextResponse.json(
+        { error: `Bulk import failed: ${error.message}` },
+        { status: 500 }
+      );
+    }
+
+    return NextResponse.json(
+      { error: 'Bulk import failed due to an unknown error' },
+      { status: 500 }
+    );
+  }
+}
--- a/frontend/src/app/scrape/story/route.ts
+++ b/frontend/src/app/scrape/story/route.ts
@@ -0,0 +1,85 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json();
+    const { url } = body;
+
+    if (!url || typeof url !== 'string') {
+      return NextResponse.json(
+        { error: 'URL is required and must be a string' },
+        { status: 400 }
+      );
+    }
+
+    // Dynamic import to prevent client-side bundling
+    const { StoryScraper } = await import('@/lib/scraper/scraper');
+    const { ScraperError } = await import('@/lib/scraper/types');
+    
+    const scraper = new StoryScraper();
+    const story = await scraper.scrapeStory(url);
+
+    // Debug logging
+    console.log('Scraped story data:', {
+      url: url,
+      title: story.title,
+      author: story.author,
+      summary: story.summary,
+      contentLength: story.content?.length || 0,
+      contentPreview: story.content?.substring(0, 200) + '...',
+      tags: story.tags,
+      coverImage: story.coverImage
+    });
+
+    return NextResponse.json(story);
+  } catch (error) {
+    console.error('Story scraping error:', error);
+
+    // Check if it's a ScraperError without importing at module level
+    if (error && typeof error === 'object' && error.constructor.name === 'ScraperError') {
+      return NextResponse.json(
+        { 
+          error: (error as any).message,
+          url: (error as any).url 
+        },
+        { status: 400 }
+      );
+    }
+
+    if (error instanceof Error) {
+      // Handle specific error types
+      if (error.message.includes('Invalid URL')) {
+        return NextResponse.json(
+          { error: 'Invalid URL provided' },
+          { status: 400 }
+        );
+      }
+
+      if (error.message.includes('Unsupported site')) {
+        return NextResponse.json(
+          { error: 'This website is not supported for scraping' },
+          { status: 400 }
+        );
+      }
+
+      if (error.message.includes('HTTP 404')) {
+        return NextResponse.json(
+          { error: 'Story not found at the provided URL' },
+          { status: 404 }
+        );
+      }
+
+      if (error.message.includes('timeout')) {
+        return NextResponse.json(
+          { error: 'Request timed out while fetching content' },
+          { status: 408 }
+        );
+      }
+    }
+
+    return NextResponse.json(
+      { error: 'Failed to scrape story. Please try again.' },
+      { status: 500 }
+    );
+  }
+}
--- a/frontend/src/app/stories/import/bulk/page.tsx
+++ b/frontend/src/app/stories/import/bulk/page.tsx
@@ -0,0 +1,300 @@
+'use client';
+
+import { useState } from 'react';
+import { useRouter } from 'next/navigation';
+import Link from 'next/link';
+import { ArrowLeftIcon } from '@heroicons/react/24/outline';
+
+interface ImportResult {
+  url: string;
+  status: 'imported' | 'skipped' | 'error';
+  reason?: string;
+  title?: string;
+  author?: string;
+  error?: string;
+  storyId?: string;
+}
+
+interface BulkImportResponse {
+  results: ImportResult[];
+  summary: {
+    total: number;
+    imported: number;
+    skipped: number;
+    errors: number;
+  };
+}
+
+export default function BulkImportPage() {
+  const router = useRouter();
+  const [urls, setUrls] = useState('');
+  const [isLoading, setIsLoading] = useState(false);
+  const [results, setResults] = useState<BulkImportResponse | null>(null);
+  const [error, setError] = useState<string | null>(null);
+
+  const handleSubmit = async (e: React.FormEvent) => {
+    e.preventDefault();
+    
+    if (!urls.trim()) {
+      setError('Please enter at least one URL');
+      return;
+    }
+
+    setIsLoading(true);
+    setError(null);
+    setResults(null);
+
+    try {
+      // Parse URLs from textarea (one per line)
+      const urlList = urls
+        .split('\n')
+        .map(url => url.trim())
+        .filter(url => url.length > 0);
+
+      if (urlList.length === 0) {
+        setError('Please enter at least one valid URL');
+        setIsLoading(false);
+        return;
+      }
+
+      if (urlList.length > 50) {
+        setError('Maximum 50 URLs allowed per bulk import');
+        setIsLoading(false);
+        return;
+      }
+
+      // Get auth token for server-side API calls
+      const token = localStorage.getItem('auth-token');
+      
+      const response = await fetch('/scrape/bulk', {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          'Authorization': token ? `Bearer ${token}` : '',
+        },
+        body: JSON.stringify({ urls: urlList }),
+      });
+
+      if (!response.ok) {
+        const errorData = await response.json();
+        throw new Error(errorData.error || 'Bulk import failed');
+      }
+
+      const data: BulkImportResponse = await response.json();
+      setResults(data);
+
+    } catch (err) {
+      console.error('Bulk import error:', err);
+      setError(err instanceof Error ? err.message : 'Failed to import stories');
+    } finally {
+      setIsLoading(false);
+    }
+  };
+
+  const handleReset = () => {
+    setUrls('');
+    setResults(null);
+    setError(null);
+  };
+
+  const getStatusColor = (status: string) => {
+    switch (status) {
+      case 'imported': return 'text-green-700 bg-green-50 border-green-200';
+      case 'skipped': return 'text-yellow-700 bg-yellow-50 border-yellow-200';
+      case 'error': return 'text-red-700 bg-red-50 border-red-200';
+      default: return 'text-gray-700 bg-gray-50 border-gray-200';
+    }
+  };
+
+  const getStatusIcon = (status: string) => {
+    switch (status) {
+      case 'imported': return '✓';
+      case 'skipped': return '⚠';
+      case 'error': return '✗';
+      default: return '';
+    }
+  };
+
+  return (
+    <div className="container mx-auto px-4 py-6">
+      <div className="max-w-4xl mx-auto">
+        {/* Header */}
+        <div className="mb-6">
+          <div className="flex items-center gap-4 mb-4">
+            <Link
+              href="/library"
+              className="inline-flex items-center text-blue-600 hover:text-blue-800"
+            >
+              <ArrowLeftIcon className="h-4 w-4 mr-1" />
+              Back to Library
+            </Link>
+          </div>
+          
+          <h1 className="text-3xl font-bold text-gray-900 mb-2">Bulk Import Stories</h1>
+          <p className="text-gray-600">
+            Import multiple stories at once by providing a list of URLs. Each URL will be scraped 
+            and automatically added to your story collection.
+          </p>
+        </div>
+
+        {!results ? (
+          // Import Form
+          <form onSubmit={handleSubmit} className="space-y-6">
+            <div>
+              <label htmlFor="urls" className="block text-sm font-medium text-gray-700 mb-2">
+                Story URLs
+              </label>
+              <p className="text-sm text-gray-500 mb-3">
+                Enter one URL per line. Maximum 50 URLs per import.
+              </p>
+              <textarea
+                id="urls"
+                value={urls}
+                onChange={(e) => setUrls(e.target.value)}
+                placeholder="https://example.com/story1&#10;https://example.com/story2&#10;https://example.com/story3"
+                className="w-full h-64 px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
+                disabled={isLoading}
+              />
+              <p className="mt-2 text-sm text-gray-500">
+                URLs: {urls.split('\n').filter(url => url.trim().length > 0).length}
+              </p>
+            </div>
+
+            {error && (
+              <div className="bg-red-50 border border-red-200 rounded-md p-4">
+                <div className="flex">
+                  <div className="ml-3">
+                    <h3 className="text-sm font-medium text-red-800">Error</h3>
+                    <div className="mt-2 text-sm text-red-700">
+                      {error}
+                    </div>
+                  </div>
+                </div>
+              </div>
+            )}
+
+            <div className="flex gap-4">
+              <button
+                type="submit"
+                disabled={isLoading || !urls.trim()}
+                className="px-6 py-2 bg-blue-600 text-white font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 disabled:opacity-50 disabled:cursor-not-allowed"
+              >
+                {isLoading ? 'Importing...' : 'Start Import'}
+              </button>
+              
+              <button
+                type="button"
+                onClick={handleReset}
+                disabled={isLoading}
+                className="px-6 py-2 bg-gray-600 text-white font-medium rounded-md hover:bg-gray-700 focus:outline-none focus:ring-2 focus:ring-gray-500 focus:ring-offset-2 disabled:opacity-50 disabled:cursor-not-allowed"
+              >
+                Clear
+              </button>
+            </div>
+
+            {isLoading && (
+              <div className="bg-blue-50 border border-blue-200 rounded-md p-4">
+                <div className="flex items-center">
+                  <div className="animate-spin rounded-full h-5 w-5 border-b-2 border-blue-600 mr-3"></div>
+                  <div>
+                    <p className="text-sm font-medium text-blue-800">Processing URLs...</p>
+                    <p className="text-sm text-blue-600">
+                      This may take a few minutes depending on the number of URLs and response times of the source websites.
+                    </p>
+                  </div>
+                </div>
+              </div>
+            )}
+          </form>
+        ) : (
+          // Results
+          <div className="space-y-6">
+            {/* Summary */}
+            <div className="bg-white border border-gray-200 rounded-lg p-6">
+              <h2 className="text-xl font-semibold text-gray-900 mb-4">Import Summary</h2>
+              <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
+                <div className="text-center">
+                  <div className="text-2xl font-bold text-gray-900">{results.summary.total}</div>
+                  <div className="text-sm text-gray-600">Total URLs</div>
+                </div>
+                <div className="text-center">
+                  <div className="text-2xl font-bold text-green-600">{results.summary.imported}</div>
+                  <div className="text-sm text-gray-600">Imported</div>
+                </div>
+                <div className="text-center">
+                  <div className="text-2xl font-bold text-yellow-600">{results.summary.skipped}</div>
+                  <div className="text-sm text-gray-600">Skipped</div>
+                </div>
+                <div className="text-center">
+                  <div className="text-2xl font-bold text-red-600">{results.summary.errors}</div>
+                  <div className="text-sm text-gray-600">Errors</div>
+                </div>
+              </div>
+            </div>
+
+            {/* Detailed Results */}
+            <div className="bg-white border border-gray-200 rounded-lg">
+              <div className="px-6 py-4 border-b border-gray-200">
+                <h3 className="text-lg font-medium text-gray-900">Detailed Results</h3>
+              </div>
+              <div className="divide-y divide-gray-200">
+                {results.results.map((result, index) => (
+                  <div key={index} className="p-6">
+                    <div className="flex items-start justify-between">
+                      <div className="flex-1 min-w-0">
+                        <div className="flex items-center gap-2 mb-2">
+                          <span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium border ${getStatusColor(result.status)}`}>
+                            {getStatusIcon(result.status)} {result.status.charAt(0).toUpperCase() + result.status.slice(1)}
+                          </span>
+                        </div>
+                        
+                        <p className="text-sm text-gray-900 font-medium truncate mb-1">
+                          {result.url}
+                        </p>
+                        
+                        {result.title && result.author && (
+                          <p className="text-sm text-gray-600 mb-1">
+                            "{result.title}" by {result.author}
+                          </p>
+                        )}
+                        
+                        {result.reason && (
+                          <p className="text-sm text-gray-500">
+                            {result.reason}
+                          </p>
+                        )}
+                        
+                        {result.error && (
+                          <p className="text-sm text-red-600">
+                            Error: {result.error}
+                          </p>
+                        )}
+                      </div>
+                    </div>
+                  </div>
+                ))}
+              </div>
+            </div>
+
+            {/* Actions */}
+            <div className="flex gap-4">
+              <button
+                onClick={handleReset}
+                className="px-6 py-2 bg-blue-600 text-white font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2"
+              >
+                Import More URLs
+              </button>
+              
+              <Link
+                href="/stories"
+                className="px-6 py-2 bg-gray-600 text-white font-medium rounded-md hover:bg-gray-700 focus:outline-none focus:ring-2 focus:ring-gray-500 focus:ring-offset-2"
+              >
+                View Stories
+              </Link>
+            </div>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
--- a/frontend/src/components/layout/Header.tsx
+++ b/frontend/src/components/layout/Header.tsx
@@ -7,6 +7,7 @@ import { useRouter } from 'next/navigation';
 import { useAuth } from '../../contexts/AuthContext';
 import { useTheme } from '../../lib/theme';
 import Button from '../ui/Button';
+import Dropdown from '../ui/Dropdown';

 export default function Header() {
  const [isMenuOpen, setIsMenuOpen] = useState(false);
@@ -14,6 +15,24 @@ export default function Header() {
  const { theme, toggleTheme } = useTheme();
  const router = useRouter();

+  const addStoryItems = [
+    {
+      href: '/add-story',
+      label: 'Manual Entry',
+      description: 'Add a story by manually entering details'
+    },
+    {
+      href: '/stories/import',
+      label: 'Import from URL',
+      description: 'Import a single story from a website'
+    },
+    {
+      href: '/stories/import/bulk',
+      label: 'Bulk Import',
+      description: 'Import multiple stories from a list of URLs'
+    }
+  ];
+
  const handleLogout = () => {
    logout();
    router.push('/login');
@@ -57,12 +76,10 @@ export default function Header() {
            >
              Authors
            </Link>
-            <Link 
-              href="/add-story" 
-              className="theme-text hover:theme-accent transition-colors font-medium"
-            >
-              Add Story
-            </Link>
+            <Dropdown
+              trigger="Add Story"
+              items={addStoryItems}
+            />
          </nav>

          {/* Right side actions */}
@@ -131,13 +148,32 @@ export default function Header() {
              >
                Authors
              </Link>
-              <Link 
-                href="/add-story" 
-                className="theme-text hover:theme-accent transition-colors font-medium px-2 py-1"
-                onClick={() => setIsMenuOpen(false)}
-              >
-                Add Story
-              </Link>
+              <div className="px-2 py-1">
+                <div className="font-medium theme-text mb-1">Add Story</div>
+                <div className="pl-4 space-y-1">
+                  <Link 
+                    href="/add-story" 
+                    className="block theme-text hover:theme-accent transition-colors text-sm py-1"
+                    onClick={() => setIsMenuOpen(false)}
+                  >
+                    Manual Entry
+                  </Link>
+                  <Link 
+                    href="/stories/import" 
+                    className="block theme-text hover:theme-accent transition-colors text-sm py-1"
+                    onClick={() => setIsMenuOpen(false)}
+                  >
+                    Import from URL
+                  </Link>
+                  <Link 
+                    href="/stories/import/bulk" 
+                    className="block theme-text hover:theme-accent transition-colors text-sm py-1"
+                    onClick={() => setIsMenuOpen(false)}
+                  >
+                    Bulk Import
+                  </Link>
+                </div>
+              </div>
              <Link 
                href="/settings" 
                className="theme-text hover:theme-accent transition-colors font-medium px-2 py-1"
--- a/frontend/src/components/ui/Dropdown.tsx
+++ b/frontend/src/components/ui/Dropdown.tsx
@@ -0,0 +1,98 @@
+'use client';
+
+import { useState, useRef, useEffect } from 'react';
+import Link from 'next/link';
+import { ChevronDownIcon } from '@heroicons/react/24/outline';
+
+interface DropdownItem {
+  href: string;
+  label: string;
+  description?: string;
+}
+
+interface DropdownProps {
+  trigger: string;
+  items: DropdownItem[];
+  className?: string;
+  onItemClick?: () => void;
+}
+
+export default function Dropdown({ trigger, items, className = '', onItemClick }: DropdownProps) {
+  const [isOpen, setIsOpen] = useState(false);
+  const dropdownRef = useRef<HTMLDivElement>(null);
+  const timeoutRef = useRef<NodeJS.Timeout>();
+
+  useEffect(() => {
+    const handleClickOutside = (event: MouseEvent) => {
+      if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) {
+        setIsOpen(false);
+      }
+    };
+
+    if (isOpen) {
+      document.addEventListener('mousedown', handleClickOutside);
+    }
+
+    return () => {
+      document.removeEventListener('mousedown', handleClickOutside);
+      if (timeoutRef.current) {
+        clearTimeout(timeoutRef.current);
+      }
+    };
+  }, [isOpen]);
+
+  const handleMouseEnter = () => {
+    if (timeoutRef.current) {
+      clearTimeout(timeoutRef.current);
+    }
+    setIsOpen(true);
+  };
+
+  const handleMouseLeave = () => {
+    timeoutRef.current = setTimeout(() => {
+      setIsOpen(false);
+    }, 150);
+  };
+
+  const handleItemClick = () => {
+    setIsOpen(false);
+    onItemClick?.();
+  };
+
+  return (
+    <div 
+      className={`relative ${className}`} 
+      ref={dropdownRef}
+      onMouseEnter={handleMouseEnter}
+      onMouseLeave={handleMouseLeave}
+    >
+      <button
+        onClick={() => setIsOpen(!isOpen)}
+        className="theme-text hover:theme-accent transition-colors font-medium flex items-center gap-1"
+      >
+        {trigger}
+        <ChevronDownIcon 
+          className={`h-4 w-4 transition-transform duration-200 ${isOpen ? 'rotate-180' : ''}`} 
+        />
+      </button>
+
+      {isOpen && (
+        <div className="absolute top-full left-0 mt-1 w-64 theme-card theme-shadow border theme-border rounded-lg py-2 z-50">
+          {items.map((item, index) => (
+            <Link
+              key={index}
+              href={item.href}
+              onClick={handleItemClick}
+              className="block px-4 py-2 theme-text hover:theme-accent transition-colors"
+            >
+              <div className="font-medium">{item.label}</div>
+              {item.description && (
+                <div className="text-sm theme-text-secondary mt-1">{item.description}</div>
+              )}
+            </Link>
+          ))}
+        </div>
+      )}
+    </div>
+  );
+}
--- a/frontend/src/lib/scraper/config/sites.json
+++ b/frontend/src/lib/scraper/config/sites.json
@@ -0,0 +1,334 @@
+{
+  "sites": {
+    "deviantart.com": {
+      "story": {
+        "title": "h1",
+        "titleFallback": "meta[property='og:title']",
+        "titleFallbackAttribute": "content",
+        "author": {
+          "strategy": "text-pattern",
+          "pattern": "by ([^\\s]+) on DeviantArt",
+          "searchAfter": "<title>",
+          "searchBefore": "</title>"
+        },
+        "content": {
+          "strategy": "text-blocks", 
+          "minLength": 200,
+          "containerHints": ["journal", "literature", "story", "text", "content"],
+          "excludeSelectors": ["script", "style", "nav", "header", "footer", ".dev-page-sidebar"]
+        },
+        "summary": "meta[property='og:description']",
+        "summaryAttribute": "content",
+        "tags": "a[data-tagname]",
+        "tagsAttribute": "data-tagname",
+        "coverImage": "meta[property='og:image']",
+        "coverImageAttribute": "content"
+      },
+      "authorPage": {
+        "storyLinks": "a[data-hook='deviation_link']",
+        "filterStrategy": "dom-check",
+        "requiresChildElement": "div[class*='journal']"
+      }
+    },
+    
+    "literotica.com": {
+      "story": {
+        "title": "h1",
+        "titleFallback": "meta[property='og:title']",
+        "titleFallbackAttribute": "content",
+        "author": {
+          "strategy": "link-with-path",
+          "pathContains": "/authors/",
+          "searchWithin": "header, .story-info, #story-meta"
+        },
+        "content": {
+          "strategy": "text-blocks",
+          "minLength": 500,
+          "containerHints": ["story", "content", "text"],
+          "excludeSelectors": ["script", "style", "nav", "header", "footer"]
+        },
+        "summary": "meta[name='description']",
+        "summaryAttribute": "content",
+        "multiPage": {
+          "enabled": true,
+          "strategy": "url-pattern",
+          "pageParam": "page",
+          "maxPages": 20
+        }
+      },
+      "authorPage": {
+        "storyLinks": {
+          "strategy": "href-pattern",
+          "pattern": "/s/[^/]+$",
+          "searchWithin": "main, #content, .stories-list"
+        }
+      }
+    },
+    
+    "mcstories.com": {
+      "story": {
+        "title": "title",
+        "titleTransform": "remove-suffix: - MCStories.com",
+        "author": "meta[name='dcterms.creator']",
+        "authorAttribute": "content",
+        "content": "article#mcstories",
+        "summary": "meta[name='dcterms.description']",
+        "summaryAttribute": "content"
+      },
+      "authorPage": {
+        "storyLinks": "a[href$='.html']:not([href*='Authors'])",
+        "linkPrefix": "https://mcstories.com/"
+      }
+    },
+    
+    "docs-lab.com": {
+      "story": {
+        "title": "title",
+        "titleTransform": "remove-suffix: - Doc's Lab",
+        "author": "a[href*='/profiles/'] strong",
+        "content": {
+          "strategy": "html-between",
+          "startMarker": "<h2>Story</h2>",
+          "endMarker": "</div>",
+          "includeStart": false
+        },
+        "tags": "span.label"
+      },
+      "authorPage": {
+        "storyLinks": "a[href*='/submissions/']",
+        "linkPrefix": "https://docs-lab.com"
+      }
+    },
+    
+    "archiveofourown.org": {
+      "story": {
+        "title": "h2.title",
+        "author": "a[rel='author']",
+        "content": {
+          "strategy": "chapters",
+          "chapterSelector": "div.userstuff[role='article']",
+          "chaptersWrapper": "#chapters",
+          "singleChapter": "#workskin"
+        },
+        "summary": "div.summary blockquote.userstuff",
+        "tags": {
+          "strategy": "multiple-types",
+          "selectors": {
+            "fandom": "dd.fandom a.tag",
+            "warning": "dd.warning a.tag",
+            "category": "dd.category a.tag",
+            "relationship": "dd.relationship a.tag",
+            "character": "dd.character a.tag",
+            "freeform": "dd.freeform a.tag"
+          }
+        },
+        "multiPage": {
+          "enabled": true,
+          "strategy": "chapter-navigation",
+          "chapterListSelector": "#chapter_index option",
+          "urlPattern": "/chapters/{chapterId}"
+        }
+      },
+      "authorPage": {
+        "storyLinks": "h4.heading a[href*='/works/']",
+        "pagination": {
+          "enabled": true,
+          "nextPageSelector": "li.next a[rel='next']"
+        }
+      }
+    },
+    
+    "fanfiction.net": {
+      "story": {
+        "title": "#profile_top b.xcontrast_txt",
+        "author": "#profile_top a[href*='/u/']",
+        "content": "#storytext",
+        "summary": "#profile_top div.xcontrast_txt",
+        "coverImage": {
+          "strategy": "lazy-loaded",
+          "selector": "img.cimage",
+          "attribute": "data-original"
+        },
+        "multiPage": {
+          "enabled": true,
+          "strategy": "chapter-dropdown",
+          "chapterSelector": "select#chap_select option",
+          "urlPattern": "{baseUrl}/{chapterNumber}"
+        }
+      },
+      "authorPage": {
+        "storyLinks": "div.z-list a.stitle",
+        "metadata": {
+          "strategy": "sibling-text",
+          "metadataSelector": "div.z-padtop2",
+          "parsePattern": "Rated: ([^-]+) - .+ - Chapters: (\\d+)"
+        }
+      }
+    },
+    
+    "royalroad.com": {
+      "story": {
+        "title": "h1[property='name']",
+        "author": "h4[property='author'] a",
+        "content": {
+          "strategy": "chapter-content",
+          "selector": "div.chapter-content",
+          "cleanupSelectors": [".portlet", ".ads-holder", "div[style*='display:none']"]
+        },
+        "summary": "div.description div.hidden-content",
+        "tags": "span.tags a.fiction-tag",
+        "coverImage": "img.thumbnail",
+        "coverImageAttribute": "src",
+        "multiPage": {
+          "enabled": true,
+          "strategy": "table-of-contents",
+          "tocSelector": "table#chapters tbody tr a[href*='/chapter/']",
+          "requiresAuth": false
+        }
+      },
+      "authorPage": {
+        "storyLinks": "div.fiction-list-item h2.fiction-title a",
+        "additionalInfo": {
+          "strategy": "data-attributes",
+          "statsSelector": "div.stats",
+          "extractStats": ["pages", "followers", "views"]
+        }
+      }
+    },
+    
+    "wattpad.com": {
+      "story": {
+        "title": "h1",
+        "author": {
+          "strategy": "schema-org",
+          "schemaType": "Person",
+          "property": "name",
+          "fallbackSelector": "a[href*='/user/']"
+        },
+        "content": {
+          "strategy": "react-content",
+          "contentClass": "pre-wrap",
+          "paragraphSelector": "p[data-p-id]",
+          "requiresJavaScript": true
+        },
+        "summary": "h2.description",
+        "tags": "div.tag-items a.tag",
+        "coverImage": {
+          "strategy": "responsive-image",
+          "selector": "img[alt*='cover']",
+          "srcsetAttribute": "srcset",
+          "selectLargest": true
+        },
+        "multiPage": {
+          "enabled": true,
+          "strategy": "api-based",
+          "apiPattern": "/v4/parts/{partId}/text",
+          "tocApiPattern": "/v5/stories/{storyId}/parts",
+          "requiresAuth": true
+        }
+      },
+      "authorPage": {
+        "storyLinks": {
+          "strategy": "infinite-scroll",
+          "initialSelector": "a[href*='/story/']",
+          "apiEndpoint": "/v4/users/{userId}/stories",
+          "requiresJavaScript": true
+        }
+      }
+    }
+  },
+  
+  "strategies": {
+    "text-blocks": {
+      "description": "Find content by looking for large text blocks",
+      "implementation": "Find all text nodes, group by parent, select parent with most text"
+    },
+    "link-with-path": {
+      "description": "Find links containing specific path patterns",
+      "implementation": "querySelector with href*= or iterate and check .href property"
+    },
+    "href-pattern": {
+      "description": "Match links by regex pattern",
+      "implementation": "Array.from(links).filter(a => pattern.test(a.href))"
+    },
+    "text-pattern": {
+      "description": "Extract text using regex from raw HTML",
+      "implementation": "Use regex on .html() with proper groups"
+    },
+    "html-between": {
+      "description": "Extract HTML between markers",
+      "implementation": "indexOf() to find positions, substring to extract"
+    },
+    "chapters": {
+      "description": "Extract story content that may be in chapters",
+      "implementation": "Check for multiple chapters or single chapter format"
+    },
+    "multiple-types": {
+      "description": "Extract different categories of tags",
+      "implementation": "Map over selector types and extract each category"
+    },
+    "chapter-navigation": {
+      "description": "Navigate through chapters using chapter index",
+      "implementation": "Extract chapter IDs and construct URLs"
+    },
+    "lazy-loaded": {
+      "description": "Extract images that are lazy-loaded",
+      "implementation": "Check data-* attributes for actual image source"
+    },
+    "chapter-dropdown": {
+      "description": "Handle stories with chapter selection dropdown",
+      "implementation": "Parse dropdown options and construct chapter URLs"
+    },
+    "table-of-contents": {
+      "description": "Extract chapters from a table of contents",
+      "implementation": "Find all chapter links in TOC structure"
+    },
+    "schema-org": {
+      "description": "Extract data from schema.org structured data",
+      "implementation": "Parse JSON-LD or microdata for specific properties"
+    },
+    "react-content": {
+      "description": "Extract content from React-rendered pages",
+      "implementation": "May require JavaScript execution or API access"
+    },
+    "responsive-image": {
+      "description": "Select best quality from responsive images",
+      "implementation": "Parse srcset and select highest resolution"
+    },
+    "api-based": {
+      "description": "Use API endpoints instead of HTML scraping",
+      "implementation": "Detect API patterns and make direct API calls"
+    },
+    "infinite-scroll": {
+      "description": "Handle pages with infinite scroll",
+      "implementation": "Detect scroll API endpoints or pagination"
+    }
+  },
+  
+  "globalOptions": {
+    "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+    "timeout": 30000,
+    "retryAttempts": 3,
+    "rateLimitMs": 1000,
+    "cacheDuration": 300000,
+    "javascriptTimeout": 10000
+  },
+  
+  "siteNotes": {
+    "wattpad.com": {
+      "warning": "Wattpad has aggressive anti-scraping measures. Consider using their API if available.",
+      "requiresAuth": "Some stories may require login to access full content"
+    },
+    "royalroad.com": {
+      "note": "Very scraper-friendly with good HTML structure"
+    },
+    "archiveofourown.org": {
+      "note": "Respects robots.txt, has good semantic HTML",
+      "rateLimit": "Be extra respectful of rate limits"
+    },
+    "fanfiction.net": {
+      "note": "Older site with simpler HTML structure",
+      "warning": "Known to block IPs for aggressive scraping"
+    }
+  }
+}
--- a/frontend/src/lib/scraper/scraper.ts
+++ b/frontend/src/lib/scraper/scraper.ts
@@ -0,0 +1,379 @@
+import 'server-only';
+
+// Note: cheerio import is done dynamically to avoid client-side bundling issues
+// Using any type for CheerioAPI to prevent bundling issues
+import { 
+  SitesConfig, 
+  SiteConfig, 
+  ScrapedStory, 
+  ScrapedAuthorStory, 
+  SelectorStrategy,
+  MultiPageConfig,
+  ScraperError 
+} from './types';
+import { RateLimiter } from './utils/rateLimit';
+import { ScraperCache } from './utils/cache';
+import { UrlParser } from './utils/urlParser';
+import {
+  extractByTextPattern,
+  extractTextBlocks,
+  extractHtmlBetween,
+  extractLinkText,
+  extractLinkWithPath,
+  extractHrefPattern,
+  extractFirstImage,
+  extractResponsiveImage,
+  extractLazyLoadedImage,
+  extractChapters,
+  extractChapterContent,
+  extractMultipleTypes,
+  extractSchemaOrg,
+  extractReactContent,
+  cleanHtml,
+  extractAttribute
+} from './strategies';
+import sitesConfig from './config/sites.json';
+
+export class StoryScraper {
+  private config: SitesConfig;
+  private cache: ScraperCache;
+  private rateLimiter: RateLimiter;
+  
+  constructor() {
+    this.config = sitesConfig as SitesConfig;
+    this.cache = new ScraperCache(this.config.globalOptions.cacheDuration);
+    this.rateLimiter = new RateLimiter(this.config.globalOptions.rateLimitMs);
+  }
+
+  async scrapeStory(url: string): Promise<ScrapedStory> {
+    try {
+      if (!UrlParser.validateUrl(url)) {
+        throw new Error(`Invalid URL: ${url}`);
+      }
+
+      const domain = UrlParser.getDomain(url);
+      const siteConfig = this.config.sites[domain];
+      
+      if (!siteConfig) {
+        throw new Error(`Unsupported site: ${domain}`);
+      }
+      
+      const html = await this.fetchWithCache(url);
+      const cheerio = await import('cheerio');
+      const $ = cheerio.load(html);
+      
+      const story: ScrapedStory = {
+        title: await this.extractFieldWithFallback($, siteConfig.story, 'title', html),
+        author: await this.extractFieldWithFallback($, siteConfig.story, 'author', html),
+        content: await this.extractContent($, siteConfig.story, url, html),
+        sourceUrl: url
+      };
+      
+      // Extract optional fields
+      if (siteConfig.story.summary) {
+        story.summary = await this.extractField($, siteConfig.story.summary, html, siteConfig.story.summaryAttribute);
+      }
+      
+      if (siteConfig.story.coverImage) {
+        story.coverImage = await this.extractField($, siteConfig.story.coverImage, html, siteConfig.story.coverImageAttribute);
+      }
+      
+      if (siteConfig.story.tags) {
+        const tagsResult = await this.extractTags($, siteConfig.story.tags, html, siteConfig.story.tagsAttribute);
+        if (Array.isArray(tagsResult)) {
+          story.tags = tagsResult;
+        } else if (typeof tagsResult === 'string' && tagsResult) {
+          story.tags = [tagsResult];
+        }
+      }
+      
+      // Apply post-processing
+      story.title = this.applyTransforms(story.title, siteConfig.story.titleTransform);
+      story.content = await cleanHtml(story.content);
+      
+      return story;
+    } catch (error) {
+      if (error instanceof Error) {
+        throw new ScraperError(
+          `Failed to scrape ${url}: ${error.message}`,
+          url,
+          error
+        );
+      }
+      throw error;
+    }
+  }
+
+  async scrapeAuthorPage(url: string): Promise<ScrapedAuthorStory[]> {
+    try {
+      if (!UrlParser.validateUrl(url)) {
+        throw new Error(`Invalid URL: ${url}`);
+      }
+
+      const domain = UrlParser.getDomain(url);
+      const siteConfig = this.config.sites[domain];
+      
+      if (!siteConfig || !siteConfig.authorPage) {
+        throw new Error(`Author page scraping not supported for: ${domain}`);
+      }
+      
+      const html = await this.fetchWithCache(url);
+      const cheerio = await import('cheerio');
+      const $ = cheerio.load(html);
+      
+      const storyLinks = await this.extractField($, siteConfig.authorPage.storyLinks, html);
+      const stories: ScrapedAuthorStory[] = [];
+      
+      if (Array.isArray(storyLinks)) {
+        for (const link of storyLinks) {
+          const storyUrl = UrlParser.normalizeUrl(link, url);
+          try {
+            const scrapedStory = await this.scrapeStory(storyUrl);
+            stories.push({
+              url: storyUrl,
+              title: scrapedStory.title,
+              author: scrapedStory.author,
+              summary: scrapedStory.summary
+            });
+          } catch (error) {
+            console.warn(`Failed to scrape story ${storyUrl}:`, error);
+          }
+        }
+      }
+      
+      return stories;
+    } catch (error) {
+      if (error instanceof Error) {
+        throw new ScraperError(
+          `Failed to scrape author page ${url}: ${error.message}`,
+          url,
+          error
+        );
+      }
+      throw error;
+    }
+  }
+
+  private async extractFieldWithFallback(
+    $: any,
+    config: any,
+    fieldName: string,
+    html: string
+  ): Promise<string> {
+    const primarySelector = config[fieldName];
+    const fallbackSelector = config[`${fieldName}Fallback`];
+    const attribute = config[`${fieldName}Attribute`];
+    const fallbackAttribute = config[`${fieldName}FallbackAttribute`];
+
+    // Try primary selector first
+    if (primarySelector) {
+      const result = await this.extractField($, primarySelector, html, attribute);
+      if (result && result.trim()) {
+        return result.trim();
+      }
+    }
+
+    // Try fallback selector if primary failed
+    if (fallbackSelector) {
+      const result = await this.extractField($, fallbackSelector, html, fallbackAttribute);
+      if (result && result.trim()) {
+        return result.trim();
+      }
+    }
+
+    return '';
+  }
+
+  private async extractField(
+    $: any, 
+    selector: string | SelectorStrategy,
+    html: string,
+    attribute?: string
+  ): Promise<any> {
+    if (typeof selector === 'string') {
+      // Simple CSS selector - always return single value (first element)
+      const element = $(selector).first();
+      if (attribute) {
+        // Extract specific attribute instead of text
+        return element.attr(attribute) || '';
+      }
+      return element.text().trim();
+    }
+    
+    // Strategy-based extraction
+    return await this.executeStrategy($, selector, html);
+  }
+
+  private async extractTags(
+    $: any, 
+    selector: string | SelectorStrategy,
+    html: string,
+    attribute?: string
+  ): Promise<any> {
+    if (typeof selector === 'string') {
+      // Simple CSS selector - collect ALL matching elements for tags
+      const elements = $(selector);
+      
+      if (elements.length === 0) {
+        return [];
+      }
+      
+      const results: string[] = [];
+      elements.each((_: any, elem: any) => {
+        const $elem = $(elem);
+        const value = attribute ? $elem.attr(attribute) : $elem.text().trim();
+        if (value) {
+          results.push(value);
+        }
+      });
+      
+      return results;
+    }
+    
+    // Strategy-based extraction for tags
+    return await this.executeStrategy($, selector, html);
+  }
+
+  private async executeStrategy(
+    $: any,
+    strategy: SelectorStrategy,
+    html: string
+  ): Promise<any> {
+    switch (strategy.strategy) {
+      case 'text-pattern':
+        return extractByTextPattern(html, strategy as any);
+      case 'link-with-path':
+        return extractLinkWithPath($, strategy as any);
+      case 'text-blocks':
+        return extractTextBlocks($, strategy as any);
+      case 'href-pattern':
+        return extractHrefPattern($, strategy as any);
+      case 'html-between':
+        return extractHtmlBetween(html, strategy as any);
+      case 'link-text':
+        return extractLinkText($, strategy as any);
+      case 'first-image':
+        return extractFirstImage($, strategy as any);
+      case 'responsive-image':
+        return extractResponsiveImage($, strategy as any);
+      case 'lazy-loaded':
+        return extractLazyLoadedImage($, strategy as any);
+      case 'chapters':
+        return extractChapters($, strategy as any);
+      case 'chapter-content':
+        return extractChapterContent($, strategy as any);
+      case 'multiple-types':
+        return extractMultipleTypes($, strategy as any);
+      case 'schema-org':
+        return extractSchemaOrg($, strategy as any);
+      case 'react-content':
+        return extractReactContent($, strategy as any);
+      default:
+        throw new Error(`Unknown strategy: ${strategy.strategy}`);
+    }
+  }
+
+  private async extractContent(
+    $: any,
+    storyConfig: any,
+    url: string,
+    html: string
+  ): Promise<string> {
+    let content = await this.extractField($, storyConfig.content, html);
+    
+    if (storyConfig.multiPage?.enabled) {
+      const additionalPages = await this.fetchAdditionalPages(
+        $, 
+        url, 
+        storyConfig.multiPage
+      );
+      
+      for (const pageHtml of additionalPages) {
+        const cheerioPage = await import('cheerio');
+        const $page = cheerioPage.load(pageHtml);
+        const pageContent = await this.extractField(
+          $page, 
+          storyConfig.content, 
+          pageHtml
+        );
+        content += '\n\n' + pageContent;
+      }
+    }
+    
+    return content;
+  }
+
+  private async fetchAdditionalPages(
+    $: any,
+    baseUrl: string,
+    config: MultiPageConfig
+  ): Promise<string[]> {
+    const pages: string[] = [];
+    let currentUrl = baseUrl;
+    let pageNum = 2;
+    
+    while (pageNum <= (config.maxPages || 20)) {
+      let nextUrl: string | null = null;
+      
+      if (config.strategy === 'url-pattern') {
+        nextUrl = UrlParser.buildPageUrl(baseUrl, pageNum, config);
+      } else if (config.nextPageSelector) {
+        const nextLink = $(config.nextPageSelector).attr('href');
+        if (nextLink) {
+          nextUrl = UrlParser.normalizeUrl(nextLink, currentUrl);
+        }
+      }
+      
+      if (!nextUrl) break;
+      
+      try {
+        await this.rateLimiter.throttle();
+        const html = await this.fetchWithCache(nextUrl);
+        pages.push(html);
+        currentUrl = nextUrl;
+        pageNum++;
+      } catch (error) {
+        console.error(`Failed to fetch page ${pageNum}:`, error);
+        break;
+      }
+    }
+    
+    return pages;
+  }
+
+  private async fetchWithCache(url: string): Promise<string> {
+    const cached = this.cache.get(url);
+    if (cached) {
+      return cached;
+    }
+
+    await this.rateLimiter.throttle();
+    
+    const response = await fetch(url, {
+      headers: {
+        'User-Agent': this.config.globalOptions.userAgent,
+      },
+      signal: AbortSignal.timeout(this.config.globalOptions.timeout)
+    });
+
+    if (!response.ok) {
+      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+    }
+
+    const html = await response.text();
+    this.cache.set(url, html);
+    
+    return html;
+  }
+
+  private applyTransforms(text: string, transform?: string): string {
+    if (!transform) return text;
+    
+    if (transform.startsWith('remove-suffix:')) {
+      const suffix = transform.substring('remove-suffix:'.length).trim();
+      return text.replace(new RegExp(`${suffix}$`, 'i'), '').trim();
+    }
+    
+    return text;
+  }
+}
--- a/frontend/src/lib/scraper/strategies/contentCleaner.ts
+++ b/frontend/src/lib/scraper/strategies/contentCleaner.ts
@@ -0,0 +1,164 @@
+// Dynamic cheerio import used to avoid client-side bundling issues
+// Using any type for CheerioAPI to prevent bundling issues
+import {
+  ChaptersStrategy,
+  ChapterContentStrategy,
+  MultipleTypesStrategy,
+  SchemaOrgStrategy,
+  ReactContentStrategy
+} from '../types';
+
+export function extractChapters(
+  $: any,
+  config: ChaptersStrategy
+): string {
+  // Check for multiple chapters first
+  if (config.chaptersWrapper) {
+    const chaptersWrapper = $(config.chaptersWrapper);
+    if (chaptersWrapper.length > 0) {
+      const chapters = chaptersWrapper.find(config.chapterSelector);
+      if (chapters.length > 1) {
+        // Multiple chapters - combine them
+        let content = '';
+        chapters.each((_: any, elem: any) => {
+          content += $(elem).html() + '\n\n';
+        });
+        return content.trim();
+      }
+    }
+  }
+  
+  // Single chapter fallback
+  if (config.singleChapter) {
+    const singleChapter = $(config.singleChapter);
+    if (singleChapter.length > 0) {
+      return singleChapter.html() || '';
+    }
+  }
+  
+  // Direct chapter selector fallback
+  const chapter = $(config.chapterSelector).first();
+  return chapter.html() || '';
+}
+
+export function extractChapterContent(
+  $: any,
+  config: ChapterContentStrategy
+): string {
+  const content = $(config.selector);
+  
+  // Remove cleanup selectors
+  if (config.cleanupSelectors) {
+    config.cleanupSelectors.forEach(selector => {
+      content.find(selector).remove();
+    });
+  }
+  
+  return content.html() || '';
+}
+
+export function extractMultipleTypes(
+  $: any,
+  config: MultipleTypesStrategy
+): string[] {
+  const tags: string[] = [];
+  
+  Object.entries(config.selectors).forEach(([type, selector]) => {
+    $(selector).each((_: any, elem: any) => {
+      const tag = $(elem).text().trim();
+      if (tag) {
+        tags.push(`${type}: ${tag}`);
+      }
+    });
+  });
+  
+  return tags;
+}
+
+export function extractSchemaOrg(
+  $: any,
+  config: SchemaOrgStrategy
+): string {
+  // Look for JSON-LD first
+  $('script[type="application/ld+json"]').each((_: any, elem: any) => {
+    try {
+      const data = JSON.parse($(elem).html() || '');
+      if (data['@type'] === config.schemaType || 
+          (Array.isArray(data) && data.some(item => item['@type'] === config.schemaType))) {
+        const item = Array.isArray(data) ? 
+          data.find(item => item['@type'] === config.schemaType) : data;
+        if (item && item[config.property]) {
+          return item[config.property];
+        }
+      }
+    } catch (e) {
+      // Invalid JSON, continue
+    }
+  });
+  
+  // Fallback to selector
+  if (config.fallbackSelector) {
+    return $(config.fallbackSelector).first().text().trim();
+  }
+  
+  return '';
+}
+
+export function extractReactContent(
+  $: any,
+  config: ReactContentStrategy
+): string {
+  // This is a simplified version - full React content extraction
+  // would require JavaScript execution or API access
+  
+  const contentElements = $(config.paragraphSelector);
+  let content = '';
+  
+  contentElements.each((_: any, elem: any) => {
+    const $elem = $(elem);
+    if ($elem.hasClass(config.contentClass)) {
+      content += $elem.html() + '\n\n';
+    }
+  });
+  
+  return content.trim();
+}
+
+export async function cleanHtml(html: string): Promise<string> {
+  // Basic HTML cleaning - remove scripts, styles, and dangerous elements
+  const cheerio = await import('cheerio');
+  const $ = cheerio.load(html, {
+    // Preserve self-closing tags like <br>
+    xmlMode: false,
+    decodeEntities: false
+  });
+  
+  // Remove dangerous elements
+  $('script, style, iframe, embed, object').remove();
+  
+  // Remove empty paragraphs and divs (but preserve <br> tags)
+  $('p:empty, div:empty').not(':has(br)').remove();
+  
+  // Clean up excessive whitespace in text nodes only, preserve <br> tags
+  $('*').each((_, elem) => {
+    const $elem = $(elem);
+    if (elem.type === 'text') {
+      const text = $elem.text();
+      if (text && text.trim() !== text) {
+        $elem.replaceWith(text.trim());
+      }
+    }
+  });
+  
+  // Return HTML with proper self-closing tag format
+  return $.html() || '';
+}
+
+export function extractAttribute(
+  $: any,
+  selector: string,
+  attribute: string
+): string {
+  const element = $(selector).first();
+  return element.attr(attribute) || '';
+}
--- a/frontend/src/lib/scraper/strategies/index.ts
+++ b/frontend/src/lib/scraper/strategies/index.ts
@@ -0,0 +1,3 @@
+export * from './textExtractor';
+export * from './linkExtractor';
+export * from './contentCleaner';
--- a/frontend/src/lib/scraper/strategies/linkExtractor.ts
+++ b/frontend/src/lib/scraper/strategies/linkExtractor.ts
@@ -0,0 +1,98 @@
+// Dynamic cheerio import used to avoid client-side bundling issues
+// Using any type for CheerioAPI to prevent bundling issues
+import { 
+  LinkWithPathStrategy, 
+  HrefPatternStrategy,
+  FirstImageStrategy,
+  ResponsiveImageStrategy,
+  LazyLoadedStrategy 
+} from '../types';
+
+export function extractLinkWithPath(
+  $: any,
+  config: LinkWithPathStrategy
+): string {
+  let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
+  
+  const links = searchScope.find('a');
+  
+  for (let i = 0; i < links.length; i++) {
+    const link = links.eq(i);
+    const href = link.attr('href');
+    
+    if (href && href.includes(config.pathContains)) {
+      return link.text().trim();
+    }
+  }
+  
+  return '';
+}
+
+export function extractHrefPattern(
+  $: any,
+  config: HrefPatternStrategy
+): string[] {
+  let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
+  
+  const pattern = new RegExp(config.pattern);
+  const links: string[] = [];
+  
+  searchScope.find('a').each((_: any, elem: any) => {
+    const href = $(elem).attr('href');
+    if (href && pattern.test(href)) {
+      links.push(href);
+    }
+  });
+  
+  return links;
+}
+
+export function extractFirstImage(
+  $: any,
+  config: FirstImageStrategy
+): string {
+  let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
+  
+  const img = searchScope.find('img').first();
+  return img.attr(config.attribute) || '';
+}
+
+export function extractResponsiveImage(
+  $: any,
+  config: ResponsiveImageStrategy
+): string {
+  const img = $(config.selector).first();
+  
+  if (config.selectLargest && config.srcsetAttribute) {
+    const srcset = img.attr(config.srcsetAttribute);
+    if (srcset) {
+      // Parse srcset and return the largest image
+      const sources = srcset.split(',').map((src: string) => {
+        const parts = src.trim().split(' ');
+        const url = parts[0];
+        const descriptor = parts[1] || '1x';
+        const width = descriptor.includes('w') ? 
+          parseInt(descriptor.replace('w', '')) : 
+          descriptor.includes('x') ? 
+            parseInt(descriptor.replace('x', '')) * 100 : 100;
+        return { url, width };
+      });
+      
+      const largest = sources.reduce((prev: any, current: any) => 
+        prev.width > current.width ? prev : current
+      );
+      
+      return largest.url;
+    }
+  }
+  
+  return img.attr('src') || '';
+}
+
+export function extractLazyLoadedImage(
+  $: any,
+  config: LazyLoadedStrategy
+): string {
+  const img = $(config.selector).first();
+  return img.attr(config.attribute) || img.attr('src') || '';
+}
--- a/frontend/src/lib/scraper/strategies/textExtractor.ts
+++ b/frontend/src/lib/scraper/strategies/textExtractor.ts
@@ -0,0 +1,144 @@
+import 'server-only';
+
+// Dynamic cheerio import used to avoid client-side bundling issues
+// Using any type for CheerioAPI to prevent bundling issues
+import { 
+  TextPatternStrategy, 
+  TextBlockStrategy, 
+  HtmlBetweenStrategy,
+  LinkTextStrategy 
+} from '../types';
+
+export function extractByTextPattern(
+  html: string, 
+  config: TextPatternStrategy
+): string {
+  let searchContent = html;
+  
+  // Limit search scope if specified
+  if (config.searchAfter) {
+    const afterIndex = html.indexOf(config.searchAfter);
+    if (afterIndex !== -1) {
+      searchContent = html.substring(afterIndex);
+    }
+  }
+  
+  if (config.searchBefore) {
+    const beforeIndex = searchContent.indexOf(config.searchBefore);
+    if (beforeIndex !== -1) {
+      searchContent = searchContent.substring(0, beforeIndex);
+    }
+  }
+  
+  const regex = new RegExp(config.pattern, 'i');
+  const match = searchContent.match(regex);
+  return match ? match[config.group || 1].trim() : '';
+}
+
+export function extractTextBlocks(
+  $: any,
+  config: TextBlockStrategy
+): string {
+  const blocks: Array<{element: any, text: string}> = [];
+  
+  // Remove excluded elements first
+  if (config.excludeSelectors) {
+    config.excludeSelectors.forEach(selector => {
+      $(selector).remove();
+    });
+  }
+  
+  $('*').each((_: any, elem: any) => {
+    const $elem = $(elem);
+    const text = $elem.clone().children().remove().end().text().trim();
+    
+    if (text.length >= (config.minLength || 500)) {
+      blocks.push({ element: elem, text });
+    }
+  });
+  
+  // Find the block that likely contains story content
+  const storyBlock = blocks.find(block => {
+    if (config.containerHints && config.containerHints.length > 0) {
+      const hasHints = config.containerHints.some(hint => 
+        $(block.element).attr('class')?.includes(hint) ||
+        $(block.element).attr('id')?.includes(hint)
+      );
+      return hasHints;
+    }
+    return blocks.length === 1;
+  });
+  
+  if (storyBlock) {
+    return $(storyBlock.element).html() || '';
+  }
+  
+  // Fallback to largest block
+  const largestBlock = blocks.reduce((prev, current) => 
+    prev.text.length > current.text.length ? prev : current
+  );
+  
+  return largestBlock ? $(largestBlock.element).html() || '' : '';
+}
+
+export function extractHtmlBetween(
+  html: string,
+  config: HtmlBetweenStrategy
+): string {
+  const startIndex = html.indexOf(config.startMarker);
+  if (startIndex === -1) return '';
+  
+  const contentStart = config.includeStart ? 
+    startIndex : 
+    startIndex + config.startMarker.length;
+    
+  const endIndex = html.indexOf(config.endMarker, contentStart);
+  if (endIndex === -1) {
+    return html.substring(contentStart);
+  }
+  
+  return html.substring(contentStart, endIndex).trim();
+}
+
+export function extractLinkText(
+  $: any,
+  config: LinkTextStrategy
+): string {
+  let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
+  
+  // Look for links near the specified text patterns
+  let foundText = '';
+  
+  config.nearText.forEach(text => {
+    if (foundText) return; // Already found
+    
+    searchScope.find('*').each((_: any, elem: any) => {
+      const $elem = $(elem);
+      const elemText = $elem.text().toLowerCase();
+      
+      if (elemText.includes(text.toLowerCase())) {
+        // Look for nearby links
+        const $link = $elem.find('a').first();
+        if ($link.length) {
+          foundText = $link.text().trim();
+          return false; // Break out of each
+        }
+        
+        // Check if the element itself is a link
+        if ($elem.is('a')) {
+          foundText = $elem.text().trim();
+          return false;
+        }
+        
+        // Look for links in the next few siblings
+        const $siblings = $elem.nextAll().slice(0, 3);
+        $siblings.find('a').first().each((_: any, link: any) => {
+          foundText = $(link).text().trim();
+          return false;
+        });
+      }
+    });
+  });
+  
+  return foundText;
+}
--- a/frontend/src/lib/scraper/types.ts
+++ b/frontend/src/lib/scraper/types.ts
@@ -0,0 +1,248 @@
+export interface SiteConfig {
+  story: StorySelectors;
+  authorPage: AuthorPageSelectors;
+}
+
+export interface StorySelectors {
+  title: string | SelectorStrategy;
+  author: string | SelectorStrategy;
+  content: string | SelectorStrategy;
+  summary?: string | SelectorStrategy;
+  coverImage?: string | SelectorStrategy;
+  tags?: string | SelectorStrategy;
+  multiPage?: MultiPageConfig;
+  titleFallback?: string;
+  titleFallbackAttribute?: string;
+  titleTransform?: string;
+  summaryAttribute?: string;
+  coverImageAttribute?: string;
+  tagsAttribute?: string;
+}
+
+export interface AuthorPageSelectors {
+  storyLinks: string | SelectorStrategy;
+  pagination?: PaginationConfig;
+  linkPrefix?: string;
+  filterStrategy?: string;
+  requiresChildElement?: string;
+  requiresNavigation?: NavigationConfig;
+  metadata?: MetadataConfig;
+  additionalInfo?: AdditionalInfoConfig;
+}
+
+export interface SelectorStrategy {
+  strategy: string;
+  [key: string]: any;
+}
+
+export interface MultiPageConfig {
+  enabled: boolean;
+  strategy: 'url-pattern' | 'next-link' | 'chapter-navigation' | 'chapter-dropdown' | 'table-of-contents' | 'api-based';
+  nextPageSelector?: string;
+  pageParam?: string;
+  maxPages?: number;
+  chapterListSelector?: string;
+  chapterSelector?: string;
+  urlPattern?: string;
+  tocSelector?: string;
+  requiresAuth?: boolean;
+  apiPattern?: string;
+  tocApiPattern?: string;
+}
+
+export interface PaginationConfig {
+  enabled: boolean;
+  nextPageSelector: string;
+}
+
+export interface NavigationConfig {
+  enabled: boolean;
+  clickText: string;
+  waitMs: number;
+}
+
+export interface MetadataConfig {
+  strategy: string;
+  metadataSelector: string;
+  parsePattern: string;
+}
+
+export interface AdditionalInfoConfig {
+  strategy: string;
+  statsSelector: string;
+  extractStats: string[];
+}
+
+export interface ScrapedStory {
+  title: string;
+  author: string;
+  content: string;
+  summary?: string;
+  coverImage?: string;
+  tags?: string[];
+  sourceUrl: string;
+}
+
+export interface ScrapedAuthorStory {
+  url: string;
+  title: string;
+  author: string;
+  summary?: string;
+}
+
+export interface SitesConfig {
+  sites: Record<string, SiteConfig>;
+  strategies: Record<string, StrategyDescription>;
+  globalOptions: GlobalOptions;
+  siteNotes?: Record<string, SiteNotes>;
+}
+
+export interface StrategyDescription {
+  description: string;
+  implementation: string;
+}
+
+export interface GlobalOptions {
+  userAgent: string;
+  timeout: number;
+  retryAttempts: number;
+  rateLimitMs: number;
+  cacheDuration?: number;
+  javascriptTimeout?: number;
+}
+
+export interface SiteNotes {
+  warning?: string;
+  note?: string;
+  rateLimit?: string;
+  requiresAuth?: string;
+}
+
+// Strategy-specific interfaces
+export interface TextPatternStrategy extends SelectorStrategy {
+  strategy: 'text-pattern';
+  pattern: string;
+  group?: number;
+  searchAfter?: string;
+  searchBefore?: string;
+}
+
+export interface LinkWithPathStrategy extends SelectorStrategy {
+  strategy: 'link-with-path';
+  pathContains: string;
+  searchWithin?: string;
+}
+
+export interface TextBlockStrategy extends SelectorStrategy {
+  strategy: 'text-blocks';
+  minLength?: number;
+  containerHints?: string[];
+  excludeSelectors?: string[];
+}
+
+export interface HrefPatternStrategy extends SelectorStrategy {
+  strategy: 'href-pattern';
+  pattern: string;
+  searchWithin?: string;
+}
+
+export interface HtmlBetweenStrategy extends SelectorStrategy {
+  strategy: 'html-between';
+  startMarker: string;
+  endMarker: string;
+  includeStart?: boolean;
+}
+
+export interface ChaptersStrategy extends SelectorStrategy {
+  strategy: 'chapters';
+  chapterSelector: string;
+  chaptersWrapper?: string;
+  singleChapter?: string;
+}
+
+export interface MultipleTypesStrategy extends SelectorStrategy {
+  strategy: 'multiple-types';
+  selectors: Record<string, string>;
+}
+
+export interface LinkTextStrategy extends SelectorStrategy {
+  strategy: 'link-text';
+  nearText: string[];
+  searchWithin?: string;
+}
+
+export interface FirstImageStrategy extends SelectorStrategy {
+  strategy: 'first-image';
+  searchWithin: string;
+  attribute: string;
+}
+
+export interface SchemaOrgStrategy extends SelectorStrategy {
+  strategy: 'schema-org';
+  schemaType: string;
+  property: string;
+  fallbackSelector?: string;
+}
+
+export interface ReactContentStrategy extends SelectorStrategy {
+  strategy: 'react-content';
+  contentClass: string;
+  paragraphSelector: string;
+  requiresJavaScript: boolean;
+}
+
+export interface ResponsiveImageStrategy extends SelectorStrategy {
+  strategy: 'responsive-image';
+  selector: string;
+  srcsetAttribute: string;
+  selectLargest: boolean;
+}
+
+export interface LazyLoadedStrategy extends SelectorStrategy {
+  strategy: 'lazy-loaded';
+  selector: string;
+  attribute: string;
+}
+
+export interface ChapterContentStrategy extends SelectorStrategy {
+  strategy: 'chapter-content';
+  selector: string;
+  cleanupSelectors?: string[];
+}
+
+export interface DataAttributesStrategy extends SelectorStrategy {
+  strategy: 'data-attributes';
+  statsSelector: string;
+  extractStats: string[];
+}
+
+export interface SiblingTextStrategy extends SelectorStrategy {
+  strategy: 'sibling-text';
+  metadataSelector: string;
+  parsePattern: string;
+}
+
+export interface ApiBasedStrategy extends SelectorStrategy {
+  strategy: 'api-based';
+  apiPattern: string;
+  tocApiPattern?: string;
+  requiresAuth: boolean;
+}
+
+export interface InfiniteScrollStrategy extends SelectorStrategy {
+  strategy: 'infinite-scroll';
+  initialSelector: string;
+  apiEndpoint: string;
+  requiresJavaScript: boolean;
+}
+
+export class ScraperError extends Error {
+  constructor(
+    message: string,
+    public url: string,
+    public originalError?: Error
+  ) {
+    super(message);
+    this.name = 'ScraperError';
+  }
+}
--- a/frontend/src/lib/scraper/utils/cache.ts
+++ b/frontend/src/lib/scraper/utils/cache.ts
@@ -0,0 +1,35 @@
+export class ScraperCache {
+  private cache: Map<string, { data: any; timestamp: number }> = new Map();
+  private ttl: number;
+
+  constructor(ttlMs: number = 300000) { // 5 minutes default
+    this.ttl = ttlMs;
+  }
+
+  get(key: string): any | null {
+    const entry = this.cache.get(key);
+    if (!entry) return null;
+
+    if (Date.now() - entry.timestamp > this.ttl) {
+      this.cache.delete(key);
+      return null;
+    }
+
+    return entry.data;
+  }
+
+  set(key: string, data: any): void {
+    this.cache.set(key, {
+      data,
+      timestamp: Date.now()
+    });
+  }
+
+  clear(): void {
+    this.cache.clear();
+  }
+
+  size(): number {
+    return this.cache.size;
+  }
+}
--- a/frontend/src/lib/scraper/utils/rateLimit.ts
+++ b/frontend/src/lib/scraper/utils/rateLimit.ts
@@ -0,0 +1,23 @@
+export class RateLimiter {
+  private lastRequest: number = 0;
+  private minDelay: number;
+  
+  constructor(minDelayMs: number = 1000) {
+    this.minDelay = minDelayMs;
+  }
+  
+  async throttle(): Promise<void> {
+    const now = Date.now();
+    const timeSinceLastRequest = now - this.lastRequest;
+    
+    if (timeSinceLastRequest < this.minDelay) {
+      await this.delay(this.minDelay - timeSinceLastRequest);
+    }
+    
+    this.lastRequest = Date.now();
+  }
+  
+  private delay(ms: number): Promise<void> {
+    return new Promise(resolve => setTimeout(resolve, ms));
+  }
+}
--- a/frontend/src/lib/scraper/utils/urlParser.ts
+++ b/frontend/src/lib/scraper/utils/urlParser.ts
@@ -0,0 +1,61 @@
+export class UrlParser {
+  static getDomain(url: string): string {
+    try {
+      const urlObj = new URL(url);
+      return urlObj.hostname.replace(/^www\./, '');
+    } catch (error) {
+      throw new Error(`Invalid URL: ${url}`);
+    }
+  }
+
+  static validateUrl(url: string): boolean {
+    try {
+      const urlObj = new URL(url);
+      return urlObj.protocol === 'http:' || urlObj.protocol === 'https:';
+    } catch {
+      return false;
+    }
+  }
+
+  static buildPageUrl(baseUrl: string, pageNum: number, config: any): string {
+    try {
+      const urlObj = new URL(baseUrl);
+      if (config.pageParam) {
+        urlObj.searchParams.set(config.pageParam, pageNum.toString());
+      } else if (config.urlPattern) {
+        // Replace {page} or similar patterns in URL
+        return config.urlPattern.replace(/\{page\}/g, pageNum.toString());
+      }
+      return urlObj.toString();
+    } catch (error) {
+      throw new Error(`Failed to build page URL: ${error}`);
+    }
+  }
+
+  static normalizeUrl(url: string, baseUrl?: string): string {
+    try {
+      if (url.startsWith('http://') || url.startsWith('https://')) {
+        return url;
+      }
+      
+      if (baseUrl) {
+        return new URL(url, baseUrl).toString();
+      }
+      
+      return url;
+    } catch (error) {
+      throw new Error(`Failed to normalize URL: ${url}`);
+    }
+  }
+
+  static extractDomainConfig(url: string, sitesConfig: any): any {
+    const domain = this.getDomain(url);
+    const config = sitesConfig.sites[domain];
+    
+    if (!config) {
+      throw new Error(`Unsupported site: ${domain}`);
+    }
+    
+    return config;
+  }
+}