From 6b837833812ec68d87fe7277270adcec55c2095a Mon Sep 17 00:00:00 2001 From: Stefan Hardegger Date: Fri, 15 Aug 2025 07:58:36 +0200 Subject: [PATCH] Small improvements --- frontend/src/app/add-story/page.tsx | 2 +- .../lib/scraper/strategies/linkExtractor.ts | 4 +++ .../lib/scraper/strategies/textExtractor.ts | 27 ++++++++++++++++++- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/frontend/src/app/add-story/page.tsx b/frontend/src/app/add-story/page.tsx index 47cb6a9..f43fbcb 100644 --- a/frontend/src/app/add-story/page.tsx +++ b/frontend/src/app/add-story/page.tsx @@ -265,7 +265,7 @@ export default function AddStoryPage() { await storyApi.uploadCover(story.id, coverImage); } - router.push(`/stories/${story.id}`); + router.push(`/stories/${story.id}/detail`); } catch (error: any) { console.error('Failed to create story:', error); const errorMessage = error.response?.data?.message || 'Failed to create story'; diff --git a/frontend/src/lib/scraper/strategies/linkExtractor.ts b/frontend/src/lib/scraper/strategies/linkExtractor.ts index b8117b3..c4162af 100644 --- a/frontend/src/lib/scraper/strategies/linkExtractor.ts +++ b/frontend/src/lib/scraper/strategies/linkExtractor.ts @@ -78,6 +78,10 @@ export function extractResponsiveImage( return { url, width }; }); + if (sources.length === 0) { + return img.attr('src') || ''; + } + const largest = sources.reduce((prev: any, current: any) => prev.width > current.width ? prev : current ); diff --git a/frontend/src/lib/scraper/strategies/textExtractor.ts b/frontend/src/lib/scraper/strategies/textExtractor.ts index 355d0f1..73b90a8 100644 --- a/frontend/src/lib/scraper/strategies/textExtractor.ts +++ b/frontend/src/lib/scraper/strategies/textExtractor.ts @@ -75,6 +75,10 @@ export function extractTextBlocks( } // Fallback to largest block + if (blocks.length === 0) { + return ''; + } + const largestBlock = blocks.reduce((prev, current) => prev.text.length > current.text.length ? prev : current ); @@ -86,6 +90,20 @@ export function extractDeviantArtContent( $: cheerio.CheerioAPI, config: TextBlockStrategy ): string { + // Check for mature content warning or login requirement + const matureWarning = $('.deviation-overlay.blocked.mature, .mature-filter, .ismature').first(); + if (matureWarning.length > 0) { + throw new Error('Content is restricted by mature content filter. Login may be required to access this story.'); + } + + const loginRequired = $('a[href*="join"][href*="mature"], a[href*="login"]').filter((_, elem) => { + const text = $(elem).text().toLowerCase(); + return text.includes('log in') || text.includes('sign up'); + }); + if (loginRequired.length > 0) { + throw new Error('Login is required to access this DeviantArt content.'); + } + // Remove excluded elements first if (config.excludeSelectors) { config.excludeSelectors.forEach(selector => { @@ -93,9 +111,10 @@ export function extractDeviantArtContent( }); } - // DeviantArt has two main content structures: + // DeviantArt has multiple content structures: // 1. Old format:
containing the full story // 2. New format:
or similar classes containing multiple

elements + // 3. Legacy journal format: .legacy-journal .text // Try the old format first (single text div) const textDiv = $('.text'); @@ -103,6 +122,12 @@ export function extractDeviantArtContent( return textDiv.html() || ''; } + // Try legacy journal format + const legacyJournal = $('.legacy-journal .text, .legacy-journal .journal-text'); + if (legacyJournal.length > 0 && legacyJournal.text().trim().length >= (config.minLength || 200)) { + return legacyJournal.html() || ''; + } + // Try the new format (multiple paragraphs in specific containers) const newFormatSelectors = [ 'div[class*="_83r8m"] p', // Main story content container