From 5746001c4a07688114847200dcf07956da9eb03d Mon Sep 17 00:00:00 2001 From: Stefan Hardegger Date: Tue, 29 Jul 2025 11:02:46 +0200 Subject: [PATCH] Bugfixes --- .../storycove/service/TypesenseService.java | 42 +++++++++++++-- frontend/src/app/library/page.tsx | 1 + frontend/src/lib/api.ts | 4 ++ frontend/src/lib/scraper/config/sites.json | 2 +- frontend/src/lib/scraper/scraper.ts | 3 ++ .../lib/scraper/strategies/textExtractor.ts | 52 +++++++++++++++++++ package.json | 5 ++ 7 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 package.json diff --git a/backend/src/main/java/com/storycove/service/TypesenseService.java b/backend/src/main/java/com/storycove/service/TypesenseService.java index 804ff86..bec6bcd 100644 --- a/backend/src/main/java/com/storycove/service/TypesenseService.java +++ b/backend/src/main/java/com/storycove/service/TypesenseService.java @@ -77,7 +77,7 @@ public class TypesenseService { new Field().name("authorName").type("string").facet(true).sort(true), new Field().name("seriesId").type("string").facet(true).optional(true), new Field().name("seriesName").type("string").facet(true).sort(true).optional(true), - new Field().name("tagNames").type("string[]").facet(true).optional(true), + new Field().name("tagNames").type("string[]").facet(true), new Field().name("rating").type("int32").facet(true).sort(true).optional(true), new Field().name("wordCount").type("int32").facet(true).sort(true).optional(true), new Field().name("volume").type("int32").facet(true).sort(true).optional(true), @@ -232,6 +232,9 @@ public class TypesenseService { .maxFacetValues(100) .sortBy(buildSortParameter(normalizedQuery, sortBy, sortDir)); + logger.debug("Typesense search parameters - facetBy: {}, maxFacetValues: {}", + searchParameters.getFacetBy(), searchParameters.getMaxFacetValues()); + // Add filters List filterConditions = new ArrayList<>(); @@ -269,6 +272,7 @@ public class TypesenseService { .documents() .search(searchParameters); + logger.debug("Search result facet counts: {}", searchResult.getFacetCounts()); List results = convertSearchResult(searchResult); Map> facets = processFacetCounts(searchResult); @@ -375,7 +379,10 @@ public class TypesenseService { .map(tag -> tag.getName()) .collect(Collectors.toList()); document.put("tagNames", tagNames); + logger.debug("Story '{}' has {} tags: {}", story.getTitle(), tagNames.size(), tagNames); } else { + document.put("tagNames", new ArrayList<>()); + logger.debug("Story '{}' has no tags, setting empty array", story.getTitle()); } document.put("rating", story.getRating() != null ? story.getRating() : 0); @@ -406,15 +413,34 @@ public class TypesenseService { List facetValues = new ArrayList<>(); if (facetCounts.getCounts() != null) { + for (Object countObj : facetCounts.getCounts()) { - if (countObj instanceof Map) { - Map countMap = (Map) countObj; - String value = (String) countMap.get("value"); - Integer count = (Integer) countMap.get("count"); + if (countObj instanceof org.typesense.model.FacetCountsCounts) { + org.typesense.model.FacetCountsCounts facetCount = (org.typesense.model.FacetCountsCounts) countObj; + String value = facetCount.getValue(); + Integer count = facetCount.getCount(); if (value != null && count != null && count > 0) { facetValues.add(new FacetCountDto(value, count)); } + } else if (countObj instanceof Map) { + // Fallback for Map-based responses + Map countMap = (Map) countObj; + String value = (String) countMap.get("value"); + Object countValue = countMap.get("count"); + + if (value != null && countValue != null) { + Integer count = null; + if (countValue instanceof Integer) { + count = (Integer) countValue; + } else if (countValue instanceof Number) { + count = ((Number) countValue).intValue(); + } + + if (count != null && count > 0) { + facetValues.add(new FacetCountDto(value, count)); + } + } } } } @@ -432,6 +458,12 @@ public class TypesenseService { } } + // DEBUG: Log final facet processing results + logger.info("FACET DEBUG: Final facetMap contents: {}", facetMap); + if (facetMap.isEmpty()) { + logger.info("FACET DEBUG: No facets were processed - investigating why"); + } + return facetMap; } diff --git a/frontend/src/app/library/page.tsx b/frontend/src/app/library/page.tsx index 5aa1089..40e82a7 100644 --- a/frontend/src/app/library/page.tsx +++ b/frontend/src/app/library/page.tsx @@ -57,6 +57,7 @@ export default function LibraryPage() { tags: selectedTags.length > 0 ? selectedTags : undefined, sortBy: sortOption, sortDir: sortDirection, + facetBy: ['tagNames'], // Request tag facets for the filter UI }); const currentStories = result?.results || []; diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index acee1cc..e641554 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -314,6 +314,7 @@ export const searchApi = { maxRating?: number; sortBy?: string; sortDir?: string; + facetBy?: string[]; }): Promise => { // Create URLSearchParams to properly handle array parameters const searchParams = new URLSearchParams(); @@ -334,6 +335,9 @@ export const searchApi = { if (params.tags && params.tags.length > 0) { params.tags.forEach(tag => searchParams.append('tags', tag)); } + if (params.facetBy && params.facetBy.length > 0) { + params.facetBy.forEach(facet => searchParams.append('facetBy', facet)); + } const response = await api.get(`/stories/search?${searchParams.toString()}`); return response.data; diff --git a/frontend/src/lib/scraper/config/sites.json b/frontend/src/lib/scraper/config/sites.json index 50f4104..1f6dd71 100644 --- a/frontend/src/lib/scraper/config/sites.json +++ b/frontend/src/lib/scraper/config/sites.json @@ -12,7 +12,7 @@ "searchBefore": "" }, "content": { - "strategy": "text-blocks", + "strategy": "deviantart-content", "minLength": 200, "containerHints": ["journal", "literature", "story", "text", "content"], "excludeSelectors": ["script", "style", "nav", "header", "footer", ".dev-page-sidebar"] diff --git a/frontend/src/lib/scraper/scraper.ts b/frontend/src/lib/scraper/scraper.ts index 7b2d406..76627fe 100644 --- a/frontend/src/lib/scraper/scraper.ts +++ b/frontend/src/lib/scraper/scraper.ts @@ -17,6 +17,7 @@ import { UrlParser } from './utils/urlParser'; import { extractByTextPattern, extractTextBlocks, + extractDeviantArtContent, extractHtmlBetween, extractLinkText, extractLinkWithPath, @@ -246,6 +247,8 @@ export class StoryScraper { return extractLinkWithPath($, strategy as any); case 'text-blocks': return extractTextBlocks($, strategy as any); + case 'deviantart-content': + return extractDeviantArtContent($, strategy as any); case 'href-pattern': return extractHrefPattern($, strategy as any); case 'html-between': diff --git a/frontend/src/lib/scraper/strategies/textExtractor.ts b/frontend/src/lib/scraper/strategies/textExtractor.ts index 49afac0..355d0f1 100644 --- a/frontend/src/lib/scraper/strategies/textExtractor.ts +++ b/frontend/src/lib/scraper/strategies/textExtractor.ts @@ -82,6 +82,58 @@ export function extractTextBlocks( return largestBlock ? $(largestBlock.element).html() || '' : ''; } +export function extractDeviantArtContent( + $: cheerio.CheerioAPI, + config: TextBlockStrategy +): string { + // Remove excluded elements first + if (config.excludeSelectors) { + config.excludeSelectors.forEach(selector => { + $(selector).remove(); + }); + } + + // DeviantArt has two main content structures: + // 1. Old format:
containing the full story + // 2. New format:
or similar classes containing multiple

elements + + // Try the old format first (single text div) + const textDiv = $('.text'); + if (textDiv.length > 0 && textDiv.text().trim().length >= (config.minLength || 200)) { + return textDiv.html() || ''; + } + + // Try the new format (multiple paragraphs in specific containers) + const newFormatSelectors = [ + 'div[class*="_83r8m"] p', // Main story content container + 'div[class*="_2CKTq"] p', // Alternate story content container + 'div[class*="journal"] p' // Generic journal container + ]; + + for (const selector of newFormatSelectors) { + const paragraphs = $(selector); + if (paragraphs.length > 0) { + let totalText = ''; + paragraphs.each((_, p) => { + totalText += $(p).text().trim(); + }); + + // Check if this container has enough content + if (totalText.length >= (config.minLength || 200)) { + // Combine all paragraphs into a single HTML string + let combinedHtml = ''; + paragraphs.each((_, p) => { + combinedHtml += $(p).prop('outerHTML') || ''; + }); + return combinedHtml; + } + } + } + + // Fallback to the original text-blocks strategy + return extractTextBlocks($, config); +} + export function extractHtmlBetween( html: string, config: HtmlBetweenStrategy diff --git a/package.json b/package.json new file mode 100644 index 0000000..cc3521b --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "dependencies": { + "cheerio": "^1.1.2" + } +}