Bugfixes
This commit is contained in:
@@ -77,7 +77,7 @@ public class TypesenseService {
|
|||||||
new Field().name("authorName").type("string").facet(true).sort(true),
|
new Field().name("authorName").type("string").facet(true).sort(true),
|
||||||
new Field().name("seriesId").type("string").facet(true).optional(true),
|
new Field().name("seriesId").type("string").facet(true).optional(true),
|
||||||
new Field().name("seriesName").type("string").facet(true).sort(true).optional(true),
|
new Field().name("seriesName").type("string").facet(true).sort(true).optional(true),
|
||||||
new Field().name("tagNames").type("string[]").facet(true).optional(true),
|
new Field().name("tagNames").type("string[]").facet(true),
|
||||||
new Field().name("rating").type("int32").facet(true).sort(true).optional(true),
|
new Field().name("rating").type("int32").facet(true).sort(true).optional(true),
|
||||||
new Field().name("wordCount").type("int32").facet(true).sort(true).optional(true),
|
new Field().name("wordCount").type("int32").facet(true).sort(true).optional(true),
|
||||||
new Field().name("volume").type("int32").facet(true).sort(true).optional(true),
|
new Field().name("volume").type("int32").facet(true).sort(true).optional(true),
|
||||||
@@ -232,6 +232,9 @@ public class TypesenseService {
|
|||||||
.maxFacetValues(100)
|
.maxFacetValues(100)
|
||||||
.sortBy(buildSortParameter(normalizedQuery, sortBy, sortDir));
|
.sortBy(buildSortParameter(normalizedQuery, sortBy, sortDir));
|
||||||
|
|
||||||
|
logger.debug("Typesense search parameters - facetBy: {}, maxFacetValues: {}",
|
||||||
|
searchParameters.getFacetBy(), searchParameters.getMaxFacetValues());
|
||||||
|
|
||||||
// Add filters
|
// Add filters
|
||||||
List<String> filterConditions = new ArrayList<>();
|
List<String> filterConditions = new ArrayList<>();
|
||||||
|
|
||||||
@@ -269,6 +272,7 @@ public class TypesenseService {
|
|||||||
.documents()
|
.documents()
|
||||||
.search(searchParameters);
|
.search(searchParameters);
|
||||||
|
|
||||||
|
logger.debug("Search result facet counts: {}", searchResult.getFacetCounts());
|
||||||
|
|
||||||
List<StorySearchDto> results = convertSearchResult(searchResult);
|
List<StorySearchDto> results = convertSearchResult(searchResult);
|
||||||
Map<String, List<FacetCountDto>> facets = processFacetCounts(searchResult);
|
Map<String, List<FacetCountDto>> facets = processFacetCounts(searchResult);
|
||||||
@@ -375,7 +379,10 @@ public class TypesenseService {
|
|||||||
.map(tag -> tag.getName())
|
.map(tag -> tag.getName())
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
document.put("tagNames", tagNames);
|
document.put("tagNames", tagNames);
|
||||||
|
logger.debug("Story '{}' has {} tags: {}", story.getTitle(), tagNames.size(), tagNames);
|
||||||
} else {
|
} else {
|
||||||
|
document.put("tagNames", new ArrayList<>());
|
||||||
|
logger.debug("Story '{}' has no tags, setting empty array", story.getTitle());
|
||||||
}
|
}
|
||||||
|
|
||||||
document.put("rating", story.getRating() != null ? story.getRating() : 0);
|
document.put("rating", story.getRating() != null ? story.getRating() : 0);
|
||||||
@@ -406,15 +413,34 @@ public class TypesenseService {
|
|||||||
List<FacetCountDto> facetValues = new ArrayList<>();
|
List<FacetCountDto> facetValues = new ArrayList<>();
|
||||||
|
|
||||||
if (facetCounts.getCounts() != null) {
|
if (facetCounts.getCounts() != null) {
|
||||||
|
|
||||||
for (Object countObj : facetCounts.getCounts()) {
|
for (Object countObj : facetCounts.getCounts()) {
|
||||||
if (countObj instanceof Map) {
|
if (countObj instanceof org.typesense.model.FacetCountsCounts) {
|
||||||
Map<String, Object> countMap = (Map<String, Object>) countObj;
|
org.typesense.model.FacetCountsCounts facetCount = (org.typesense.model.FacetCountsCounts) countObj;
|
||||||
String value = (String) countMap.get("value");
|
String value = facetCount.getValue();
|
||||||
Integer count = (Integer) countMap.get("count");
|
Integer count = facetCount.getCount();
|
||||||
|
|
||||||
if (value != null && count != null && count > 0) {
|
if (value != null && count != null && count > 0) {
|
||||||
facetValues.add(new FacetCountDto(value, count));
|
facetValues.add(new FacetCountDto(value, count));
|
||||||
}
|
}
|
||||||
|
} else if (countObj instanceof Map) {
|
||||||
|
// Fallback for Map-based responses
|
||||||
|
Map<String, Object> countMap = (Map<String, Object>) countObj;
|
||||||
|
String value = (String) countMap.get("value");
|
||||||
|
Object countValue = countMap.get("count");
|
||||||
|
|
||||||
|
if (value != null && countValue != null) {
|
||||||
|
Integer count = null;
|
||||||
|
if (countValue instanceof Integer) {
|
||||||
|
count = (Integer) countValue;
|
||||||
|
} else if (countValue instanceof Number) {
|
||||||
|
count = ((Number) countValue).intValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count != null && count > 0) {
|
||||||
|
facetValues.add(new FacetCountDto(value, count));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -432,6 +458,12 @@ public class TypesenseService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DEBUG: Log final facet processing results
|
||||||
|
logger.info("FACET DEBUG: Final facetMap contents: {}", facetMap);
|
||||||
|
if (facetMap.isEmpty()) {
|
||||||
|
logger.info("FACET DEBUG: No facets were processed - investigating why");
|
||||||
|
}
|
||||||
|
|
||||||
return facetMap;
|
return facetMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ export default function LibraryPage() {
|
|||||||
tags: selectedTags.length > 0 ? selectedTags : undefined,
|
tags: selectedTags.length > 0 ? selectedTags : undefined,
|
||||||
sortBy: sortOption,
|
sortBy: sortOption,
|
||||||
sortDir: sortDirection,
|
sortDir: sortDirection,
|
||||||
|
facetBy: ['tagNames'], // Request tag facets for the filter UI
|
||||||
});
|
});
|
||||||
|
|
||||||
const currentStories = result?.results || [];
|
const currentStories = result?.results || [];
|
||||||
|
|||||||
@@ -314,6 +314,7 @@ export const searchApi = {
|
|||||||
maxRating?: number;
|
maxRating?: number;
|
||||||
sortBy?: string;
|
sortBy?: string;
|
||||||
sortDir?: string;
|
sortDir?: string;
|
||||||
|
facetBy?: string[];
|
||||||
}): Promise<SearchResult> => {
|
}): Promise<SearchResult> => {
|
||||||
// Create URLSearchParams to properly handle array parameters
|
// Create URLSearchParams to properly handle array parameters
|
||||||
const searchParams = new URLSearchParams();
|
const searchParams = new URLSearchParams();
|
||||||
@@ -334,6 +335,9 @@ export const searchApi = {
|
|||||||
if (params.tags && params.tags.length > 0) {
|
if (params.tags && params.tags.length > 0) {
|
||||||
params.tags.forEach(tag => searchParams.append('tags', tag));
|
params.tags.forEach(tag => searchParams.append('tags', tag));
|
||||||
}
|
}
|
||||||
|
if (params.facetBy && params.facetBy.length > 0) {
|
||||||
|
params.facetBy.forEach(facet => searchParams.append('facetBy', facet));
|
||||||
|
}
|
||||||
|
|
||||||
const response = await api.get(`/stories/search?${searchParams.toString()}`);
|
const response = await api.get(`/stories/search?${searchParams.toString()}`);
|
||||||
return response.data;
|
return response.data;
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
"searchBefore": "</title>"
|
"searchBefore": "</title>"
|
||||||
},
|
},
|
||||||
"content": {
|
"content": {
|
||||||
"strategy": "text-blocks",
|
"strategy": "deviantart-content",
|
||||||
"minLength": 200,
|
"minLength": 200,
|
||||||
"containerHints": ["journal", "literature", "story", "text", "content"],
|
"containerHints": ["journal", "literature", "story", "text", "content"],
|
||||||
"excludeSelectors": ["script", "style", "nav", "header", "footer", ".dev-page-sidebar"]
|
"excludeSelectors": ["script", "style", "nav", "header", "footer", ".dev-page-sidebar"]
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ import { UrlParser } from './utils/urlParser';
|
|||||||
import {
|
import {
|
||||||
extractByTextPattern,
|
extractByTextPattern,
|
||||||
extractTextBlocks,
|
extractTextBlocks,
|
||||||
|
extractDeviantArtContent,
|
||||||
extractHtmlBetween,
|
extractHtmlBetween,
|
||||||
extractLinkText,
|
extractLinkText,
|
||||||
extractLinkWithPath,
|
extractLinkWithPath,
|
||||||
@@ -246,6 +247,8 @@ export class StoryScraper {
|
|||||||
return extractLinkWithPath($, strategy as any);
|
return extractLinkWithPath($, strategy as any);
|
||||||
case 'text-blocks':
|
case 'text-blocks':
|
||||||
return extractTextBlocks($, strategy as any);
|
return extractTextBlocks($, strategy as any);
|
||||||
|
case 'deviantart-content':
|
||||||
|
return extractDeviantArtContent($, strategy as any);
|
||||||
case 'href-pattern':
|
case 'href-pattern':
|
||||||
return extractHrefPattern($, strategy as any);
|
return extractHrefPattern($, strategy as any);
|
||||||
case 'html-between':
|
case 'html-between':
|
||||||
|
|||||||
@@ -82,6 +82,58 @@ export function extractTextBlocks(
|
|||||||
return largestBlock ? $(largestBlock.element).html() || '' : '';
|
return largestBlock ? $(largestBlock.element).html() || '' : '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function extractDeviantArtContent(
|
||||||
|
$: cheerio.CheerioAPI,
|
||||||
|
config: TextBlockStrategy
|
||||||
|
): string {
|
||||||
|
// Remove excluded elements first
|
||||||
|
if (config.excludeSelectors) {
|
||||||
|
config.excludeSelectors.forEach(selector => {
|
||||||
|
$(selector).remove();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeviantArt has two main content structures:
|
||||||
|
// 1. Old format: <div class="text"> containing the full story
|
||||||
|
// 2. New format: <div class="_83r8m _2CKTq"> or similar classes containing multiple <p> elements
|
||||||
|
|
||||||
|
// Try the old format first (single text div)
|
||||||
|
const textDiv = $('.text');
|
||||||
|
if (textDiv.length > 0 && textDiv.text().trim().length >= (config.minLength || 200)) {
|
||||||
|
return textDiv.html() || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try the new format (multiple paragraphs in specific containers)
|
||||||
|
const newFormatSelectors = [
|
||||||
|
'div[class*="_83r8m"] p', // Main story content container
|
||||||
|
'div[class*="_2CKTq"] p', // Alternate story content container
|
||||||
|
'div[class*="journal"] p' // Generic journal container
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const selector of newFormatSelectors) {
|
||||||
|
const paragraphs = $(selector);
|
||||||
|
if (paragraphs.length > 0) {
|
||||||
|
let totalText = '';
|
||||||
|
paragraphs.each((_, p) => {
|
||||||
|
totalText += $(p).text().trim();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check if this container has enough content
|
||||||
|
if (totalText.length >= (config.minLength || 200)) {
|
||||||
|
// Combine all paragraphs into a single HTML string
|
||||||
|
let combinedHtml = '';
|
||||||
|
paragraphs.each((_, p) => {
|
||||||
|
combinedHtml += $(p).prop('outerHTML') || '';
|
||||||
|
});
|
||||||
|
return combinedHtml;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to the original text-blocks strategy
|
||||||
|
return extractTextBlocks($, config);
|
||||||
|
}
|
||||||
|
|
||||||
export function extractHtmlBetween(
|
export function extractHtmlBetween(
|
||||||
html: string,
|
html: string,
|
||||||
config: HtmlBetweenStrategy
|
config: HtmlBetweenStrategy
|
||||||
|
|||||||
5
package.json
Normal file
5
package.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"dependencies": {
|
||||||
|
"cheerio": "^1.1.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user