Bugfixes
This commit is contained in:
@@ -77,7 +77,7 @@ public class TypesenseService {
|
||||
new Field().name("authorName").type("string").facet(true).sort(true),
|
||||
new Field().name("seriesId").type("string").facet(true).optional(true),
|
||||
new Field().name("seriesName").type("string").facet(true).sort(true).optional(true),
|
||||
new Field().name("tagNames").type("string[]").facet(true).optional(true),
|
||||
new Field().name("tagNames").type("string[]").facet(true),
|
||||
new Field().name("rating").type("int32").facet(true).sort(true).optional(true),
|
||||
new Field().name("wordCount").type("int32").facet(true).sort(true).optional(true),
|
||||
new Field().name("volume").type("int32").facet(true).sort(true).optional(true),
|
||||
@@ -232,6 +232,9 @@ public class TypesenseService {
|
||||
.maxFacetValues(100)
|
||||
.sortBy(buildSortParameter(normalizedQuery, sortBy, sortDir));
|
||||
|
||||
logger.debug("Typesense search parameters - facetBy: {}, maxFacetValues: {}",
|
||||
searchParameters.getFacetBy(), searchParameters.getMaxFacetValues());
|
||||
|
||||
// Add filters
|
||||
List<String> filterConditions = new ArrayList<>();
|
||||
|
||||
@@ -269,6 +272,7 @@ public class TypesenseService {
|
||||
.documents()
|
||||
.search(searchParameters);
|
||||
|
||||
logger.debug("Search result facet counts: {}", searchResult.getFacetCounts());
|
||||
|
||||
List<StorySearchDto> results = convertSearchResult(searchResult);
|
||||
Map<String, List<FacetCountDto>> facets = processFacetCounts(searchResult);
|
||||
@@ -375,7 +379,10 @@ public class TypesenseService {
|
||||
.map(tag -> tag.getName())
|
||||
.collect(Collectors.toList());
|
||||
document.put("tagNames", tagNames);
|
||||
logger.debug("Story '{}' has {} tags: {}", story.getTitle(), tagNames.size(), tagNames);
|
||||
} else {
|
||||
document.put("tagNames", new ArrayList<>());
|
||||
logger.debug("Story '{}' has no tags, setting empty array", story.getTitle());
|
||||
}
|
||||
|
||||
document.put("rating", story.getRating() != null ? story.getRating() : 0);
|
||||
@@ -406,15 +413,34 @@ public class TypesenseService {
|
||||
List<FacetCountDto> facetValues = new ArrayList<>();
|
||||
|
||||
if (facetCounts.getCounts() != null) {
|
||||
|
||||
for (Object countObj : facetCounts.getCounts()) {
|
||||
if (countObj instanceof Map) {
|
||||
Map<String, Object> countMap = (Map<String, Object>) countObj;
|
||||
String value = (String) countMap.get("value");
|
||||
Integer count = (Integer) countMap.get("count");
|
||||
if (countObj instanceof org.typesense.model.FacetCountsCounts) {
|
||||
org.typesense.model.FacetCountsCounts facetCount = (org.typesense.model.FacetCountsCounts) countObj;
|
||||
String value = facetCount.getValue();
|
||||
Integer count = facetCount.getCount();
|
||||
|
||||
if (value != null && count != null && count > 0) {
|
||||
facetValues.add(new FacetCountDto(value, count));
|
||||
}
|
||||
} else if (countObj instanceof Map) {
|
||||
// Fallback for Map-based responses
|
||||
Map<String, Object> countMap = (Map<String, Object>) countObj;
|
||||
String value = (String) countMap.get("value");
|
||||
Object countValue = countMap.get("count");
|
||||
|
||||
if (value != null && countValue != null) {
|
||||
Integer count = null;
|
||||
if (countValue instanceof Integer) {
|
||||
count = (Integer) countValue;
|
||||
} else if (countValue instanceof Number) {
|
||||
count = ((Number) countValue).intValue();
|
||||
}
|
||||
|
||||
if (count != null && count > 0) {
|
||||
facetValues.add(new FacetCountDto(value, count));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -432,6 +458,12 @@ public class TypesenseService {
|
||||
}
|
||||
}
|
||||
|
||||
// DEBUG: Log final facet processing results
|
||||
logger.info("FACET DEBUG: Final facetMap contents: {}", facetMap);
|
||||
if (facetMap.isEmpty()) {
|
||||
logger.info("FACET DEBUG: No facets were processed - investigating why");
|
||||
}
|
||||
|
||||
return facetMap;
|
||||
}
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ export default function LibraryPage() {
|
||||
tags: selectedTags.length > 0 ? selectedTags : undefined,
|
||||
sortBy: sortOption,
|
||||
sortDir: sortDirection,
|
||||
facetBy: ['tagNames'], // Request tag facets for the filter UI
|
||||
});
|
||||
|
||||
const currentStories = result?.results || [];
|
||||
|
||||
@@ -314,6 +314,7 @@ export const searchApi = {
|
||||
maxRating?: number;
|
||||
sortBy?: string;
|
||||
sortDir?: string;
|
||||
facetBy?: string[];
|
||||
}): Promise<SearchResult> => {
|
||||
// Create URLSearchParams to properly handle array parameters
|
||||
const searchParams = new URLSearchParams();
|
||||
@@ -334,6 +335,9 @@ export const searchApi = {
|
||||
if (params.tags && params.tags.length > 0) {
|
||||
params.tags.forEach(tag => searchParams.append('tags', tag));
|
||||
}
|
||||
if (params.facetBy && params.facetBy.length > 0) {
|
||||
params.facetBy.forEach(facet => searchParams.append('facetBy', facet));
|
||||
}
|
||||
|
||||
const response = await api.get(`/stories/search?${searchParams.toString()}`);
|
||||
return response.data;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"searchBefore": "</title>"
|
||||
},
|
||||
"content": {
|
||||
"strategy": "text-blocks",
|
||||
"strategy": "deviantart-content",
|
||||
"minLength": 200,
|
||||
"containerHints": ["journal", "literature", "story", "text", "content"],
|
||||
"excludeSelectors": ["script", "style", "nav", "header", "footer", ".dev-page-sidebar"]
|
||||
|
||||
@@ -17,6 +17,7 @@ import { UrlParser } from './utils/urlParser';
|
||||
import {
|
||||
extractByTextPattern,
|
||||
extractTextBlocks,
|
||||
extractDeviantArtContent,
|
||||
extractHtmlBetween,
|
||||
extractLinkText,
|
||||
extractLinkWithPath,
|
||||
@@ -246,6 +247,8 @@ export class StoryScraper {
|
||||
return extractLinkWithPath($, strategy as any);
|
||||
case 'text-blocks':
|
||||
return extractTextBlocks($, strategy as any);
|
||||
case 'deviantart-content':
|
||||
return extractDeviantArtContent($, strategy as any);
|
||||
case 'href-pattern':
|
||||
return extractHrefPattern($, strategy as any);
|
||||
case 'html-between':
|
||||
|
||||
@@ -82,6 +82,58 @@ export function extractTextBlocks(
|
||||
return largestBlock ? $(largestBlock.element).html() || '' : '';
|
||||
}
|
||||
|
||||
export function extractDeviantArtContent(
|
||||
$: cheerio.CheerioAPI,
|
||||
config: TextBlockStrategy
|
||||
): string {
|
||||
// Remove excluded elements first
|
||||
if (config.excludeSelectors) {
|
||||
config.excludeSelectors.forEach(selector => {
|
||||
$(selector).remove();
|
||||
});
|
||||
}
|
||||
|
||||
// DeviantArt has two main content structures:
|
||||
// 1. Old format: <div class="text"> containing the full story
|
||||
// 2. New format: <div class="_83r8m _2CKTq"> or similar classes containing multiple <p> elements
|
||||
|
||||
// Try the old format first (single text div)
|
||||
const textDiv = $('.text');
|
||||
if (textDiv.length > 0 && textDiv.text().trim().length >= (config.minLength || 200)) {
|
||||
return textDiv.html() || '';
|
||||
}
|
||||
|
||||
// Try the new format (multiple paragraphs in specific containers)
|
||||
const newFormatSelectors = [
|
||||
'div[class*="_83r8m"] p', // Main story content container
|
||||
'div[class*="_2CKTq"] p', // Alternate story content container
|
||||
'div[class*="journal"] p' // Generic journal container
|
||||
];
|
||||
|
||||
for (const selector of newFormatSelectors) {
|
||||
const paragraphs = $(selector);
|
||||
if (paragraphs.length > 0) {
|
||||
let totalText = '';
|
||||
paragraphs.each((_, p) => {
|
||||
totalText += $(p).text().trim();
|
||||
});
|
||||
|
||||
// Check if this container has enough content
|
||||
if (totalText.length >= (config.minLength || 200)) {
|
||||
// Combine all paragraphs into a single HTML string
|
||||
let combinedHtml = '';
|
||||
paragraphs.each((_, p) => {
|
||||
combinedHtml += $(p).prop('outerHTML') || '';
|
||||
});
|
||||
return combinedHtml;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to the original text-blocks strategy
|
||||
return extractTextBlocks($, config);
|
||||
}
|
||||
|
||||
export function extractHtmlBetween(
|
||||
html: string,
|
||||
config: HtmlBetweenStrategy
|
||||
|
||||
5
package.json
Normal file
5
package.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"cheerio": "^1.1.2"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user