Small improvements
This commit is contained in:
@@ -78,6 +78,10 @@ export function extractResponsiveImage(
|
||||
return { url, width };
|
||||
});
|
||||
|
||||
if (sources.length === 0) {
|
||||
return img.attr('src') || '';
|
||||
}
|
||||
|
||||
const largest = sources.reduce((prev: any, current: any) =>
|
||||
prev.width > current.width ? prev : current
|
||||
);
|
||||
|
||||
@@ -75,6 +75,10 @@ export function extractTextBlocks(
|
||||
}
|
||||
|
||||
// Fallback to largest block
|
||||
if (blocks.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const largestBlock = blocks.reduce((prev, current) =>
|
||||
prev.text.length > current.text.length ? prev : current
|
||||
);
|
||||
@@ -86,6 +90,20 @@ export function extractDeviantArtContent(
|
||||
$: cheerio.CheerioAPI,
|
||||
config: TextBlockStrategy
|
||||
): string {
|
||||
// Check for mature content warning or login requirement
|
||||
const matureWarning = $('.deviation-overlay.blocked.mature, .mature-filter, .ismature').first();
|
||||
if (matureWarning.length > 0) {
|
||||
throw new Error('Content is restricted by mature content filter. Login may be required to access this story.');
|
||||
}
|
||||
|
||||
const loginRequired = $('a[href*="join"][href*="mature"], a[href*="login"]').filter((_, elem) => {
|
||||
const text = $(elem).text().toLowerCase();
|
||||
return text.includes('log in') || text.includes('sign up');
|
||||
});
|
||||
if (loginRequired.length > 0) {
|
||||
throw new Error('Login is required to access this DeviantArt content.');
|
||||
}
|
||||
|
||||
// Remove excluded elements first
|
||||
if (config.excludeSelectors) {
|
||||
config.excludeSelectors.forEach(selector => {
|
||||
@@ -93,9 +111,10 @@ export function extractDeviantArtContent(
|
||||
});
|
||||
}
|
||||
|
||||
// DeviantArt has two main content structures:
|
||||
// DeviantArt has multiple content structures:
|
||||
// 1. Old format: <div class="text"> containing the full story
|
||||
// 2. New format: <div class="_83r8m _2CKTq"> or similar classes containing multiple <p> elements
|
||||
// 3. Legacy journal format: .legacy-journal .text
|
||||
|
||||
// Try the old format first (single text div)
|
||||
const textDiv = $('.text');
|
||||
@@ -103,6 +122,12 @@ export function extractDeviantArtContent(
|
||||
return textDiv.html() || '';
|
||||
}
|
||||
|
||||
// Try legacy journal format
|
||||
const legacyJournal = $('.legacy-journal .text, .legacy-journal .journal-text');
|
||||
if (legacyJournal.length > 0 && legacyJournal.text().trim().length >= (config.minLength || 200)) {
|
||||
return legacyJournal.html() || '';
|
||||
}
|
||||
|
||||
// Try the new format (multiple paragraphs in specific containers)
|
||||
const newFormatSelectors = [
|
||||
'div[class*="_83r8m"] p', // Main story content container
|
||||
|
||||
Reference in New Issue
Block a user