Saving reading position
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import 'server-only';
|
||||
|
||||
// Dynamic cheerio import used to avoid client-side bundling issues
|
||||
@@ -36,7 +37,7 @@ export function extractByTextPattern(
|
||||
}
|
||||
|
||||
export function extractTextBlocks(
|
||||
$: any,
|
||||
$: cheerio.CheerioAPI,
|
||||
config: TextBlockStrategy
|
||||
): string {
|
||||
const blocks: Array<{element: any, text: string}> = [];
|
||||
@@ -48,7 +49,7 @@ export function extractTextBlocks(
|
||||
});
|
||||
}
|
||||
|
||||
$('*').each((_: any, elem: any) => {
|
||||
$('*').each((_, elem) => {
|
||||
const $elem = $(elem);
|
||||
const text = $elem.clone().children().remove().end().text().trim();
|
||||
|
||||
@@ -101,10 +102,16 @@ export function extractHtmlBetween(
|
||||
}
|
||||
|
||||
export function extractLinkText(
|
||||
$: any,
|
||||
$: cheerio.CheerioAPI,
|
||||
config: LinkTextStrategy
|
||||
): string {
|
||||
let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
|
||||
let searchScope: cheerio.Cheerio<cheerio.AnyNode>;
|
||||
|
||||
if (config.searchWithin) {
|
||||
searchScope = $(config.searchWithin);
|
||||
} else {
|
||||
searchScope = $('body').length ? $('body') : $('*');
|
||||
}
|
||||
|
||||
// Look for links near the specified text patterns
|
||||
let foundText = '';
|
||||
@@ -112,7 +119,7 @@ export function extractLinkText(
|
||||
config.nearText.forEach(text => {
|
||||
if (foundText) return; // Already found
|
||||
|
||||
searchScope.find('*').each((_: any, elem: any) => {
|
||||
searchScope.find('*').each((_, elem) => {
|
||||
const $elem = $(elem);
|
||||
const elemText = $elem.text().toLowerCase();
|
||||
|
||||
@@ -132,7 +139,7 @@ export function extractLinkText(
|
||||
|
||||
// Look for links in the next few siblings
|
||||
const $siblings = $elem.nextAll().slice(0, 3);
|
||||
$siblings.find('a').first().each((_: any, link: any) => {
|
||||
$siblings.find('a').first().each((_, link) => {
|
||||
foundText = $(link).text().trim();
|
||||
return false;
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user