Saving reading position

This commit is contained in:
Stefan Hardegger
2025-07-28 14:09:19 +02:00
parent fcad028959
commit a501b27169
10 changed files with 473 additions and 6 deletions

View File

@@ -1,3 +1,4 @@
import * as cheerio from 'cheerio';
import 'server-only';
// Dynamic cheerio import used to avoid client-side bundling issues
@@ -36,7 +37,7 @@ export function extractByTextPattern(
}
export function extractTextBlocks(
$: any,
$: cheerio.CheerioAPI,
config: TextBlockStrategy
): string {
const blocks: Array<{element: any, text: string}> = [];
@@ -48,7 +49,7 @@ export function extractTextBlocks(
});
}
$('*').each((_: any, elem: any) => {
$('*').each((_, elem) => {
const $elem = $(elem);
const text = $elem.clone().children().remove().end().text().trim();
@@ -101,10 +102,16 @@ export function extractHtmlBetween(
}
export function extractLinkText(
$: any,
$: cheerio.CheerioAPI,
config: LinkTextStrategy
): string {
let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
let searchScope: cheerio.Cheerio<cheerio.AnyNode>;
if (config.searchWithin) {
searchScope = $(config.searchWithin);
} else {
searchScope = $('body').length ? $('body') : $('*');
}
// Look for links near the specified text patterns
let foundText = '';
@@ -112,7 +119,7 @@ export function extractLinkText(
config.nearText.forEach(text => {
if (foundText) return; // Already found
searchScope.find('*').each((_: any, elem: any) => {
searchScope.find('*').each((_, elem) => {
const $elem = $(elem);
const elemText = $elem.text().toLowerCase();
@@ -132,7 +139,7 @@ export function extractLinkText(
// Look for links in the next few siblings
const $siblings = $elem.nextAll().slice(0, 3);
$siblings.find('a').first().each((_: any, link: any) => {
$siblings.find('a').first().each((_, link) => {
foundText = $(link).text().trim();
return false;
});