98 lines
2.5 KiB
TypeScript
98 lines
2.5 KiB
TypeScript
// Dynamic cheerio import used to avoid client-side bundling issues
|
|
// Using any type for CheerioAPI to prevent bundling issues
|
|
import {
|
|
LinkWithPathStrategy,
|
|
HrefPatternStrategy,
|
|
FirstImageStrategy,
|
|
ResponsiveImageStrategy,
|
|
LazyLoadedStrategy
|
|
} from '../types';
|
|
|
|
export function extractLinkWithPath(
|
|
$: any,
|
|
config: LinkWithPathStrategy
|
|
): string {
|
|
let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
|
|
|
|
const links = searchScope.find('a');
|
|
|
|
for (let i = 0; i < links.length; i++) {
|
|
const link = links.eq(i);
|
|
const href = link.attr('href');
|
|
|
|
if (href && href.includes(config.pathContains)) {
|
|
return link.text().trim();
|
|
}
|
|
}
|
|
|
|
return '';
|
|
}
|
|
|
|
export function extractHrefPattern(
|
|
$: any,
|
|
config: HrefPatternStrategy
|
|
): string[] {
|
|
let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
|
|
|
|
const pattern = new RegExp(config.pattern);
|
|
const links: string[] = [];
|
|
|
|
searchScope.find('a').each((_: any, elem: any) => {
|
|
const href = $(elem).attr('href');
|
|
if (href && pattern.test(href)) {
|
|
links.push(href);
|
|
}
|
|
});
|
|
|
|
return links;
|
|
}
|
|
|
|
export function extractFirstImage(
|
|
$: any,
|
|
config: FirstImageStrategy
|
|
): string {
|
|
let searchScope = config.searchWithin ? $(config.searchWithin) : $('body');
|
|
|
|
const img = searchScope.find('img').first();
|
|
return img.attr(config.attribute) || '';
|
|
}
|
|
|
|
export function extractResponsiveImage(
|
|
$: any,
|
|
config: ResponsiveImageStrategy
|
|
): string {
|
|
const img = $(config.selector).first();
|
|
|
|
if (config.selectLargest && config.srcsetAttribute) {
|
|
const srcset = img.attr(config.srcsetAttribute);
|
|
if (srcset) {
|
|
// Parse srcset and return the largest image
|
|
const sources = srcset.split(',').map((src: string) => {
|
|
const parts = src.trim().split(' ');
|
|
const url = parts[0];
|
|
const descriptor = parts[1] || '1x';
|
|
const width = descriptor.includes('w') ?
|
|
parseInt(descriptor.replace('w', '')) :
|
|
descriptor.includes('x') ?
|
|
parseInt(descriptor.replace('x', '')) * 100 : 100;
|
|
return { url, width };
|
|
});
|
|
|
|
const largest = sources.reduce((prev: any, current: any) =>
|
|
prev.width > current.width ? prev : current
|
|
);
|
|
|
|
return largest.url;
|
|
}
|
|
}
|
|
|
|
return img.attr('src') || '';
|
|
}
|
|
|
|
export function extractLazyLoadedImage(
|
|
$: any,
|
|
config: LazyLoadedStrategy
|
|
): string {
|
|
const img = $(config.selector).first();
|
|
return img.attr(config.attribute) || img.attr('src') || '';
|
|
} |