79 lines
2.6 KiB
TypeScript
79 lines
2.6 KiB
TypeScript
import { Readability } from '@mozilla/readability'
|
|
import { JSDOM } from 'jsdom'
|
|
import DOMPurify from 'isomorphic-dompurify'
|
|
import {
|
|
applyRtlToHtmlBlocks,
|
|
readPageLocaleFromHtml,
|
|
resolveClipLocale,
|
|
wrapClipArticleHtml,
|
|
type ClipLocaleHint,
|
|
} from '@/lib/clip/rtl-content'
|
|
|
|
export interface ExtractedArticle {
|
|
title: string
|
|
content: string
|
|
textContent: string
|
|
excerpt: string
|
|
locale: ClipLocaleHint
|
|
}
|
|
|
|
export function extractArticleFromHtml(html: string, pageUrl: string): ExtractedArticle | null {
|
|
const dom = new JSDOM(html, { url: pageUrl })
|
|
const reader = new Readability(dom.window.document)
|
|
const article = reader.parse()
|
|
if (!article) return null
|
|
|
|
const pageLocale = readPageLocaleFromHtml(html)
|
|
const readabilityDir = article.dir?.toLowerCase() === 'rtl' ? 'rtl' : 'ltr'
|
|
const readabilityLang = article.lang?.split('-')[0]?.toLowerCase()
|
|
const locale = resolveClipLocale(
|
|
pageUrl,
|
|
article.title || '',
|
|
article.textContent || '',
|
|
)
|
|
const mergedLocale: ClipLocaleHint = {
|
|
direction:
|
|
readabilityDir === 'rtl' || pageLocale.direction === 'rtl' || locale.direction === 'rtl'
|
|
? 'rtl'
|
|
: 'ltr',
|
|
lang:
|
|
(readabilityLang === 'fa' || readabilityLang === 'ar' || readabilityLang === 'he'
|
|
? readabilityLang
|
|
: undefined) ||
|
|
locale.lang ||
|
|
pageLocale.lang,
|
|
}
|
|
|
|
const sanitized = DOMPurify.sanitize(article.content || '')
|
|
const rtlBlocks = applyRtlToHtmlBlocks(sanitized, mergedLocale)
|
|
const content = wrapClipArticleHtml(rtlBlocks, mergedLocale)
|
|
|
|
return {
|
|
title: (article.title || '').trim(),
|
|
content,
|
|
textContent: (article.textContent || '').trim(),
|
|
excerpt: (article.excerpt || '').trim(),
|
|
locale: mergedLocale,
|
|
}
|
|
}
|
|
|
|
export function clipFooterLocaleTag(lang?: string): string {
|
|
if (lang === 'fa') return 'fa-IR'
|
|
if (lang === 'ar') return 'ar'
|
|
if (lang === 'he') return 'he-IL'
|
|
return 'fr-FR'
|
|
}
|
|
|
|
export function buildClipSourceFooter(domain: string, date: Date, localeTag = 'fr-FR'): string {
|
|
const formatted = date.toLocaleDateString(localeTag, { day: 'numeric', month: 'long', year: 'numeric' })
|
|
const isRtl = localeTag.startsWith('fa') || localeTag.startsWith('ar') || localeTag.startsWith('he')
|
|
const label =
|
|
localeTag.startsWith('fa')
|
|
? `برگرفته از ${domain} — ${formatted}`
|
|
: localeTag.startsWith('ar')
|
|
? `مقتبس من ${domain} — ${formatted}`
|
|
: `Extrait de ${domain} le ${formatted}`
|
|
const dirAttr = isRtl ? ' dir="rtl"' : ''
|
|
return `<hr/><p${dirAttr}><small>${DOMPurify.sanitize(label)}</small></p>`
|
|
}
|