refactor(ux): consolidate BMAD skills, update design system, and clean up Prisma generated client
This commit is contained in:
92
keep-notes/lib/ai/services/rss.service.ts
Normal file
92
keep-notes/lib/ai/services/rss.service.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
/**
|
||||
* RSS/Atom Feed Service
|
||||
* Parses RSS and Atom feeds and returns structured article entries.
|
||||
* Used by the scraper pipeline to get individual article URLs from feeds.
|
||||
*/
|
||||
|
||||
import Parser from 'rss-parser'
|
||||
|
||||
export interface FeedArticle {
|
||||
title: string
|
||||
link: string
|
||||
pubDate?: string
|
||||
contentSnippet?: string
|
||||
content?: string
|
||||
creator?: string
|
||||
}
|
||||
|
||||
export interface ParsedFeed {
|
||||
title: string
|
||||
description?: string
|
||||
link?: string
|
||||
articles: FeedArticle[]
|
||||
}
|
||||
|
||||
const parser = new Parser({
|
||||
timeout: 15000,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'application/rss+xml, application/xml, text/xml, application/atom+xml, text/html;q=0.9',
|
||||
},
|
||||
})
|
||||
|
||||
const MAX_ARTICLES_PER_FEED = 8
|
||||
|
||||
export class RssService {
|
||||
/**
|
||||
* Detect if a URL looks like an RSS/Atom feed
|
||||
*/
|
||||
isFeedUrl(url: string): boolean {
|
||||
const feedPatterns = [
|
||||
'/feed', '/rss', '/atom', '/feed/', '/rss/',
|
||||
'.xml', '.rss', '.atom',
|
||||
'/feed/json',
|
||||
]
|
||||
const lower = url.toLowerCase()
|
||||
return feedPatterns.some(p => lower.includes(p))
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to parse a URL as an RSS/Atom feed.
|
||||
* Returns null if the URL is not a valid feed.
|
||||
*/
|
||||
async parseFeed(feedUrl: string): Promise<ParsedFeed | null> {
|
||||
try {
|
||||
const result = await parser.parseURL(feedUrl)
|
||||
return {
|
||||
title: result.title || feedUrl,
|
||||
description: result.description,
|
||||
link: result.link,
|
||||
articles: (result.items || [])
|
||||
.slice(0, MAX_ARTICLES_PER_FEED)
|
||||
.map(item => ({
|
||||
title: item.title || 'Sans titre',
|
||||
link: item.link || '',
|
||||
pubDate: item.pubDate || item.isoDate,
|
||||
contentSnippet: (item.contentSnippet || '').substring(0, 500),
|
||||
content: item['content:encoded'] || item.content || '',
|
||||
creator: item.creator || item.dc?.creator,
|
||||
}))
|
||||
.filter(a => a.link), // Only keep entries with a link
|
||||
}
|
||||
} catch {
|
||||
// Not a valid feed or fetch failed
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch an RSS feed and return only the article URLs for scraping.
|
||||
* Useful when you want to scrape articles individually.
|
||||
*/
|
||||
async getArticleUrls(feedUrl: string): Promise<{ feedTitle: string; urls: string[] }> {
|
||||
const feed = await this.parseFeed(feedUrl)
|
||||
if (!feed) return { feedTitle: '', urls: [] }
|
||||
return {
|
||||
feedTitle: feed.title,
|
||||
urls: feed.articles.map(a => a.link),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const rssService = new RssService()
|
||||
Reference in New Issue
Block a user