MCP server: - Fix Prisma imports from stale client-generated path to @prisma/client - Switch schema from SQLite to PostgreSQL for Docker compatibility - Add prisma generate step to Dockerfile with proper binaryTargets - Include index-sse.js in Docker build (was excluded by .dockerignore) - Install openssl and libc6-compat in Alpine image for Prisma runtime Docker: - Fix memento-note healthcheck (wget unavailable in bullseye-slim) Minor fixes: - scrape.service SSRF protection, middleware route coverage - canvas-board and note-input type fixes - next.config turbopack and devIndicators adjustments Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
69 lines
1.9 KiB
TypeScript
69 lines
1.9 KiB
TypeScript
/**
|
|
* Scrape Service
|
|
* Advanced content extraction using Readability and jsdom
|
|
*/
|
|
|
|
import { JSDOM } from 'jsdom'
|
|
import { Readability } from '@mozilla/readability'
|
|
|
|
export interface ScrapedContent {
|
|
title: string
|
|
content: string // Markdown or clean text
|
|
textContent: string
|
|
excerpt: string
|
|
byline: string
|
|
siteName: string
|
|
url: string
|
|
}
|
|
|
|
export class ScrapeService {
|
|
async scrapeUrl(url: string): Promise<ScrapedContent | null> {
|
|
try {
|
|
// Add protocol if missing
|
|
let targetUrl = url
|
|
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
|
targetUrl = 'https://' + url
|
|
}
|
|
|
|
console.log(`[ScrapeService] Fetching ${targetUrl}...`)
|
|
|
|
const response = await fetch(targetUrl, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
},
|
|
next: { revalidate: 3600 }
|
|
})
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP error! status: ${response.status}`)
|
|
}
|
|
|
|
const html = await response.text()
|
|
const dom = new JSDOM(html, { url: targetUrl })
|
|
|
|
const reader = new Readability(dom.window.document)
|
|
const article = reader.parse()
|
|
|
|
if (!article) {
|
|
return null
|
|
}
|
|
|
|
return {
|
|
title: article.title ?? '',
|
|
content: article.content ?? '', // HTML fragment from readability
|
|
textContent: article.textContent ?? '', // Clean text
|
|
excerpt: article.excerpt ?? '',
|
|
byline: article.byline ?? '',
|
|
siteName: article.siteName ?? '',
|
|
url: targetUrl
|
|
}
|
|
} catch (error) {
|
|
console.error(`[ScrapeService] Error scraping ${url}:`, error)
|
|
return null
|
|
}
|
|
}
|
|
}
|
|
|
|
export const scrapeService = new ScrapeService()
|