Security: - Add auth + file type/size validation to upload API - Add admin auth to /api/admin/ endpoints - Add SSRF protection to scrape action - Whitelist fields in PUT /api/notes/[id] to prevent mass assignment - Protect /lab, /agents, /chat, /canvas, /notebooks routes in middleware AI provider fixes: - Add deepseek/openrouter to factory ProviderType (was silently falling back to ollama) - Fix title-suggestion.service.ts to use factory instead of hardcoded OpenAI - Fix getAIProvider→getChatProvider in memory-echo, notebook-summary, agent-executor - Fix getAIProvider→getTagsProvider in notebook-suggestion, title-suggestions, transform-markdown Functional bugs: - Fix ALLOW_REGISTRATION AND→OR logic - Fix note-editor.tsx passing stale props to useAutoTagging instead of local state - Fix stale Note.embedding type (migrated to NoteEmbedding table) - Remove hardcoded SQLite path from prisma.ts Frontend: - Add AbortController to useAutoTagging and useTitleSuggestions hooks - Add error rollback to optimistic UI in note-inline-editor - Remove stale closure over notebookId/language in useAutoTagging Cleanup: - Rename docker-compose from keepnotes→memento - Remove unused unstable_cache import from config.ts - Remove dead useUndoRedo hook - Fix TagSuggestion type (add isNewLabel, reasoning) - Remove dead AIConfig/AIProviderType types - Fix ghost-tags unused isEmpty var and as any cast - Fix note-editor titleSuggestions typed as any[] Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
68 lines
2.4 KiB
TypeScript
68 lines
2.4 KiB
TypeScript
'use server'
|
|
|
|
import * as cheerio from 'cheerio';
|
|
|
|
export interface LinkMetadata {
|
|
url: string;
|
|
title?: string;
|
|
description?: string;
|
|
imageUrl?: string;
|
|
siteName?: string;
|
|
}
|
|
|
|
export async function fetchLinkMetadata(url: string): Promise<LinkMetadata | null> {
|
|
try {
|
|
// Add protocol if missing
|
|
let targetUrl = url;
|
|
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
|
targetUrl = 'https://' + url;
|
|
}
|
|
|
|
// SSRF protection: block internal/private IPs
|
|
const parsed = new URL(targetUrl)
|
|
const hostname = parsed.hostname.toLowerCase()
|
|
const blockedHosts = ['localhost', '127.0.0.1', '0.0.0.0', '::1', '169.254.169.254']
|
|
if (blockedHosts.includes(hostname)) return null
|
|
if (hostname.startsWith('10.') || hostname.startsWith('172.') || hostname.startsWith('192.168.') || hostname.startsWith('fc') || hostname.startsWith('fd')) return null
|
|
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return null
|
|
|
|
const response = await fetch(targetUrl, {
|
|
headers: {
|
|
// Use a real browser User-Agent to avoid 403 Forbidden from strict sites
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5'
|
|
},
|
|
next: { revalidate: 3600 } // Cache for 1 hour
|
|
});
|
|
|
|
if (!response.ok) {
|
|
return null;
|
|
}
|
|
|
|
const html = await response.text();
|
|
const $ = cheerio.load(html);
|
|
|
|
const getMeta = (prop: string) =>
|
|
$(`meta[property="${prop}"]`).attr('content') ||
|
|
$(`meta[name="${prop}"]`).attr('content');
|
|
|
|
// Robust extraction with fallbacks
|
|
const title = getMeta('og:title') || $('title').text() || getMeta('twitter:title') || url;
|
|
const description = getMeta('og:description') || getMeta('description') || getMeta('twitter:description') || '';
|
|
const imageUrl = getMeta('og:image') || getMeta('twitter:image') || $('link[rel="image_src"]').attr('href');
|
|
const siteName = getMeta('og:site_name') || '';
|
|
|
|
return {
|
|
url: targetUrl,
|
|
title: title.substring(0, 100),
|
|
description: description.substring(0, 200),
|
|
imageUrl,
|
|
siteName
|
|
};
|
|
} catch (error) {
|
|
console.error(`[Scrape] Error fetching ${url}:`, error);
|
|
return null;
|
|
}
|
|
}
|