'use server' import * as cheerio from 'cheerio'; export interface LinkMetadata { url: string; title?: string; description?: string; imageUrl?: string; siteName?: string; } export async function fetchLinkMetadata(url: string): Promise { try { // Add protocol if missing let targetUrl = url; if (!url.startsWith('http://') && !url.startsWith('https://')) { targetUrl = 'https://' + url; } // SSRF protection: block internal/private IPs const parsed = new URL(targetUrl) const hostname = parsed.hostname.toLowerCase() const blockedHosts = ['localhost', '127.0.0.1', '0.0.0.0', '::1', '169.254.169.254'] if (blockedHosts.includes(hostname)) return null if (hostname.startsWith('10.') || hostname.startsWith('172.') || hostname.startsWith('192.168.') || hostname.startsWith('fc') || hostname.startsWith('fd')) return null if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return null const response = await fetch(targetUrl, { headers: { // Use a real browser User-Agent to avoid 403 Forbidden from strict sites 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5' }, next: { revalidate: 3600 } // Cache for 1 hour }); if (!response.ok) { return null; } const html = await response.text(); const $ = cheerio.load(html); const getMeta = (prop: string) => $(`meta[property="${prop}"]`).attr('content') || $(`meta[name="${prop}"]`).attr('content'); // Robust extraction with fallbacks const title = getMeta('og:title') || $('title').text() || getMeta('twitter:title') || url; const description = getMeta('og:description') || getMeta('description') || getMeta('twitter:description') || ''; const imageUrl = getMeta('og:image') || getMeta('twitter:image') || $('link[rel="image_src"]').attr('href'); const siteName = getMeta('og:site_name') || ''; return { url: targetUrl, title: title.substring(0, 100), description: description.substring(0, 200), imageUrl, siteName }; } catch (error) { console.error(`[Scrape] Error fetching ${url}:`, error); return null; } }