'use server' import * as cheerio from 'cheerio'; export interface LinkMetadata { url: string; title?: string; description?: string; imageUrl?: string; siteName?: string; } export async function fetchLinkMetadata(url: string): Promise { try { // Add protocol if missing let targetUrl = url; if (!url.startsWith('http://') && !url.startsWith('https://')) { targetUrl = 'https://' + url; } const response = await fetch(targetUrl, { headers: { // Use a real browser User-Agent to avoid 403 Forbidden from strict sites 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5' }, next: { revalidate: 3600 } // Cache for 1 hour }); if (!response.ok) { return null; } const html = await response.text(); const $ = cheerio.load(html); const getMeta = (prop: string) => $(`meta[property="${prop}"]`).attr('content') || $(`meta[name="${prop}"]`).attr('content'); // Robust extraction with fallbacks const title = getMeta('og:title') || $('title').text() || getMeta('twitter:title') || url; const description = getMeta('og:description') || getMeta('description') || getMeta('twitter:description') || ''; const imageUrl = getMeta('og:image') || getMeta('twitter:image') || $('link[rel="image_src"]').attr('href'); const siteName = getMeta('og:site_name') || ''; return { url: targetUrl, title: title.substring(0, 100), description: description.substring(0, 200), imageUrl, siteName }; } catch (error) { console.error(`[Scrape] Error fetching ${url}:`, error); return null; } }