refactor(ux): consolidate BMAD skills, update design system, and clean up Prisma generated client

2026-04-19 19:21:27 +02:00
parent 5296c4da2c
commit 25529a24b8
2476 changed files with 127934 additions and 101962 deletions
--- a/keep-notes/lib/ai/tools/extract-images.ts
+++ b/keep-notes/lib/ai/tools/extract-images.ts
@@ -0,0 +1,167 @@
+/**
+ * Image Extraction Utility
+ * Extracts image URLs from web pages using Cheerio.
+ * Downloads and saves images locally for agent note attachment.
+ */
+
+import * as cheerio from 'cheerio'
+import { promises as fs } from 'fs'
+import path from 'path'
+import { randomUUID } from 'crypto'
+import sharp from 'sharp'
+
+const UPLOADS_DIR = 'public/uploads/notes'
+const URL_PREFIX = '/uploads/notes'
+const MAX_IMAGES_PER_PAGE = 3
+const MIN_IMAGE_SIZE = 200 // px -- skip icons, spacers, tracking pixels
+const MAX_IMAGE_WIDTH = 600 // px -- resize for note-friendly display
+
+export interface ExtractedImage {
+  url: string
+  localPath?: string
+}
+
+/**
+ * Extract image URLs from an HTML page.
+ * Prioritizes og:image, then article images with size filtering.
+ */
+export function extractImageUrlsFromHtml(html: string, pageUrl: string): string[] {
+  const $ = cheerio.load(html)
+  const images: string[] = []
+  const seen = new Set<string>()
+
+  // 1. Open Graph image
+  const ogImage = $('meta[property="og:image"]').attr('content')
+  if (ogImage) {
+    const resolved = resolveUrl(ogImage, pageUrl)
+    if (resolved && !seen.has(resolved)) {
+      images.push(resolved)
+      seen.add(resolved)
+    }
+  }
+
+  // 2. Twitter card image
+  const twitterImage = $('meta[name="twitter:image"]').attr('content')
+  if (twitterImage) {
+    const resolved = resolveUrl(twitterImage, pageUrl)
+    if (resolved && !seen.has(resolved)) {
+      images.push(resolved)
+      seen.add(resolved)
+    }
+  }
+
+  // 3. Article body images (filter by size and relevance)
+  $('article img, main img, .content img, .post-content img, .entry-content img, .article-body img').each((_, el) => {
+    if (images.length >= MAX_IMAGES_PER_PAGE) return false
+    const src = $(el).attr('src') || $(el).attr('data-src')
+    if (!src) return
+    const width = parseInt($(el).attr('width') || '0', 10)
+    const height = parseInt($(el).attr('height') || '0', 10)
+    // Skip if explicitly sized too small
+    if ((width > 0 && width < MIN_IMAGE_SIZE) || (height > 0 && height < MIN_IMAGE_SIZE)) return
+    // Skip common non-content patterns
+    if (src.includes('avatar') || src.includes('icon') || src.includes('logo') || src.includes('badge') || src.includes('spinner')) return
+    const resolved = resolveUrl(src, pageUrl)
+    if (resolved && !seen.has(resolved)) {
+      images.push(resolved)
+      seen.add(resolved)
+    }
+  })
+
+  // 4. Fallback: any large images in the page if we still have room
+  if (images.length < MAX_IMAGES_PER_PAGE) {
+    $('img').each((_, el) => {
+      if (images.length >= MAX_IMAGES_PER_PAGE) return false
+      const src = $(el).attr('src') || $(el).attr('data-src')
+      if (!src) return
+      const width = parseInt($(el).attr('width') || '0', 10)
+      const height = parseInt($(el).attr('height') || '0', 10)
+      if ((width > 0 && width < MIN_IMAGE_SIZE) || (height > 0 && height < MIN_IMAGE_SIZE)) return
+      if (src.includes('avatar') || src.includes('icon') || src.includes('logo') || src.includes('badge') || src.includes('spinner') || src.includes('pixel') || src.includes('tracking')) return
+      const resolved = resolveUrl(src, pageUrl)
+      if (resolved && !seen.has(resolved)) {
+        images.push(resolved)
+        seen.add(resolved)
+      }
+    })
+  }
+
+  return images.slice(0, MAX_IMAGES_PER_PAGE)
+}
+
+/**
+ * Download an image and save it locally.
+ */
+export async function downloadImage(imageUrl: string): Promise<string | null> {
+  try {
+    const controller = new AbortController()
+    const timeout = setTimeout(() => controller.abort(), 10000)
+
+    const response = await fetch(imageUrl, {
+      signal: controller.signal,
+      headers: { 'User-Agent': 'Mozilla/5.0 (compatible; KeepBot/1.0)' },
+    })
+    clearTimeout(timeout)
+
+    if (!response.ok) return null
+
+    const contentType = response.headers.get('content-type') || ''
+    if (!contentType.startsWith('image/')) return null
+
+    const buffer = Buffer.from(await response.arrayBuffer())
+    if (buffer.length < 1024) return null // Skip tiny files
+
+    const ext = contentType.split('/')[1]?.replace('jpeg', 'jpg') || 'jpg'
+    const filename = `${randomUUID()}.${ext}`
+
+    await fs.mkdir(path.join(process.cwd(), UPLOADS_DIR), { recursive: true })
+
+    // Resize to max width for note-friendly display
+    try {
+      await sharp(buffer)
+        .resize(MAX_IMAGE_WIDTH, null, { withoutEnlargement: true })
+        .jpeg({ quality: 80 })
+        .toFile(path.join(process.cwd(), UPLOADS_DIR, filename.replace(/\.\w+$/, '.jpg')))
+    } catch {
+      // Sharp failed (e.g. SVG, WebP unsupported) — save raw buffer
+      await fs.writeFile(path.join(process.cwd(), UPLOADS_DIR, filename), buffer)
+    }
+
+    // Always reference as .jpg since sharp converts to jpeg
+    return `${URL_PREFIX}/${filename.replace(/\.\w+$/, '.jpg')}`
+  } catch {
+    return null
+  }
+}
+
+/**
+ * Extract and download images from a web page.
+ * Returns local URLs for successfully downloaded images.
+ */
+export async function extractAndDownloadImages(html: string, pageUrl: string): Promise<string[]> {
+  const imageUrls = extractImageUrlsFromHtml(html, pageUrl)
+  const localUrls: string[] = []
+
+  for (const url of imageUrls) {
+    const localPath = await downloadImage(url)
+    if (localPath) {
+      localUrls.push(localPath)
+    }
+  }
+
+  return localUrls
+}
+
+function resolveUrl(src: string, pageUrl: string): string | null {
+  try {
+    if (src.startsWith('//')) return `https:${src}`
+    if (src.startsWith('http://') || src.startsWith('https://')) return src
+    if (src.startsWith('/') || src.startsWith('./')) {
+      const base = new URL(pageUrl)
+      return new URL(src, base.origin).href
+    }
+    return new URL(src, pageUrl).href
+  } catch {
+    return null
+  }
+}