refactor(ux): consolidate BMAD skills, update design system, and clean up Prisma generated client
This commit is contained in:
167
keep-notes/lib/ai/tools/extract-images.ts
Normal file
167
keep-notes/lib/ai/tools/extract-images.ts
Normal file
@@ -0,0 +1,167 @@
|
||||
/**
|
||||
* Image Extraction Utility
|
||||
* Extracts image URLs from web pages using Cheerio.
|
||||
* Downloads and saves images locally for agent note attachment.
|
||||
*/
|
||||
|
||||
import * as cheerio from 'cheerio'
|
||||
import { promises as fs } from 'fs'
|
||||
import path from 'path'
|
||||
import { randomUUID } from 'crypto'
|
||||
import sharp from 'sharp'
|
||||
|
||||
const UPLOADS_DIR = 'public/uploads/notes'
|
||||
const URL_PREFIX = '/uploads/notes'
|
||||
const MAX_IMAGES_PER_PAGE = 3
|
||||
const MIN_IMAGE_SIZE = 200 // px -- skip icons, spacers, tracking pixels
|
||||
const MAX_IMAGE_WIDTH = 600 // px -- resize for note-friendly display
|
||||
|
||||
export interface ExtractedImage {
|
||||
url: string
|
||||
localPath?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract image URLs from an HTML page.
|
||||
* Prioritizes og:image, then article images with size filtering.
|
||||
*/
|
||||
export function extractImageUrlsFromHtml(html: string, pageUrl: string): string[] {
|
||||
const $ = cheerio.load(html)
|
||||
const images: string[] = []
|
||||
const seen = new Set<string>()
|
||||
|
||||
// 1. Open Graph image
|
||||
const ogImage = $('meta[property="og:image"]').attr('content')
|
||||
if (ogImage) {
|
||||
const resolved = resolveUrl(ogImage, pageUrl)
|
||||
if (resolved && !seen.has(resolved)) {
|
||||
images.push(resolved)
|
||||
seen.add(resolved)
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Twitter card image
|
||||
const twitterImage = $('meta[name="twitter:image"]').attr('content')
|
||||
if (twitterImage) {
|
||||
const resolved = resolveUrl(twitterImage, pageUrl)
|
||||
if (resolved && !seen.has(resolved)) {
|
||||
images.push(resolved)
|
||||
seen.add(resolved)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Article body images (filter by size and relevance)
|
||||
$('article img, main img, .content img, .post-content img, .entry-content img, .article-body img').each((_, el) => {
|
||||
if (images.length >= MAX_IMAGES_PER_PAGE) return false
|
||||
const src = $(el).attr('src') || $(el).attr('data-src')
|
||||
if (!src) return
|
||||
const width = parseInt($(el).attr('width') || '0', 10)
|
||||
const height = parseInt($(el).attr('height') || '0', 10)
|
||||
// Skip if explicitly sized too small
|
||||
if ((width > 0 && width < MIN_IMAGE_SIZE) || (height > 0 && height < MIN_IMAGE_SIZE)) return
|
||||
// Skip common non-content patterns
|
||||
if (src.includes('avatar') || src.includes('icon') || src.includes('logo') || src.includes('badge') || src.includes('spinner')) return
|
||||
const resolved = resolveUrl(src, pageUrl)
|
||||
if (resolved && !seen.has(resolved)) {
|
||||
images.push(resolved)
|
||||
seen.add(resolved)
|
||||
}
|
||||
})
|
||||
|
||||
// 4. Fallback: any large images in the page if we still have room
|
||||
if (images.length < MAX_IMAGES_PER_PAGE) {
|
||||
$('img').each((_, el) => {
|
||||
if (images.length >= MAX_IMAGES_PER_PAGE) return false
|
||||
const src = $(el).attr('src') || $(el).attr('data-src')
|
||||
if (!src) return
|
||||
const width = parseInt($(el).attr('width') || '0', 10)
|
||||
const height = parseInt($(el).attr('height') || '0', 10)
|
||||
if ((width > 0 && width < MIN_IMAGE_SIZE) || (height > 0 && height < MIN_IMAGE_SIZE)) return
|
||||
if (src.includes('avatar') || src.includes('icon') || src.includes('logo') || src.includes('badge') || src.includes('spinner') || src.includes('pixel') || src.includes('tracking')) return
|
||||
const resolved = resolveUrl(src, pageUrl)
|
||||
if (resolved && !seen.has(resolved)) {
|
||||
images.push(resolved)
|
||||
seen.add(resolved)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return images.slice(0, MAX_IMAGES_PER_PAGE)
|
||||
}
|
||||
|
||||
/**
|
||||
* Download an image and save it locally.
|
||||
*/
|
||||
export async function downloadImage(imageUrl: string): Promise<string | null> {
|
||||
try {
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), 10000)
|
||||
|
||||
const response = await fetch(imageUrl, {
|
||||
signal: controller.signal,
|
||||
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; KeepBot/1.0)' },
|
||||
})
|
||||
clearTimeout(timeout)
|
||||
|
||||
if (!response.ok) return null
|
||||
|
||||
const contentType = response.headers.get('content-type') || ''
|
||||
if (!contentType.startsWith('image/')) return null
|
||||
|
||||
const buffer = Buffer.from(await response.arrayBuffer())
|
||||
if (buffer.length < 1024) return null // Skip tiny files
|
||||
|
||||
const ext = contentType.split('/')[1]?.replace('jpeg', 'jpg') || 'jpg'
|
||||
const filename = `${randomUUID()}.${ext}`
|
||||
|
||||
await fs.mkdir(path.join(process.cwd(), UPLOADS_DIR), { recursive: true })
|
||||
|
||||
// Resize to max width for note-friendly display
|
||||
try {
|
||||
await sharp(buffer)
|
||||
.resize(MAX_IMAGE_WIDTH, null, { withoutEnlargement: true })
|
||||
.jpeg({ quality: 80 })
|
||||
.toFile(path.join(process.cwd(), UPLOADS_DIR, filename.replace(/\.\w+$/, '.jpg')))
|
||||
} catch {
|
||||
// Sharp failed (e.g. SVG, WebP unsupported) — save raw buffer
|
||||
await fs.writeFile(path.join(process.cwd(), UPLOADS_DIR, filename), buffer)
|
||||
}
|
||||
|
||||
// Always reference as .jpg since sharp converts to jpeg
|
||||
return `${URL_PREFIX}/${filename.replace(/\.\w+$/, '.jpg')}`
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract and download images from a web page.
|
||||
* Returns local URLs for successfully downloaded images.
|
||||
*/
|
||||
export async function extractAndDownloadImages(html: string, pageUrl: string): Promise<string[]> {
|
||||
const imageUrls = extractImageUrlsFromHtml(html, pageUrl)
|
||||
const localUrls: string[] = []
|
||||
|
||||
for (const url of imageUrls) {
|
||||
const localPath = await downloadImage(url)
|
||||
if (localPath) {
|
||||
localUrls.push(localPath)
|
||||
}
|
||||
}
|
||||
|
||||
return localUrls
|
||||
}
|
||||
|
||||
function resolveUrl(src: string, pageUrl: string): string | null {
|
||||
try {
|
||||
if (src.startsWith('//')) return `https:${src}`
|
||||
if (src.startsWith('http://') || src.startsWith('https://')) return src
|
||||
if (src.startsWith('/') || src.startsWith('./')) {
|
||||
const base = new URL(pageUrl)
|
||||
return new URL(src, base.origin).href
|
||||
}
|
||||
return new URL(src, pageUrl).href
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
15
keep-notes/lib/ai/tools/index.ts
Normal file
15
keep-notes/lib/ai/tools/index.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* Tools Index
|
||||
* Side-effect imports register all tools into the registry.
|
||||
*/
|
||||
|
||||
// Import all tools (side-effect registration)
|
||||
import './web-search.tool'
|
||||
import './note-search.tool'
|
||||
import './note-crud.tool'
|
||||
import './web-scrape.tool'
|
||||
import './url-fetch.tool'
|
||||
import './memory.tool'
|
||||
|
||||
// Re-export registry
|
||||
export { toolRegistry, type ToolContext, type RegisteredTool } from './registry'
|
||||
62
keep-notes/lib/ai/tools/memory.tool.ts
Normal file
62
keep-notes/lib/ai/tools/memory.tool.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
* Memory Search Tool
|
||||
* Searches past AgentActions (logs, toolLogs, inputs) for context.
|
||||
*/
|
||||
|
||||
import { tool } from 'ai'
|
||||
import { z } from 'zod'
|
||||
import { toolRegistry } from './registry'
|
||||
import { prisma } from '@/lib/prisma'
|
||||
|
||||
toolRegistry.register({
|
||||
name: 'memory_search',
|
||||
description: 'Search past agent execution history for relevant information. Looks through previous logs, tool traces, and inputs.',
|
||||
isInternal: true,
|
||||
buildTool: (ctx) =>
|
||||
tool({
|
||||
description: 'Search past agent executions for context. Searches through logs and tool traces from previous runs.',
|
||||
inputSchema: z.object({
|
||||
query: z.string().describe('What to search for in past executions'),
|
||||
limit: z.number().optional().describe('Max results (default 5)').default(5),
|
||||
}),
|
||||
execute: async ({ query, limit = 5 }) => {
|
||||
try {
|
||||
// Get past actions for this agent
|
||||
const actions = await prisma.agentAction.findMany({
|
||||
where: {
|
||||
agentId: ctx.agentId,
|
||||
status: 'success',
|
||||
},
|
||||
orderBy: { createdAt: 'desc' },
|
||||
take: limit * 2,
|
||||
select: { id: true, log: true, input: true, toolLog: true, createdAt: true },
|
||||
})
|
||||
|
||||
const keywords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2)
|
||||
|
||||
const results = actions
|
||||
.map(a => {
|
||||
const searchable = `${a.log || ''} ${a.input || ''} ${a.toolLog || ''}`.toLowerCase()
|
||||
const score = keywords.reduce((acc, kw) => acc + (searchable.includes(kw) ? 1 : 0), 0)
|
||||
return { ...a, score }
|
||||
})
|
||||
.filter(r => r.score > 0)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit)
|
||||
|
||||
if (results.length === 0) {
|
||||
return { message: 'No matching past executions found.', query }
|
||||
}
|
||||
|
||||
return results.map(r => ({
|
||||
actionId: r.id,
|
||||
date: r.createdAt.toISOString(),
|
||||
log: (r.log || '').substring(0, 800),
|
||||
input: r.input ? (r.input).substring(0, 500) : null,
|
||||
}))
|
||||
} catch (e: any) {
|
||||
return { error: `Memory search failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
104
keep-notes/lib/ai/tools/note-crud.tool.ts
Normal file
104
keep-notes/lib/ai/tools/note-crud.tool.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Note CRUD Tools
|
||||
* note_create, note_read, note_update
|
||||
*/
|
||||
|
||||
import { tool } from 'ai'
|
||||
import { z } from 'zod'
|
||||
import { toolRegistry } from './registry'
|
||||
import { prisma } from '@/lib/prisma'
|
||||
|
||||
// --- note_read ---
|
||||
toolRegistry.register({
|
||||
name: 'note_read',
|
||||
description: 'Read a specific note by its ID. Returns the full note content.',
|
||||
isInternal: true,
|
||||
buildTool: (ctx) =>
|
||||
tool({
|
||||
description: 'Read a specific note by ID. Returns the full content.',
|
||||
inputSchema: z.object({
|
||||
noteId: z.string().describe('The ID of the note to read'),
|
||||
}),
|
||||
execute: async ({ noteId }) => {
|
||||
try {
|
||||
const note = await prisma.note.findFirst({
|
||||
where: { id: noteId, userId: ctx.userId },
|
||||
select: { id: true, title: true, content: true, isMarkdown: true, createdAt: true, updatedAt: true },
|
||||
})
|
||||
if (!note) return { error: 'Note not found' }
|
||||
return note
|
||||
} catch (e: any) {
|
||||
return { error: `Read note failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
|
||||
// --- note_create ---
|
||||
toolRegistry.register({
|
||||
name: 'note_create',
|
||||
description: 'Create a new note with a title and content.',
|
||||
isInternal: true,
|
||||
buildTool: (ctx) =>
|
||||
tool({
|
||||
description: 'Create a new note.',
|
||||
inputSchema: z.object({
|
||||
title: z.string().describe('Title for the note'),
|
||||
content: z.string().describe('Content of the note (markdown supported)'),
|
||||
notebookId: z.string().optional().describe('Optional notebook ID to place the note in'),
|
||||
images: z.array(z.string()).optional().describe('Optional array of local image URL paths to attach to the note (e.g. ["/uploads/notes/abc.jpg"])'),
|
||||
}),
|
||||
execute: async ({ title, content, notebookId, images }) => {
|
||||
try {
|
||||
const note = await prisma.note.create({
|
||||
data: {
|
||||
title,
|
||||
content,
|
||||
isMarkdown: true,
|
||||
autoGenerated: true,
|
||||
userId: ctx.userId,
|
||||
notebookId: notebookId || null,
|
||||
images: images && images.length > 0 ? JSON.stringify(images) : null,
|
||||
},
|
||||
select: { id: true, title: true },
|
||||
})
|
||||
return { success: true, noteId: note.id, title: note.title }
|
||||
} catch (e: any) {
|
||||
return { error: `Create note failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
|
||||
// --- note_update ---
|
||||
toolRegistry.register({
|
||||
name: 'note_update',
|
||||
description: 'Update an existing note\'s content.',
|
||||
isInternal: true,
|
||||
buildTool: (ctx) =>
|
||||
tool({
|
||||
description: 'Update an existing note.',
|
||||
inputSchema: z.object({
|
||||
noteId: z.string().describe('The ID of the note to update'),
|
||||
title: z.string().optional().describe('New title (optional)'),
|
||||
content: z.string().optional().describe('New content (optional)'),
|
||||
}),
|
||||
execute: async ({ noteId, title, content }) => {
|
||||
try {
|
||||
const existing = await prisma.note.findFirst({
|
||||
where: { id: noteId, userId: ctx.userId },
|
||||
})
|
||||
if (!existing) return { error: 'Note not found' }
|
||||
|
||||
const data: Record<string, any> = {}
|
||||
if (title !== undefined) data.title = title
|
||||
if (content !== undefined) data.content = content
|
||||
|
||||
await prisma.note.update({ where: { id: noteId }, data })
|
||||
return { success: true, noteId }
|
||||
} catch (e: any) {
|
||||
return { error: `Update note failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
54
keep-notes/lib/ai/tools/note-search.tool.ts
Normal file
54
keep-notes/lib/ai/tools/note-search.tool.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Note Search Tool
|
||||
* Wraps semanticSearchService.searchAsUser()
|
||||
*/
|
||||
|
||||
import { tool } from 'ai'
|
||||
import { z } from 'zod'
|
||||
import { toolRegistry } from './registry'
|
||||
import { prisma } from '@/lib/prisma'
|
||||
|
||||
toolRegistry.register({
|
||||
name: 'note_search',
|
||||
description: 'Search the user\'s notes using semantic search. Returns matching notes with titles and content excerpts.',
|
||||
isInternal: true,
|
||||
buildTool: (ctx) =>
|
||||
tool({
|
||||
description: 'Search the user\'s notes by keyword or semantic meaning. Returns matching notes with titles and content excerpts.',
|
||||
inputSchema: z.object({
|
||||
query: z.string().describe('The search query'),
|
||||
limit: z.number().optional().describe('Max results to return (default 5)').default(5),
|
||||
}),
|
||||
execute: async ({ query, limit = 5 }) => {
|
||||
try {
|
||||
// Keyword fallback search using Prisma
|
||||
const keywords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2)
|
||||
const conditions = keywords.flatMap(term => [
|
||||
{ title: { contains: term } },
|
||||
{ content: { contains: term } }
|
||||
])
|
||||
|
||||
const notes = await prisma.note.findMany({
|
||||
where: {
|
||||
userId: ctx.userId,
|
||||
...(conditions.length > 0 ? { OR: conditions } : {}),
|
||||
isArchived: false,
|
||||
trashedAt: null,
|
||||
},
|
||||
select: { id: true, title: true, content: true, createdAt: true },
|
||||
take: limit,
|
||||
orderBy: { createdAt: 'desc' },
|
||||
})
|
||||
|
||||
return notes.map(n => ({
|
||||
id: n.id,
|
||||
title: n.title || 'Untitled',
|
||||
excerpt: n.content.substring(0, 300),
|
||||
createdAt: n.createdAt.toISOString(),
|
||||
}))
|
||||
} catch (e: any) {
|
||||
return { error: `Note search failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
56
keep-notes/lib/ai/tools/registry.ts
Normal file
56
keep-notes/lib/ai/tools/registry.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Tool Registry
|
||||
* Central registry for all agent tools.
|
||||
* Tools self-register on import via side-effect in index.ts.
|
||||
*/
|
||||
|
||||
import { tool } from 'ai'
|
||||
import { z } from 'zod'
|
||||
|
||||
export interface ToolContext {
|
||||
userId: string
|
||||
agentId: string
|
||||
actionId: string
|
||||
config: Record<string, string>
|
||||
}
|
||||
|
||||
export interface RegisteredTool {
|
||||
name: string
|
||||
description: string
|
||||
buildTool: (ctx: ToolContext) => any // Returns an AI SDK tool() synchronously
|
||||
isInternal: boolean // true = no API key needed
|
||||
}
|
||||
|
||||
class ToolRegistry {
|
||||
private tools: Map<string, RegisteredTool> = new Map()
|
||||
|
||||
register(tool: RegisteredTool): void {
|
||||
this.tools.set(tool.name, tool)
|
||||
}
|
||||
|
||||
get(name: string): RegisteredTool | undefined {
|
||||
return this.tools.get(name)
|
||||
}
|
||||
|
||||
buildToolsForAgent(toolNames: string[], ctx: ToolContext): Record<string, any> {
|
||||
const built: Record<string, any> = {}
|
||||
for (const name of toolNames) {
|
||||
const registered = this.tools.get(name)
|
||||
if (registered) {
|
||||
built[name] = registered.buildTool(ctx)
|
||||
}
|
||||
}
|
||||
return built
|
||||
}
|
||||
|
||||
getAvailableTools(): Array<{ name: string; description: string; isInternal: boolean }> {
|
||||
return Array.from(this.tools.values()).map(t => ({
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
isInternal: t.isInternal,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton
|
||||
export const toolRegistry = new ToolRegistry()
|
||||
55
keep-notes/lib/ai/tools/url-fetch.tool.ts
Normal file
55
keep-notes/lib/ai/tools/url-fetch.tool.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* URL Fetch Tool
|
||||
* Fetches a URL and returns parsed content (JSON, CSV, or text).
|
||||
* Max 5MB response.
|
||||
*/
|
||||
|
||||
import { tool } from 'ai'
|
||||
import { z } from 'zod'
|
||||
import { toolRegistry } from './registry'
|
||||
|
||||
const MAX_SIZE = 5 * 1024 * 1024 // 5MB
|
||||
|
||||
toolRegistry.register({
|
||||
name: 'url_fetch',
|
||||
description: 'Fetch a URL and return its content. Supports JSON, CSV, and plain text responses. Max 5MB.',
|
||||
isInternal: true,
|
||||
buildTool: (_ctx) =>
|
||||
tool({
|
||||
description: 'Fetch a URL and return its parsed content. Supports JSON, CSV, and text.',
|
||||
inputSchema: z.object({
|
||||
url: z.string().describe('The URL to fetch'),
|
||||
method: z.enum(['GET', 'POST']).optional().describe('HTTP method (default GET)').default('GET'),
|
||||
}),
|
||||
execute: async ({ url, method = 'GET' }) => {
|
||||
try {
|
||||
const response = await fetch(url, { method })
|
||||
if (!response.ok) return { error: `HTTP ${response.status}: ${response.statusText}` }
|
||||
|
||||
const contentLength = parseInt(response.headers.get('content-length') || '0')
|
||||
if (contentLength > MAX_SIZE) return { error: 'Response too large (max 5MB)' }
|
||||
|
||||
const contentType = response.headers.get('content-type') || ''
|
||||
const text = await response.text()
|
||||
|
||||
if (text.length > MAX_SIZE) return { error: 'Response too large (max 5MB)' }
|
||||
|
||||
if (contentType.includes('application/json')) {
|
||||
try {
|
||||
return { type: 'json', data: JSON.parse(text) }
|
||||
} catch {
|
||||
return { type: 'text', content: text.substring(0, 10000) }
|
||||
}
|
||||
}
|
||||
|
||||
if (contentType.includes('text/csv')) {
|
||||
return { type: 'csv', content: text.substring(0, 10000) }
|
||||
}
|
||||
|
||||
return { type: 'text', content: text.substring(0, 10000) }
|
||||
} catch (e: any) {
|
||||
return { error: `Fetch failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
88
keep-notes/lib/ai/tools/web-scrape.tool.ts
Normal file
88
keep-notes/lib/ai/tools/web-scrape.tool.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
/**
|
||||
* Web Scrape Tool
|
||||
* Uses Jina Reader API (r.jina.ai) to scrape a URL into markdown.
|
||||
* Falls back to basic fetch on error.
|
||||
* Supports RSS/Atom feeds: parses the feed and scrapes top articles.
|
||||
*/
|
||||
|
||||
import { tool } from 'ai'
|
||||
import { z } from 'zod'
|
||||
import { toolRegistry } from './registry'
|
||||
import { rssService } from '../services/rss.service'
|
||||
|
||||
const MAX_ARTICLE_CONTENT = 4000
|
||||
const MAX_TOTAL_CONTENT = 15000
|
||||
const MAX_ARTICLES_FROM_FEED = 5
|
||||
|
||||
async function scrapeSingleUrl(url: string, jinaKey?: string): Promise<{ content: string; url: string }> {
|
||||
const headers: Record<string, string> = { 'Accept': 'text/markdown' }
|
||||
if (jinaKey) {
|
||||
headers['Authorization'] = `Bearer ${jinaKey}`
|
||||
}
|
||||
|
||||
const response = await fetch(`https://r.jina.ai/${url}`, { headers })
|
||||
|
||||
if (!response.ok) {
|
||||
const fallback = await fetch(url)
|
||||
if (!fallback.ok) return { content: `Failed to fetch ${url}: ${fallback.status}`, url }
|
||||
const text = await fallback.text()
|
||||
return { content: text.substring(0, 10000), url }
|
||||
}
|
||||
|
||||
const markdown = await response.text()
|
||||
return { content: markdown.substring(0, MAX_TOTAL_CONTENT), url }
|
||||
}
|
||||
|
||||
toolRegistry.register({
|
||||
name: 'web_scrape',
|
||||
description: 'Scrape a web page and return its content as markdown. Supports RSS/Atom feeds — will automatically parse feeds and scrape individual articles.',
|
||||
isInternal: false,
|
||||
buildTool: (ctx) =>
|
||||
tool({
|
||||
description: 'Scrape a web page URL and return its content as clean markdown text. If the URL is an RSS/Atom feed, it will parse the feed and scrape the latest articles automatically.',
|
||||
inputSchema: z.object({
|
||||
url: z.string().describe('The URL to scrape. Can be a regular web page or an RSS/Atom feed URL.'),
|
||||
}),
|
||||
execute: async ({ url }) => {
|
||||
try {
|
||||
// Try RSS feed detection first
|
||||
if (rssService.isFeedUrl(url)) {
|
||||
const feed = await rssService.parseFeed(url)
|
||||
if (feed && feed.articles.length > 0) {
|
||||
const jinaKey = ctx.config.JINA_API_KEY
|
||||
const articlesToScrape = feed.articles.slice(0, MAX_ARTICLES_FROM_FEED)
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
articlesToScrape.map(article => scrapeSingleUrl(article.link, jinaKey))
|
||||
)
|
||||
|
||||
const parts: string[] = []
|
||||
parts.push(`# ${feed.title}\n_Flux RSS: ${url} — ${feed.articles.length} articles disponibles, ${articlesToScrape.length} scrapés_\n`)
|
||||
|
||||
let totalLen = 0
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
const r = results[i]
|
||||
if (r.status === 'fulfilled' && r.value.content) {
|
||||
const article = articlesToScrape[i]
|
||||
const header = `\n---\n\n## ${article.title}\n_Source: ${article.link}_${article.pubDate ? ` — ${new Date(article.pubDate).toISOString().split('T')[0]}` : ''}\n\n`
|
||||
const content = r.value.content.substring(0, MAX_ARTICLE_CONTENT)
|
||||
if (totalLen + header.length + content.length > MAX_TOTAL_CONTENT) break
|
||||
parts.push(header + content)
|
||||
totalLen += header.length + content.length
|
||||
}
|
||||
}
|
||||
|
||||
return { content: parts.join(''), url, feedTitle: feed.title, articlesScraped: articlesToScrape.length }
|
||||
}
|
||||
// If feed parsing failed, fall through to normal scraping
|
||||
}
|
||||
|
||||
// Normal web page scraping
|
||||
const result = await scrapeSingleUrl(url, ctx.config.JINA_API_KEY)
|
||||
return result
|
||||
} catch (e: any) {
|
||||
return { error: `Scrape failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
65
keep-notes/lib/ai/tools/web-search.tool.ts
Normal file
65
keep-notes/lib/ai/tools/web-search.tool.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Web Search Tool
|
||||
* Uses SearXNG or Brave Search API.
|
||||
*/
|
||||
|
||||
import { tool } from 'ai'
|
||||
import { z } from 'zod'
|
||||
import { toolRegistry } from './registry'
|
||||
|
||||
async function searchSearXNG(query: string, searxngUrl: string): Promise<any> {
|
||||
const url = `${searxngUrl.replace(/\/+$/, '')}/search?q=${encodeURIComponent(query)}&format=json`
|
||||
const response = await fetch(url, { headers: { 'Accept': 'application/json' } })
|
||||
if (!response.ok) throw new Error(`SearXNG error: ${response.status}`)
|
||||
const data = await response.json()
|
||||
return (data.results || []).slice(0, 8).map((r: any) => ({
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
snippet: r.content || '',
|
||||
}))
|
||||
}
|
||||
|
||||
async function searchBrave(query: string, apiKey: string): Promise<any> {
|
||||
const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=8`
|
||||
const response = await fetch(url, {
|
||||
headers: { 'Accept': 'application/json', 'X-Subscription-Token': apiKey }
|
||||
})
|
||||
if (!response.ok) throw new Error(`Brave error: ${response.status}`)
|
||||
const data = await response.json()
|
||||
return (data.web?.results || []).map((r: any) => ({
|
||||
title: r.title,
|
||||
url: r.url,
|
||||
snippet: r.description || '',
|
||||
}))
|
||||
}
|
||||
|
||||
toolRegistry.register({
|
||||
name: 'web_search',
|
||||
description: 'Search the web for information. Returns a list of results with titles, URLs and snippets.',
|
||||
isInternal: false,
|
||||
buildTool: (ctx) =>
|
||||
tool({
|
||||
description: 'Search the web for information. Returns results with titles, URLs and snippets.',
|
||||
inputSchema: z.object({
|
||||
query: z.string().describe('The search query'),
|
||||
}),
|
||||
execute: async ({ query }) => {
|
||||
try {
|
||||
const provider = ctx.config.WEB_SEARCH_PROVIDER || 'searxng'
|
||||
|
||||
if (provider === 'brave' || provider === 'both') {
|
||||
const apiKey = ctx.config.BRAVE_SEARCH_API_KEY
|
||||
if (apiKey) {
|
||||
return await searchBrave(query, apiKey)
|
||||
}
|
||||
}
|
||||
|
||||
// Default: SearXNG
|
||||
const searxngUrl = ctx.config.SEARXNG_URL || 'http://localhost:8080'
|
||||
return await searchSearXNG(query, searxngUrl)
|
||||
} catch (e: any) {
|
||||
return { error: `Web search failed: ${e.message}` }
|
||||
}
|
||||
},
|
||||
}),
|
||||
})
|
||||
Reference in New Issue
Block a user