Momento/memento-note/app/api/chat/route.ts

import { streamText, UIMessage, stepCountIs } from 'ai'
import { resolveAiRouteWithTiming, formatAiRouteDebug } from '@/lib/ai/router'
import { runLaneWithBillingUser, willUseByokForLane } from '@/lib/ai/provider-for-user'
import { getSystemConfig } from '@/lib/config'
import { semanticSearchService } from '@/lib/ai/services/semantic-search.service'
import { prisma } from '@/lib/prisma'
import { auth } from '@/auth'
import { loadTranslations, getTranslationValue, SupportedLanguage } from '@/lib/i18n'
import { toolRegistry } from '@/lib/ai/tools'
import { checkEntitlementOrThrow, QuotaExceededError, incrementUsageAsync } from '@/lib/entitlements'
import { trackFeatureUsage } from '@/lib/usage-tracker'
import { readFile } from 'fs/promises'
import path from 'path'

export const maxDuration = 60

/**
 * Extract text content from a UIMessage's parts array.
 */
function extractTextFromUIMessage(msg: { parts?: Array<{ type: string; text?: string }>; content?: string }): string {
  if (typeof msg.content === 'string') return msg.content
  if (msg.parts && Array.isArray(msg.parts)) {
    return msg.parts
      .filter((p) => p.type === 'text' && typeof p.text === 'string')
      .map((p) => p.text!)
      .join('')
  }
  return ''
}

/**
 * Convert an array of UIMessages (from the client) to CoreMessage[] for streamText.
 */
function toCoreMessages(uiMessages: UIMessage[]): Array<{ role: 'user' | 'assistant'; content: string }> {
  return uiMessages
    .filter((m) => m.role === 'user' || m.role === 'assistant')
    .map((m) => ({
      role: m.role as 'user' | 'assistant',
      content: extractTextFromUIMessage(m),
    }))
    .filter((m) => m.content.length > 0)
}

export async function POST(req: Request) {
  // 1. Auth check
  const session = await auth()
  if (!session?.user?.id) {
    return new Response('Unauthorized', { status: 401 })
  }
  const userId = session.user.id

  // 1.5 Quota check (per-provider BYOK bypass — only when BYOK will be used for resolved provider)
  try {
    const sysConfigEarly = await getSystemConfig()
    const { usedByok: willUseByok } = await willUseByokForLane('chat', sysConfigEarly, userId)
    if (!willUseByok) {
      await checkEntitlementOrThrow(userId, 'chat')
    }
  } catch (err) {
    if (err instanceof QuotaExceededError) {
      return Response.json(err.toJSON(), { status: 402 })
    }
    console.error('[chat] Quota check error (fail-open):', err)
  }

  // 2. Parse request body
  const body = await req.json()
  const { messages: rawMessages, conversationId, notebookId, language, webSearch, noteContext, format, noteId } = body as {
    messages: UIMessage[]
    conversationId?: string
    notebookId?: string
    language?: string
    webSearch?: boolean
    noteContext?: { title: string; content: string; tone: string; images?: string[] }
    format?: 'html' | 'markdown'
    noteId?: string
  }

  const incomingMessages = toCoreMessages(rawMessages)

  // 3. Manage conversation
  let conversation: { id: string; messages: Array<{ role: string; content: string }> }
  if (conversationId) {
    const existing = await prisma.conversation.findUnique({
      where: { id: conversationId, userId },
      include: { messages: { orderBy: { createdAt: 'asc' } } },
    })
    if (!existing) return new Response('Conversation not found', { status: 404 })
    conversation = existing
  } else {
    const userMessage = incomingMessages[incomingMessages.length - 1]?.content || 'New conversation'
    conversation = await prisma.conversation.create({
      data: {
        userId,
        notebookId: notebookId || null,
        title: userMessage.substring(0, 50) + (userMessage.length > 50 ? '...' : ''),
      },
      include: { messages: true },
    })
  }

  // 4. RAG retrieval
  const currentMessage = incomingMessages[incomingMessages.length - 1]?.content || ''
  const lang = (language || 'en') as SupportedLanguage
  const translations = await loadTranslations(lang)
  const untitledText = getTranslationValue(translations, 'notes.untitled') || 'Untitled'

  let notebookContext = ''
  let searchNotes = ''

  if (!noteContext) {
    if (notebookId) {
      const notebookNotes = await prisma.note.findMany({
        where: { notebookId, userId, trashedAt: null },
        orderBy: { updatedAt: 'desc' },
        take: 20,
        select: { id: true, title: true, content: true, updatedAt: true },
      })
      if (notebookNotes.length > 0) {
        notebookContext = notebookNotes
          .map(n => `NOTE [${n.title || untitledText}] (updated ${n.updatedAt.toLocaleDateString()}):\n${(n.content || '').substring(0, 1500)}`)
          .join('\n\n---\n\n')
      }
    }

    let searchResults: any[] = []
    try {
      const documentMention = currentMessage.match(
        /\b(pdf|document|fichier|pi[eè]ce jointe|attachment|file)\b/i
      )

      if (documentMention) {
        const docResults = await semanticSearchService.searchWithDocuments(
          userId, currentMessage, {
            notebookId,
            limit: notebookId ? 10 : 5,
            threshold: notebookId ? 0.3 : 0.5,
            includeDocuments: true,
            defaultTitle: untitledText,
          }
        )
        searchResults = docResults
      } else {
        searchResults = await semanticSearchService.search(currentMessage, {
          notebookId,
          limit: notebookId ? 10 : 5,
          threshold: notebookId ? 0.3 : 0.5,
          defaultTitle: untitledText,
        })
      }
    } catch {}

    searchNotes = searchResults
      .map((r) => {
        if ((r as any).source === 'document') {
          return `DOCUMENT [${(r as any).fileName} p.${(r as any).pageNumber}] (from note: ${r.title || untitledText}):\n${r.content}`
        }
        return `NOTE [${r.title || untitledText}]: ${r.content}`
      })
      .join('\n\n---\n\n')
  } else if (noteId) {
    try {
      const docResults = await semanticSearchService.searchWithDocuments(
        userId, currentMessage, {
          noteId,
          limit: 8,
          threshold: 0.3,
          includeDocuments: true,
          defaultTitle: untitledText,
        }
      )
      searchNotes = docResults
        .map((r) => {
          if ((r as any).source === 'document') {
            return `DOCUMENT [${(r as any).fileName} p.${(r as any).pageNumber}]:\n${r.content}`
          }
          return ''
        })
        .filter(Boolean)
        .join('\n\n---\n\n')
    } catch {}
  }

  const contextNotes = [notebookContext, searchNotes].filter(Boolean).join('\n\n---\n\n')

  // 5. System prompt synthesis
  const promptLang: Record<string, { contextWithNotes: string; contextNoNotes: string; system: string }> = {
    en: {
      contextWithNotes: `## User's notes & documents\n\n${contextNotes}\n\nWhen using info from the notes above, cite the source note title in parentheses, e.g.: "Deployment is done via Docker (💻 Development Guide)". For document passages, cite the filename and page number, e.g.: "The revenue was $5M (📄 report.pdf p.12)". Don't copy word for word — rephrase. If the notes don't cover the topic, say so and supplement with your general knowledge.`,
      contextNoNotes: "No relevant notes found for this question. Answer with your general knowledge.",
      system: `You are the AI assistant of Memento. The user asks you questions about their projects, technical docs, and notes. You must respond in a structured and helpful way.

## Format rules
- ${format === 'html' ? `Respond MANDATORILY using valid HTML fragments (e.g., <p>, <strong>, <em>, <ul>, <li>, <h3>, <table>, <tr>, <td>).
- Do NOT use Markdown symbols (no #, *, -, etc.).
- Do not wrap your HTML code in a Markdown code block.` : 'Use markdown freely: headings (##, ###), lists, code blocks, bold, tables — anything that makes the response readable.'}
- Structure your response with sections for technical questions or complex topics.
- For simple, short questions, a direct paragraph is enough.` + (format === 'html' ? `

## HTML OUTPUT EXAMPLE
<h3>Section Title</h3>
<p>Here is an explanation with <strong>bold text</strong> and a list:</p>
<ul>
  <li>First important point</li>
  <li>Second important point</li>
</ul>` : '') + `

## Tone rules
- Natural tone, neither corporate nor too casual.
- No unnecessary intro phrases. Answer directly.
- No upsell questions at the end. If you have useful additional info, just give it.
- If the user says "Momento" they mean Momento (this app).

## About Momento
Momento is an intelligent note-taking application. Key features include:
- **Notes & Editor**: Create rich Markdown notes with an integrated AI Copilot to rewrite, summarize, or translate content.
- **Organization**: Group notes into Notebooks and tag them with Labels.
- **Search**: Advanced semantic search to find notes by meaning, not just keywords, and Web Search integration.
- **Agents**: Create specialized AI Agents with custom system prompts for specific recurring tasks.
- **Lab**: Experimental AI tools for data analysis and deeper insights.

## Available tools
You have access to: note_search, note_read, document_search, task_extract, web_search, web_scrape.
Only use tools if you need more information. Never invent note IDs or URLs.
- document_search: Searches attached PDF documents for the current note/notebook. Use when the user asks about documents or files.
- task_extract: Extracts action items from notes and creates a synthesis note. Use when the user asks to extract tasks or TODOs.`,
    },
    fr: {
      contextWithNotes: `## Notes et documents de l'utilisateur\n\n${contextNotes}\n\nQuand tu utilises une info venant des notes ci-dessus, cite le titre de la note source entre parenthèses, ex: "Le déploiement se fait via Docker (💻 Development Guide)". Pour les documents PDF, cite le nom du fichier et la page, ex: "Le chiffre d'affaires est de 5M$ (📄 rapport.pdf p.12)". Ne recopie pas mot pour mot — reformule.`,
      contextNoNotes: "Aucune note pertinente trouvée pour cette question. Réponds avec tes connaissances générales.",
      system: `Tu es l'assistant IA de Memento. L'utilisateur te pose des questions sur ses projets, sa doc technique, ses notes. Tu dois répondre de façon structurée et utile.

## Règles de format
- ${format === 'html' ? `Réponds OBLIGATOIREMENT en utilisant des fragments HTML valides (ex: <p>, <strong>, <em>, <ul>, <li>, <h3>, <table>, <tr>, <td>).
- N'utilise PAS de symboles Markdown.
- Ne mets pas ton code HTML dans un bloc de code Markdown.` : '- Utilise le markdown librement : titres (##, ###), listes, code blocks, gras, tables.'}
- Structure ta réponse avec des sections quand c'est une question technique ou un sujet complexe.
- Pour les questions simples et courtes, un paragraphe direct suffit.` + (format === 'html' ? `

## EXEMPLE DE SORTIE HTML
<h3>Titre de section</h3>
<p>Voici une explication avec du <strong>texte en gras</strong> et une liste :</p>
<ul>
  <li>Premier point important</li>
  <li>Deuxième point important</li>
</ul>` : '') + `

## Règles de ton
- Ton naturel, direct, sans phrases d'intro inutiles.
- Pas de question upsell à la fin.
- Si l'utilisateur dit "Momento" il parle de Momento (cette application).

## À propos de Momento
Momento est une application de prise de notes intelligente. Ses fonctionnalités : Éditeur Markdown riche, Copilot IA, Organisation par Carnets, Recherche sémantique, Agents IA, Lab.

## Outils disponibles
Tu as accès à : note_search, note_read, document_search, task_extract, web_search, web_scrape.
- document_search : Recherche dans les documents PDF attachés à la note/au carnet.
- task_extract : Extrait les tâches/action items des notes et crée une note de synthèse.`,
    },
    fa: {
      contextWithNotes: `## یادداشت‌های کاربر\n\n${contextNotes}\n\nهنگام استفاده از اطلاعات یادداشت‌های بالا، عنوان یادداشت منبع را در پرانتز ذکر کنید.`,
      contextNoNotes: "هیچ یادداشت مرتبطی برای این سؤال یافت نشد. با دانش عمومی خود پاسخ دهید.",
      system: `شما دستیار هوش مصنوعی Memento هستید. کاربر از شما درباره پروژه‌ها، مستندات فنی و یادداشت‌هایش سؤال می‌کند. باید به شکلی ساختاریافته و مفید پاسخ دهید.

## قوانین قالب‌بندی
- ${format === 'html' ? `حتماً از تگ‌های HTML معتبر استفاده کنید (مانند <p>, <strong>, <em>, <ul>, <li>, <h3>).
- از نمادهای مارک‌داون استفاده نکنید.` : 'از مارک‌داون آزادانه استفاده کنید: عناوین (##, ###)، لیست‌ها، بلوک‌های کد، پررنگ، جداول.'}
- برای سؤالات فنی یا موضوعات پیچیده، پاسخ خود را بخش‌بندی کنید.
- برای سؤالات ساده و کوتاه، یک پاراگراف مستقیم کافی است.` + (format === 'html' ? `

## نمونه خروجی HTML
<h3>عنوان بخش</h3>
<p>این یک توضیح با <strong>متن برجسته</strong> و یک لیست است:</p>
<ul>
  <li>نکته اول</li>
  <li>نکته دوم</li>
</ul>` : '') + `

## قوانین لحن
- لحن طبیعی، مستقیم، بدون مقدمه اضافی.
- اگر کاربر "Momento" می‌گوید، منظورش Memento (این برنامه) است.`,
    },
    es: {
      contextWithNotes: `## Notas del usuario\n\n${contextNotes}\n\nCuando uses información de las notas anteriores, cita el título de la nota fuente entre paréntesis.`,
      contextNoNotes: "No se encontraron notas relevantes para esta pregunta. Responde con tu conocimiento general.",
      system: `Eres el asistente de IA de Memento. El usuario te hace preguntas sobre sus proyectos, documentación técnica y notas.

## Reglas de formato
- ${format === 'html' ? `Responde OBLIGATORIAMENTE usando fragmentos HTML válidos (ej: <p>, <strong>, <em>, <ul>, <li>, <h3>, <table>, <tr>, <td>).
- NO uses símbolos Markdown.` : 'Usa markdown libremente: títulos (##, ###), listas, negritas, tablas.'}
- Estructura tu respuesta con secciones para temas complejos.
- Para preguntas simples, un párrafo directo es suficiente.` + (format === 'html' ? `

## EJEMPLO DE SALIDA HTML
<h3>Título de sección</h3>
<p>Aquí hay una explicación con <strong>texto en negrita</strong> y una lista:</p>
<ul>
  <li>Primer punto importante</li>
  <li>Segundo punto importante</li>
</ul>` : ''),
    },
  }

  const prompts = promptLang[lang] || promptLang.en
  const contextBlock = contextNotes.length > 0 ? prompts.contextWithNotes : prompts.contextNoNotes

  // Load note images for vision
  const imageContextParts: Array<{ type: 'image'; image: string }> = []
  if (noteContext?.images && noteContext.images.length > 0) {
    for (const imgPath of noteContext.images.slice(0, 4)) {
      try {
        const fullPath = path.join(process.cwd(), 'data', imgPath)
        const buffer = await readFile(fullPath)
        const ext = path.extname(imgPath).toLowerCase()
        const mime = ext === '.png' ? 'image/png' : ext === '.gif' ? 'image/gif' : ext === '.webp' ? 'image/webp' : 'image/jpeg'
        imageContextParts.push({ type: 'image', image: `data:${mime};base64,${buffer.toString('base64')}` })
      } catch {}
    }
  }

  let copilotContext = ''
  if (noteContext) {
     copilotContext = `\n\n## Current Note Context
You are helping the user edit a specific note: ${noteContext.title || 'Untitled'}.
Tone: ${noteContext.tone || 'professional'}.
Content: ${noteContext.content || '(empty)'}
Focus ONLY on this note unless asked otherwise.`
  }

  // Notebook scope directive — tells the AI to stay within the selected notebook
  let notebookScopeDirective = ''
  if (notebookId) {
    const scopedNotebook = await prisma.notebook.findUnique({ where: { id: notebookId }, select: { name: true } }).catch(() => null)
    const notebookName = scopedNotebook?.name || notebookId
    notebookScopeDirective = `\n\n## NOTEBOOK SCOPE\nThe user has scoped this conversation to the notebook "${notebookName}". When using the note_search tool, ALWAYS pass notebookId="${notebookId}" to restrict results to this notebook. Only reference notes from this notebook unless the user explicitly asks otherwise.`
  }

  const systemPrompt = `${prompts.system}\n${copilotContext}${notebookScopeDirective}\n\n${contextBlock}\n\n## LANGUAGE RULE (MANDATORY)\nYou MUST respond in ${lang === 'en' ? 'English' : lang === 'fr' ? 'French' : lang === 'fa' ? 'Persian (Farsi)' : lang === 'es' ? 'Spanish' : 'English'}.`

  // 6. Execute stream
  const sysConfig = await getSystemConfig()

  const routeDebug =
    process.env.NODE_ENV !== 'production' || process.env.MEMENTO_AI_ROUTE_DEBUG === '1'
  if (routeDebug) {
    console.debug('[ai-route]', formatAiRouteDebug(resolveAiRouteWithTiming('chat', sysConfig)))
  }

  const chatTools = noteContext
    ? toolRegistry.buildToolsForChat({ userId, config: sysConfig, webSearch, notebookId: notebookId || undefined })
    : toolRegistry.buildToolsForChat({ userId, config: sysConfig, webSearch, notebookId: notebookId || undefined })

  const { result, usedByok } = await runLaneWithBillingUser(
    'chat',
    sysConfig,
    userId,
    async (provider) =>
      streamText({
        model: provider.getModel(),
        system: systemPrompt,
        messages: incomingMessages,
        tools: chatTools,
        stopWhen: stepCountIs(5),
        onFinish: async (final) => {
          const userContent = incomingMessages[incomingMessages.length - 1].content
          await prisma.chatMessage.create({
            data: { conversationId: conversation.id, role: 'user', content: userContent },
          })
          await prisma.chatMessage.create({
            data: { conversationId: conversation.id, role: 'assistant', content: final.text },
          })
          if (!usedByok) {
            trackFeatureUsage(userId, 'chat', final.usage?.totalTokens ?? 0)
            incrementUsageAsync(userId, 'chat')
          }
        },
      }),
  )

  return result.toUIMessageStreamResponse()
}