Momento/memento-note/app/api/chat/route.ts

import { streamText, UIMessage, stepCountIs } from 'ai'
import { resolveAiRouteWithTiming, formatAiRouteDebug } from '@/lib/ai/router'
import { runLaneWithBillingUser, willUseByokForLane } from '@/lib/ai/provider-for-user'
import { getSystemConfig } from '@/lib/config'
import { getChatProvider } from '@/lib/ai/factory'
import { semanticSearchService } from '@/lib/ai/services/semantic-search.service'
import { prisma } from '@/lib/prisma'
import { auth } from '@/auth'
import { hasUserAiConsent } from '@/lib/consent/server-consent'
import { loadTranslations, getTranslationValue, SupportedLanguage } from '@/lib/i18n'
import { toolRegistry } from '@/lib/ai/tools'
import { reserveUsageOrThrow, QuotaExceededError, QuotaServiceUnavailableError } from '@/lib/entitlements'
import { ByokUnavailableError } from '@/lib/byok'
import { trackFeatureUsage } from '@/lib/usage-tracker'
import { readFile } from 'fs/promises'
import path from 'path'
import { logAuditEvent, getClientIp } from '@/lib/audit-log'

export const maxDuration = 60

/**
 * Extract text content from a UIMessage's parts array.
 */
function extractTextFromUIMessage(msg: { parts?: Array<{ type: string; text?: string }>; content?: string }): string {
  if (typeof msg.content === 'string') return msg.content
  if (msg.parts && Array.isArray(msg.parts)) {
    return msg.parts
      .filter((p) => p.type === 'text' && typeof p.text === 'string')
      .map((p) => p.text!)
      .join('')
  }
  return ''
}

/**
 * Convert an array of UIMessages (from the client) to CoreMessage[] for streamText.
 */
function toCoreMessages(uiMessages: UIMessage[]): Array<{ role: 'user' | 'assistant'; content: string }> {
  return uiMessages
    .filter((m) => m.role === 'user' || m.role === 'assistant')
    .map((m) => ({
      role: m.role as 'user' | 'assistant',
      content: extractTextFromUIMessage(m),
    }))
    .filter((m) => m.content.length > 0)
}

export async function POST(req: Request) {
  // 1. Auth check
  const session = await auth()
  if (!session?.user?.id) {
    return new Response('Unauthorized', { status: 401 })
  }
  const userId = session.user.id

  // GDPR AI Consent check
  if (!(await hasUserAiConsent())) {
    return new Response(JSON.stringify({ error: 'ai_consent_required' }), {
      status: 403,
      headers: { 'Content-Type': 'application/json' },
    })
  }

  // 1.5 Quota check (per-provider BYOK bypass — only when BYOK will be used for resolved provider)
  try {
    const sysConfigEarly = await getSystemConfig()
    const { usedByok: willUseByok } = await willUseByokForLane('chat', sysConfigEarly, userId)
    if (!willUseByok) {
      await reserveUsageOrThrow(userId, 'chat')
    }
  } catch (err) {
    if (err instanceof QuotaExceededError) {
      return Response.json(err.toJSON(), { status: 402 })
    }
    if (err instanceof ByokUnavailableError) {
      return Response.json(
        { error: 'byok_unavailable', message: 'Votre clé API BYOK est configurée mais n\'a pas pu être chargée. Vérifiez vos paramètres BYOK.' },
        { status: 503 }
      )
    }
    if (err instanceof QuotaServiceUnavailableError) {
      return Response.json({ error: err.code }, { status: 503 })
    }
    if (process.env.NODE_ENV !== 'production') {
      console.error('[chat] Quota check error (fail-open):', err)
    } else {
      console.error('[chat] Quota check error:', err)
      return Response.json({ error: 'QUOTA_SERVICE_UNAVAILABLE' }, { status: 503 })
    }
  }

  // 2. Parse request body
  const body = await req.json()
  const { messages: rawMessages, conversationId, notebookId, language, webSearch, noteContext, format, noteId } = body as {
    messages: UIMessage[]
    conversationId?: string
    notebookId?: string
    language?: string
    webSearch?: boolean
    noteContext?: { title: string; content: string; tone: string; images?: string[] }
    format?: 'html' | 'markdown'
    noteId?: string
  }

  const incomingMessages = toCoreMessages(rawMessages)

  // 3. Manage conversation
  let conversation: { id: string; summary: string | null; messages: Array<{ id: string | null; role: string; content: string }> }
  if (conversationId) {
    const existing = await prisma.conversation.findUnique({
      where: { id: conversationId, userId },
      include: { messages: { orderBy: { createdAt: 'asc' }, select: { id: true, role: true, content: true } } },
    })
    if (!existing) return new Response('Conversation not found', { status: 404 })
    conversation = existing
  } else {
    const userMessage = incomingMessages[incomingMessages.length - 1]?.content || 'New conversation'
    conversation = await prisma.conversation.create({
      data: {
        userId,
        notebookId: notebookId || null,
        title: userMessage.substring(0, 50) + (userMessage.length > 50 ? '...' : ''),
      },
      include: { messages: { select: { id: true, role: true, content: true } } },
    })
  }

  // 3.5 Progressive summary — compress old messages to save tokens
  const SUMMARY_THRESHOLD = 8
  const RECENT_KEEP = 4
  let messagesForModel = incomingMessages

  if (conversationId && conversation.messages.length >= SUMMARY_THRESHOLD) {
    try {
      const sysConfig = await getSystemConfig()
      const shouldRegenerate = !conversation.summary || conversation.messages.length >= SUMMARY_THRESHOLD + 4

      if (shouldRegenerate) {
        const oldMessages = conversation.messages.slice(0, conversation.messages.length - RECENT_KEEP)
        const existingSummary = conversation.summary
          ? `Résumé actuel:\n${conversation.summary}\n\nNouveaux échanges à intégrer:\n`
          : ''

        const summaryPrompt = `${existingSummary}${oldMessages.map(m => `${m.role}: ${m.content.slice(0, 300)}`).join('\n')}

Fais un résumé concis (max 200 mots) de cette conversation. Garde les informations clés, décisions, et contexte important.`
        const provider = getChatProvider(sysConfig)
        const newSummary = await provider.generateText(summaryPrompt)

        await prisma.conversation.update({
          where: { id: conversation.id },
          data: { summary: newSummary.slice(0, 1000) },
        })

        messagesForModel = [
          { role: 'system' as const, content: `Contexte de la conversation précédente:\n${newSummary.slice(0, 1000)}` },
          ...incomingMessages.slice(-RECENT_KEEP),
        ]
      } else if (conversation.summary) {
        messagesForModel = [
          { role: 'system' as const, content: `Contexte de la conversation précédente:\n${conversation.summary}` },
          ...incomingMessages.slice(-RECENT_KEEP),
        ]
      }
    } catch (e) {
      console.error('[Chat] Summary generation failed, using full context:', e)
    }
  }

  // 4. RAG retrieval
  const currentMessage = incomingMessages[incomingMessages.length - 1]?.content || ''
  const lang = (language || 'en') as SupportedLanguage
  const translations = await loadTranslations(lang)
  const untitledText = getTranslationValue(translations, 'notes.untitled') || 'Untitled'

  let notebookContext = ''
  let searchNotes = ''

  if (!noteContext) {
    if (notebookId) {
      const notebookNotes = await prisma.note.findMany({
        where: { notebookId, userId, trashedAt: null },
        orderBy: { updatedAt: 'desc' },
        take: 20,
        select: { id: true, title: true, content: true, updatedAt: true },
      })
      if (notebookNotes.length > 0) {
        notebookContext = notebookNotes
          .map(n => `NOTE [${n.title || untitledText}] (updated ${n.updatedAt.toLocaleDateString()}):\n${(n.content || '').substring(0, 1500)}`)
          .join('\n\n---\n\n')
      }
    }

    let searchResults: any[] = []
    try {
      const documentMention = currentMessage.match(
        /\b(pdf|document|fichier|pi[eè]ce jointe|attachment|file)\b/i
      )

      if (documentMention) {
        const docResults = await semanticSearchService.searchWithDocuments(
          userId, currentMessage, {
            notebookId,
            limit: notebookId ? 10 : 5,
            threshold: notebookId ? 0.3 : 0.5,
            includeDocuments: true,
            defaultTitle: untitledText,
          }
        )
        searchResults = docResults
      } else {
        searchResults = await semanticSearchService.search(currentMessage, {
          notebookId,
          limit: notebookId ? 10 : 5,
          threshold: notebookId ? 0.3 : 0.5,
          defaultTitle: untitledText,
        })
      }
    } catch {}

    searchNotes = searchResults
      .map((r) => {
        if ((r as any).source === 'document') {
          return `DOCUMENT [${(r as any).fileName} p.${(r as any).pageNumber}] (from note: ${r.title || untitledText}):\n${r.content}`
        }
        return `NOTE [${r.title || untitledText}]: ${r.content}`
      })
      .join('\n\n---\n\n')
  } else if (noteId) {
    try {
      const docResults = await semanticSearchService.searchWithDocuments(
        userId, currentMessage, {
          noteId,
          limit: 8,
          threshold: 0.3,
          includeDocuments: true,
          defaultTitle: untitledText,
        }
      )
      searchNotes = docResults
        .map((r) => {
          if ((r as any).source === 'document') {
            return `DOCUMENT [${(r as any).fileName} p.${(r as any).pageNumber}]:\n${r.content}`
          }
          return ''
        })
        .filter(Boolean)
        .join('\n\n---\n\n')
    } catch {}
  }

  const contextNotes = [notebookContext, searchNotes].filter(Boolean).join('\n\n---\n\n')

  // 5. System prompt synthesis
  const promptLang: Record<string, { contextWithNotes: string; contextNoNotes: string; system: string }> = {
    en: {
      contextWithNotes: `## User's notes & documents\n\n${contextNotes}\n\nWhen using info from the notes above, cite the source note title in parentheses, e.g.: "Deployment is done via Docker (💻 Development Guide)". For document passages, cite the filename and page number, e.g.: "The revenue was $5M (📄 report.pdf p.12)". Don't copy word for word — rephrase. If the notes don't cover the topic, say so and supplement with your general knowledge.`,
      contextNoNotes: "No relevant notes found for this question. Answer with your general knowledge.",
      system: `You are the AI assistant of Memento. The user asks you questions about their projects, technical docs, and notes. You must respond in a structured and helpful way.

## Format rules
- ${format === 'html' ? `Respond MANDATORILY using valid HTML fragments (e.g., <p>, <strong>, <em>, <ul>, <li>, <h3>, <table>, <tr>, <td>).
- Do NOT use Markdown symbols (no #, *, -, etc.).
- Do not wrap your HTML code in a Markdown code block.` : 'Use markdown freely: headings (##, ###), lists, code blocks, bold, tables — anything that makes the response readable.'}
- Structure your response with sections for technical questions or complex topics.
- For simple, short questions, a direct paragraph is enough.` + (format === 'html' ? `

## HTML OUTPUT EXAMPLE
<h3>Section Title</h3>
<p>Here is an explanation with <strong>bold text</strong> and a list:</p>
<ul>
  <li>First important point</li>
  <li>Second important point</li>
</ul>` : '') + `

## Tone rules
- Natural tone, neither corporate nor too casual.
- No unnecessary intro phrases. Answer directly.
- No upsell questions at the end. If you have useful additional info, just give it.
- If the user says "Memento" they mean Memento (this app).

## About Memento
Memento is an intelligent note-taking application. Key features include:
- **Notes & Editor**: Create rich Markdown notes with an integrated AI Copilot to rewrite, summarize, or translate content.
- **Organization**: Group notes into Notebooks and tag them with Labels.
- **Search**: Advanced semantic search to find notes by meaning, not just keywords, and Web Search integration.
- **Agents**: Create specialized AI Agents with custom system prompts for specific recurring tasks.
- **Lab**: Experimental AI tools for data analysis and deeper insights.

## Available tools
You have access to: note_search, note_read, note_find_and_update, document_search, task_extract, web_search, web_scrape, insert_chart.
Only use tools if you need more information. Never invent note IDs or URLs.
- document_search: Searches attached PDF documents for the current note/notebook. Use when the user asks about documents or files.
- task_extract: Extracts action items from notes and creates a synthesis note. Use when the user asks to extract tasks or TODOs.
- note_find_and_update: Finds a note by search query and appends/prepends/replaces content. Use when the user says "find the note about X and add Y to it".
- insert_chart: Generates a chart and inserts it directly into the note. Use when the user asks "make a chart", "create a graph", "visualize this data", "show me a chart of X".
IMPORTANT: Chart format MUST be exactly:
\`\`\`chart
{type}
{title}
{label}: {value}
{label}: {value}
\`\`\`

Example for sales chart:
\`\`\`chart
bar
Sales by Month
Jan: 5000
Feb: 7500
Mar: 6200
\`\`\`

Available types: bar, horizontal-bar, line, area, pie, radar. NEVER use Mermaid or other formats.`,
    },
    fr: {
      contextWithNotes: `## Notes et documents de l'utilisateur\n\n${contextNotes}\n\nQuand tu utilises une info venant des notes ci-dessus, cite le titre de la note source entre parenthèses, ex: "Le déploiement se fait via Docker (💻 Development Guide)". Pour les documents PDF, cite le nom du fichier et la page, ex: "Le chiffre d'affaires est de 5M$ (📄 rapport.pdf p.12)". Ne recopie pas mot pour mot — reformule.`,
      contextNoNotes: "Aucune note pertinente trouvée pour cette question. Réponds avec tes connaissances générales.",
      system: `Tu es l'assistant IA de Memento. L'utilisateur te pose des questions sur ses projets, sa doc technique, ses notes. Tu dois répondre de façon structurée et utile.

## Règles de format
- ${format === 'html' ? `Réponds OBLIGATOIREMENT en utilisant des fragments HTML valides (ex: <p>, <strong>, <em>, <ul>, <li>, <h3>, <table>, <tr>, <td>).
- N'utilise PAS de symboles Markdown.
- Ne mets pas ton code HTML dans un bloc de code Markdown.` : '- Utilise le markdown librement : titres (##, ###), listes, code blocks, gras, tables.'}
- Structure ta réponse avec des sections quand c'est une question technique ou un sujet complexe.
- Pour les questions simples et courtes, un paragraphe direct suffit.` + (format === 'html' ? `

## EXEMPLE DE SORTIE HTML
<h3>Titre de section</h3>
<p>Voici une explication avec du <strong>texte en gras</strong> et une liste :</p>
<ul>
  <li>Premier point important</li>
  <li>Deuxième point important</li>
</ul>` : '') + `

## Règles de ton
- Ton naturel, direct, sans phrases d'intro inutiles.
- Pas de question upsell à la fin.
- Si l'utilisateur dit "Memento" il parle de Memento (cette application).

## À propos de Memento
Memento est une application de prise de notes intelligente. Ses fonctionnalités : Éditeur Markdown riche, Copilot IA, Organisation par Carnets, Recherche sémantique, Agents IA, Lab.

## Outils disponibles
Tu as accès à : note_search, note_read, note_find_and_update, document_search, task_extract, web_search, web_scrape, insert_chart.
- document_search : Recherche dans les documents PDF attachés à la note/au carnet.
- task_extract : Extrait les tâches/action items des notes et crée une note de synthèse.
- note_find_and_update : Trouve une note par recherche textuelle et ajoute/prépose/remplace du contenu. Utilise quand l'utilisateur dit "trouve la note sur X et ajoute-y Y".
- insert_chart : Génère un graphique et l'insère directement dans la note. Utilise quand l'utilisateur demande "fais un graphique", "crée un chart", "visualise ces données".
IMPORTANT : Le format du graphique DOIT être exactement :
\`\`\`chart
{type}
{titre}
{label}: {valeur}
{label}: {valeur}
\`\`\`

Exemple pour un graphique de ventes :
\`\`\`chart
bar
Ventes par mois
Jan: 5000
Fév: 7500
Mar: 6200
\`\`\`

Types disponibles : bar, horizontal-bar, line, area, pie, radar. JAMAIS utiliser Mermaid ou d'autres formats.`,
    },
    fa: {
      contextWithNotes: `## یادداشت‌های کاربر\n\n${contextNotes}\n\nهنگام استفاده از اطلاعات یادداشت‌های بالا، عنوان یادداشت منبع را در پرانتز ذکر کنید.`,
      contextNoNotes: "هیچ یادداشت مرتبطی برای این سؤال یافت نشد. با دانش عمومی خود پاسخ دهید.",
      system: `شما دستیار هوش مصنوعی Memento هستید. کاربر از شما درباره پروژه‌ها، مستندات فنی و یادداشت‌هایش سؤال می‌کند. باید به شکلی ساختاریافته و مفید پاسخ دهید.

## قوانین قالب‌بندی
- ${format === 'html' ? `حتماً از تگ‌های HTML معتبر استفاده کنید (مانند <p>, <strong>, <em>, <ul>, <li>, <h3>).
- از نمادهای مارک‌داون استفاده نکنید.` : 'از مارک‌داون آزادانه استفاده کنید: عناوین (##, ###)، لیست‌ها، بلوک‌های کد، پررنگ، جداول.'}
- برای سؤالات فنی یا موضوعات پیچیده، پاسخ خود را بخش‌بندی کنید.
- برای سؤالات ساده و کوتاه، یک پاراگراف مستقیم کافی است.` + (format === 'html' ? `

## نمونه خروجی HTML
<h3>عنوان بخش</h3>
<p>این یک توضیح با <strong>متن برجسته</strong> و یک لیست است:</p>
<ul>
  <li>نکته اول</li>
  <li>نکته دوم</li>
</ul>` : '') + `

## قوانین لحن
- لحن طبیعی، مستقیم، بدون مقدمه اضافی.
- اگر کاربر "Memento" می‌گوید، منظورش Memento (این برنامه) است.`,
    },
    es: {
      contextWithNotes: `## Notas del usuario\n\n${contextNotes}\n\nCuando uses información de las notas anteriores, cita el título de la nota fuente entre paréntesis.`,
      contextNoNotes: "No se encontraron notas relevantes para esta pregunta. Responde con tu conocimiento general.",
      system: `Eres el asistente de IA de Memento. El usuario te hace preguntas sobre sus proyectos, documentación técnica y notas.

## Reglas de formato
- ${format === 'html' ? `Responde OBLIGATORIAMENTE usando fragmentos HTML válidos (ej: <p>, <strong>, <em>, <ul>, <li>, <h3>, <table>, <tr>, <td>).
- NO uses símbolos Markdown.` : 'Usa markdown libremente: títulos (##, ###), listas, negritas, tablas.'}
- Estructura tu respuesta con secciones para temas complejos.
- Para preguntas simples, un párrafo directo es suficiente.` + (format === 'html' ? `

## EJEMPLO DE SALIDA HTML
<h3>Título de sección</h3>
<p>Aquí hay una explicación con <strong>texto en negrita</strong> y una lista:</p>
<ul>
  <li>Primer punto importante</li>
  <li>Segundo punto importante</li>
</ul>` : ''),
    },
  }

  const prompts = promptLang[lang] || promptLang.en
  const contextBlock = contextNotes.length > 0 ? prompts.contextWithNotes : prompts.contextNoNotes

  // Load note images for vision
  const imageContextParts: Array<{ type: 'image'; image: string }> = []
  if (noteContext?.images && noteContext.images.length > 0) {
    for (const imgPath of noteContext.images.slice(0, 4)) {
      try {
        const fullPath = path.join(process.cwd(), 'data', imgPath)
        const buffer = await readFile(fullPath)
        const ext = path.extname(imgPath).toLowerCase()
        const mime = ext === '.png' ? 'image/png' : ext === '.gif' ? 'image/gif' : ext === '.webp' ? 'image/webp' : 'image/jpeg'
        imageContextParts.push({ type: 'image', image: `data:${mime};base64,${buffer.toString('base64')}` })
      } catch {}
    }
  }

  let copilotContext = ''
  if (noteContext) {
     copilotContext = `\n\n## Current Note Context
You are helping the user edit a specific note: ${noteContext.title || 'Untitled'}.
Tone: ${noteContext.tone || 'professional'}.
Content: ${noteContext.content || '(empty)'}
Focus ONLY on this note unless asked otherwise.`
  }

  // Notebook scope directive — tells the AI to stay within the selected notebook
  let notebookScopeDirective = ''
  if (notebookId) {
    const scopedNotebook = await prisma.notebook.findUnique({ where: { id: notebookId }, select: { name: true } }).catch(() => null)
    const notebookName = scopedNotebook?.name || notebookId
    notebookScopeDirective = `\n\n## NOTEBOOK SCOPE\nThe user has scoped this conversation to the notebook "${notebookName}". When using the note_search tool, ALWAYS pass notebookId="${notebookId}" to restrict results to this notebook. Only reference notes from this notebook unless the user explicitly asks otherwise.`
  }

  const systemPrompt = `${prompts.system}\n${copilotContext}${notebookScopeDirective}\n\n${contextBlock}\n\n## LANGUAGE RULE (MANDATORY)\nYou MUST respond in ${lang === 'en' ? 'English' : lang === 'fr' ? 'French' : lang === 'fa' ? 'Persian (Farsi)' : lang === 'es' ? 'Spanish' : 'English'}.`

  // 6. Execute stream
  const sysConfig = await getSystemConfig()

  const routeDebug =
    process.env.NODE_ENV !== 'production' || process.env.MEMENTO_AI_ROUTE_DEBUG === '1'
  if (routeDebug) {
    console.debug('[ai-route]', formatAiRouteDebug(resolveAiRouteWithTiming('chat', sysConfig)))
  }

  const chatTools = noteContext
    ? toolRegistry.buildToolsForChat({ userId, config: sysConfig, webSearch, notebookId: notebookId || undefined, noteId })
    : toolRegistry.buildToolsForChat({ userId, config: sysConfig, webSearch, notebookId: notebookId || undefined, noteId })

  // Detect if user is asking for a chart/visualization to force tool usage
  const lastMessage = currentMessage.toLowerCase()
  const chartKeywords = [
    'chart', 'graph', 'graphique', 'graphe', 'charte', 'visuali', 'diagramme',
    'plot', 'courbe', 'histogram', 'bar', 'pie', 'line', 'area', 'radar',
    'données', 'donnée', 'data', 'stat', 'mrr', 'arr', 'revenu', 'sales', 'vente'
  ]
  const wantsChart = chartKeywords.some(k => lastMessage.includes(k))

  try {
    const { result, usedByok } = await runLaneWithBillingUser(
      'chat',
      sysConfig,
      userId,
      async (provider) =>
        streamText({
          model: provider.getModel(),
          system: systemPrompt,
          messages: messagesForModel,
          tools: chatTools,
          toolChoice: wantsChart && chatTools.insert_chart ? { type: 'tool', toolName: 'insert_chart' } : undefined,
          stopWhen: stepCountIs(5),
          onFinish: async (final) => {
            const userContent = incomingMessages[incomingMessages.length - 1].content
            await prisma.chatMessage.create({
              data: { conversationId: conversation.id, role: 'user', content: userContent },
            })
            await prisma.chatMessage.create({
              data: { conversationId: conversation.id, role: 'assistant', content: final.text },
            })
            if (!usedByok) {
              trackFeatureUsage(userId, 'chat', final.usage?.totalTokens ?? 0)
            }
            logAuditEvent({
              userId,
              action: 'AI_REQUEST',
              resource: 'chat',
              metadata: { tokens: final.usage?.totalTokens, byok: usedByok },
              ip: getClientIp(req),
            })
          },
        }),
    )
    return result.toUIMessageStreamResponse()
  } catch (err) {
    if (err instanceof ByokUnavailableError) {
      return Response.json(
        { error: 'byok_unavailable', message: 'Votre clé API BYOK est configurée mais n\'a pas pu être chargée. Vérifiez vos paramètres dans Réglages > Clés API.' },
        { status: 503 }
      )
    }
    throw err
  }
}