feat: résumé progressif chat — compression automatique du contexte

- Migration: champs summary + summaryUpTo sur Conversation - Compression tous les 8 messages (garde les 4 derniers intacts) - Résumé régénéré tous les 4 nouveaux messages - Fallback gracieux: si la génération échoue, envoie tout le contexte - getChatProvider import ajouté - i18n non requis (optimisation backend)
2026-06-19 19:54:02 +00:00
parent a4238dc204
commit 5b9930b02e
3 changed files with 61 additions and 13 deletions
--- a/memento-note/app/api/chat/route.ts
+++ b/memento-note/app/api/chat/route.ts
@@ -2,6 +2,7 @@ import { streamText, UIMessage, stepCountIs } from 'ai'
 import { resolveAiRouteWithTiming, formatAiRouteDebug } from '@/lib/ai/router'
 import { runLaneWithBillingUser, willUseByokForLane } from '@/lib/ai/provider-for-user'
 import { getSystemConfig } from '@/lib/config'
+import { getChatProvider } from '@/lib/ai/factory'
 import { semanticSearchService } from '@/lib/ai/services/semantic-search.service'
 import { prisma } from '@/lib/prisma'
 import { auth } from '@/auth'
@@ -96,11 +97,11 @@ export async function POST(req: Request) {
  const incomingMessages = toCoreMessages(rawMessages)

  // 3. Manage conversation
-  let conversation: { id: string; messages: Array<{ role: string; content: string }> }
+  let conversation: { id: string; summary: string | null; messages: Array<{ id: string | null; role: string; content: string }> }
  if (conversationId) {
    const existing = await prisma.conversation.findUnique({
      where: { id: conversationId, userId },
-      include: { messages: { orderBy: { createdAt: 'asc' } } },
+      include: { messages: { orderBy: { createdAt: 'asc' }, select: { id: true, role: true, content: true } } },
    })
    if (!existing) return new Response('Conversation not found', { status: 404 })
    conversation = existing
@@ -112,10 +113,52 @@ export async function POST(req: Request) {
        notebookId: notebookId || null,
        title: userMessage.substring(0, 50) + (userMessage.length > 50 ? '...' : ''),
      },
-      include: { messages: true },
+      include: { messages: { select: { id: true, role: true, content: true } } },
    })
  }

+  // 3.5 Progressive summary — compress old messages to save tokens
+  const SUMMARY_THRESHOLD = 8
+  const RECENT_KEEP = 4
+  let messagesForModel = incomingMessages
+
+  if (conversationId && conversation.messages.length >= SUMMARY_THRESHOLD) {
+    try {
+      const sysConfig = await getSystemConfig()
+      const shouldRegenerate = !conversation.summary || conversation.messages.length >= SUMMARY_THRESHOLD + 4
+
+      if (shouldRegenerate) {
+        const oldMessages = conversation.messages.slice(0, conversation.messages.length - RECENT_KEEP)
+        const existingSummary = conversation.summary
+          ? `Résumé actuel:\n${conversation.summary}\n\nNouveaux échanges à intégrer:\n`
+          : ''
+
+        const summaryPrompt = `${existingSummary}${oldMessages.map(m => `${m.role}: ${m.content.slice(0, 300)}`).join('\n')}
+
+Fais un résumé concis (max 200 mots) de cette conversation. Garde les informations clés, décisions, et contexte important.`
+        const provider = getChatProvider(sysConfig)
+        const newSummary = await provider.generateText(summaryPrompt)
+
+        await prisma.conversation.update({
+          where: { id: conversation.id },
+          data: { summary: newSummary.slice(0, 1000) },
+        })
+
+        messagesForModel = [
+          { role: 'system' as const, content: `Contexte de la conversation précédente:\n${newSummary.slice(0, 1000)}` },
+          ...incomingMessages.slice(-RECENT_KEEP),
+        ]
+      } else if (conversation.summary) {
+        messagesForModel = [
+          { role: 'system' as const, content: `Contexte de la conversation précédente:\n${conversation.summary}` },
+          ...incomingMessages.slice(-RECENT_KEEP),
+        ]
+      }
+    } catch (e) {
+      console.error('[Chat] Summary generation failed, using full context:', e)
+    }
+  }
+
  // 4. RAG retrieval
  const currentMessage = incomingMessages[incomingMessages.length - 1]?.content || ''
  const lang = (language || 'en') as SupportedLanguage
@@ -426,7 +469,7 @@ Focus ONLY on this note unless asked otherwise.`
        streamText({
          model: provider.getModel(),
          system: systemPrompt,
-          messages: incomingMessages,
+          messages: messagesForModel,
          tools: chatTools,
          toolChoice: wantsChart && chatTools.insert_chart ? { type: 'tool', toolName: 'insert_chart' } : undefined,
          stopWhen: stepCountIs(5),