feat: résumé progressif chat — compression automatique du contexte
All checks were successful
CI / Lint, Unit Tests & Build (push) Successful in 7m0s
CI / Deploy production (on server) (push) Successful in 1m28s

- Migration: champs summary + summaryUpTo sur Conversation
- Compression tous les 8 messages (garde les 4 derniers intacts)
- Résumé régénéré tous les 4 nouveaux messages
- Fallback gracieux: si la génération échoue, envoie tout le contexte
- getChatProvider import ajouté
- i18n non requis (optimisation backend)
This commit is contained in:
Antigravity
2026-06-19 19:54:02 +00:00
parent a4238dc204
commit 5b9930b02e
3 changed files with 61 additions and 13 deletions

View File

@@ -2,6 +2,7 @@ import { streamText, UIMessage, stepCountIs } from 'ai'
import { resolveAiRouteWithTiming, formatAiRouteDebug } from '@/lib/ai/router'
import { runLaneWithBillingUser, willUseByokForLane } from '@/lib/ai/provider-for-user'
import { getSystemConfig } from '@/lib/config'
import { getChatProvider } from '@/lib/ai/factory'
import { semanticSearchService } from '@/lib/ai/services/semantic-search.service'
import { prisma } from '@/lib/prisma'
import { auth } from '@/auth'
@@ -96,11 +97,11 @@ export async function POST(req: Request) {
const incomingMessages = toCoreMessages(rawMessages)
// 3. Manage conversation
let conversation: { id: string; messages: Array<{ role: string; content: string }> }
let conversation: { id: string; summary: string | null; messages: Array<{ id: string | null; role: string; content: string }> }
if (conversationId) {
const existing = await prisma.conversation.findUnique({
where: { id: conversationId, userId },
include: { messages: { orderBy: { createdAt: 'asc' } } },
include: { messages: { orderBy: { createdAt: 'asc' }, select: { id: true, role: true, content: true } } },
})
if (!existing) return new Response('Conversation not found', { status: 404 })
conversation = existing
@@ -112,10 +113,52 @@ export async function POST(req: Request) {
notebookId: notebookId || null,
title: userMessage.substring(0, 50) + (userMessage.length > 50 ? '...' : ''),
},
include: { messages: true },
include: { messages: { select: { id: true, role: true, content: true } } },
})
}
// 3.5 Progressive summary — compress old messages to save tokens
const SUMMARY_THRESHOLD = 8
const RECENT_KEEP = 4
let messagesForModel = incomingMessages
if (conversationId && conversation.messages.length >= SUMMARY_THRESHOLD) {
try {
const sysConfig = await getSystemConfig()
const shouldRegenerate = !conversation.summary || conversation.messages.length >= SUMMARY_THRESHOLD + 4
if (shouldRegenerate) {
const oldMessages = conversation.messages.slice(0, conversation.messages.length - RECENT_KEEP)
const existingSummary = conversation.summary
? `Résumé actuel:\n${conversation.summary}\n\nNouveaux échanges à intégrer:\n`
: ''
const summaryPrompt = `${existingSummary}${oldMessages.map(m => `${m.role}: ${m.content.slice(0, 300)}`).join('\n')}
Fais un résumé concis (max 200 mots) de cette conversation. Garde les informations clés, décisions, et contexte important.`
const provider = getChatProvider(sysConfig)
const newSummary = await provider.generateText(summaryPrompt)
await prisma.conversation.update({
where: { id: conversation.id },
data: { summary: newSummary.slice(0, 1000) },
})
messagesForModel = [
{ role: 'system' as const, content: `Contexte de la conversation précédente:\n${newSummary.slice(0, 1000)}` },
...incomingMessages.slice(-RECENT_KEEP),
]
} else if (conversation.summary) {
messagesForModel = [
{ role: 'system' as const, content: `Contexte de la conversation précédente:\n${conversation.summary}` },
...incomingMessages.slice(-RECENT_KEEP),
]
}
} catch (e) {
console.error('[Chat] Summary generation failed, using full context:', e)
}
}
// 4. RAG retrieval
const currentMessage = incomingMessages[incomingMessages.length - 1]?.content || ''
const lang = (language || 'en') as SupportedLanguage
@@ -426,7 +469,7 @@ Focus ONLY on this note unless asked otherwise.`
streamText({
model: provider.getModel(),
system: systemPrompt,
messages: incomingMessages,
messages: messagesForModel,
tools: chatTools,
toolChoice: wantsChart && chatTools.insert_chart ? { type: 'tool', toolName: 'insert_chart' } : undefined,
stopWhen: stepCountIs(5),