import { streamText, UIMessage, stepCountIs } from 'ai' import { resolveAiRouteWithTiming, formatAiRouteDebug } from '@/lib/ai/router' import { runLaneWithBillingUser, willUseByokForLane } from '@/lib/ai/provider-for-user' import { getSystemConfig } from '@/lib/config' import { getChatProvider } from '@/lib/ai/factory' import { semanticSearchService } from '@/lib/ai/services/semantic-search.service' import { prisma } from '@/lib/prisma' import { auth } from '@/auth' import { hasUserAiConsent } from '@/lib/consent/server-consent' import { loadTranslations, getTranslationValue, SupportedLanguage } from '@/lib/i18n' import { toolRegistry } from '@/lib/ai/tools' import { reserveUsageOrThrow, QuotaExceededError, QuotaServiceUnavailableError } from '@/lib/entitlements' import { ByokUnavailableError } from '@/lib/byok' import { trackFeatureUsage } from '@/lib/usage-tracker' import { readFile } from 'fs/promises' import path from 'path' import { logAuditEvent, getClientIp } from '@/lib/audit-log' export const maxDuration = 60 /** * Extract text content from a UIMessage's parts array. */ function extractTextFromUIMessage(msg: { parts?: Array<{ type: string; text?: string }>; content?: string }): string { if (typeof msg.content === 'string') return msg.content if (msg.parts && Array.isArray(msg.parts)) { return msg.parts .filter((p) => p.type === 'text' && typeof p.text === 'string') .map((p) => p.text!) .join('') } return '' } /** * Convert an array of UIMessages (from the client) to CoreMessage[] for streamText. */ function toCoreMessages(uiMessages: UIMessage[]): Array<{ role: 'user' | 'assistant'; content: string }> { return uiMessages .filter((m) => m.role === 'user' || m.role === 'assistant') .map((m) => ({ role: m.role as 'user' | 'assistant', content: extractTextFromUIMessage(m), })) .filter((m) => m.content.length > 0) } export async function POST(req: Request) { // 1. Auth check const session = await auth() if (!session?.user?.id) { return new Response('Unauthorized', { status: 401 }) } const userId = session.user.id // GDPR AI Consent check if (!(await hasUserAiConsent())) { return new Response(JSON.stringify({ error: 'ai_consent_required' }), { status: 403, headers: { 'Content-Type': 'application/json' }, }) } // 1.5 Quota check (per-provider BYOK bypass — only when BYOK will be used for resolved provider) try { const sysConfigEarly = await getSystemConfig() const { usedByok: willUseByok } = await willUseByokForLane('chat', sysConfigEarly, userId) if (!willUseByok) { await reserveUsageOrThrow(userId, 'chat') } } catch (err) { if (err instanceof QuotaExceededError) { return Response.json(err.toJSON(), { status: 402 }) } if (err instanceof ByokUnavailableError) { return Response.json( { error: 'byok_unavailable', message: 'Votre clé API BYOK est configurée mais n\'a pas pu être chargée. Vérifiez vos paramètres BYOK.' }, { status: 503 } ) } if (err instanceof QuotaServiceUnavailableError) { return Response.json({ error: err.code }, { status: 503 }) } if (process.env.NODE_ENV !== 'production') { console.error('[chat] Quota check error (fail-open):', err) } else { console.error('[chat] Quota check error:', err) return Response.json({ error: 'QUOTA_SERVICE_UNAVAILABLE' }, { status: 503 }) } } // 2. Parse request body const body = await req.json() const { messages: rawMessages, conversationId, notebookId, language, webSearch, noteContext, format, noteId } = body as { messages: UIMessage[] conversationId?: string notebookId?: string language?: string webSearch?: boolean noteContext?: { title: string; content: string; tone: string; images?: string[] } format?: 'html' | 'markdown' noteId?: string } const incomingMessages = toCoreMessages(rawMessages) // 3. Manage conversation let conversation: { id: string; summary: string | null; messages: Array<{ id: string | null; role: string; content: string }> } if (conversationId) { const existing = await prisma.conversation.findUnique({ where: { id: conversationId, userId }, include: { messages: { orderBy: { createdAt: 'asc' }, select: { id: true, role: true, content: true } } }, }) if (!existing) return new Response('Conversation not found', { status: 404 }) conversation = existing } else { const userMessage = incomingMessages[incomingMessages.length - 1]?.content || 'New conversation' conversation = await prisma.conversation.create({ data: { userId, notebookId: notebookId || null, title: userMessage.substring(0, 50) + (userMessage.length > 50 ? '...' : ''), }, include: { messages: { select: { id: true, role: true, content: true } } }, }) } // 3.5 Progressive summary — compress old messages to save tokens const SUMMARY_THRESHOLD = 8 const RECENT_KEEP = 4 let messagesForModel = incomingMessages if (conversationId && conversation.messages.length >= SUMMARY_THRESHOLD) { try { const sysConfig = await getSystemConfig() const shouldRegenerate = !conversation.summary || conversation.messages.length >= SUMMARY_THRESHOLD + 4 if (shouldRegenerate) { const oldMessages = conversation.messages.slice(0, conversation.messages.length - RECENT_KEEP) const existingSummary = conversation.summary ? `Résumé actuel:\n${conversation.summary}\n\nNouveaux échanges à intégrer:\n` : '' const summaryPrompt = `${existingSummary}${oldMessages.map(m => `${m.role}: ${m.content.slice(0, 300)}`).join('\n')} Fais un résumé concis (max 200 mots) de cette conversation. Garde les informations clés, décisions, et contexte important.` const provider = getChatProvider(sysConfig) const newSummary = await provider.generateText(summaryPrompt) await prisma.conversation.update({ where: { id: conversation.id }, data: { summary: newSummary.slice(0, 1000) }, }) messagesForModel = [ { role: 'system' as const, content: `Contexte de la conversation précédente:\n${newSummary.slice(0, 1000)}` }, ...incomingMessages.slice(-RECENT_KEEP), ] } else if (conversation.summary) { messagesForModel = [ { role: 'system' as const, content: `Contexte de la conversation précédente:\n${conversation.summary}` }, ...incomingMessages.slice(-RECENT_KEEP), ] } } catch (e) { console.error('[Chat] Summary generation failed, using full context:', e) } } // 4. RAG retrieval const currentMessage = incomingMessages[incomingMessages.length - 1]?.content || '' const lang = (language || 'en') as SupportedLanguage const translations = await loadTranslations(lang) const untitledText = getTranslationValue(translations, 'notes.untitled') || 'Untitled' let notebookContext = '' let searchNotes = '' if (!noteContext) { if (notebookId) { const notebookNotes = await prisma.note.findMany({ where: { notebookId, userId, trashedAt: null }, orderBy: { updatedAt: 'desc' }, take: 20, select: { id: true, title: true, content: true, updatedAt: true }, }) if (notebookNotes.length > 0) { notebookContext = notebookNotes .map(n => `NOTE [${n.title || untitledText}] (updated ${n.updatedAt.toLocaleDateString()}):\n${(n.content || '').substring(0, 1500)}`) .join('\n\n---\n\n') } } let searchResults: any[] = [] try { const documentMention = currentMessage.match( /\b(pdf|document|fichier|pi[eè]ce jointe|attachment|file)\b/i ) if (documentMention) { const docResults = await semanticSearchService.searchWithDocuments( userId, currentMessage, { notebookId, limit: notebookId ? 10 : 5, threshold: notebookId ? 0.3 : 0.5, includeDocuments: true, defaultTitle: untitledText, } ) searchResults = docResults } else { searchResults = await semanticSearchService.search(currentMessage, { notebookId, limit: notebookId ? 10 : 5, threshold: notebookId ? 0.3 : 0.5, defaultTitle: untitledText, }) } } catch {} searchNotes = searchResults .map((r) => { if ((r as any).source === 'document') { return `DOCUMENT [${(r as any).fileName} p.${(r as any).pageNumber}] (from note: ${r.title || untitledText}):\n${r.content}` } return `NOTE [${r.title || untitledText}]: ${r.content}` }) .join('\n\n---\n\n') } else if (noteId) { try { const docResults = await semanticSearchService.searchWithDocuments( userId, currentMessage, { noteId, limit: 8, threshold: 0.3, includeDocuments: true, defaultTitle: untitledText, } ) searchNotes = docResults .map((r) => { if ((r as any).source === 'document') { return `DOCUMENT [${(r as any).fileName} p.${(r as any).pageNumber}]:\n${r.content}` } return '' }) .filter(Boolean) .join('\n\n---\n\n') } catch {} } const contextNotes = [notebookContext, searchNotes].filter(Boolean).join('\n\n---\n\n') // 5. System prompt synthesis const promptLang: Record = { en: { contextWithNotes: `## User's notes & documents\n\n${contextNotes}\n\nWhen using info from the notes above, cite the source note title in parentheses, e.g.: "Deployment is done via Docker (💻 Development Guide)". For document passages, cite the filename and page number, e.g.: "The revenue was $5M (📄 report.pdf p.12)". Don't copy word for word — rephrase. If the notes don't cover the topic, say so and supplement with your general knowledge.`, contextNoNotes: "No relevant notes found for this question. Answer with your general knowledge.", system: `You are the AI assistant of Memento. The user asks you questions about their projects, technical docs, and notes. You must respond in a structured and helpful way. ## Format rules - ${format === 'html' ? `Respond MANDATORILY using valid HTML fragments (e.g.,

, , ,