From 3ef59150622cdf8851f84826166031b421083d5f Mon Sep 17 00:00:00 2001 From: Sepehr Ramezani Date: Fri, 17 Apr 2026 22:05:19 +0200 Subject: [PATCH] feat(db): extraction des embeddings + mode WAL + config DB provider-agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Ajout de la table de relation 1-1 NoteEmbedding pour alléger Model Note - Refactor complet des actions IA sémantique et Memory Echo pour utiliser la jointure - Migration propre des 85 embeddings locaux existants - Ajout PRAGMA journal_mode=WAL pour la concurrence au sein de lib/prisma - Ajout npm run db:switch pour configuration auto SQLite / PostgreSQL - Fix du compilateur Turbopack et Next-PWA --- keep-notes/app/actions/notes.ts | 37 +++++++---- .../api/admin/embeddings/validate/route.ts | 8 +-- .../lib/ai/services/memory-echo.service.ts | 22 +++---- .../ai/services/semantic-search.service.ts | 18 +++--- keep-notes/lib/prisma.ts | 6 ++ keep-notes/next.config.ts | 3 + keep-notes/package.json | 1 + keep-notes/prisma/schema.prisma | 12 +++- keep-notes/scripts/migrate-embeddings.ts | 61 +++++++++++++++++++ keep-notes/scripts/switch-db.js | 48 +++++++++++++++ 10 files changed, 180 insertions(+), 36 deletions(-) create mode 100644 keep-notes/scripts/migrate-embeddings.ts create mode 100644 keep-notes/scripts/switch-db.js diff --git a/keep-notes/app/actions/notes.ts b/keep-notes/app/actions/notes.ts index a5e9b73..ae4de6b 100644 --- a/keep-notes/app/actions/notes.ts +++ b/keep-notes/app/actions/notes.ts @@ -349,8 +349,9 @@ async function semanticSearch(query: string, userId: string, notebookId?: string where: { userId: userId, isArchived: false, - ...(notebookId !== undefined ? { notebookId } : {}) // NEW: Filter by notebook (IA5) - } + ...(notebookId !== undefined ? { notebookId } : {}) + }, + include: { noteEmbedding: true } }); const queryLower = query.toLowerCase().trim(); @@ -380,8 +381,8 @@ async function semanticSearch(query: string, userId: string, notebookId?: string // Semantic match (if embedding available) let semanticMatch = false; let similarity = 0; - if (queryEmbedding && note.embedding) { - similarity = cosineSimilarity(queryEmbedding, JSON.parse(note.embedding)); + if (queryEmbedding && note.noteEmbedding?.embedding) { + similarity = cosineSimilarity(queryEmbedding, JSON.parse(note.noteEmbedding.embedding)); semanticMatch = similarity > 0.3; // 30% threshold - works well for related concepts } @@ -450,7 +451,6 @@ export async function createNote(data: { reminder: data.reminder || null, isMarkdown: data.isMarkdown || false, size: data.size || 'small', - embedding: null, // Generated in background autoGenerated: data.autoGenerated || null, notebookId: data.notebookId || null, } @@ -480,9 +480,10 @@ export async function createNote(data: { const provider = getAIProvider(await getSystemConfig()) const embedding = await provider.getEmbeddings(content) if (embedding) { - await prisma.note.update({ - where: { id: noteId }, - data: { embedding: JSON.stringify(embedding) } + await prisma.noteEmbedding.upsert({ + where: { noteId: noteId }, + create: { noteId: noteId, embedding: JSON.stringify(embedding) }, + update: { embedding: JSON.stringify(embedding) } }) } } catch (e) { @@ -579,9 +580,10 @@ export async function updateNote(id: string, data: { const provider = getAIProvider(await getSystemConfig()); const embedding = await provider.getEmbeddings(content); if (embedding) { - await prisma.note.update({ - where: { id: noteId }, - data: { embedding: JSON.stringify(embedding) } + await prisma.noteEmbedding.upsert({ + where: { noteId: noteId }, + create: { noteId: noteId, embedding: JSON.stringify(embedding) }, + update: { embedding: JSON.stringify(embedding) } }) } } catch (e) { @@ -863,14 +865,23 @@ export async function syncAllEmbeddings() { const userId = session.user.id; let updatedCount = 0; try { - const notesToSync = await prisma.note.findMany({ where: { userId, embedding: null } }) + const notesToSync = await prisma.note.findMany({ + where: { + userId, + noteEmbedding: { is: null } + } + }) const provider = getAIProvider(await getSystemConfig()); for (const note of notesToSync) { if (!note.content) continue; try { const embedding = await provider.getEmbeddings(note.content); if (embedding) { - await prisma.note.update({ where: { id: note.id }, data: { embedding: JSON.stringify(embedding) } }) + await prisma.noteEmbedding.upsert({ + where: { noteId: note.id }, + create: { noteId: note.id, embedding: JSON.stringify(embedding) }, + update: { embedding: JSON.stringify(embedding) } + }) updatedCount++; } } catch (e) { } diff --git a/keep-notes/app/api/admin/embeddings/validate/route.ts b/keep-notes/app/api/admin/embeddings/validate/route.ts index 1532039..6f765a7 100644 --- a/keep-notes/app/api/admin/embeddings/validate/route.ts +++ b/keep-notes/app/api/admin/embeddings/validate/route.ts @@ -29,7 +29,7 @@ export async function GET() { select: { id: true, title: true, - embedding: true + noteEmbedding: true } }) @@ -45,7 +45,7 @@ export async function GET() { for (const note of allNotes) { // Check if embedding is missing - if (!note.embedding) { + if (!note.noteEmbedding?.embedding) { missingCount++ invalidNotes.push({ id: note.id, @@ -57,8 +57,8 @@ export async function GET() { // Validate embedding try { - if (!note.embedding) continue - const embedding = JSON.parse(note.embedding) as number[] + if (!note.noteEmbedding?.embedding) continue + const embedding = JSON.parse(note.noteEmbedding.embedding) as number[] const validation = validateEmbedding(embedding) if (!validation.valid) { diff --git a/keep-notes/lib/ai/services/memory-echo.service.ts b/keep-notes/lib/ai/services/memory-echo.service.ts index 0aea325..7f00984 100644 --- a/keep-notes/lib/ai/services/memory-echo.service.ts +++ b/keep-notes/lib/ai/services/memory-echo.service.ts @@ -61,13 +61,13 @@ export class MemoryEchoService { where: { userId, isArchived: false, - embedding: { not: null } // Only notes with embeddings + noteEmbedding: { isNot: null } // Only notes with embeddings }, select: { id: true, title: true, content: true, - embedding: true, + noteEmbedding: true, createdAt: true }, orderBy: { createdAt: 'desc' } @@ -81,7 +81,7 @@ export class MemoryEchoService { const notesWithEmbeddings = notes .map(note => ({ ...note, - embedding: note.embedding ? JSON.parse(note.embedding) as number[] : null + embedding: note.noteEmbedding?.embedding ? JSON.parse(note.noteEmbedding.embedding) as number[] : null })) .filter(note => note.embedding && Array.isArray(note.embedding)) @@ -367,7 +367,7 @@ Explain in one brief sentence (max 15 words) why these notes are connected. Focu id: true, title: true, content: true, - embedding: true, + noteEmbedding: true, createdAt: true, userId: true } @@ -377,7 +377,7 @@ Explain in one brief sentence (max 15 words) why these notes are connected. Focu return [] // Note not found or doesn't belong to user } - if (!targetNote.embedding) { + if (!targetNote.noteEmbedding) { return [] // Note has no embedding } @@ -408,15 +408,15 @@ Explain in one brief sentence (max 15 words) why these notes are connected. Focu const otherNotes = await prisma.note.findMany({ where: { userId, - id: { not: noteId }, // Exclude the target note + id: { not: noteId }, isArchived: false, - embedding: { not: null } + noteEmbedding: { isNot: null } }, select: { id: true, title: true, content: true, - embedding: true, + noteEmbedding: true, createdAt: true }, orderBy: { createdAt: 'desc' } @@ -427,7 +427,7 @@ Explain in one brief sentence (max 15 words) why these notes are connected. Focu } // Target note embedding (already native Json from PostgreSQL) - const targetEmbedding = targetNote.embedding ? JSON.parse(targetNote.embedding) as number[] : null + const targetEmbedding = targetNote.noteEmbedding?.embedding ? JSON.parse(targetNote.noteEmbedding.embedding) as number[] : null if (!targetEmbedding) return [] // Check if user has demo mode enabled @@ -443,9 +443,9 @@ Explain in one brief sentence (max 15 words) why these notes are connected. Focu // Compare target note with all other notes for (const otherNote of otherNotes) { - if (!otherNote.embedding) continue + if (!otherNote.noteEmbedding) continue - const otherEmbedding = otherNote.embedding ? JSON.parse(otherNote.embedding) as number[] : null + const otherEmbedding = otherNote.noteEmbedding?.embedding ? JSON.parse(otherNote.noteEmbedding.embedding) as number[] : null if (!otherEmbedding) continue // Check if this connection was dismissed diff --git a/keep-notes/lib/ai/services/semantic-search.service.ts b/keep-notes/lib/ai/services/semantic-search.service.ts index c763af9..51e1703 100644 --- a/keep-notes/lib/ai/services/semantic-search.service.ts +++ b/keep-notes/lib/ai/services/semantic-search.service.ts @@ -177,12 +177,12 @@ export class SemanticSearchService { const notes = await prisma.note.findMany({ where: { ...(userId ? { userId } : {}), - ...(notebookId !== undefined ? { notebookId } : {}), // NEW: Notebook filter - embedding: { not: null } + ...(notebookId !== undefined ? { notebookId } : {}), + noteEmbedding: { isNot: null } }, select: { id: true, - embedding: true + noteEmbedding: true } }) @@ -192,7 +192,7 @@ export class SemanticSearchService { // Calculate similarities for all notes const similarities = notes.map(note => { - const noteEmbedding = note.embedding ? JSON.parse(note.embedding) as number[] : [] + const noteEmbedding = note.noteEmbedding?.embedding ? JSON.parse(note.noteEmbedding.embedding) as number[] : [] const similarity = embeddingService.calculateCosineSimilarity( queryEmbedding, noteEmbedding @@ -273,7 +273,7 @@ export class SemanticSearchService { try { const note = await prisma.note.findUnique({ where: { id: noteId }, - select: { content: true, embedding: true, lastAiAnalysis: true } + select: { content: true, noteEmbedding: true, lastAiAnalysis: true } }) if (!note) { @@ -283,7 +283,7 @@ export class SemanticSearchService { // Check if embedding needs regeneration const shouldRegenerate = embeddingService.shouldRegenerateEmbedding( note.content, - note.embedding as any, + note.noteEmbedding?.embedding as any, note.lastAiAnalysis ) @@ -295,10 +295,14 @@ export class SemanticSearchService { const { embedding } = await embeddingService.generateEmbedding(note.content) // Save to database + await prisma.noteEmbedding.upsert({ + where: { noteId: noteId }, + create: { noteId: noteId, embedding: embeddingService.serialize(embedding) as any }, + update: { embedding: embeddingService.serialize(embedding) as any } + }) await prisma.note.update({ where: { id: noteId }, data: { - embedding: embeddingService.serialize(embedding) as any, lastAiAnalysis: new Date() } }) diff --git a/keep-notes/lib/prisma.ts b/keep-notes/lib/prisma.ts index 833e829..7ec0c4f 100644 --- a/keep-notes/lib/prisma.ts +++ b/keep-notes/lib/prisma.ts @@ -17,6 +17,12 @@ declare const globalThis: { const prisma = globalThis.prismaGlobal ?? prismaClientSingleton() +// Enable WAL mode for SQLite to improve concurrent read/write performance +if (process.env.DATABASE_URL?.includes('sqlite') || prismaClientSingleton.toString().includes('sqlite')) { + // Execute via an un-awaited promise or fire-and-forget, PRAGMA is session-based but setting it globally sets DB state + prisma.$executeRawUnsafe('PRAGMA journal_mode = WAL;').catch(console.error) +} + export { prisma } export default prisma diff --git a/keep-notes/next.config.ts b/keep-notes/next.config.ts index 88c4d40..8a317a6 100644 --- a/keep-notes/next.config.ts +++ b/keep-notes/next.config.ts @@ -21,6 +21,9 @@ const nextConfig: NextConfig = { // Hide the "compiling" indicator devIndicators: false, + + // Silence warning from Next-PWA custom webpack injections + turbopack: {}, }; export default withPWA(nextConfig); diff --git a/keep-notes/package.json b/keep-notes/package.json index 827e126..996d6e4 100644 --- a/keep-notes/package.json +++ b/keep-notes/package.json @@ -12,6 +12,7 @@ "db:push": "prisma db push", "db:studio": "prisma studio", "db:reset": "prisma migrate reset", + "db:switch": "node scripts/switch-db.js", "test": "playwright test", "test:ui": "playwright test --ui", "test:headed": "playwright test --headed", diff --git a/keep-notes/prisma/schema.prisma b/keep-notes/prisma/schema.prisma index 33f160e..991d7fe 100644 --- a/keep-notes/prisma/schema.prisma +++ b/keep-notes/prisma/schema.prisma @@ -123,7 +123,6 @@ model Note { reminderLocation String? isMarkdown Boolean @default(false) size String @default("small") - embedding String? sharedWith String? userId String? order Int @default(0) @@ -144,6 +143,7 @@ model Note { user User? @relation(fields: [userId], references: [id], onDelete: Cascade) shares NoteShare[] labelRelations Label[] @relation("LabelToNote") + noteEmbedding NoteEmbedding? @@index([isPinned]) @@index([isArchived]) @@ -241,3 +241,13 @@ model UserAISettings { @@index([memoryEchoFrequency]) @@index([preferredLanguage]) } + +model NoteEmbedding { + id String @id @default(cuid()) + noteId String @unique + embedding String + createdAt DateTime @default(now()) + note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) + + @@index([noteId]) +} diff --git a/keep-notes/scripts/migrate-embeddings.ts b/keep-notes/scripts/migrate-embeddings.ts new file mode 100644 index 0000000..a2a5cb0 --- /dev/null +++ b/keep-notes/scripts/migrate-embeddings.ts @@ -0,0 +1,61 @@ +// scripts/migrate-embeddings.ts +const { PrismaClient } = require('../prisma/client-generated') + +const prisma = new PrismaClient({ + datasources: { + db: { + url: process.env.DATABASE_URL || "file:../prisma/dev.db" + } + } +}) + +async function main() { + console.log("Fetching notes with embeddings...") + const notes = await prisma.note.findMany({ + where: { + embedding: { not: null } + }, + select: { + id: true, + embedding: true + } + }) + + console.log(`Found ${notes.length} notes with an embedding.`) + + if (notes.length === 0) { + console.log("Nothing to migrate.") + return + } + + let count = 0 + for (const note of notes) { + if (!note.embedding) continue + + await prisma.noteEmbedding.upsert({ + where: { noteId: note.id }, + create: { + noteId: note.id, + embedding: note.embedding + }, + update: { + embedding: note.embedding + } + }) + count++ + if (count % 10 === 0) { + console.log(`Migrated ${count}/${notes.length}...`) + } + } + + console.log(`✅ Successfully migrated ${count} note embeddings to the NoteEmbedding table.`) +} + +main() + .catch((e) => { + console.error("Migration failed:", e) + process.exit(1) + }) + .finally(async () => { + await prisma.$disconnect() + }) diff --git a/keep-notes/scripts/switch-db.js b/keep-notes/scripts/switch-db.js new file mode 100644 index 0000000..03e0b85 --- /dev/null +++ b/keep-notes/scripts/switch-db.js @@ -0,0 +1,48 @@ +// scripts/switch-db.js +const fs = require('fs') +const path = require('path') + +const envPath = path.join(__dirname, '..', '.env') +const schemaPath = path.join(__dirname, '..', 'prisma', 'schema.prisma') + +const target = process.argv[2] +if (!['sqlite', 'postgresql'].includes(target)) { + console.error("Usage: node scripts/switch-db.js [sqlite|postgresql]") + process.exit(1) +} + +// 1. Update schema.prisma +let schemaContent = fs.readFileSync(schemaPath, 'utf8') +// Find the datasource db block and replace the provider +schemaContent = schemaContent.replace( + /datasource db \{\s*provider\s*=\s*"[^"]+"/g, + `datasource db {\n provider = "${target}"` +) +fs.writeFileSync(schemaPath, schemaContent) + +// 2. Update .env +let envContent = fs.existsSync(envPath) ? fs.readFileSync(envPath, 'utf8') : '' +const sqliteUrl = 'file:./dev.db' +const pgUrl = 'postgresql://postgres:postgres@localhost:5432/keep_notes?schema=public' + +// Update or append DATABASE_URL +if (target === 'sqlite') { + if (envContent.match(/^DATABASE_URL=.*$/m)) { + envContent = envContent.replace(/^DATABASE_URL=.*$/m, `DATABASE_URL="${sqliteUrl}"`) + } else { + envContent += `\nDATABASE_URL="${sqliteUrl}"` + } +} else { + if (envContent.match(/^DATABASE_URL=.*$/m)) { + envContent = envContent.replace(/^DATABASE_URL=.*$/m, `DATABASE_URL="${pgUrl}"`) + } else { + envContent += `\nDATABASE_URL="${pgUrl}"` + } +} + +fs.writeFileSync(envPath, envContent) + +console.log(`✅ Successfully switched database provider to ${target}`) +console.log('You should now run:') +console.log(' npx prisma generate') +console.log(' npx prisma db push')