feat: migrate semantic search to pgvector + full-text search
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
Replace JSON-string embeddings with native pgvector(1536) storage and add PostgreSQL full-text search (tsvector/GIN) with Reciprocal Rank Fusion for hybrid keyword + semantic ranking. Changes: - NoteEmbedding.embedding: String → vector(1536) via pgvector - NoteEmbedding: added updatedAt for reindex tracking - Note: added tsv (tsvector) with auto-update trigger for FTS - semantic-search.service: hybrid FTS + vector search with RRF fusion - embedding.service: toVectorString() for pgvector SQL literals - Removed JS-side cosine similarity loops (now DB-side via <=>) - Added HNSW index on NoteEmbedding.embedding (cosine distance) - Added GIN index on Note.tsv for FTS queries Schema migration in: prisma/migrations/20260512120000_pgvector_and_fts_search/ Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
import { auth } from '@/auth'
|
||||
import { prisma } from '@/lib/prisma'
|
||||
import { EmbeddingService } from '@/lib/ai/services/embedding.service'
|
||||
import { semanticSearchService } from '@/lib/ai/services/semantic-search.service'
|
||||
|
||||
export async function POST(req: NextRequest) {
|
||||
try {
|
||||
@@ -12,41 +12,31 @@ export async function POST(req: NextRequest) {
|
||||
|
||||
const userId = session.user.id
|
||||
|
||||
// Fetch all notes for the user
|
||||
const notes = await prisma.note.findMany({
|
||||
where: { userId, trashedAt: null },
|
||||
select: { id: true, title: true, content: true }
|
||||
select: { id: true }
|
||||
})
|
||||
|
||||
const embeddingService = new EmbeddingService()
|
||||
let processedCount = 0
|
||||
let failedCount = 0
|
||||
const BATCH_SIZE = 20
|
||||
|
||||
// Process in small batches to avoid timeouts if possible
|
||||
// Note: In a real production app, this should be a background job
|
||||
for (const note of notes) {
|
||||
try {
|
||||
const textToEmbed = `${note.title || ''}\n${note.content}`
|
||||
if (textToEmbed.trim()) {
|
||||
const embedding = await embeddingService.generateEmbedding(textToEmbed)
|
||||
|
||||
await prisma.noteEmbedding.upsert({
|
||||
where: { noteId: note.id },
|
||||
update: { embedding: JSON.stringify(embedding) },
|
||||
create: {
|
||||
noteId: note.id,
|
||||
embedding: JSON.stringify(embedding)
|
||||
}
|
||||
})
|
||||
processedCount++
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`Failed to reindex note ${note.id}:`, err)
|
||||
for (let i = 0; i < notes.length; i += BATCH_SIZE) {
|
||||
const batch = notes.slice(i, i + BATCH_SIZE)
|
||||
const results = await Promise.allSettled(
|
||||
batch.map(note => semanticSearchService.indexNote(note.id))
|
||||
)
|
||||
|
||||
for (const r of results) {
|
||||
if (r.status === 'fulfilled') processedCount++
|
||||
else failedCount++
|
||||
}
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
count: processedCount,
|
||||
failed: failedCount,
|
||||
total: notes.length
|
||||
})
|
||||
} catch (error) {
|
||||
|
||||
Reference in New Issue
Block a user