feat: migrate semantic search to pgvector + full-text search
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s

Replace JSON-string embeddings with native pgvector(1536) storage and
add PostgreSQL full-text search (tsvector/GIN) with Reciprocal Rank Fusion
for hybrid keyword + semantic ranking.

Changes:
- NoteEmbedding.embedding: String → vector(1536) via pgvector
- NoteEmbedding: added updatedAt for reindex tracking
- Note: added tsv (tsvector) with auto-update trigger for FTS
- semantic-search.service: hybrid FTS + vector search with RRF fusion
- embedding.service: toVectorString() for pgvector SQL literals
- Removed JS-side cosine similarity loops (now DB-side via <=>)
- Added HNSW index on NoteEmbedding.embedding (cosine distance)
- Added GIN index on Note.tsv for FTS queries

Schema migration in: prisma/migrations/20260512120000_pgvector_and_fts_search/

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Antigravity
2026-05-12 07:03:56 +00:00
parent 92c3a6f307
commit 03e6a62b80
43 changed files with 4024 additions and 786 deletions

View File

@@ -1,7 +1,7 @@
import { NextRequest, NextResponse } from 'next/server'
import { auth } from '@/auth'
import { prisma } from '@/lib/prisma'
import { EmbeddingService } from '@/lib/ai/services/embedding.service'
import { semanticSearchService } from '@/lib/ai/services/semantic-search.service'
export async function POST(req: NextRequest) {
try {
@@ -12,41 +12,31 @@ export async function POST(req: NextRequest) {
const userId = session.user.id
// Fetch all notes for the user
const notes = await prisma.note.findMany({
where: { userId, trashedAt: null },
select: { id: true, title: true, content: true }
select: { id: true }
})
const embeddingService = new EmbeddingService()
let processedCount = 0
let failedCount = 0
const BATCH_SIZE = 20
// Process in small batches to avoid timeouts if possible
// Note: In a real production app, this should be a background job
for (const note of notes) {
try {
const textToEmbed = `${note.title || ''}\n${note.content}`
if (textToEmbed.trim()) {
const embedding = await embeddingService.generateEmbedding(textToEmbed)
await prisma.noteEmbedding.upsert({
where: { noteId: note.id },
update: { embedding: JSON.stringify(embedding) },
create: {
noteId: note.id,
embedding: JSON.stringify(embedding)
}
})
processedCount++
}
} catch (err) {
console.error(`Failed to reindex note ${note.id}:`, err)
for (let i = 0; i < notes.length; i += BATCH_SIZE) {
const batch = notes.slice(i, i + BATCH_SIZE)
const results = await Promise.allSettled(
batch.map(note => semanticSearchService.indexNote(note.id))
)
for (const r of results) {
if (r.status === 'fulfilled') processedCount++
else failedCount++
}
}
return NextResponse.json({
success: true,
count: processedCount,
failed: failedCount,
total: notes.length
})
} catch (error) {