feat: migrate semantic search to pgvector + full-text search
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s

Replace JSON-string embeddings with native pgvector(1536) storage and
add PostgreSQL full-text search (tsvector/GIN) with Reciprocal Rank Fusion
for hybrid keyword + semantic ranking.

Changes:
- NoteEmbedding.embedding: String → vector(1536) via pgvector
- NoteEmbedding: added updatedAt for reindex tracking
- Note: added tsv (tsvector) with auto-update trigger for FTS
- semantic-search.service: hybrid FTS + vector search with RRF fusion
- embedding.service: toVectorString() for pgvector SQL literals
- Removed JS-side cosine similarity loops (now DB-side via <=>)
- Added HNSW index on NoteEmbedding.embedding (cosine distance)
- Added GIN index on Note.tsv for FTS queries

Schema migration in: prisma/migrations/20260512120000_pgvector_and_fts_search/

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Antigravity
2026-05-12 07:03:56 +00:00
parent 92c3a6f307
commit 03e6a62b80
43 changed files with 4024 additions and 786 deletions

View File

@@ -220,32 +220,30 @@ describe('Data Integrity Tests', () => {
expect(parsedLabels).toContain('project')
})
test('should preserve embedding JSON structure', async () => {
const embedding = JSON.stringify({
vector: [0.1, 0.2, 0.3, 0.4, 0.5],
model: 'text-embedding-ada-002',
timestamp: new Date().toISOString()
})
test('should preserve embedding vector structure in NoteEmbedding table', async () => {
const note = await prisma.note.create({
data: {
title: 'Embedding Test Note',
content: 'Note with embedding',
embedding,
userId: 'test-user-id'
}
})
// Verify embedding is preserved and can be parsed
const retrieved = await prisma.note.findUnique({
where: { id: note.id }
})
expect(retrieved?.embedding).toBeDefined()
const parsedEmbedding = JSON.parse(retrieved?.embedding || '{}')
expect(parsedEmbedding.vector).toEqual([0.1, 0.2, 0.3, 0.4, 0.5])
expect(parsedEmbedding.model).toBe('text-embedding-ada-002')
const vecStr = '[0.1,0.2,0.3,0.4,0.5]'
await prisma.$executeRawUnsafe(
`INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt")
VALUES (gen_random_uuid(), $1, $2::vector(1536), now(), now())`,
note.id,
vecStr
)
const retrieved: Array<{ noteId: string }> = await prisma.$queryRawUnsafe(
`SELECT "noteId" FROM "NoteEmbedding" WHERE "noteId" = $1`,
note.id
)
expect(retrieved.length).toBe(1)
expect(retrieved[0].noteId).toBe(note.id)
})
test('should preserve links JSON structure', async () => {