feat: migrate semantic search to pgvector + full-text search
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
Replace JSON-string embeddings with native pgvector(1536) storage and add PostgreSQL full-text search (tsvector/GIN) with Reciprocal Rank Fusion for hybrid keyword + semantic ranking. Changes: - NoteEmbedding.embedding: String → vector(1536) via pgvector - NoteEmbedding: added updatedAt for reindex tracking - Note: added tsv (tsvector) with auto-update trigger for FTS - semantic-search.service: hybrid FTS + vector search with RRF fusion - embedding.service: toVectorString() for pgvector SQL literals - Removed JS-side cosine similarity loops (now DB-side via <=>) - Added HNSW index on NoteEmbedding.embedding (cosine distance) - Added GIN index on Note.tsv for FTS queries Schema migration in: prisma/migrations/20260512120000_pgvector_and_fts_search/ Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -220,32 +220,30 @@ describe('Data Integrity Tests', () => {
|
||||
expect(parsedLabels).toContain('project')
|
||||
})
|
||||
|
||||
test('should preserve embedding JSON structure', async () => {
|
||||
const embedding = JSON.stringify({
|
||||
vector: [0.1, 0.2, 0.3, 0.4, 0.5],
|
||||
model: 'text-embedding-ada-002',
|
||||
timestamp: new Date().toISOString()
|
||||
})
|
||||
|
||||
test('should preserve embedding vector structure in NoteEmbedding table', async () => {
|
||||
const note = await prisma.note.create({
|
||||
data: {
|
||||
title: 'Embedding Test Note',
|
||||
content: 'Note with embedding',
|
||||
embedding,
|
||||
userId: 'test-user-id'
|
||||
}
|
||||
})
|
||||
|
||||
// Verify embedding is preserved and can be parsed
|
||||
const retrieved = await prisma.note.findUnique({
|
||||
where: { id: note.id }
|
||||
})
|
||||
|
||||
expect(retrieved?.embedding).toBeDefined()
|
||||
|
||||
const parsedEmbedding = JSON.parse(retrieved?.embedding || '{}')
|
||||
expect(parsedEmbedding.vector).toEqual([0.1, 0.2, 0.3, 0.4, 0.5])
|
||||
expect(parsedEmbedding.model).toBe('text-embedding-ada-002')
|
||||
|
||||
const vecStr = '[0.1,0.2,0.3,0.4,0.5]'
|
||||
await prisma.$executeRawUnsafe(
|
||||
`INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt")
|
||||
VALUES (gen_random_uuid(), $1, $2::vector(1536), now(), now())`,
|
||||
note.id,
|
||||
vecStr
|
||||
)
|
||||
|
||||
const retrieved: Array<{ noteId: string }> = await prisma.$queryRawUnsafe(
|
||||
`SELECT "noteId" FROM "NoteEmbedding" WHERE "noteId" = $1`,
|
||||
note.id
|
||||
)
|
||||
|
||||
expect(retrieved.length).toBe(1)
|
||||
expect(retrieved[0].noteId).toBe(note.id)
|
||||
})
|
||||
|
||||
test('should preserve links JSON structure', async () => {
|
||||
|
||||
Reference in New Issue
Block a user