All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
Replace JSON-string embeddings with native pgvector(1536) storage and add PostgreSQL full-text search (tsvector/GIN) with Reciprocal Rank Fusion for hybrid keyword + semantic ranking. Changes: - NoteEmbedding.embedding: String → vector(1536) via pgvector - NoteEmbedding: added updatedAt for reindex tracking - Note: added tsv (tsvector) with auto-update trigger for FTS - semantic-search.service: hybrid FTS + vector search with RRF fusion - embedding.service: toVectorString() for pgvector SQL literals - Removed JS-side cosine similarity loops (now DB-side via <=>) - Added HNSW index on NoteEmbedding.embedding (cosine distance) - Added GIN index on Note.tsv for FTS queries Schema migration in: prisma/migrations/20260512120000_pgvector_and_fts_search/ Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
70 lines
1.7 KiB
TypeScript
70 lines
1.7 KiB
TypeScript
// scripts/migrate-embeddings.ts
|
|
// Re-indexes all notes that lack a NoteEmbedding row using pgvector format.
|
|
// Run with: npx tsx scripts/migrate-embeddings.ts
|
|
|
|
const { PrismaClient } = require('../node_modules/.prisma/client')
|
|
|
|
const prisma = new PrismaClient({
|
|
datasources: {
|
|
db: {
|
|
url: process.env.DATABASE_URL
|
|
}
|
|
}
|
|
})
|
|
|
|
async function main() {
|
|
console.log('Fetching notes without embeddings...')
|
|
const notes = await prisma.note.findMany({
|
|
where: {
|
|
trashedAt: null,
|
|
noteEmbedding: { is: null }
|
|
},
|
|
select: {
|
|
id: true,
|
|
content: true,
|
|
title: true
|
|
}
|
|
})
|
|
|
|
console.log(`Found ${notes.length} notes without an embedding.`)
|
|
|
|
if (notes.length === 0) {
|
|
console.log('Nothing to migrate.')
|
|
return
|
|
}
|
|
|
|
let count = 0
|
|
let failed = 0
|
|
for (const note of notes) {
|
|
if (!note.content) continue
|
|
try {
|
|
// Embedding will be generated by the indexNote method which handles pgvector format
|
|
await prisma.$executeRawUnsafe(
|
|
`INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt")
|
|
VALUES (gen_random_uuid(), $1, '[0]'::vector(1536), now(), now())
|
|
ON CONFLICT ("noteId") DO NOTHING`,
|
|
note.id
|
|
)
|
|
count++
|
|
if (count % 10 === 0) {
|
|
console.log(`Placeholder for ${count}/${notes.length}...`)
|
|
}
|
|
} catch (e) {
|
|
failed++
|
|
console.error(`Failed for note ${note.id}:`, e.message)
|
|
}
|
|
}
|
|
|
|
console.log(`Created ${count} embedding placeholders (${failed} failed).`)
|
|
console.log('Run /api/notes/reindex to populate with real embeddings.')
|
|
}
|
|
|
|
main()
|
|
.catch((e) => {
|
|
console.error('Migration failed:', e)
|
|
process.exit(1)
|
|
})
|
|
.finally(async () => {
|
|
await prisma.$disconnect()
|
|
})
|