Files
Momento/memento-note/app/api/admin/embeddings/validate/route.ts
Antigravity 03e6a62b80
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
feat: migrate semantic search to pgvector + full-text search
Replace JSON-string embeddings with native pgvector(1536) storage and
add PostgreSQL full-text search (tsvector/GIN) with Reciprocal Rank Fusion
for hybrid keyword + semantic ranking.

Changes:
- NoteEmbedding.embedding: String → vector(1536) via pgvector
- NoteEmbedding: added updatedAt for reindex tracking
- Note: added tsv (tsvector) with auto-update trigger for FTS
- semantic-search.service: hybrid FTS + vector search with RRF fusion
- embedding.service: toVectorString() for pgvector SQL literals
- Removed JS-side cosine similarity loops (now DB-side via <=>)
- Added HNSW index on NoteEmbedding.embedding (cosine distance)
- Added GIN index on Note.tsv for FTS queries

Schema migration in: prisma/migrations/20260512120000_pgvector_and_fts_search/

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-12 07:03:56 +00:00

62 lines
1.9 KiB
TypeScript

import { NextResponse } from 'next/server'
import { prisma } from '@/lib/prisma'
import { auth } from '@/auth'
/**
* Admin endpoint to validate all pgvector embeddings in the database.
* Uses native SQL to check for valid vector format.
*/
export async function GET() {
try {
const session = await auth()
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const user = await prisma.user.findUnique({
where: { id: session.user.id },
select: { role: true }
})
if (!user || user.role !== 'ADMIN') {
return NextResponse.json({ error: 'Forbidden - Admin only' }, { status: 403 })
}
const totalResult: Array<{ total: bigint }> = await prisma.$queryRawUnsafe(
`SELECT COUNT(*)::bigint as total FROM "Note" WHERE "trashedAt" IS NULL`
)
const total = Number(totalResult[0]?.total ?? 0)
const withEmbedding: Array<{ count: bigint }> = await prisma.$queryRawUnsafe(
`SELECT COUNT(*)::bigint as count FROM "NoteEmbedding"`
)
const validCount = Number(withEmbedding[0]?.count ?? 0)
const invalidResult: Array<{ count: bigint }> = await prisma.$queryRawUnsafe(
`SELECT COUNT(*)::bigint as count FROM "NoteEmbedding" e
WHERE e."embedding" IS NULL
OR array_length(string_to_array(replace(replace(e."embedding"::text, '[', ''), ']', ''), ','), 1) != 1536`
)
const invalidCount = Number(invalidResult[0]?.count ?? 0)
const missingCount = total - validCount
return NextResponse.json({
success: true,
summary: {
total,
valid: validCount - invalidCount,
missing: missingCount > 0 ? missingCount : 0,
invalid: invalidCount
},
invalidNotes: []
})
} catch (error) {
console.error('[EMBEDDING_VALIDATION] Error:', error)
return NextResponse.json(
{ success: false, error: String(error) },
{ status: 500 }
)
}
}