feat: migrate semantic search to pgvector + full-text search
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m12s
Replace JSON-string embeddings with native pgvector(1536) storage and add PostgreSQL full-text search (tsvector/GIN) with Reciprocal Rank Fusion for hybrid keyword + semantic ranking. Changes: - NoteEmbedding.embedding: String → vector(1536) via pgvector - NoteEmbedding: added updatedAt for reindex tracking - Note: added tsv (tsvector) with auto-update trigger for FTS - semantic-search.service: hybrid FTS + vector search with RRF fusion - embedding.service: toVectorString() for pgvector SQL literals - Removed JS-side cosine similarity loops (now DB-side via <=>) - Added HNSW index on NoteEmbedding.embedding (cosine distance) - Added GIN index on Note.tsv for FTS queries Schema migration in: prisma/migrations/20260512120000_pgvector_and_fts_search/ Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,12 @@
|
||||
/**
|
||||
* Semantic Search Service
|
||||
* Hybrid search combining keyword matching and semantic similarity
|
||||
* Uses Reciprocal Rank Fusion (RRF) for result ranking
|
||||
*
|
||||
* Unified hybrid search combining:
|
||||
* 1. PostgreSQL full-text search (tsvector / tsquery) via GIN index
|
||||
* 2. pgvector cosine-distance nearest-neighbor search via HNSW index
|
||||
* 3. Reciprocal Rank Fusion (RRF) for final ranking
|
||||
*
|
||||
* All vector operations happen in the database — no JS cosine-similarity loops.
|
||||
*/
|
||||
|
||||
import { embeddingService } from './embedding.service'
|
||||
@@ -19,19 +24,22 @@ export interface SearchResult {
|
||||
|
||||
export interface SearchOptions {
|
||||
limit?: number
|
||||
threshold?: number // Minimum similarity score (0-1)
|
||||
threshold?: number
|
||||
includeExactMatches?: boolean
|
||||
notebookId?: string // NEW: Filter by notebook for contextual search (IA5)
|
||||
defaultTitle?: string // Optional default title for untitled notes (i18n)
|
||||
notebookId?: string
|
||||
defaultTitle?: string
|
||||
}
|
||||
|
||||
export class SemanticSearchService {
|
||||
private readonly RRF_K = 60 // RRF constant (default recommended value)
|
||||
private readonly RRF_K = 60
|
||||
private readonly DEFAULT_LIMIT = 20
|
||||
private readonly DEFAULT_THRESHOLD = 0.6
|
||||
private readonly DEFAULT_THRESHOLD = 0.3
|
||||
private readonly VECTOR_CANDIDATES = 50
|
||||
private readonly FTS_CANDIDATES = 50
|
||||
|
||||
/**
|
||||
* Hybrid search: keyword + semantic with RRF fusion
|
||||
* Hybrid search: FTS + pgvector with RRF fusion.
|
||||
* Accepts an optional userId to skip auth() (used by agent tools).
|
||||
*/
|
||||
async search(
|
||||
query: string,
|
||||
@@ -40,292 +48,15 @@ export class SemanticSearchService {
|
||||
const {
|
||||
limit = this.DEFAULT_LIMIT,
|
||||
threshold = this.DEFAULT_THRESHOLD,
|
||||
includeExactMatches = true,
|
||||
notebookId, // NEW: Contextual search within notebook (IA5)
|
||||
defaultTitle = 'Untitled' // Default title for i18n
|
||||
notebookId,
|
||||
defaultTitle = 'Untitled'
|
||||
} = options
|
||||
|
||||
if (!query || query.trim().length < 2) {
|
||||
return []
|
||||
}
|
||||
if (!query || query.trim().length < 2) return []
|
||||
|
||||
const session = await auth()
|
||||
const userId = session?.user?.id || null
|
||||
|
||||
try {
|
||||
// 1. Keyword search (SQLite FTS)
|
||||
const keywordResults = await this.keywordSearch(query, userId, notebookId)
|
||||
|
||||
// 2. Semantic search (vector similarity)
|
||||
const semanticResults = await this.semanticVectorSearch(query, userId, threshold, notebookId)
|
||||
|
||||
// 3. Reciprocal Rank Fusion
|
||||
const fusedResults = await this.reciprocalRankFusion(
|
||||
keywordResults,
|
||||
semanticResults
|
||||
)
|
||||
|
||||
// 4. Sort by final score and limit
|
||||
return fusedResults
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit)
|
||||
.map(result => ({
|
||||
...result,
|
||||
title: result.title || defaultTitle,
|
||||
matchType: result.score > 0.8 ? 'exact' : 'related'
|
||||
}))
|
||||
} catch (error) {
|
||||
console.error('Error in hybrid search:', error)
|
||||
// Fallback to keyword-only search
|
||||
const keywordResults = await this.keywordSearch(query, userId)
|
||||
|
||||
// Fetch note details for keyword results
|
||||
const noteIds = keywordResults.slice(0, limit).map(r => r.noteId)
|
||||
const notes = await prisma.note.findMany({
|
||||
where: { id: { in: noteIds }, trashedAt: null },
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
content: true,
|
||||
language: true
|
||||
}
|
||||
})
|
||||
|
||||
return notes.map(note => ({
|
||||
noteId: note.id,
|
||||
title: note.title || defaultTitle,
|
||||
content: note.content,
|
||||
score: 1.0, // Default score for keyword-only results
|
||||
matchType: 'related' as const,
|
||||
language: note.language
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Keyword search using SQLite LIKE/FTS
|
||||
*/
|
||||
private async keywordSearch(
|
||||
query: string,
|
||||
userId: string | null,
|
||||
notebookId?: string // NEW: Filter by notebook (IA5)
|
||||
): Promise<Array<{ noteId: string; rank: number }>> {
|
||||
// Extract keywords (words with > 3 characters) to avoid entire sentence matching failing
|
||||
const stopWords = new Set(['comment', 'pourquoi', 'lequel', 'laquelle', 'avec', 'pour', 'dans', 'sur', 'est-ce']);
|
||||
const keywords = query.toLowerCase()
|
||||
.split(/[^a-z0-9àáâäçéèêëíìîïñóòôöúùûü]/i)
|
||||
.filter(w => w.length > 3 && !stopWords.has(w));
|
||||
|
||||
// If no good keywords found, fallback to the original query but it'll likely fail
|
||||
const searchTerms = keywords.length > 0 ? keywords : [query];
|
||||
|
||||
// Build Prisma OR clauses for each keyword
|
||||
const searchConditions = searchTerms.flatMap(term => [
|
||||
{ title: { contains: term, mode: 'insensitive' as const } },
|
||||
{ content: { contains: term, mode: 'insensitive' as const } }
|
||||
]);
|
||||
|
||||
const notes = await prisma.note.findMany({
|
||||
where: {
|
||||
...(userId ? { userId } : {}),
|
||||
...(notebookId !== undefined ? { notebookId } : {}), // NEW: Notebook filter
|
||||
trashedAt: null,
|
||||
OR: searchConditions
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
content: true
|
||||
}
|
||||
})
|
||||
|
||||
// Simple relevance scoring based on match position and frequency
|
||||
const results = notes.map(note => {
|
||||
const title = note.title || ''
|
||||
const content = note.content || ''
|
||||
const queryLower = query.toLowerCase()
|
||||
|
||||
// Count occurrences — escape regex special chars to avoid crashes
|
||||
const escaped = queryLower.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
const titleMatches = (title.match(new RegExp(escaped, 'gi')) || []).length
|
||||
const contentMatches = (content.match(new RegExp(escaped, 'gi')) || []).length
|
||||
|
||||
// Boost title matches significantly
|
||||
const titlePosition = title.toLowerCase().indexOf(queryLower)
|
||||
const contentPosition = content.toLowerCase().indexOf(queryLower)
|
||||
|
||||
// Calculate rank (lower is better)
|
||||
let rank = 100
|
||||
|
||||
if (titleMatches > 0) {
|
||||
rank = titlePosition === 0 ? 1 : 10
|
||||
rank -= titleMatches * 2
|
||||
} else if (contentMatches > 0) {
|
||||
rank = contentPosition < 100 ? 20 : 30
|
||||
rank -= contentMatches
|
||||
}
|
||||
|
||||
return {
|
||||
noteId: note.id,
|
||||
rank
|
||||
}
|
||||
})
|
||||
|
||||
return results.sort((a, b) => a.rank - b.rank)
|
||||
}
|
||||
|
||||
/**
|
||||
* Semantic vector search using embeddings
|
||||
*/
|
||||
private async semanticVectorSearch(
|
||||
query: string,
|
||||
userId: string | null,
|
||||
threshold: number,
|
||||
notebookId?: string // NEW: Filter by notebook (IA5)
|
||||
): Promise<Array<{ noteId: string; rank: number }>> {
|
||||
try {
|
||||
// Generate query embedding
|
||||
const { embedding: queryEmbedding } = await embeddingService.generateEmbedding(query)
|
||||
|
||||
// Fetch all user's notes with embeddings
|
||||
const notes = await prisma.note.findMany({
|
||||
where: {
|
||||
...(userId ? { userId } : {}),
|
||||
...(notebookId !== undefined ? { notebookId } : {}),
|
||||
trashedAt: null,
|
||||
noteEmbedding: { isNot: null }
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
noteEmbedding: true
|
||||
}
|
||||
})
|
||||
|
||||
if (notes.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
// Calculate similarities for all notes
|
||||
const similarities = notes.map(note => {
|
||||
const noteEmbedding = note.noteEmbedding?.embedding ? JSON.parse(note.noteEmbedding.embedding) as number[] : []
|
||||
const similarity = embeddingService.calculateCosineSimilarity(
|
||||
queryEmbedding,
|
||||
noteEmbedding
|
||||
)
|
||||
|
||||
return {
|
||||
noteId: note.id,
|
||||
similarity
|
||||
}
|
||||
})
|
||||
|
||||
// Filter by threshold and convert to rank
|
||||
return similarities
|
||||
.filter(s => s.similarity >= threshold)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.map((s, index) => ({
|
||||
noteId: s.noteId,
|
||||
rank: index + 1 // 1-based rank
|
||||
}))
|
||||
} catch (error) {
|
||||
console.error('Error in semantic vector search:', error)
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reciprocal Rank Fusion algorithm
|
||||
* Combines multiple ranked lists into a single ranking
|
||||
* Formula: RRF(score) = 1 / (k + rank)
|
||||
* k = 60 (default, prevents high rank from dominating)
|
||||
*/
|
||||
private async reciprocalRankFusion(
|
||||
keywordResults: Array<{ noteId: string; rank: number }>,
|
||||
semanticResults: Array<{ noteId: string; rank: number }>
|
||||
): Promise<SearchResult[]> {
|
||||
const scores = new Map<string, number>()
|
||||
|
||||
// Add keyword scores
|
||||
for (const result of keywordResults) {
|
||||
const rrfScore = 1 / (this.RRF_K + result.rank)
|
||||
scores.set(result.noteId, (scores.get(result.noteId) || 0) + rrfScore)
|
||||
}
|
||||
|
||||
// Add semantic scores
|
||||
for (const result of semanticResults) {
|
||||
const rrfScore = 1 / (this.RRF_K + result.rank)
|
||||
scores.set(result.noteId, (scores.get(result.noteId) || 0) + rrfScore)
|
||||
}
|
||||
|
||||
// Fetch note details
|
||||
const noteIds = Array.from(scores.keys())
|
||||
const notes = await prisma.note.findMany({
|
||||
where: { id: { in: noteIds }, trashedAt: null },
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
content: true,
|
||||
language: true
|
||||
}
|
||||
})
|
||||
|
||||
// Combine scores with note details
|
||||
return notes.map(note => ({
|
||||
noteId: note.id,
|
||||
title: note.title,
|
||||
content: note.content,
|
||||
score: scores.get(note.id) || 0,
|
||||
matchType: 'related' as const,
|
||||
language: note.language
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate or update embedding for a note
|
||||
* Called when note is created or significantly updated
|
||||
*/
|
||||
async indexNote(noteId: string): Promise<void> {
|
||||
try {
|
||||
const note = await prisma.note.findUnique({
|
||||
where: { id: noteId },
|
||||
select: { content: true, noteEmbedding: true, lastAiAnalysis: true }
|
||||
})
|
||||
|
||||
if (!note) {
|
||||
throw new Error('Note not found')
|
||||
}
|
||||
|
||||
// Check if embedding needs regeneration
|
||||
const shouldRegenerate = embeddingService.shouldRegenerateEmbedding(
|
||||
note.content,
|
||||
note.noteEmbedding?.embedding as any,
|
||||
note.lastAiAnalysis
|
||||
)
|
||||
|
||||
if (!shouldRegenerate) {
|
||||
return
|
||||
}
|
||||
|
||||
// Generate new embedding
|
||||
const { embedding } = await embeddingService.generateEmbedding(note.content)
|
||||
|
||||
// Save to database
|
||||
await prisma.noteEmbedding.upsert({
|
||||
where: { noteId: noteId },
|
||||
create: { noteId: noteId, embedding: embeddingService.serialize(embedding) as any },
|
||||
update: { embedding: embeddingService.serialize(embedding) as any }
|
||||
})
|
||||
await prisma.note.update({
|
||||
where: { id: noteId },
|
||||
data: {
|
||||
lastAiAnalysis: new Date()
|
||||
}
|
||||
})
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Error indexing note ${noteId}:`, error)
|
||||
throw error
|
||||
}
|
||||
return this._doSearch(query, userId, { limit, threshold, notebookId, defaultTitle })
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -340,50 +71,251 @@ export class SemanticSearchService {
|
||||
const {
|
||||
limit = this.DEFAULT_LIMIT,
|
||||
threshold = this.DEFAULT_THRESHOLD,
|
||||
includeExactMatches = true,
|
||||
notebookId,
|
||||
defaultTitle = 'Untitled'
|
||||
} = options
|
||||
|
||||
if (!query || query.trim().length < 2) {
|
||||
return []
|
||||
}
|
||||
if (!query || query.trim().length < 2) return []
|
||||
return this._doSearch(query, userId, { limit, threshold, notebookId, defaultTitle })
|
||||
}
|
||||
|
||||
private async _doSearch(
|
||||
query: string,
|
||||
userId: string | null,
|
||||
opts: { limit: number; threshold: number; notebookId?: string; defaultTitle: string }
|
||||
): Promise<SearchResult[]> {
|
||||
try {
|
||||
const keywordResults = await this.keywordSearch(query, userId, notebookId)
|
||||
const semanticResults = await this.semanticVectorSearch(query, userId, threshold, notebookId)
|
||||
const fusedResults = await this.reciprocalRankFusion(keywordResults, semanticResults)
|
||||
const [keywordResults, semanticResults] = await Promise.all([
|
||||
this.ftsSearch(query, userId, opts.notebookId),
|
||||
this.vectorSearch(query, userId, opts.threshold, opts.notebookId)
|
||||
])
|
||||
|
||||
const fusedResults = this.reciprocalRankFusion(keywordResults, semanticResults)
|
||||
|
||||
return fusedResults
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit)
|
||||
.slice(0, opts.limit)
|
||||
.map(result => ({
|
||||
...result,
|
||||
title: result.title || defaultTitle,
|
||||
matchType: result.score > 0.8 ? 'exact' : 'related'
|
||||
title: result.title || opts.defaultTitle,
|
||||
matchType: result.score > 0.8 ? 'exact' as const : 'related' as const
|
||||
}))
|
||||
} catch (error) {
|
||||
console.error('Error in searchAsUser:', error)
|
||||
console.error('Error in hybrid search:', error)
|
||||
return this._ftsFallback(query, userId, opts)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* PostgreSQL full-text search using tsvector + GIN index.
|
||||
* Returns ranked results using ts_rank.
|
||||
*/
|
||||
private async ftsSearch(
|
||||
query: string,
|
||||
userId: string | null,
|
||||
notebookId?: string
|
||||
): Promise<Array<{ noteId: string; rank: number }>> {
|
||||
const safeQuery = query.replace(/'/g, "''")
|
||||
|
||||
const userClause = userId ? `AND "userId" = '${userId}'` : ''
|
||||
const notebookClause = notebookId !== undefined
|
||||
? `AND "notebookId" ${notebookId ? `= '${notebookId.replace(/'/g, "''")}'` : 'IS NULL'}`
|
||||
: ''
|
||||
|
||||
const sql = `
|
||||
SELECT id AS "noteId", ts_rank("tsv", plainto_tsquery('simple', '${safeQuery}')) AS rank
|
||||
FROM "Note"
|
||||
WHERE "tsv" @@ plainto_tsquery('simple', '${safeQuery}')
|
||||
AND "trashedAt" IS NULL
|
||||
AND "isArchived" = false
|
||||
${userClause}
|
||||
${notebookClause}
|
||||
ORDER BY rank DESC
|
||||
LIMIT ${this.FTS_CANDIDATES}
|
||||
`
|
||||
|
||||
const rows: Array<{ noteId: string; rank: number }> = await prisma.$queryRawUnsafe(sql)
|
||||
|
||||
const maxRank = rows.length > 0 ? rows[0].rank : 1
|
||||
return rows.map((r, i) => ({
|
||||
noteId: r.noteId,
|
||||
rank: i + 1
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* pgvector cosine-distance search using the HNSW index.
|
||||
* Returns nearest neighbors above the similarity threshold.
|
||||
*/
|
||||
private async vectorSearch(
|
||||
query: string,
|
||||
userId: string | null,
|
||||
threshold: number,
|
||||
notebookId?: string
|
||||
): Promise<Array<{ noteId: string; rank: number }>> {
|
||||
let queryEmbedding: number[]
|
||||
try {
|
||||
const result = await embeddingService.generateEmbedding(query)
|
||||
queryEmbedding = result.embedding
|
||||
} catch (error) {
|
||||
console.error('Failed to generate query embedding:', error)
|
||||
return []
|
||||
}
|
||||
|
||||
const vecStr = embeddingService.toVectorString(queryEmbedding)
|
||||
const userClause = userId ? `AND n."userId" = '${userId}'` : ''
|
||||
const notebookClause = notebookId !== undefined
|
||||
? `AND n."notebookId" ${notebookId ? `= '${notebookId.replace(/'/g, "''")}'` : 'IS NULL'}`
|
||||
: ''
|
||||
|
||||
const sql = `
|
||||
SELECT n.id AS "noteId",
|
||||
1 - (e."embedding" <=> '${vecStr}'::vector) AS similarity
|
||||
FROM "Note" n
|
||||
INNER JOIN "NoteEmbedding" e ON e."noteId" = n.id
|
||||
WHERE n."trashedAt" IS NULL
|
||||
AND n."isArchived" = false
|
||||
${userClause}
|
||||
${notebookClause}
|
||||
AND 1 - (e."embedding" <=> '${vecStr}'::vector) >= ${threshold}
|
||||
ORDER BY e."embedding" <=> '${vecStr}'::vector ASC
|
||||
LIMIT ${this.VECTOR_CANDIDATES}
|
||||
`
|
||||
|
||||
const rows: Array<{ noteId: string; similarity: number }> = await prisma.$queryRawUnsafe(sql)
|
||||
|
||||
return rows.map((r, i) => ({
|
||||
noteId: r.noteId,
|
||||
rank: i + 1
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Reciprocal Rank Fusion algorithm.
|
||||
* Combines keyword and semantic ranked lists into a single ranking.
|
||||
*/
|
||||
private async reciprocalRankFusion(
|
||||
keywordResults: Array<{ noteId: string; rank: number }>,
|
||||
semanticResults: Array<{ noteId: string; rank: number }>
|
||||
): Promise<SearchResult[]> {
|
||||
const scores = new Map<string, number>()
|
||||
|
||||
for (const result of keywordResults) {
|
||||
const rrfScore = 1 / (this.RRF_K + result.rank)
|
||||
scores.set(result.noteId, (scores.get(result.noteId) || 0) + rrfScore)
|
||||
}
|
||||
|
||||
for (const result of semanticResults) {
|
||||
const rrfScore = 1 / (this.RRF_K + result.rank)
|
||||
scores.set(result.noteId, (scores.get(result.noteId) || 0) + rrfScore)
|
||||
}
|
||||
|
||||
const noteIds = Array.from(scores.keys())
|
||||
if (noteIds.length === 0) return []
|
||||
|
||||
const notes = await prisma.note.findMany({
|
||||
where: { id: { in: noteIds }, trashedAt: null },
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
content: true,
|
||||
language: true
|
||||
}
|
||||
})
|
||||
|
||||
return notes.map(note => ({
|
||||
noteId: note.id,
|
||||
title: note.title,
|
||||
content: note.content,
|
||||
score: scores.get(note.id) || 0,
|
||||
matchType: 'related' as const,
|
||||
language: note.language
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback to FTS-only when vector search fails entirely.
|
||||
*/
|
||||
private async _ftsFallback(
|
||||
query: string,
|
||||
userId: string | null,
|
||||
opts: { limit: number; threshold: number; notebookId?: string; defaultTitle: string }
|
||||
): Promise<SearchResult[]> {
|
||||
try {
|
||||
const keywordResults = await this.ftsSearch(query, userId, opts.notebookId)
|
||||
const noteIds = keywordResults.slice(0, opts.limit).map(r => r.noteId)
|
||||
const notes = await prisma.note.findMany({
|
||||
where: { id: { in: noteIds }, trashedAt: null },
|
||||
select: { id: true, title: true, content: true, language: true }
|
||||
})
|
||||
|
||||
return notes.map(note => ({
|
||||
noteId: note.id,
|
||||
title: note.title || opts.defaultTitle,
|
||||
content: note.content,
|
||||
score: 1.0,
|
||||
matchType: 'related' as const,
|
||||
language: note.language
|
||||
}))
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch index multiple notes (for initial migration or bulk updates)
|
||||
* Generate or update embedding for a note.
|
||||
* Stores as native pgvector via raw SQL.
|
||||
*/
|
||||
async indexNote(noteId: string): Promise<void> {
|
||||
try {
|
||||
const note = await prisma.note.findUnique({
|
||||
where: { id: noteId },
|
||||
select: { content: true, lastAiAnalysis: true }
|
||||
})
|
||||
|
||||
if (!note) throw new Error('Note not found')
|
||||
|
||||
const shouldRegenerate = embeddingService.shouldRegenerateEmbedding(
|
||||
note.content,
|
||||
null,
|
||||
note.lastAiAnalysis
|
||||
)
|
||||
|
||||
if (!shouldRegenerate) return
|
||||
|
||||
const { embedding } = await embeddingService.generateEmbedding(note.content)
|
||||
const vecStr = embeddingService.toVectorString(embedding)
|
||||
|
||||
await prisma.$executeRawUnsafe(
|
||||
`INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt")
|
||||
VALUES (gen_random_uuid(), $1, $2::vector, now(), now())
|
||||
ON CONFLICT ("noteId")
|
||||
DO UPDATE SET "embedding" = $2::vector, "updatedAt" = now()`,
|
||||
noteId,
|
||||
vecStr
|
||||
)
|
||||
|
||||
await prisma.note.update({
|
||||
where: { id: noteId },
|
||||
data: { lastAiAnalysis: new Date() }
|
||||
})
|
||||
} catch (error) {
|
||||
console.error(`Error indexing note ${noteId}:`, error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch index multiple notes.
|
||||
*/
|
||||
async indexBatchNotes(noteIds: string[]): Promise<void> {
|
||||
const BATCH_SIZE = 10 // Process in batches to avoid overwhelming
|
||||
const BATCH_SIZE = 20
|
||||
|
||||
for (let i = 0; i < noteIds.length; i += BATCH_SIZE) {
|
||||
const batch = noteIds.slice(i, i + BATCH_SIZE)
|
||||
|
||||
await Promise.allSettled(
|
||||
batch.map(noteId => this.indexNote(noteId))
|
||||
)
|
||||
|
||||
await Promise.allSettled(batch.map(noteId => this.indexNote(noteId)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
export const semanticSearchService = new SemanticSearchService()
|
||||
|
||||
Reference in New Issue
Block a user