## Translation Files - Add 11 new language files (es, de, pt, ru, zh, ja, ko, ar, hi, nl, pl) - Add 100+ missing translation keys across all 15 languages - New sections: notebook, pagination, ai.batchOrganization, ai.autoLabels - Update nav section with workspace, quickAccess, myLibrary keys ## Component Updates - Update 15+ components to use translation keys instead of hardcoded text - Components: notebook dialogs, sidebar, header, note-input, ghost-tags, etc. - Replace 80+ hardcoded English/French strings with t() calls - Ensure consistent UI across all supported languages ## Code Quality - Remove 77+ console.log statements from codebase - Clean up API routes, components, hooks, and services - Keep only essential error handling (no debugging logs) ## UI/UX Improvements - Update Keep logo to yellow post-it style (from-yellow-400 to-amber-500) - Change selection colors to #FEF3C6 (notebooks) and #EFB162 (nav items) - Make "+" button permanently visible in notebooks section - Fix grammar and syntax errors in multiple components ## Bug Fixes - Fix JSON syntax errors in it.json, nl.json, pl.json, zh.json - Fix syntax errors in notebook-suggestion-toast.tsx - Fix syntax errors in use-auto-tagging.ts - Fix syntax errors in paragraph-refactor.service.ts - Fix duplicate "fusion" section in nl.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> Ou une version plus courte si vous préférez : feat(i18n): Add 15 languages, remove logs, update UI components - Create 11 new translation files (es, de, pt, ru, zh, ja, ko, ar, hi, nl, pl) - Add 100+ translation keys: notebook, pagination, AI features - Update 15+ components to use translations (80+ strings) - Remove 77+ console.log statements from codebase - Fix JSON syntax errors in 4 translation files - Fix component syntax errors (toast, hooks, services) - Update logo to yellow post-it style - Change selection colors (#FEF3C6, #EFB162) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
331 lines
9.1 KiB
TypeScript
331 lines
9.1 KiB
TypeScript
/**
|
|
* Semantic Search Service
|
|
* Hybrid search combining keyword matching and semantic similarity
|
|
* Uses Reciprocal Rank Fusion (RRF) for result ranking
|
|
*/
|
|
|
|
import { embeddingService } from './embedding.service'
|
|
import { prisma } from '@/lib/prisma'
|
|
import { auth } from '@/auth'
|
|
|
|
export interface SearchResult {
|
|
noteId: string
|
|
title: string | null
|
|
content: string
|
|
score: number
|
|
matchType: 'exact' | 'related'
|
|
language?: string | null
|
|
}
|
|
|
|
export interface SearchOptions {
|
|
limit?: number
|
|
threshold?: number // Minimum similarity score (0-1)
|
|
includeExactMatches?: boolean
|
|
notebookId?: string // NEW: Filter by notebook for contextual search (IA5)
|
|
}
|
|
|
|
export class SemanticSearchService {
|
|
private readonly RRF_K = 60 // RRF constant (default recommended value)
|
|
private readonly DEFAULT_LIMIT = 20
|
|
private readonly DEFAULT_THRESHOLD = 0.6
|
|
|
|
/**
|
|
* Hybrid search: keyword + semantic with RRF fusion
|
|
*/
|
|
async search(
|
|
query: string,
|
|
options: SearchOptions = {}
|
|
): Promise<SearchResult[]> {
|
|
const {
|
|
limit = this.DEFAULT_LIMIT,
|
|
threshold = this.DEFAULT_THRESHOLD,
|
|
includeExactMatches = true,
|
|
notebookId // NEW: Contextual search within notebook (IA5)
|
|
} = options
|
|
|
|
if (!query || query.trim().length < 2) {
|
|
return []
|
|
}
|
|
|
|
const session = await auth()
|
|
const userId = session?.user?.id || null
|
|
|
|
try {
|
|
// 1. Keyword search (SQLite FTS)
|
|
const keywordResults = await this.keywordSearch(query, userId, notebookId)
|
|
|
|
// 2. Semantic search (vector similarity)
|
|
const semanticResults = await this.semanticVectorSearch(query, userId, threshold, notebookId)
|
|
|
|
// 3. Reciprocal Rank Fusion
|
|
const fusedResults = await this.reciprocalRankFusion(
|
|
keywordResults,
|
|
semanticResults
|
|
)
|
|
|
|
// 4. Sort by final score and limit
|
|
return fusedResults
|
|
.sort((a, b) => b.score - a.score)
|
|
.slice(0, limit)
|
|
.map(result => ({
|
|
...result,
|
|
matchType: result.score > 0.8 ? 'exact' : 'related'
|
|
}))
|
|
} catch (error) {
|
|
console.error('Error in hybrid search:', error)
|
|
// Fallback to keyword-only search
|
|
const keywordResults = await this.keywordSearch(query, userId)
|
|
|
|
// Fetch note details for keyword results
|
|
const noteIds = keywordResults.slice(0, limit).map(r => r.noteId)
|
|
const notes = await prisma.note.findMany({
|
|
where: { id: { in: noteIds } },
|
|
select: {
|
|
id: true,
|
|
title: true,
|
|
content: true,
|
|
language: true
|
|
}
|
|
})
|
|
|
|
return notes.map(note => ({
|
|
noteId: note.id,
|
|
title: note.title,
|
|
content: note.content,
|
|
score: 1.0, // Default score for keyword-only results
|
|
matchType: 'related' as const,
|
|
language: note.language
|
|
}))
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Keyword search using SQLite LIKE/FTS
|
|
*/
|
|
private async keywordSearch(
|
|
query: string,
|
|
userId: string | null,
|
|
notebookId?: string // NEW: Filter by notebook (IA5)
|
|
): Promise<Array<{ noteId: string; rank: number }>> {
|
|
// Build query for case-insensitive search
|
|
const searchPattern = `%${query}%`
|
|
|
|
const notes = await prisma.note.findMany({
|
|
where: {
|
|
...(userId ? { userId } : {}),
|
|
...(notebookId !== undefined ? { notebookId } : {}), // NEW: Notebook filter
|
|
OR: [
|
|
{ title: { contains: query } },
|
|
{ content: { contains: query } }
|
|
]
|
|
},
|
|
select: {
|
|
id: true,
|
|
title: true,
|
|
content: true
|
|
}
|
|
})
|
|
|
|
// Simple relevance scoring based on match position and frequency
|
|
const results = notes.map(note => {
|
|
const title = note.title || ''
|
|
const content = note.content || ''
|
|
const queryLower = query.toLowerCase()
|
|
|
|
// Count occurrences
|
|
const titleMatches = (title.match(new RegExp(queryLower, 'gi')) || []).length
|
|
const contentMatches = (content.match(new RegExp(queryLower, 'gi')) || []).length
|
|
|
|
// Boost title matches significantly
|
|
const titlePosition = title.toLowerCase().indexOf(queryLower)
|
|
const contentPosition = content.toLowerCase().indexOf(queryLower)
|
|
|
|
// Calculate rank (lower is better)
|
|
let rank = 100
|
|
|
|
if (titleMatches > 0) {
|
|
rank = titlePosition === 0 ? 1 : 10
|
|
rank -= titleMatches * 2
|
|
} else if (contentMatches > 0) {
|
|
rank = contentPosition < 100 ? 20 : 30
|
|
rank -= contentMatches
|
|
}
|
|
|
|
return {
|
|
noteId: note.id,
|
|
rank
|
|
}
|
|
})
|
|
|
|
return results.sort((a, b) => a.rank - b.rank)
|
|
}
|
|
|
|
/**
|
|
* Semantic vector search using embeddings
|
|
*/
|
|
private async semanticVectorSearch(
|
|
query: string,
|
|
userId: string | null,
|
|
threshold: number,
|
|
notebookId?: string // NEW: Filter by notebook (IA5)
|
|
): Promise<Array<{ noteId: string; rank: number }>> {
|
|
try {
|
|
// Generate query embedding
|
|
const { embedding: queryEmbedding } = await embeddingService.generateEmbedding(query)
|
|
|
|
// Fetch all user's notes with embeddings
|
|
const notes = await prisma.note.findMany({
|
|
where: {
|
|
...(userId ? { userId } : {}),
|
|
...(notebookId !== undefined ? { notebookId } : {}), // NEW: Notebook filter
|
|
embedding: { not: null }
|
|
},
|
|
select: {
|
|
id: true,
|
|
embedding: true
|
|
}
|
|
})
|
|
|
|
if (notes.length === 0) {
|
|
return []
|
|
}
|
|
|
|
// Calculate similarities for all notes
|
|
const similarities = notes.map(note => {
|
|
const noteEmbedding = embeddingService.deserialize(note.embedding || '[]')
|
|
const similarity = embeddingService.calculateCosineSimilarity(
|
|
queryEmbedding,
|
|
noteEmbedding
|
|
)
|
|
|
|
return {
|
|
noteId: note.id,
|
|
similarity
|
|
}
|
|
})
|
|
|
|
// Filter by threshold and convert to rank
|
|
return similarities
|
|
.filter(s => s.similarity >= threshold)
|
|
.sort((a, b) => b.similarity - a.similarity)
|
|
.map((s, index) => ({
|
|
noteId: s.noteId,
|
|
rank: index + 1 // 1-based rank
|
|
}))
|
|
} catch (error) {
|
|
console.error('Error in semantic vector search:', error)
|
|
return []
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reciprocal Rank Fusion algorithm
|
|
* Combines multiple ranked lists into a single ranking
|
|
* Formula: RRF(score) = 1 / (k + rank)
|
|
* k = 60 (default, prevents high rank from dominating)
|
|
*/
|
|
private async reciprocalRankFusion(
|
|
keywordResults: Array<{ noteId: string; rank: number }>,
|
|
semanticResults: Array<{ noteId: string; rank: number }>
|
|
): Promise<SearchResult[]> {
|
|
const scores = new Map<string, number>()
|
|
|
|
// Add keyword scores
|
|
for (const result of keywordResults) {
|
|
const rrfScore = 1 / (this.RRF_K + result.rank)
|
|
scores.set(result.noteId, (scores.get(result.noteId) || 0) + rrfScore)
|
|
}
|
|
|
|
// Add semantic scores
|
|
for (const result of semanticResults) {
|
|
const rrfScore = 1 / (this.RRF_K + result.rank)
|
|
scores.set(result.noteId, (scores.get(result.noteId) || 0) + rrfScore)
|
|
}
|
|
|
|
// Fetch note details
|
|
const noteIds = Array.from(scores.keys())
|
|
const notes = await prisma.note.findMany({
|
|
where: { id: { in: noteIds } },
|
|
select: {
|
|
id: true,
|
|
title: true,
|
|
content: true,
|
|
language: true
|
|
}
|
|
})
|
|
|
|
// Combine scores with note details
|
|
return notes.map(note => ({
|
|
noteId: note.id,
|
|
title: note.title,
|
|
content: note.content,
|
|
score: scores.get(note.id) || 0,
|
|
matchType: 'related' as const,
|
|
language: note.language
|
|
}))
|
|
}
|
|
|
|
/**
|
|
* Generate or update embedding for a note
|
|
* Called when note is created or significantly updated
|
|
*/
|
|
async indexNote(noteId: string): Promise<void> {
|
|
try {
|
|
const note = await prisma.note.findUnique({
|
|
where: { id: noteId },
|
|
select: { content: true, embedding: true, lastAiAnalysis: true }
|
|
})
|
|
|
|
if (!note) {
|
|
throw new Error('Note not found')
|
|
}
|
|
|
|
// Check if embedding needs regeneration
|
|
const shouldRegenerate = embeddingService.shouldRegenerateEmbedding(
|
|
note.content,
|
|
note.embedding,
|
|
note.lastAiAnalysis
|
|
)
|
|
|
|
if (!shouldRegenerate) {
|
|
return
|
|
}
|
|
|
|
// Generate new embedding
|
|
const { embedding } = await embeddingService.generateEmbedding(note.content)
|
|
|
|
// Save to database
|
|
await prisma.note.update({
|
|
where: { id: noteId },
|
|
data: {
|
|
embedding: embeddingService.serialize(embedding),
|
|
lastAiAnalysis: new Date()
|
|
}
|
|
})
|
|
|
|
} catch (error) {
|
|
console.error(`Error indexing note ${noteId}:`, error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Batch index multiple notes (for initial migration or bulk updates)
|
|
*/
|
|
async indexBatchNotes(noteIds: string[]): Promise<void> {
|
|
const BATCH_SIZE = 10 // Process in batches to avoid overwhelming
|
|
|
|
for (let i = 0; i < noteIds.length; i += BATCH_SIZE) {
|
|
const batch = noteIds.slice(i, i + BATCH_SIZE)
|
|
|
|
await Promise.allSettled(
|
|
batch.map(noteId => this.indexNote(noteId))
|
|
)
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
// Singleton instance
|
|
export const semanticSearchService = new SemanticSearchService()
|