Keep/keep-notes/lib/ai/services/embedding.service.ts
sepehr 7fb486c9a4 feat: Complete internationalization and code cleanup
## Translation Files
- Add 11 new language files (es, de, pt, ru, zh, ja, ko, ar, hi, nl, pl)
- Add 100+ missing translation keys across all 15 languages
- New sections: notebook, pagination, ai.batchOrganization, ai.autoLabels
- Update nav section with workspace, quickAccess, myLibrary keys

## Component Updates
- Update 15+ components to use translation keys instead of hardcoded text
- Components: notebook dialogs, sidebar, header, note-input, ghost-tags, etc.
- Replace 80+ hardcoded English/French strings with t() calls
- Ensure consistent UI across all supported languages

## Code Quality
- Remove 77+ console.log statements from codebase
- Clean up API routes, components, hooks, and services
- Keep only essential error handling (no debugging logs)

## UI/UX Improvements
- Update Keep logo to yellow post-it style (from-yellow-400 to-amber-500)
- Change selection colors to #FEF3C6 (notebooks) and #EFB162 (nav items)
- Make "+" button permanently visible in notebooks section
- Fix grammar and syntax errors in multiple components

## Bug Fixes
- Fix JSON syntax errors in it.json, nl.json, pl.json, zh.json
- Fix syntax errors in notebook-suggestion-toast.tsx
- Fix syntax errors in use-auto-tagging.ts
- Fix syntax errors in paragraph-refactor.service.ts
- Fix duplicate "fusion" section in nl.json

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Ou une version plus courte si vous préférez :

feat(i18n): Add 15 languages, remove logs, update UI components

- Create 11 new translation files (es, de, pt, ru, zh, ja, ko, ar, hi, nl, pl)
- Add 100+ translation keys: notebook, pagination, AI features
- Update 15+ components to use translations (80+ strings)
- Remove 77+ console.log statements from codebase
- Fix JSON syntax errors in 4 translation files
- Fix component syntax errors (toast, hooks, services)
- Update logo to yellow post-it style
- Change selection colors (#FEF3C6, #EFB162)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-11 22:26:13 +01:00

225 lines
6.1 KiB
TypeScript

/**
* Embedding Service
* Generates vector embeddings for semantic search and similarity analysis
* Uses text-embedding-3-small model via OpenAI (or Ollama alternatives)
*/
import { getAIProvider } from '../factory'
export interface EmbeddingResult {
embedding: number[]
model: string
dimension: number
}
/**
* Service for generating and managing text embeddings
*/
export class EmbeddingService {
private readonly EMBEDDING_MODEL = 'text-embedding-3-small'
private readonly EMBEDDING_DIMENSION = 1536 // OpenAI's embedding dimension
/**
* Generate embedding for a single text
*/
async generateEmbedding(text: string): Promise<EmbeddingResult> {
if (!text || text.trim().length === 0) {
throw new Error('Cannot generate embedding for empty text')
}
try {
const provider = getAIProvider()
// Use the existing getEmbeddings method from AIProvider
const embedding = await provider.getEmbeddings(text)
// Validate embedding dimension
if (embedding.length !== this.EMBEDDING_DIMENSION) {
}
return {
embedding,
model: this.EMBEDDING_MODEL,
dimension: embedding.length
}
} catch (error) {
console.error('Error generating embedding:', error)
throw new Error(`Failed to generate embedding: ${error}`)
}
}
/**
* Generate embeddings for multiple texts in batch
* More efficient than calling generateEmbedding multiple times
*/
async generateBatchEmbeddings(texts: string[]): Promise<EmbeddingResult[]> {
if (!texts || texts.length === 0) {
return []
}
// Filter out empty texts
const validTexts = texts.filter(t => t && t.trim().length > 0)
if (validTexts.length === 0) {
return []
}
try {
const provider = getAIProvider()
// Batch embedding using the existing getEmbeddings method
const embeddings = await Promise.all(
validTexts.map(text => provider.getEmbeddings(text))
)
return embeddings.map(embedding => ({
embedding,
model: this.EMBEDDING_MODEL,
dimension: embedding.length
}))
} catch (error) {
console.error('Error generating batch embeddings:', error)
throw error
}
}
/**
* Calculate cosine similarity between two embeddings
* Returns value between -1 and 1, where 1 is identical
*/
calculateCosineSimilarity(embedding1: number[], embedding2: number[]): number {
if (embedding1.length !== embedding2.length) {
throw new Error('Embeddings must have the same dimension')
}
let dotProduct = 0
let magnitude1 = 0
let magnitude2 = 0
for (let i = 0; i < embedding1.length; i++) {
dotProduct += embedding1[i] * embedding2[i]
magnitude1 += embedding1[i] * embedding1[i]
magnitude2 += embedding2[i] * embedding2[i]
}
magnitude1 = Math.sqrt(magnitude1)
magnitude2 = Math.sqrt(magnitude2)
if (magnitude1 === 0 || magnitude2 === 0) {
return 0
}
return dotProduct / (magnitude1 * magnitude2)
}
/**
* Calculate similarity between an embedding and multiple other embeddings
* Returns array of similarities
*/
calculateSimilarities(
queryEmbedding: number[],
targetEmbeddings: number[][]
): number[] {
return targetEmbeddings.map(embedding =>
this.calculateCosineSimilarity(queryEmbedding, embedding)
)
}
/**
* Find most similar embeddings to a query
* Returns top-k results with their similarities
*/
findMostSimilar(
queryEmbedding: number[],
targetEmbeddings: Array<{ id: string; embedding: number[] }>,
topK: number = 10
): Array<{ id: string; similarity: number }> {
const similarities = targetEmbeddings.map(({ id, embedding }) => ({
id,
similarity: this.calculateCosineSimilarity(queryEmbedding, embedding)
}))
// Sort by similarity descending and return top-k
return similarities
.sort((a, b) => b.similarity - a.similarity)
.slice(0, topK)
}
/**
* Get average embedding from multiple embeddings
* Useful for clustering or centroid calculation
*/
averageEmbeddings(embeddings: number[][]): number[] {
if (embeddings.length === 0) {
throw new Error('Cannot average empty embeddings array')
}
const dimension = embeddings[0].length
const average = new Array(dimension).fill(0)
for (const embedding of embeddings) {
if (embedding.length !== dimension) {
throw new Error('All embeddings must have the same dimension')
}
for (let i = 0; i < dimension; i++) {
average[i] += embedding[i]
}
}
// Divide by number of embeddings
return average.map(val => val / embeddings.length)
}
/**
* Serialize embedding to JSON-safe format (for storage)
*/
serialize(embedding: number[]): string {
return JSON.stringify(embedding)
}
/**
* Deserialize embedding from JSON string
*/
deserialize(jsonString: string): number[] {
try {
const parsed = JSON.parse(jsonString)
if (!Array.isArray(parsed)) {
throw new Error('Invalid embedding format')
}
return parsed
} catch (error) {
console.error('Error deserializing embedding:', error)
throw new Error('Failed to deserialize embedding')
}
}
/**
* Check if a note needs embedding regeneration
* (e.g., if content has changed significantly)
*/
shouldRegenerateEmbedding(
noteContent: string,
lastEmbeddingContent: string | null,
lastAnalysis: Date | null
): boolean {
// If no previous embedding, generate one
if (!lastEmbeddingContent || !lastAnalysis) {
return true
}
// If content has changed more than 20% (simple heuristic)
const contentChanged =
Math.abs(noteContent.length - lastEmbeddingContent.length) / lastEmbeddingContent.length > 0.2
// If last analysis is more than 7 days old
const daysSinceAnalysis = (Date.now() - lastAnalysis.getTime()) / (1000 * 60 * 60 * 24)
const isStale = daysSinceAnalysis > 7
return contentChanged || isStale
}
}
// Singleton instance
export const embeddingService = new EmbeddingService()