feat: Complete internationalization and code cleanup

## Translation Files - Add 11 new language files (es, de, pt, ru, zh, ja, ko, ar, hi, nl, pl) - Add 100+ missing translation keys across all 15 languages - New sections: notebook, pagination, ai.batchOrganization, ai.autoLabels - Update nav section with workspace, quickAccess, myLibrary keys ## Component Updates - Update 15+ components to use translation keys instead of hardcoded text - Components: notebook dialogs, sidebar, header, note-input, ghost-tags, etc. - Replace 80+ hardcoded English/French strings with t() calls - Ensure consistent UI across all supported languages ## Code Quality - Remove 77+ console.log statements from codebase - Clean up API routes, components, hooks, and services - Keep only essential error handling (no debugging logs) ## UI/UX Improvements - Update Keep logo to yellow post-it style (from-yellow-400 to-amber-500) - Change selection colors to #FEF3C6 (notebooks) and #EFB162 (nav items) - Make "+" button permanently visible in notebooks section - Fix grammar and syntax errors in multiple components ## Bug Fixes - Fix JSON syntax errors in it.json, nl.json, pl.json, zh.json - Fix syntax errors in notebook-suggestion-toast.tsx - Fix syntax errors in use-auto-tagging.ts - Fix syntax errors in paragraph-refactor.service.ts - Fix duplicate "fusion" section in nl.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> Ou une version plus courte si vous préférez : feat(i18n): Add 15 languages, remove logs, update UI components - Create 11 new translation files (es, de, pt, ru, zh, ja, ko, ar, hi, nl, pl) - Add 100+ translation keys: notebook, pagination, AI features - Update 15+ components to use translations (80+ strings) - Remove 77+ console.log statements from codebase - Fix JSON syntax errors in 4 translation files - Fix component syntax errors (toast, hooks, services) - Update logo to yellow post-it style - Change selection colors (#FEF3C6, #EFB162) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-11 22:26:13 +01:00
parent fc2c40249e
commit 7fb486c9a4
183 changed files with 48288 additions and 1290 deletions
--- a/keep-notes/lib/ai/services/embedding.service.ts
+++ b/keep-notes/lib/ai/services/embedding.service.ts
@@ -0,0 +1,224 @@
+/**
+ * Embedding Service
+ * Generates vector embeddings for semantic search and similarity analysis
+ * Uses text-embedding-3-small model via OpenAI (or Ollama alternatives)
+ */
+
+import { getAIProvider } from '../factory'
+
+export interface EmbeddingResult {
+  embedding: number[]
+  model: string
+  dimension: number
+}
+
+/**
+ * Service for generating and managing text embeddings
+ */
+export class EmbeddingService {
+  private readonly EMBEDDING_MODEL = 'text-embedding-3-small'
+  private readonly EMBEDDING_DIMENSION = 1536 // OpenAI's embedding dimension
+
+  /**
+   * Generate embedding for a single text
+   */
+  async generateEmbedding(text: string): Promise<EmbeddingResult> {
+    if (!text || text.trim().length === 0) {
+      throw new Error('Cannot generate embedding for empty text')
+    }
+
+    try {
+      const provider = getAIProvider()
+
+      // Use the existing getEmbeddings method from AIProvider
+      const embedding = await provider.getEmbeddings(text)
+
+      // Validate embedding dimension
+      if (embedding.length !== this.EMBEDDING_DIMENSION) {
+      }
+
+      return {
+        embedding,
+        model: this.EMBEDDING_MODEL,
+        dimension: embedding.length
+      }
+    } catch (error) {
+      console.error('Error generating embedding:', error)
+      throw new Error(`Failed to generate embedding: ${error}`)
+    }
+  }
+
+  /**
+   * Generate embeddings for multiple texts in batch
+   * More efficient than calling generateEmbedding multiple times
+   */
+  async generateBatchEmbeddings(texts: string[]): Promise<EmbeddingResult[]> {
+    if (!texts || texts.length === 0) {
+      return []
+    }
+
+    // Filter out empty texts
+    const validTexts = texts.filter(t => t && t.trim().length > 0)
+
+    if (validTexts.length === 0) {
+      return []
+    }
+
+    try {
+      const provider = getAIProvider()
+
+      // Batch embedding using the existing getEmbeddings method
+      const embeddings = await Promise.all(
+        validTexts.map(text => provider.getEmbeddings(text))
+      )
+
+      return embeddings.map(embedding => ({
+        embedding,
+        model: this.EMBEDDING_MODEL,
+        dimension: embedding.length
+      }))
+    } catch (error) {
+      console.error('Error generating batch embeddings:', error)
+      throw error
+    }
+  }
+
+  /**
+   * Calculate cosine similarity between two embeddings
+   * Returns value between -1 and 1, where 1 is identical
+   */
+  calculateCosineSimilarity(embedding1: number[], embedding2: number[]): number {
+    if (embedding1.length !== embedding2.length) {
+      throw new Error('Embeddings must have the same dimension')
+    }
+
+    let dotProduct = 0
+    let magnitude1 = 0
+    let magnitude2 = 0
+
+    for (let i = 0; i < embedding1.length; i++) {
+      dotProduct += embedding1[i] * embedding2[i]
+      magnitude1 += embedding1[i] * embedding1[i]
+      magnitude2 += embedding2[i] * embedding2[i]
+    }
+
+    magnitude1 = Math.sqrt(magnitude1)
+    magnitude2 = Math.sqrt(magnitude2)
+
+    if (magnitude1 === 0 || magnitude2 === 0) {
+      return 0
+    }
+
+    return dotProduct / (magnitude1 * magnitude2)
+  }
+
+  /**
+   * Calculate similarity between an embedding and multiple other embeddings
+   * Returns array of similarities
+   */
+  calculateSimilarities(
+    queryEmbedding: number[],
+    targetEmbeddings: number[][]
+  ): number[] {
+    return targetEmbeddings.map(embedding =>
+      this.calculateCosineSimilarity(queryEmbedding, embedding)
+    )
+  }
+
+  /**
+   * Find most similar embeddings to a query
+   * Returns top-k results with their similarities
+   */
+  findMostSimilar(
+    queryEmbedding: number[],
+    targetEmbeddings: Array<{ id: string; embedding: number[] }>,
+    topK: number = 10
+  ): Array<{ id: string; similarity: number }> {
+    const similarities = targetEmbeddings.map(({ id, embedding }) => ({
+      id,
+      similarity: this.calculateCosineSimilarity(queryEmbedding, embedding)
+    }))
+
+    // Sort by similarity descending and return top-k
+    return similarities
+      .sort((a, b) => b.similarity - a.similarity)
+      .slice(0, topK)
+  }
+
+  /**
+   * Get average embedding from multiple embeddings
+   * Useful for clustering or centroid calculation
+   */
+  averageEmbeddings(embeddings: number[][]): number[] {
+    if (embeddings.length === 0) {
+      throw new Error('Cannot average empty embeddings array')
+    }
+
+    const dimension = embeddings[0].length
+    const average = new Array(dimension).fill(0)
+
+    for (const embedding of embeddings) {
+      if (embedding.length !== dimension) {
+        throw new Error('All embeddings must have the same dimension')
+      }
+
+      for (let i = 0; i < dimension; i++) {
+        average[i] += embedding[i]
+      }
+    }
+
+    // Divide by number of embeddings
+    return average.map(val => val / embeddings.length)
+  }
+
+  /**
+   * Serialize embedding to JSON-safe format (for storage)
+   */
+  serialize(embedding: number[]): string {
+    return JSON.stringify(embedding)
+  }
+
+  /**
+   * Deserialize embedding from JSON string
+   */
+  deserialize(jsonString: string): number[] {
+    try {
+      const parsed = JSON.parse(jsonString)
+      if (!Array.isArray(parsed)) {
+        throw new Error('Invalid embedding format')
+      }
+      return parsed
+    } catch (error) {
+      console.error('Error deserializing embedding:', error)
+      throw new Error('Failed to deserialize embedding')
+    }
+  }
+
+  /**
+   * Check if a note needs embedding regeneration
+   * (e.g., if content has changed significantly)
+   */
+  shouldRegenerateEmbedding(
+    noteContent: string,
+    lastEmbeddingContent: string | null,
+    lastAnalysis: Date | null
+  ): boolean {
+    // If no previous embedding, generate one
+    if (!lastEmbeddingContent || !lastAnalysis) {
+      return true
+    }
+
+    // If content has changed more than 20% (simple heuristic)
+    const contentChanged =
+      Math.abs(noteContent.length - lastEmbeddingContent.length) / lastEmbeddingContent.length > 0.2
+
+    // If last analysis is more than 7 days old
+    const daysSinceAnalysis = (Date.now() - lastAnalysis.getTime()) / (1000 * 60 * 60 * 24)
+    const isStale = daysSinceAnalysis > 7
+
+    return contentChanged || isStale
+  }
+}
+
+// Singleton instance
+export const embeddingService = new EmbeddingService()