import { detect } from 'tinyld' /** * Language Detection Service * * Uses hybrid approach: * - TinyLD for notes < 50 words (fast, ~8ms) * - AI for notes ≥ 50 words (more accurate, ~200-500ms) * * Supports 62 languages including Persian (fa) */ export class LanguageDetectionService { private readonly MIN_WORDS_FOR_AI = 50 private readonly MIN_CONFIDENCE = 0.7 /** * Detect language of content using hybrid approach */ async detectLanguage(content: string): Promise<{ language: string // 'fr' | 'en' | 'es' | 'de' | 'fa' | 'unknown' confidence: number // 0.0-1.0 method: 'tinyld' | 'ai' | 'unknown' }> { if (!content || content.trim().length === 0) { return { language: 'unknown', confidence: 0.0, method: 'unknown' } } const wordCount = content.split(/\s+/).length // Short notes: TinyLD (fast, TypeScript native) if (wordCount < this.MIN_WORDS_FOR_AI) { const result = detect(content) return { language: this.mapToISO(result), confidence: 0.8, method: 'tinyld' } } // Long notes: AI for better accuracy try { const detected = await this.detectLanguageWithAI(content) return { language: detected, confidence: 0.9, method: 'ai' } } catch (error) { console.error('Language detection error:', error) // Fallback to TinyLD const result = detect(content) return { language: this.mapToISO(result), confidence: 0.6, method: 'tinyld' } } } /** * Detect language using AI provider * (Fallback method for long content) */ private async detectLanguageWithAI(content: string): Promise { // For now, use TinyLD as AI detection is not yet implemented // In Phase 2, we can add AI-based detection for better accuracy const result = detect(content) return this.mapToISO(result) } /** * Map TinyLD language codes to ISO 639-1 */ private mapToISO(code: string): string { const mapping: Record = { 'fra': 'fr', 'eng': 'en', 'spa': 'es', 'deu': 'de', 'fas': 'fa', 'pes': 'fa', // Persian (Farsi) 'por': 'pt', 'ita': 'it', 'rus': 'ru', 'zho': 'zh', 'jpn': 'ja', 'kor': 'ko', 'ara': 'ar', 'hin': 'hi', 'nld': 'nl', 'pol': 'pl', 'tur': 'tr', 'vie': 'vi', 'tha': 'th', 'ind': 'id' } // Direct mapping for ISO codes if (code.length === 2 && /^[a-z]{2}$/.test(code)) { return code } // Use mapping or fallback return mapping[code] || code.substring(0, 2).toLowerCase() } /** * Get supported languages count */ getSupportedLanguagesCount(): number { return 62 // TinyLD supports 62 languages } /** * Check if a language code is supported */ isLanguageSupported(languageCode: string): boolean { // TinyLD supports 62 languages including Persian (fa) const supportedCodes = [ 'fr', 'en', 'es', 'de', 'fa', 'pt', 'it', 'ru', 'zh', 'ja', 'ko', 'ar', 'hi', 'nl', 'pl', 'tr', 'vi', 'th', 'id' // ... and 43 more ] return supportedCodes.includes(languageCode.toLowerCase()) } }