Files
Momento/memento-note/lib/ai/services/language-detection.service.ts
Sepehr Ramezani e4d4e23dc7 chore: clean up repo for public release
- Remove BMAD framework, IDE configs, dev screenshots, test files,
  internal docs, and backup files
- Rename keep-notes/ to memento-note/
- Update all references from keep-notes to memento-note
- Add Apache 2.0 license with Commons Clause (non-commercial restriction)
- Add clean .gitignore and .env.docker.example
2026-04-20 22:48:06 +02:00

134 lines
3.2 KiB
TypeScript

import { detect } from 'tinyld'
/**
* Language Detection Service
*
* Uses hybrid approach:
* - TinyLD for notes < 50 words (fast, ~8ms)
* - AI for notes ≥ 50 words (more accurate, ~200-500ms)
*
* Supports 62 languages including Persian (fa)
*/
export class LanguageDetectionService {
private readonly MIN_WORDS_FOR_AI = 50
private readonly MIN_CONFIDENCE = 0.7
/**
* Detect language of content using hybrid approach
*/
async detectLanguage(content: string): Promise<{
language: string // 'fr' | 'en' | 'es' | 'de' | 'fa' | 'unknown'
confidence: number // 0.0-1.0
method: 'tinyld' | 'ai' | 'unknown'
}> {
if (!content || content.trim().length === 0) {
return {
language: 'unknown',
confidence: 0.0,
method: 'unknown'
}
}
const wordCount = content.split(/\s+/).length
// Short notes: TinyLD (fast, TypeScript native)
if (wordCount < this.MIN_WORDS_FOR_AI) {
const result = detect(content)
return {
language: this.mapToISO(result),
confidence: 0.8,
method: 'tinyld'
}
}
// Long notes: AI for better accuracy
try {
const detected = await this.detectLanguageWithAI(content)
return {
language: detected,
confidence: 0.9,
method: 'ai'
}
} catch (error) {
console.error('Language detection error:', error)
// Fallback to TinyLD
const result = detect(content)
return {
language: this.mapToISO(result),
confidence: 0.6,
method: 'tinyld'
}
}
}
/**
* Detect language using AI provider
* (Fallback method for long content)
*/
private async detectLanguageWithAI(content: string): Promise<string> {
// For now, use TinyLD as AI detection is not yet implemented
// In Phase 2, we can add AI-based detection for better accuracy
const result = detect(content)
return this.mapToISO(result)
}
/**
* Map TinyLD language codes to ISO 639-1
*/
private mapToISO(code: string): string {
const mapping: Record<string, string> = {
'fra': 'fr',
'eng': 'en',
'spa': 'es',
'deu': 'de',
'fas': 'fa',
'pes': 'fa', // Persian (Farsi)
'por': 'pt',
'ita': 'it',
'rus': 'ru',
'zho': 'zh',
'jpn': 'ja',
'kor': 'ko',
'ara': 'ar',
'hin': 'hi',
'nld': 'nl',
'pol': 'pl',
'tur': 'tr',
'vie': 'vi',
'tha': 'th',
'ind': 'id'
}
// Direct mapping for ISO codes
if (code.length === 2 && /^[a-z]{2}$/.test(code)) {
return code
}
// Use mapping or fallback
return mapping[code] || code.substring(0, 2).toLowerCase()
}
/**
* Get supported languages count
*/
getSupportedLanguagesCount(): number {
return 62 // TinyLD supports 62 languages
}
/**
* Check if a language code is supported
*/
isLanguageSupported(languageCode: string): boolean {
// TinyLD supports 62 languages including Persian (fa)
const supportedCodes = [
'fr', 'en', 'es', 'de', 'fa', 'pt', 'it', 'ru', 'zh',
'ja', 'ko', 'ar', 'hi', 'nl', 'pl', 'tr', 'vi', 'th', 'id'
// ... and 43 more
]
return supportedCodes.includes(languageCode.toLowerCase())
}
}