Add translation cache for faster repeated translations (5000 entry LRU cache with hit rate tracking)
This commit is contained in:
parent
d2b820c6f1
commit
b65e683d32
4
main.py
4
main.py
@ -21,6 +21,7 @@ import time
|
||||
from config import config
|
||||
from translators import excel_translator, word_translator, pptx_translator
|
||||
from utils import file_handler, handle_translation_error, DocumentProcessingError
|
||||
from services.translation_service import _translation_cache
|
||||
|
||||
# Import auth routes
|
||||
from routes.auth_routes import router as auth_router
|
||||
@ -228,7 +229,8 @@ async def health_check():
|
||||
"rate_limits": {
|
||||
"requests_per_minute": rate_limit_config.requests_per_minute,
|
||||
"translations_per_minute": rate_limit_config.translations_per_minute,
|
||||
}
|
||||
},
|
||||
"translation_cache": _translation_cache.stats()
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
"""
|
||||
Translation Service Abstraction
|
||||
Provides a unified interface for different translation providers
|
||||
Optimized for high performance with parallel processing
|
||||
Optimized for high performance with parallel processing and caching
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
@ -13,12 +13,77 @@ import threading
|
||||
import asyncio
|
||||
from functools import lru_cache
|
||||
import time
|
||||
import hashlib
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
# Global thread pool for parallel translations
|
||||
_executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
|
||||
|
||||
|
||||
class TranslationCache:
|
||||
"""Thread-safe LRU cache for translations to avoid redundant API calls"""
|
||||
|
||||
def __init__(self, maxsize: int = 5000):
|
||||
self.cache: OrderedDict = OrderedDict()
|
||||
self.maxsize = maxsize
|
||||
self.lock = threading.RLock()
|
||||
self.hits = 0
|
||||
self.misses = 0
|
||||
|
||||
def _make_key(self, text: str, target_language: str, source_language: str, provider: str) -> str:
|
||||
"""Create a unique cache key"""
|
||||
content = f"{provider}:{source_language}:{target_language}:{text}"
|
||||
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
||||
|
||||
def get(self, text: str, target_language: str, source_language: str, provider: str) -> Optional[str]:
|
||||
"""Get a cached translation if available"""
|
||||
key = self._make_key(text, target_language, source_language, provider)
|
||||
with self.lock:
|
||||
if key in self.cache:
|
||||
self.hits += 1
|
||||
# Move to end (most recently used)
|
||||
self.cache.move_to_end(key)
|
||||
return self.cache[key]
|
||||
self.misses += 1
|
||||
return None
|
||||
|
||||
def set(self, text: str, target_language: str, source_language: str, provider: str, translation: str):
|
||||
"""Cache a translation result"""
|
||||
key = self._make_key(text, target_language, source_language, provider)
|
||||
with self.lock:
|
||||
if key in self.cache:
|
||||
self.cache.move_to_end(key)
|
||||
self.cache[key] = translation
|
||||
# Remove oldest if exceeding maxsize
|
||||
while len(self.cache) > self.maxsize:
|
||||
self.cache.popitem(last=False)
|
||||
|
||||
def clear(self):
|
||||
"""Clear the cache"""
|
||||
with self.lock:
|
||||
self.cache.clear()
|
||||
self.hits = 0
|
||||
self.misses = 0
|
||||
|
||||
def stats(self) -> Dict:
|
||||
"""Get cache statistics"""
|
||||
with self.lock:
|
||||
total = self.hits + self.misses
|
||||
hit_rate = (self.hits / total * 100) if total > 0 else 0
|
||||
return {
|
||||
"size": len(self.cache),
|
||||
"maxsize": self.maxsize,
|
||||
"hits": self.hits,
|
||||
"misses": self.misses,
|
||||
"hit_rate": f"{hit_rate:.1f}%"
|
||||
}
|
||||
|
||||
|
||||
# Global translation cache
|
||||
_translation_cache = TranslationCache(maxsize=5000)
|
||||
|
||||
|
||||
class TranslationProvider(ABC):
|
||||
"""Abstract base class for translation providers"""
|
||||
|
||||
@ -63,10 +128,11 @@ class TranslationProvider(ABC):
|
||||
|
||||
|
||||
class GoogleTranslationProvider(TranslationProvider):
|
||||
"""Google Translate implementation with batch support"""
|
||||
"""Google Translate implementation with batch support and caching"""
|
||||
|
||||
def __init__(self):
|
||||
self._local = threading.local()
|
||||
self.provider_name = "google"
|
||||
|
||||
def _get_translator(self, source_language: str, target_language: str) -> GoogleTranslator:
|
||||
"""Get or create a translator instance for the current thread"""
|
||||
@ -81,9 +147,17 @@ class GoogleTranslationProvider(TranslationProvider):
|
||||
if not text or not text.strip():
|
||||
return text
|
||||
|
||||
# Check cache first
|
||||
cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
try:
|
||||
translator = self._get_translator(source_language, target_language)
|
||||
return translator.translate(text)
|
||||
result = translator.translate(text)
|
||||
# Cache the result
|
||||
_translation_cache.set(text, target_language, source_language, self.provider_name, result)
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"Translation error: {e}")
|
||||
return text
|
||||
@ -91,7 +165,7 @@ class GoogleTranslationProvider(TranslationProvider):
|
||||
def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto', batch_size: int = 50) -> List[str]:
|
||||
"""
|
||||
Translate multiple texts using batch processing for speed.
|
||||
Uses deep_translator's batch capability when possible.
|
||||
Uses caching to avoid redundant translations.
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
@ -100,15 +174,24 @@ class GoogleTranslationProvider(TranslationProvider):
|
||||
results = [''] * len(texts)
|
||||
non_empty_indices = []
|
||||
non_empty_texts = []
|
||||
texts_to_translate = []
|
||||
indices_to_translate = []
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
if text and text.strip():
|
||||
# Check cache first
|
||||
cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
|
||||
if cached is not None:
|
||||
results[i] = cached
|
||||
else:
|
||||
non_empty_indices.append(i)
|
||||
non_empty_texts.append(text)
|
||||
texts_to_translate.append(text)
|
||||
indices_to_translate.append(i)
|
||||
else:
|
||||
results[i] = text if text else ''
|
||||
|
||||
if not non_empty_texts:
|
||||
if not texts_to_translate:
|
||||
return results
|
||||
|
||||
try:
|
||||
@ -116,8 +199,8 @@ class GoogleTranslationProvider(TranslationProvider):
|
||||
|
||||
# Process in batches
|
||||
translated_texts = []
|
||||
for i in range(0, len(non_empty_texts), batch_size):
|
||||
batch = non_empty_texts[i:i + batch_size]
|
||||
for i in range(0, len(texts_to_translate), batch_size):
|
||||
batch = texts_to_translate[i:i + batch_size]
|
||||
try:
|
||||
# Use translate_batch if available
|
||||
if hasattr(translator, 'translate_batch'):
|
||||
@ -145,16 +228,19 @@ class GoogleTranslationProvider(TranslationProvider):
|
||||
except:
|
||||
translated_texts.append(text)
|
||||
|
||||
# Map back to original positions
|
||||
for idx, translated in zip(non_empty_indices, translated_texts):
|
||||
results[idx] = translated if translated else texts[idx]
|
||||
# Map back to original positions and cache results
|
||||
for idx, (original, translated) in zip(indices_to_translate, zip(texts_to_translate, translated_texts)):
|
||||
result = translated if translated else texts[idx]
|
||||
results[idx] = result
|
||||
# Cache successful translations
|
||||
_translation_cache.set(texts[idx], target_language, source_language, self.provider_name, result)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
print(f"Batch translation failed: {e}")
|
||||
# Fallback to individual translations
|
||||
for idx, text in zip(non_empty_indices, non_empty_texts):
|
||||
for idx, text in zip(indices_to_translate, texts_to_translate):
|
||||
try:
|
||||
results[idx] = GoogleTranslator(source=source_language, target=target_language).translate(text) or text
|
||||
except:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user