817 lines
33 KiB
Python
817 lines
33 KiB
Python
"""
|
|
Translation Service Abstraction
|
|
Provides a unified interface for different translation providers
|
|
Optimized for high performance with parallel processing and caching
|
|
"""
|
|
from abc import ABC, abstractmethod
|
|
from typing import Optional, List, Dict, Tuple
|
|
import requests
|
|
from deep_translator import GoogleTranslator, DeeplTranslator, LibreTranslator
|
|
from config import config
|
|
import concurrent.futures
|
|
import threading
|
|
import asyncio
|
|
from functools import lru_cache
|
|
import time
|
|
import hashlib
|
|
from collections import OrderedDict
|
|
|
|
|
|
# Global thread pool for parallel translations
|
|
_executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
|
|
|
|
|
|
class TranslationCache:
|
|
"""Thread-safe LRU cache for translations to avoid redundant API calls"""
|
|
|
|
def __init__(self, maxsize: int = 5000):
|
|
self.cache: OrderedDict = OrderedDict()
|
|
self.maxsize = maxsize
|
|
self.lock = threading.RLock()
|
|
self.hits = 0
|
|
self.misses = 0
|
|
|
|
def _make_key(self, text: str, target_language: str, source_language: str, provider: str) -> str:
|
|
"""Create a unique cache key"""
|
|
content = f"{provider}:{source_language}:{target_language}:{text}"
|
|
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
|
|
|
def get(self, text: str, target_language: str, source_language: str, provider: str) -> Optional[str]:
|
|
"""Get a cached translation if available"""
|
|
key = self._make_key(text, target_language, source_language, provider)
|
|
with self.lock:
|
|
if key in self.cache:
|
|
self.hits += 1
|
|
# Move to end (most recently used)
|
|
self.cache.move_to_end(key)
|
|
return self.cache[key]
|
|
self.misses += 1
|
|
return None
|
|
|
|
def set(self, text: str, target_language: str, source_language: str, provider: str, translation: str):
|
|
"""Cache a translation result"""
|
|
key = self._make_key(text, target_language, source_language, provider)
|
|
with self.lock:
|
|
if key in self.cache:
|
|
self.cache.move_to_end(key)
|
|
self.cache[key] = translation
|
|
# Remove oldest if exceeding maxsize
|
|
while len(self.cache) > self.maxsize:
|
|
self.cache.popitem(last=False)
|
|
|
|
def clear(self):
|
|
"""Clear the cache"""
|
|
with self.lock:
|
|
self.cache.clear()
|
|
self.hits = 0
|
|
self.misses = 0
|
|
|
|
def stats(self) -> Dict:
|
|
"""Get cache statistics"""
|
|
with self.lock:
|
|
total = self.hits + self.misses
|
|
hit_rate = (self.hits / total * 100) if total > 0 else 0
|
|
return {
|
|
"size": len(self.cache),
|
|
"maxsize": self.maxsize,
|
|
"hits": self.hits,
|
|
"misses": self.misses,
|
|
"hit_rate": f"{hit_rate:.1f}%"
|
|
}
|
|
|
|
|
|
# Global translation cache
|
|
_translation_cache = TranslationCache(maxsize=5000)
|
|
|
|
|
|
class TranslationProvider(ABC):
|
|
"""Abstract base class for translation providers"""
|
|
|
|
@abstractmethod
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
"""Translate text from source to target language"""
|
|
pass
|
|
|
|
def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
|
|
"""Translate multiple texts at once - default implementation"""
|
|
return [self.translate(text, target_language, source_language) for text in texts]
|
|
|
|
def translate_batch_parallel(self, texts: List[str], target_language: str, source_language: str = 'auto', max_workers: int = 4) -> List[str]:
|
|
"""Parallel batch translation using thread pool"""
|
|
if not texts:
|
|
return []
|
|
|
|
results = [''] * len(texts)
|
|
non_empty = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
|
|
|
|
if not non_empty:
|
|
return [t if t else '' for t in texts]
|
|
|
|
def translate_one(item: Tuple[int, str]) -> Tuple[int, str]:
|
|
idx, text = item
|
|
try:
|
|
return (idx, self.translate(text, target_language, source_language))
|
|
except Exception as e:
|
|
print(f"Translation error at index {idx}: {e}")
|
|
return (idx, text)
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
for idx, translated in executor.map(translate_one, non_empty):
|
|
results[idx] = translated
|
|
|
|
# Fill empty positions
|
|
for i, text in enumerate(texts):
|
|
if not text or not text.strip():
|
|
results[i] = text if text else ''
|
|
|
|
return results
|
|
|
|
|
|
class GoogleTranslationProvider(TranslationProvider):
|
|
"""Google Translate implementation with batch support and caching"""
|
|
|
|
def __init__(self):
|
|
self._local = threading.local()
|
|
self.provider_name = "google"
|
|
|
|
def _get_translator(self, source_language: str, target_language: str) -> GoogleTranslator:
|
|
"""Get or create a translator instance for the current thread"""
|
|
key = f"{source_language}_{target_language}"
|
|
if not hasattr(self._local, 'translators'):
|
|
self._local.translators = {}
|
|
if key not in self._local.translators:
|
|
self._local.translators[key] = GoogleTranslator(source=source_language, target=target_language)
|
|
return self._local.translators[key]
|
|
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
# Check cache first
|
|
cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
|
|
if cached is not None:
|
|
return cached
|
|
|
|
try:
|
|
translator = self._get_translator(source_language, target_language)
|
|
result = translator.translate(text)
|
|
# Cache the result
|
|
_translation_cache.set(text, target_language, source_language, self.provider_name, result)
|
|
return result
|
|
except Exception as e:
|
|
print(f"Translation error: {e}")
|
|
return text
|
|
|
|
def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto', batch_size: int = 50) -> List[str]:
|
|
"""
|
|
Translate multiple texts using batch processing for speed.
|
|
Uses caching to avoid redundant translations.
|
|
"""
|
|
if not texts:
|
|
return []
|
|
|
|
# Filter and track empty texts
|
|
results = [''] * len(texts)
|
|
non_empty_indices = []
|
|
non_empty_texts = []
|
|
texts_to_translate = []
|
|
indices_to_translate = []
|
|
|
|
for i, text in enumerate(texts):
|
|
if text and text.strip():
|
|
# Check cache first
|
|
cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
|
|
if cached is not None:
|
|
results[i] = cached
|
|
else:
|
|
non_empty_indices.append(i)
|
|
non_empty_texts.append(text)
|
|
texts_to_translate.append(text)
|
|
indices_to_translate.append(i)
|
|
else:
|
|
results[i] = text if text else ''
|
|
|
|
if not texts_to_translate:
|
|
return results
|
|
|
|
try:
|
|
translator = GoogleTranslator(source=source_language, target=target_language)
|
|
|
|
# Process in batches
|
|
translated_texts = []
|
|
for i in range(0, len(texts_to_translate), batch_size):
|
|
batch = texts_to_translate[i:i + batch_size]
|
|
try:
|
|
# Use translate_batch if available
|
|
if hasattr(translator, 'translate_batch'):
|
|
batch_result = translator.translate_batch(batch)
|
|
else:
|
|
# Fallback: join with separator, translate, split
|
|
separator = "\n|||SPLIT|||\n"
|
|
combined = separator.join(batch)
|
|
translated_combined = translator.translate(combined)
|
|
if translated_combined:
|
|
batch_result = translated_combined.split("|||SPLIT|||")
|
|
# Clean up results
|
|
batch_result = [t.strip() for t in batch_result]
|
|
# If split didn't work correctly, fall back to individual
|
|
if len(batch_result) != len(batch):
|
|
batch_result = [translator.translate(t) for t in batch]
|
|
else:
|
|
batch_result = batch
|
|
translated_texts.extend(batch_result)
|
|
except Exception as e:
|
|
print(f"Batch translation error, falling back to individual: {e}")
|
|
for text in batch:
|
|
try:
|
|
translated_texts.append(translator.translate(text))
|
|
except:
|
|
translated_texts.append(text)
|
|
|
|
# Map back to original positions and cache results
|
|
for idx, (original, translated) in zip(indices_to_translate, zip(texts_to_translate, translated_texts)):
|
|
result = translated if translated else texts[idx]
|
|
results[idx] = result
|
|
# Cache successful translations
|
|
_translation_cache.set(texts[idx], target_language, source_language, self.provider_name, result)
|
|
|
|
return results
|
|
|
|
except Exception as e:
|
|
print(f"Batch translation failed: {e}")
|
|
# Fallback to individual translations
|
|
for idx, text in zip(indices_to_translate, texts_to_translate):
|
|
try:
|
|
results[idx] = GoogleTranslator(source=source_language, target=target_language).translate(text) or text
|
|
except:
|
|
results[idx] = text
|
|
return results
|
|
|
|
|
|
class DeepLTranslationProvider(TranslationProvider):
|
|
"""DeepL Translate implementation with batch support"""
|
|
|
|
def __init__(self, api_key: str):
|
|
self.api_key = api_key
|
|
self._translator_cache = {}
|
|
|
|
def _get_translator(self, source_language: str, target_language: str) -> DeeplTranslator:
|
|
key = f"{source_language}_{target_language}"
|
|
if key not in self._translator_cache:
|
|
self._translator_cache[key] = DeeplTranslator(api_key=self.api_key, source=source_language, target=target_language)
|
|
return self._translator_cache[key]
|
|
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
try:
|
|
translator = self._get_translator(source_language, target_language)
|
|
return translator.translate(text)
|
|
except Exception as e:
|
|
print(f"Translation error: {e}")
|
|
return text
|
|
|
|
def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
|
|
"""Batch translate using DeepL"""
|
|
if not texts:
|
|
return []
|
|
|
|
results = [''] * len(texts)
|
|
non_empty = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
|
|
|
|
if not non_empty:
|
|
return [t if t else '' for t in texts]
|
|
|
|
try:
|
|
translator = self._get_translator(source_language, target_language)
|
|
non_empty_texts = [t for _, t in non_empty]
|
|
|
|
if hasattr(translator, 'translate_batch'):
|
|
translated = translator.translate_batch(non_empty_texts)
|
|
else:
|
|
translated = [translator.translate(t) for t in non_empty_texts]
|
|
|
|
for (idx, _), trans in zip(non_empty, translated):
|
|
results[idx] = trans if trans else texts[idx]
|
|
|
|
# Fill empty positions
|
|
for i, text in enumerate(texts):
|
|
if not text or not text.strip():
|
|
results[i] = text if text else ''
|
|
|
|
return results
|
|
except Exception as e:
|
|
print(f"DeepL batch error: {e}")
|
|
return [self.translate(t, target_language, source_language) for t in texts]
|
|
|
|
|
|
class LibreTranslationProvider(TranslationProvider):
|
|
"""LibreTranslate implementation with batch support"""
|
|
|
|
def __init__(self, custom_url: str = "https://libretranslate.com"):
|
|
self.custom_url = custom_url
|
|
self._translator_cache = {}
|
|
|
|
def _get_translator(self, source_language: str, target_language: str) -> LibreTranslator:
|
|
key = f"{source_language}_{target_language}"
|
|
if key not in self._translator_cache:
|
|
self._translator_cache[key] = LibreTranslator(source=source_language, target=target_language, custom_url=self.custom_url)
|
|
return self._translator_cache[key]
|
|
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
try:
|
|
translator = self._get_translator(source_language, target_language)
|
|
return translator.translate(text)
|
|
except Exception as e:
|
|
print(f"LibreTranslate error: {e}")
|
|
return text
|
|
|
|
def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
|
|
"""Batch translate using LibreTranslate"""
|
|
if not texts:
|
|
return []
|
|
|
|
results = [''] * len(texts)
|
|
non_empty = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
|
|
|
|
if not non_empty:
|
|
return [t if t else '' for t in texts]
|
|
|
|
try:
|
|
translator = self._get_translator(source_language, target_language)
|
|
|
|
for idx, text in non_empty:
|
|
try:
|
|
results[idx] = translator.translate(text) or text
|
|
except:
|
|
results[idx] = text
|
|
|
|
for i, text in enumerate(texts):
|
|
if not text or not text.strip():
|
|
results[i] = text if text else ''
|
|
|
|
return results
|
|
except Exception as e:
|
|
print(f"LibreTranslate batch error: {e}")
|
|
return texts
|
|
|
|
|
|
class OllamaTranslationProvider(TranslationProvider):
|
|
"""Ollama LLM translation implementation"""
|
|
|
|
def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava", system_prompt: str = ""):
|
|
self.base_url = base_url.rstrip('/')
|
|
self.model = model.strip() # Remove any leading/trailing whitespace
|
|
self.vision_model = vision_model.strip()
|
|
self.custom_system_prompt = system_prompt # Custom context, glossary, instructions
|
|
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
# Skip very short text or numbers only
|
|
if len(text.strip()) < 2 or text.strip().isdigit():
|
|
return text
|
|
|
|
try:
|
|
# Build system prompt with custom context if provided
|
|
base_prompt = f"""You are a professional translator. Your ONLY task is to translate text to {target_language}.
|
|
|
|
CRITICAL RULES:
|
|
1. Output ONLY the translated text - no explanations, no comments, no notes
|
|
2. Preserve the exact formatting (line breaks, spacing, punctuation)
|
|
3. Do NOT add any prefixes like "Here's the translation:" or "Translation:"
|
|
4. Do NOT refuse to translate or ask clarifying questions
|
|
5. If the text is already in {target_language}, return it unchanged
|
|
6. Translate everything literally and accurately
|
|
7. NEVER provide comments, opinions, or explanations - you are JUST a translator
|
|
8. If you have any doubt about the translation, return the original text unchanged
|
|
9. Do not interpret or analyze the content - simply translate word by word
|
|
10. Your response must contain ONLY the translated text, nothing else"""
|
|
|
|
if self.custom_system_prompt:
|
|
system_content = f"""{base_prompt}
|
|
|
|
ADDITIONAL CONTEXT AND INSTRUCTIONS:
|
|
{self.custom_system_prompt}"""
|
|
else:
|
|
system_content = base_prompt
|
|
|
|
# Use /api/chat endpoint (more compatible with all models)
|
|
response = requests.post(
|
|
f"{self.base_url}/api/chat",
|
|
json={
|
|
"model": self.model,
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": system_content
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": text
|
|
}
|
|
],
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": 0.3,
|
|
"num_predict": 500
|
|
}
|
|
},
|
|
timeout=120 # 2 minutes timeout
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
translated = result.get("message", {}).get("content", "").strip()
|
|
return translated if translated else text
|
|
except requests.exceptions.ConnectionError:
|
|
print(f"Ollama error: Cannot connect to {self.base_url}. Is Ollama running?")
|
|
return text
|
|
except requests.exceptions.Timeout:
|
|
print(f"Ollama error: Request timeout after 120s")
|
|
return text
|
|
except Exception as e:
|
|
print(f"Ollama translation error: {e}")
|
|
return text
|
|
|
|
def translate_image(self, image_path: str, target_language: str) -> str:
|
|
"""Translate text within an image using Ollama vision model"""
|
|
import base64
|
|
|
|
try:
|
|
# Read and encode image
|
|
with open(image_path, 'rb') as img_file:
|
|
image_data = base64.b64encode(img_file.read()).decode('utf-8')
|
|
|
|
# Use /api/chat for vision models too
|
|
response = requests.post(
|
|
f"{self.base_url}/api/chat",
|
|
json={
|
|
"model": self.vision_model,
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting.",
|
|
"images": [image_data]
|
|
}
|
|
],
|
|
"stream": False
|
|
},
|
|
timeout=60
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
return result.get("message", {}).get("content", "").strip()
|
|
except Exception as e:
|
|
print(f"Ollama vision translation error: {e}")
|
|
return ""
|
|
|
|
@staticmethod
|
|
def list_models(base_url: str = "http://localhost:11434") -> List[str]:
|
|
"""List available Ollama models"""
|
|
try:
|
|
response = requests.get(f"{base_url.rstrip('/')}/api/tags", timeout=5)
|
|
response.raise_for_status()
|
|
models = response.json().get("models", [])
|
|
return [model["name"] for model in models]
|
|
except Exception as e:
|
|
print(f"Error listing Ollama models: {e}")
|
|
return []
|
|
|
|
|
|
class OpenRouterTranslationProvider(TranslationProvider):
|
|
"""
|
|
OpenRouter API translation - Access to many cheap & high-quality models
|
|
Recommended models for translation (by cost/quality):
|
|
- deepseek/deepseek-chat: $0.14/M tokens - Excellent quality, very cheap
|
|
- mistralai/mistral-7b-instruct: $0.06/M tokens - Fast and cheap
|
|
- meta-llama/llama-3.1-8b-instruct: $0.06/M tokens - Good quality
|
|
- google/gemma-2-9b-it: $0.08/M tokens - Good for European languages
|
|
"""
|
|
|
|
def __init__(self, api_key: str, model: str = "deepseek/deepseek-chat", system_prompt: str = ""):
|
|
self.api_key = api_key
|
|
self.model = model
|
|
self.custom_system_prompt = system_prompt
|
|
self.base_url = "https://openrouter.ai/api/v1"
|
|
self.provider_name = "openrouter"
|
|
self._session = None
|
|
|
|
def _get_session(self):
|
|
"""Get or create a requests session for connection pooling"""
|
|
if self._session is None:
|
|
import requests
|
|
self._session = requests.Session()
|
|
self._session.headers.update({
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"HTTP-Referer": "https://translate-app.local",
|
|
"X-Title": "Document Translator",
|
|
"Content-Type": "application/json"
|
|
})
|
|
return self._session
|
|
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
# Skip very short text or numbers only
|
|
if len(text.strip()) < 2 or text.strip().isdigit():
|
|
return text
|
|
|
|
# Check cache first
|
|
cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
|
|
if cached is not None:
|
|
return cached
|
|
|
|
try:
|
|
session = self._get_session()
|
|
|
|
# Optimized prompt for translation
|
|
system_prompt = f"""Translate to {target_language}. Output ONLY the translation, nothing else. Preserve formatting."""
|
|
|
|
if self.custom_system_prompt:
|
|
system_prompt = f"{system_prompt}\n\nContext: {self.custom_system_prompt}"
|
|
|
|
response = session.post(
|
|
f"{self.base_url}/chat/completions",
|
|
json={
|
|
"model": self.model,
|
|
"messages": [
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": text}
|
|
],
|
|
"temperature": 0.2,
|
|
"max_tokens": 1000
|
|
},
|
|
timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
translated = result.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
|
|
|
|
if translated:
|
|
# Cache the result
|
|
_translation_cache.set(text, target_language, source_language, self.provider_name, translated)
|
|
return translated
|
|
return text
|
|
|
|
except Exception as e:
|
|
print(f"OpenRouter translation error: {e}")
|
|
return text
|
|
|
|
def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
|
|
"""
|
|
Batch translate using OpenRouter with parallel requests.
|
|
Uses caching to avoid redundant translations.
|
|
"""
|
|
if not texts:
|
|
return []
|
|
|
|
results = [''] * len(texts)
|
|
texts_to_translate = []
|
|
indices_to_translate = []
|
|
|
|
# Check cache first
|
|
for i, text in enumerate(texts):
|
|
if not text or not text.strip():
|
|
results[i] = text if text else ''
|
|
else:
|
|
cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
|
|
if cached is not None:
|
|
results[i] = cached
|
|
else:
|
|
texts_to_translate.append(text)
|
|
indices_to_translate.append(i)
|
|
|
|
if not texts_to_translate:
|
|
return results
|
|
|
|
# Translate in parallel batches
|
|
import concurrent.futures
|
|
|
|
def translate_one(text: str) -> str:
|
|
return self.translate(text, target_language, source_language)
|
|
|
|
# Use thread pool for parallel requests
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
translated = list(executor.map(translate_one, texts_to_translate))
|
|
|
|
# Map back results
|
|
for idx, trans in zip(indices_to_translate, translated):
|
|
results[idx] = trans
|
|
|
|
return results
|
|
|
|
@staticmethod
|
|
def list_recommended_models() -> List[dict]:
|
|
"""List recommended models for translation with pricing"""
|
|
return [
|
|
{"id": "deepseek/deepseek-chat", "name": "DeepSeek Chat", "price": "$0.14/M tokens", "quality": "Excellent", "speed": "Fast"},
|
|
{"id": "mistralai/mistral-7b-instruct", "name": "Mistral 7B", "price": "$0.06/M tokens", "quality": "Good", "speed": "Very Fast"},
|
|
{"id": "meta-llama/llama-3.1-8b-instruct", "name": "Llama 3.1 8B", "price": "$0.06/M tokens", "quality": "Good", "speed": "Fast"},
|
|
{"id": "google/gemma-2-9b-it", "name": "Gemma 2 9B", "price": "$0.08/M tokens", "quality": "Good", "speed": "Fast"},
|
|
{"id": "anthropic/claude-3-haiku", "name": "Claude 3 Haiku", "price": "$0.25/M tokens", "quality": "Excellent", "speed": "Fast"},
|
|
{"id": "openai/gpt-4o-mini", "name": "GPT-4o Mini", "price": "$0.15/M tokens", "quality": "Excellent", "speed": "Fast"},
|
|
]
|
|
|
|
|
|
class WebLLMTranslationProvider(TranslationProvider):
|
|
"""WebLLM browser-based translation (client-side processing)"""
|
|
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
# WebLLM translation happens client-side in the browser
|
|
# This is just a placeholder - actual translation is done by JavaScript
|
|
# For server-side, we'll just pass through for now
|
|
return text
|
|
|
|
|
|
class OpenAITranslationProvider(TranslationProvider):
|
|
"""OpenAI GPT translation implementation with vision support"""
|
|
|
|
def __init__(self, api_key: str, model: str = "gpt-4o-mini", system_prompt: str = ""):
|
|
self.api_key = api_key
|
|
self.model = model
|
|
self.custom_system_prompt = system_prompt
|
|
|
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
# Skip very short text or numbers only
|
|
if len(text.strip()) < 2 or text.strip().isdigit():
|
|
return text
|
|
|
|
try:
|
|
import openai
|
|
client = openai.OpenAI(api_key=self.api_key)
|
|
|
|
# Build system prompt with custom context if provided
|
|
base_prompt = f"""You are a professional translator. Your ONLY task is to translate text to {target_language}.
|
|
|
|
CRITICAL RULES:
|
|
1. Output ONLY the translated text - no explanations, no comments, no notes
|
|
2. Preserve the exact formatting (line breaks, spacing, punctuation)
|
|
3. Do NOT add any prefixes like "Here's the translation:" or "Translation:"
|
|
4. Do NOT refuse to translate or ask clarifying questions
|
|
5. If the text is already in {target_language}, return it unchanged
|
|
6. Translate everything literally and accurately
|
|
7. NEVER provide comments, opinions, or explanations - you are JUST a translator
|
|
8. If you have any doubt about the translation, return the original text unchanged
|
|
9. Do not interpret or analyze the content - simply translate word by word
|
|
10. Your response must contain ONLY the translated text, nothing else"""
|
|
|
|
if self.custom_system_prompt:
|
|
system_content = f"""{base_prompt}
|
|
|
|
ADDITIONAL CONTEXT AND INSTRUCTIONS:
|
|
{self.custom_system_prompt}"""
|
|
else:
|
|
system_content = base_prompt
|
|
|
|
response = client.chat.completions.create(
|
|
model=self.model,
|
|
messages=[
|
|
{"role": "system", "content": system_content},
|
|
{"role": "user", "content": text}
|
|
],
|
|
temperature=0.3,
|
|
max_tokens=500
|
|
)
|
|
|
|
translated = response.choices[0].message.content.strip()
|
|
return translated if translated else text
|
|
except Exception as e:
|
|
print(f"OpenAI translation error: {e}")
|
|
return text
|
|
|
|
def translate_image(self, image_path: str, target_language: str) -> str:
|
|
"""Translate text within an image using OpenAI vision model"""
|
|
import base64
|
|
|
|
try:
|
|
import openai
|
|
client = openai.OpenAI(api_key=self.api_key)
|
|
|
|
# Read and encode image
|
|
with open(image_path, 'rb') as img_file:
|
|
image_data = base64.b64encode(img_file.read()).decode('utf-8')
|
|
|
|
# Determine image type from extension
|
|
ext = image_path.lower().split('.')[-1]
|
|
media_type = f"image/{ext}" if ext in ['png', 'jpg', 'jpeg', 'gif', 'webp'] else "image/png"
|
|
|
|
response = client.chat.completions.create(
|
|
model=self.model, # gpt-4o and gpt-4o-mini support vision
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting."
|
|
},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:{media_type};base64,{image_data}"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
max_tokens=1000
|
|
)
|
|
|
|
return response.choices[0].message.content.strip()
|
|
except Exception as e:
|
|
print(f"OpenAI vision translation error: {e}")
|
|
return ""
|
|
|
|
|
|
class TranslationService:
|
|
"""Main translation service that delegates to the configured provider"""
|
|
|
|
def __init__(self, provider: Optional[TranslationProvider] = None):
|
|
if provider:
|
|
self.provider = provider
|
|
else:
|
|
# Auto-select provider based on configuration
|
|
self.provider = self._get_default_provider()
|
|
self.translate_images = False # Flag to enable image translation
|
|
|
|
def _get_default_provider(self) -> TranslationProvider:
|
|
"""Get the default translation provider from configuration"""
|
|
# Always use Google Translate by default to avoid API key issues
|
|
# Provider will be overridden per request in the API endpoint
|
|
return GoogleTranslationProvider()
|
|
|
|
def translate_text(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
|
"""
|
|
Translate a single text string
|
|
|
|
Args:
|
|
text: Text to translate
|
|
target_language: Target language code (e.g., 'es', 'fr', 'de')
|
|
source_language: Source language code (default: 'auto' for auto-detection)
|
|
|
|
Returns:
|
|
Translated text
|
|
"""
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
return self.provider.translate(text, target_language, source_language)
|
|
|
|
def translate_image(self, image_path: str, target_language: str) -> str:
|
|
"""
|
|
Translate text in an image using vision model (Ollama or OpenAI)
|
|
|
|
Args:
|
|
image_path: Path to image file
|
|
target_language: Target language code
|
|
|
|
Returns:
|
|
Translated text from image
|
|
"""
|
|
if not self.translate_images:
|
|
return ""
|
|
|
|
# Ollama and OpenAI support image translation
|
|
if isinstance(self.provider, OllamaTranslationProvider):
|
|
return self.provider.translate_image(image_path, target_language)
|
|
elif isinstance(self.provider, OpenAITranslationProvider):
|
|
return self.provider.translate_image(image_path, target_language)
|
|
|
|
return ""
|
|
|
|
def translate_batch(self, texts: list[str], target_language: str, source_language: str = 'auto') -> list[str]:
|
|
"""
|
|
Translate multiple text strings efficiently using batch processing.
|
|
|
|
Args:
|
|
texts: List of texts to translate
|
|
target_language: Target language code
|
|
source_language: Source language code (default: 'auto')
|
|
|
|
Returns:
|
|
List of translated texts
|
|
"""
|
|
if not texts:
|
|
return []
|
|
|
|
# Use provider's batch method if available
|
|
if hasattr(self.provider, 'translate_batch'):
|
|
return self.provider.translate_batch(texts, target_language, source_language)
|
|
|
|
# Fallback to individual translations
|
|
return [self.translate_text(text, target_language, source_language) for text in texts]
|
|
|
|
|
|
# Global translation service instance
|
|
translation_service = TranslationService()
|