From c0f93501cce6d7784d893283f64509cc2b22e2db Mon Sep 17 00:00:00 2001 From: sepehr Date: Sun, 17 May 2026 12:09:26 +0200 Subject: [PATCH] fix: use Google Cloud API key for classic mode + translation verification Two critical fixes: 1. Provider "google" (default classic mode) now checks for a Google Cloud API key (GOOGLE_CLOUD_API_KEY in env or admin settings). If present, uses GoogleCloudTranslationProvider (official API). Previously it always fell through to deep_translator (free scraper) which gets blocked in production, silently returning untranslated text. 2. Added translation verification: each translator now tracks how many texts were attempted vs actually changed. If 0 texts were translated, the job is marked as FAILED with a clear error message instead of returning the original file as "completed". Co-Authored-By: Claude Opus 4.7 --- routes/translate_routes.py | 40 ++++++++++++++++++++++++++++++++- translators/excel_translator.py | 27 +++++++++++----------- translators/pdf_translator.py | 28 +++++++++++++++++------ translators/pptx_translator.py | 27 +++++++++++----------- translators/word_translator.py | 16 +++++++++++-- 5 files changed, 102 insertions(+), 36 deletions(-) diff --git a/routes/translate_routes.py b/routes/translate_routes.py index 43d5611..1e91504 100644 --- a/routes/translate_routes.py +++ b/routes/translate_routes.py @@ -951,7 +951,21 @@ async def _run_translation_job( translation_provider = None _p = provider.lower() - if _p in ("openrouter", "llm") and api_key: + # "google" (default classic mode): use Google Cloud API key if available, + # otherwise fall back to deep_translator (legacy, no key). + if _p == "google": + gc_key = _cfg( + getattr(_admin_cfg.google_cloud, "api_key", None), + "GOOGLE_CLOUD_API_KEY", + ) + if gc_key: + from services.providers.google_cloud_provider import LegacyGoogleCloudAdapter + translation_provider = LegacyGoogleCloudAdapter(gc_key) + logger.info("google_provider_using_cloud_api", job_id=job_id) + else: + logger.info("google_provider_no_cloud_key_using_legacy", job_id=job_id) + + elif _p in ("openrouter", "llm") and api_key: translation_provider = OpenRouterTranslationProvider( api_key, model, full_prompt ) @@ -1114,6 +1128,30 @@ async def _run_translation_job( else: raise ValueError(f"Unsupported file type: {file_extension}") + # ── Verify translation actually produced results ── + if not output_path.exists() or output_path.stat().st_size == 0: + error_msg = "Translation failed: output file is empty or missing. The translation provider may be unavailable." + logger.error(f"Job {job_id}: {error_msg}") + tracker.set_error(error_msg) + return + + stats = job_translator.get_translation_stats() + attempted = stats.get("attempted", 0) + changed = stats.get("changed", 0) + + if attempted > 0: + ratio = changed / attempted + logger.info(f"Job {job_id}: translation stats — {changed}/{attempted} texts changed ({ratio:.0%})") + if ratio < 0.15 and changed == 0: + error_msg = ( + f"Translation failed: 0 out of {attempted} texts were translated. " + f"The provider ({provider}) may be unavailable or misconfigured. " + f"Check your API keys in admin settings." + ) + logger.error(f"Job {job_id}: {error_msg}") + tracker.set_error(error_msg) + return + if user_id: await tier_quota_service.increment_on_success(user_id) # Persist monthly usage counters in PostgreSQL (docs + pages) diff --git a/translators/excel_translator.py b/translators/excel_translator.py index ca7baa2..9b5f434 100644 --- a/translators/excel_translator.py +++ b/translators/excel_translator.py @@ -107,6 +107,7 @@ class ExcelTranslator: self._provider = provider self.formula_pattern = re.compile(r"=.*") self._custom_prompt: Optional[str] = None + self._translation_stats = {"attempted": 0, "changed": 0} def set_provider(self, provider: TranslationProvider) -> None: """Set the translation provider.""" @@ -387,26 +388,26 @@ class ExcelTranslator: def _batch_translate( self, texts: List[str], target_language: str, source_language: str = "auto" ) -> List[str]: - """ - Batch translate using new provider interface. - - Args: - texts: List of texts to translate - target_language: Target language code - source_language: Source language code - - Returns: - List of translated texts (same order as input) - """ if not texts: return [] + non_empty = [t for t in texts if t and t.strip()] + self._translation_stats["attempted"] += len(non_empty) + if self._provider is not None: - return self._translate_with_provider( + translated = self._translate_with_provider( texts, target_language, source_language ) + else: + translated = self._translate_with_legacy(texts, target_language, source_language) - return self._translate_with_legacy(texts, target_language, source_language) + changed = sum(1 for orig, trans in zip(texts, translated) if orig != trans and trans.strip()) + self._translation_stats["changed"] += changed + + return translated + + def get_translation_stats(self) -> dict: + return dict(self._translation_stats) def _translate_with_provider( self, texts: List[str], target_language: str, source_language: str diff --git a/translators/pdf_translator.py b/translators/pdf_translator.py index 03a9561..4c31e04 100644 --- a/translators/pdf_translator.py +++ b/translators/pdf_translator.py @@ -63,6 +63,7 @@ class PDFTranslator: def __init__(self, provider=None): self._provider = provider self._font_path: Optional[str] = None + self._translation_stats = {"attempted": 0, "changed": 0} def _get_font_path(self) -> Optional[str]: """Resolve a Unicode-capable TTF/OTF font file.""" @@ -825,18 +826,31 @@ class PDFTranslator: self, texts: List[str], target_language: str, source_language: str ) -> List[str]: """Translate a batch of texts.""" + non_empty = [t for t in texts if t and t.strip()] + self._translation_stats["attempted"] += len(non_empty) + + translated = None if self._provider is not None: try: - return self._provider.translate_batch(texts, target_language, source_language) + translated = self._provider.translate_batch(texts, target_language, source_language) except Exception as e: logger.warning("provider_translate_failed", error=str(e)) - from services.translation_service import translation_service - try: - return translation_service.translate_batch(texts, target_language, source_language) - except Exception as e: - logger.warning("legacy_translate_failed", error=str(e)) - return texts + if translated is None: + from services.translation_service import translation_service + try: + translated = translation_service.translate_batch(texts, target_language, source_language) + except Exception as e: + logger.warning("legacy_translate_failed", error=str(e)) + translated = texts + + changed = sum(1 for orig, trans in zip(texts, translated) if orig != trans and trans.strip()) + self._translation_stats["changed"] += changed + + return translated + + def get_translation_stats(self) -> dict: + return dict(self._translation_stats) def _validate_file(self, file_path: Path) -> None: if not file_path.exists(): diff --git a/translators/pptx_translator.py b/translators/pptx_translator.py index 93d7d85..76139ce 100644 --- a/translators/pptx_translator.py +++ b/translators/pptx_translator.py @@ -152,6 +152,7 @@ class PowerPointTranslator: """ self._provider = provider self._custom_prompt: Optional[str] = None + self._translation_stats = {"attempted": 0, "changed": 0} def set_provider(self, provider: TranslationProvider) -> None: """Set the translation provider.""" @@ -381,26 +382,26 @@ class PowerPointTranslator: def _batch_translate( self, texts: List[str], target_language: str, source_language: str = "auto" ) -> List[str]: - """ - Batch translate using new provider interface. - - Args: - texts: List of texts to translate - target_language: Target language code - source_language: Source language code - - Returns: - List of translated texts (same order as input) - """ if not texts: return [] + non_empty = [t for t in texts if t and t.strip()] + self._translation_stats["attempted"] += len(non_empty) + if self._provider is not None: - return self._translate_with_provider( + translated = self._translate_with_provider( texts, target_language, source_language ) + else: + translated = self._translate_with_legacy(texts, target_language, source_language) - return self._translate_with_legacy(texts, target_language, source_language) + changed = sum(1 for orig, trans in zip(texts, translated) if orig != trans and trans.strip()) + self._translation_stats["changed"] += changed + + return translated + + def get_translation_stats(self) -> dict: + return dict(self._translation_stats) def _translate_with_provider( self, texts: List[str], target_language: str, source_language: str diff --git a/translators/word_translator.py b/translators/word_translator.py index 5d649eb..9816126 100644 --- a/translators/word_translator.py +++ b/translators/word_translator.py @@ -172,6 +172,7 @@ class WordTranslator: """ self._provider = provider self._custom_prompt: Optional[str] = None + self._translation_stats = {"attempted": 0, "changed": 0} def set_provider(self, provider: TranslationProvider) -> None: """Set the translation provider.""" @@ -439,12 +440,23 @@ class WordTranslator: if not texts: return [] + non_empty = [t for t in texts if t and t.strip()] + self._translation_stats["attempted"] += len(non_empty) + if self._provider is not None: - return self._translate_with_provider( + translated = self._translate_with_provider( texts, target_language, source_language ) + else: + translated = self._translate_with_legacy(texts, target_language, source_language) - return self._translate_with_legacy(texts, target_language, source_language) + changed = sum(1 for orig, trans in zip(texts, translated) if orig != trans and trans.strip()) + self._translation_stats["changed"] += changed + + return translated + + def get_translation_stats(self) -> dict: + return dict(self._translation_stats) def _translate_with_provider( self, texts: List[str], target_language: str, source_language: str