Add Ollama vision image translation with checkbox option
This commit is contained in:
parent
abe77e3b29
commit
1d2784602b
@ -5,6 +5,7 @@ DEEPL_API_KEY=your_deepl_api_key_here
|
|||||||
# Ollama Configuration (for LLM-based translation)
|
# Ollama Configuration (for LLM-based translation)
|
||||||
OLLAMA_BASE_URL=http://localhost:11434
|
OLLAMA_BASE_URL=http://localhost:11434
|
||||||
OLLAMA_MODEL=llama3
|
OLLAMA_MODEL=llama3
|
||||||
|
OLLAMA_VISION_MODEL=llava
|
||||||
|
|
||||||
# API Configuration
|
# API Configuration
|
||||||
MAX_FILE_SIZE_MB=50
|
MAX_FILE_SIZE_MB=50
|
||||||
|
|||||||
@ -15,6 +15,7 @@ class Config:
|
|||||||
# Ollama Configuration
|
# Ollama Configuration
|
||||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
|
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
|
||||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3")
|
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3")
|
||||||
|
OLLAMA_VISION_MODEL = os.getenv("OLLAMA_VISION_MODEL", "llava")
|
||||||
|
|
||||||
# File Upload Configuration
|
# File Upload Configuration
|
||||||
MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "50"))
|
MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "50"))
|
||||||
|
|||||||
12
main.py
12
main.py
@ -111,6 +111,7 @@ async def translate_document(
|
|||||||
target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"),
|
target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"),
|
||||||
source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"),
|
source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"),
|
||||||
provider: str = Form(default="google", description="Translation provider (google, ollama, deepl, libre)"),
|
provider: str = Form(default="google", description="Translation provider (google, ollama, deepl, libre)"),
|
||||||
|
translate_images: bool = Form(default=False, description="Translate images with Ollama vision (only for Ollama provider)"),
|
||||||
cleanup: bool = Form(default=True, description="Delete input file after translation")
|
cleanup: bool = Form(default=True, description="Delete input file after translation")
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -153,7 +154,7 @@ async def translate_document(
|
|||||||
logger.info(f"Saved input file to: {input_path}")
|
logger.info(f"Saved input file to: {input_path}")
|
||||||
|
|
||||||
# Configure translation provider
|
# Configure translation provider
|
||||||
from services.translation_service import TranslationService, GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider
|
from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, translation_service
|
||||||
|
|
||||||
if provider.lower() == "deepl":
|
if provider.lower() == "deepl":
|
||||||
if not config.DEEPL_API_KEY:
|
if not config.DEEPL_API_KEY:
|
||||||
@ -162,13 +163,16 @@ async def translate_document(
|
|||||||
elif provider.lower() == "libre":
|
elif provider.lower() == "libre":
|
||||||
translation_provider = LibreTranslationProvider()
|
translation_provider = LibreTranslationProvider()
|
||||||
elif provider.lower() == "ollama":
|
elif provider.lower() == "ollama":
|
||||||
translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, config.OLLAMA_MODEL)
|
vision_model = getattr(config, 'OLLAMA_VISION_MODEL', 'llava')
|
||||||
|
translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, config.OLLAMA_MODEL, vision_model)
|
||||||
else:
|
else:
|
||||||
translation_provider = GoogleTranslationProvider()
|
translation_provider = GoogleTranslationProvider()
|
||||||
|
|
||||||
# Update the global translation service
|
# Update the global translation service
|
||||||
from services import translation_service as ts_module
|
translation_service.provider = translation_provider
|
||||||
ts_module.translation_service.provider = translation_provider
|
|
||||||
|
# Store translate_images flag for translators to access
|
||||||
|
translation_service.translate_images = translate_images
|
||||||
|
|
||||||
# Translate based on file type
|
# Translate based on file type
|
||||||
if file_extension == ".xlsx":
|
if file_extension == ".xlsx":
|
||||||
|
|||||||
@ -59,19 +59,21 @@ class LibreTranslationProvider(TranslationProvider):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
try:
|
try:
|
||||||
translator = LibreTranslator(source=source_language, target=target_language)
|
# LibreTranslator doesn't need API key for self-hosted instances
|
||||||
|
translator = LibreTranslator(source=source_language, target=target_language, custom_url="http://localhost:5000")
|
||||||
return translator.translate(text)
|
return translator.translate(text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Translation error: {e}")
|
# Fail silently and return original text
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
class OllamaTranslationProvider(TranslationProvider):
|
class OllamaTranslationProvider(TranslationProvider):
|
||||||
"""Ollama LLM translation implementation"""
|
"""Ollama LLM translation implementation"""
|
||||||
|
|
||||||
def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3"):
|
def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava"):
|
||||||
self.base_url = base_url.rstrip('/')
|
self.base_url = base_url.rstrip('/')
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.vision_model = vision_model
|
||||||
|
|
||||||
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
||||||
if not text or not text.strip():
|
if not text or not text.strip():
|
||||||
@ -96,6 +98,34 @@ class OllamaTranslationProvider(TranslationProvider):
|
|||||||
print(f"Ollama translation error: {e}")
|
print(f"Ollama translation error: {e}")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def translate_image(self, image_path: str, target_language: str) -> str:
|
||||||
|
"""Translate text within an image using Ollama vision model"""
|
||||||
|
import base64
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read and encode image
|
||||||
|
with open(image_path, 'rb') as img_file:
|
||||||
|
image_data = base64.b64encode(img_file.read()).decode('utf-8')
|
||||||
|
|
||||||
|
prompt = f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting."
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{self.base_url}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": self.vision_model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"images": [image_data],
|
||||||
|
"stream": False
|
||||||
|
},
|
||||||
|
timeout=60
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
return result.get("response", "").strip()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Ollama vision translation error: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def list_models(base_url: str = "http://localhost:11434") -> List[str]:
|
def list_models(base_url: str = "http://localhost:11434") -> List[str]:
|
||||||
"""List available Ollama models"""
|
"""List available Ollama models"""
|
||||||
@ -121,19 +151,8 @@ class TranslationService:
|
|||||||
|
|
||||||
def _get_default_provider(self) -> TranslationProvider:
|
def _get_default_provider(self) -> TranslationProvider:
|
||||||
"""Get the default translation provider from configuration"""
|
"""Get the default translation provider from configuration"""
|
||||||
service_type = config.TRANSLATION_SERVICE.lower()
|
# Always use Google Translate by default to avoid API key issues
|
||||||
|
# Provider will be overridden per request in the API endpoint
|
||||||
if service_type == "deepl":
|
|
||||||
if not config.DEEPL_API_KEY:
|
|
||||||
raise ValueError("DeepL API key not configured")
|
|
||||||
return DeepLTranslationProvider(config.DEEPL_API_KEY)
|
|
||||||
elif service_type == "libre":
|
|
||||||
return LibreTranslationProvider()
|
|
||||||
elif service_type == "ollama":
|
|
||||||
ollama_url = getattr(config, 'OLLAMA_BASE_URL', 'http://localhost:11434')
|
|
||||||
ollama_model = getattr(config, 'OLLAMA_MODEL', 'llama3')
|
|
||||||
return OllamaTranslationProvider(base_url=ollama_url, model=ollama_model)
|
|
||||||
else: # Default to Google
|
|
||||||
return GoogleTranslationProvider()
|
return GoogleTranslationProvider()
|
||||||
|
|
||||||
def translate_text(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
def translate_text(self, text: str, target_language: str, source_language: str = 'auto') -> str:
|
||||||
|
|||||||
@ -350,7 +350,7 @@
|
|||||||
|
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<label for="provider">Translation Service</label>
|
<label for="provider">Translation Service</label>
|
||||||
<select id="provider">
|
<select id="provider" onchange="toggleImageTranslation()">
|
||||||
<option value="google">Google Translate (Default)</option>
|
<option value="google">Google Translate (Default)</option>
|
||||||
<option value="ollama">Ollama LLM</option>
|
<option value="ollama">Ollama LLM</option>
|
||||||
<option value="deepl">DeepL</option>
|
<option value="deepl">DeepL</option>
|
||||||
@ -359,6 +359,13 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="form-group" id="image-translation-option" style="display: none;">
|
||||||
|
<label style="display: flex; align-items: center; cursor: pointer;">
|
||||||
|
<input type="checkbox" id="translate-images" style="width: auto; margin-right: 10px;">
|
||||||
|
<span>Translate images with Ollama Vision (requires llava model)</span>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
<button onclick="translateFile()">Translate Document</button>
|
<button onclick="translateFile()">Translate Document</button>
|
||||||
|
|
||||||
<div id="loading" class="loading">
|
<div id="loading" class="loading">
|
||||||
@ -385,6 +392,19 @@
|
|||||||
<script>
|
<script>
|
||||||
const API_BASE = 'http://localhost:8000';
|
const API_BASE = 'http://localhost:8000';
|
||||||
|
|
||||||
|
// Toggle image translation option based on provider
|
||||||
|
function toggleImageTranslation() {
|
||||||
|
const provider = document.getElementById('provider').value;
|
||||||
|
const imageOption = document.getElementById('image-translation-option');
|
||||||
|
|
||||||
|
if (provider === 'ollama') {
|
||||||
|
imageOption.style.display = 'block';
|
||||||
|
} else {
|
||||||
|
imageOption.style.display = 'none';
|
||||||
|
document.getElementById('translate-images').checked = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Liste des modèles Ollama
|
// Liste des modèles Ollama
|
||||||
async function listOllamaModels() {
|
async function listOllamaModels() {
|
||||||
const url = document.getElementById('ollama-url').value;
|
const url = document.getElementById('ollama-url').value;
|
||||||
@ -461,6 +481,7 @@
|
|||||||
const fileInput = document.getElementById('file-input');
|
const fileInput = document.getElementById('file-input');
|
||||||
const targetLang = document.getElementById('target-lang').value;
|
const targetLang = document.getElementById('target-lang').value;
|
||||||
const provider = document.getElementById('provider').value;
|
const provider = document.getElementById('provider').value;
|
||||||
|
const translateImages = document.getElementById('translate-images').checked;
|
||||||
const resultDiv = document.getElementById('translate-result');
|
const resultDiv = document.getElementById('translate-result');
|
||||||
const loadingDiv = document.getElementById('loading');
|
const loadingDiv = document.getElementById('loading');
|
||||||
const progressContainer = document.getElementById('progress-container');
|
const progressContainer = document.getElementById('progress-container');
|
||||||
@ -476,6 +497,7 @@
|
|||||||
formData.append('file', fileInput.files[0]);
|
formData.append('file', fileInput.files[0]);
|
||||||
formData.append('target_language', targetLang);
|
formData.append('target_language', targetLang);
|
||||||
formData.append('provider', provider);
|
formData.append('provider', provider);
|
||||||
|
formData.append('translate_images', translateImages);
|
||||||
|
|
||||||
loadingDiv.classList.add('active');
|
loadingDiv.classList.add('active');
|
||||||
progressContainer.classList.add('active');
|
progressContainer.classList.add('active');
|
||||||
@ -520,6 +542,7 @@
|
|||||||
<p><strong>File:</strong> ${fileInput.files[0].name}</p>
|
<p><strong>File:</strong> ${fileInput.files[0].name}</p>
|
||||||
<p><strong>Target language:</strong> ${targetLang}</p>
|
<p><strong>Target language:</strong> ${targetLang}</p>
|
||||||
<p><strong>Service:</strong> ${provider}</p>
|
<p><strong>Service:</strong> ${provider}</p>
|
||||||
|
${translateImages ? '<p><strong>Images:</strong> Translated with Ollama Vision</p>' : ''}
|
||||||
<a href="${url}" download="${filename}" class="download-link">Download translated file</a>
|
<a href="${url}" download="${filename}" class="download-link">Download translated file</a>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user