Add WebLLM model selection and cache management

2025-11-30 11:57:58 +01:00 · 2025-11-30 11:57:58 +01:00 · 465cab8a61
commit 465cab8a61
parent 9410b07512
2 changed files with 125 additions and 7 deletions
--- a/static/index.html
+++ b/static/index.html
@ -368,9 +368,24 @@
            </div>

            <div class="form-group" id="webllm-info" style="display: none; padding: 12px; background: #e0f2ff; border-radius: 6px; border-left: 4px solid #2563eb;">
-                <p style="margin: 0; font-size: 13px; color: #1e40af;">
-                    <strong>WebLLM Mode:</strong> First use will download the model (~2GB) to your browser. Translation runs entirely in your browser using WebGPU.
+                <p style="margin: 0 0 10px 0; font-size: 13px; color: #1e40af;">
+                    <strong>WebLLM Mode:</strong> Translation runs entirely in your browser using WebGPU. First use downloads the model.
                </p>
+                <div style="display: grid; grid-template-columns: 1fr auto; gap: 10px; align-items: end;">
+                    <div>
+                        <label for="webllm-model" style="font-size: 12px; color: #4a5568; margin-bottom: 4px;">Select Model:</label>
+                        <select id="webllm-model" style="width: 100%; padding: 6px; font-size: 13px; border: 1px solid #cbd5e0; border-radius: 4px;">
+                            <option value="Llama-3.1-8B-Instruct-q4f32_1-MLC">Llama 3.1 8B (~4.5GB)</option>
+                            <option value="Llama-3.2-3B-Instruct-q4f32_1-MLC">Llama 3.2 3B (~2GB)</option>
+                            <option value="Phi-3.5-mini-instruct-q4f16_1-MLC">Phi 3.5 Mini (~2.5GB)</option>
+                            <option value="Mistral-7B-Instruct-v0.3-q4f16_1-MLC">Mistral 7B (~4.5GB)</option>
+                            <option value="gemma-2-2b-it-q4f16_1-MLC">Gemma 2 2B (~1.5GB)</option>
+                        </select>
+                    </div>
+                    <button onclick="clearWebLLMCache()" style="background: #dc2626; padding: 6px 12px; font-size: 13px; white-space: nowrap;">
+                        Clear Cache
+                    </button>
+                </div>
            </div>

            <button onclick="translateFile()">Translate Document</button>
@ -399,6 +414,37 @@
    <script>
        const API_BASE = 'http://localhost:8000';

+        // Clear WebLLM cache
+        async function clearWebLLMCache() {
+            if (!confirm('This will delete all downloaded WebLLM models from your browser cache. Continue?')) {
+                return;
+            }
+            
+            try {
+                // Clear IndexedDB cache used by WebLLM
+                const databases = await indexedDB.databases();
+                for (const db of databases) {
+                    if (db.name && (db.name.includes('webllm') || db.name.includes('mlc'))) {
+                        indexedDB.deleteDatabase(db.name);
+                    }
+                }
+                
+                // Clear Cache API
+                if ('caches' in window) {
+                    const cacheNames = await caches.keys();
+                    for (const name of cacheNames) {
+                        if (name.includes('webllm') || name.includes('mlc')) {
+                            await caches.delete(name);
+                        }
+                    }
+                }
+                
+                alert('✅ WebLLM cache cleared successfully! Refresh the page.');
+            } catch (error) {
+                alert('❌ Error clearing cache: ' + error.message);
+            }
+        }
+
        // Toggle image translation option based on provider
        function toggleImageTranslation() {
            const provider = document.getElementById('provider').value;
--- a/static/webllm.html
+++ b/static/webllm.html
@ -10,24 +10,70 @@
        let engine = null;
        const statusDiv = document.getElementById('status');
        const outputDiv = document.getElementById('output');
+        let currentModel = null;

        async function initEngine() {
-            statusDiv.textContent = "Initializing WebLLM engine (first time: ~2GB download)...";
+            const modelSelect = document.getElementById('model-select');
+            const selectedModel = modelSelect.value;
+            
+            // If already loaded and same model, skip
+            if (engine && currentModel === selectedModel) {
+                statusDiv.textContent = "✅ WebLLM engine already ready!";
+                return;
+            }
+            
+            // Clear previous engine
+            if (engine) {
+                engine = null;
+            }
+            
+            statusDiv.textContent = `Initializing ${selectedModel} (first time: downloading model)...`;
+            document.getElementById('translate-btn').disabled = true;
            
            try {
-                engine = await CreateMLCEngine("Llama-3.1-8B-Instruct-q4f32_1-MLC", {
+                engine = await CreateMLCEngine(selectedModel, {
                    initProgressCallback: (progress) => {
                        statusDiv.textContent = `Loading: ${progress.text}`;
                    }
                });
                
-                statusDiv.textContent = "✅ WebLLM engine ready!";
+                currentModel = selectedModel;
+                statusDiv.textContent = `✅ ${selectedModel} ready!`;
                document.getElementById('translate-btn').disabled = false;
            } catch (error) {
                statusDiv.textContent = `❌ Error: ${error.message}`;
            }
        }

+        async function clearCache() {
+            if (!confirm('This will delete all downloaded WebLLM models (~2-5GB). Continue?')) {
+                return;
+            }
+            
+            try {
+                const databases = await indexedDB.databases();
+                for (const db of databases) {
+                    if (db.name && (db.name.includes('webllm') || db.name.includes('mlc'))) {
+                        indexedDB.deleteDatabase(db.name);
+                    }
+                }
+                
+                if ('caches' in window) {
+                    const cacheNames = await caches.keys();
+                    for (const name of cacheNames) {
+                        if (name.includes('webllm') || name.includes('mlc')) {
+                            await caches.delete(name);
+                        }
+                    }
+                }
+                
+                alert('✅ Cache cleared! Refresh the page.');
+                location.reload();
+            } catch (error) {
+                alert('❌ Error: ' + error.message);
+            }
+        }
+
        async function translateText() {
            const inputText = document.getElementById('input-text').value;
            const targetLang = document.getElementById('target-lang').value;
@ -63,8 +109,14 @@
        }

        // Auto-init on page load
-        window.addEventListener('DOMContentLoaded', initEngine);
+        window.addEventListener('DOMContentLoaded', () => {
+            // Don't auto-init, let user choose model first
+            statusDiv.textContent = "Select a model and click 'Load Model' to start.";
+        });
+        
        window.translateText = translateText;
+        window.initEngine = initEngine;
+        window.clearCache = clearCache;
    </script>
    <style>
        body {
@ -126,6 +178,12 @@
            opacity: 0.5;
            cursor: not-allowed;
        }
+        .btn-group {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 10px;
+            margin-bottom: 15px;
+        }
        #status {
            margin-top: 15px;
            padding: 12px;
@ -149,7 +207,21 @@
    <div class="container">
        <h1>WebLLM Translation Demo</h1>
        <div class="info">
-            <strong>Info:</strong> This demo runs entirely in your browser using WebGPU. First load will download ~2GB model.
+            <strong>Info:</strong> Runs entirely in your browser using WebGPU. Models are cached after first download.
+        </div>
+        
+        <label for="model-select">Select Model:</label>
+        <select id="model-select">
+            <option value="Llama-3.2-3B-Instruct-q4f32_1-MLC">Llama 3.2 3B (~2GB) - Fast</option>
+            <option value="Llama-3.1-8B-Instruct-q4f32_1-MLC">Llama 3.1 8B (~4.5GB) - Accurate</option>
+            <option value="Phi-3.5-mini-instruct-q4f16_1-MLC">Phi 3.5 Mini (~2.5GB) - Balanced</option>
+            <option value="Mistral-7B-Instruct-v0.3-q4f16_1-MLC">Mistral 7B (~4.5GB) - High Quality</option>
+            <option value="gemma-2-2b-it-q4f16_1-MLC">Gemma 2 2B (~1.5GB) - Lightweight</option>
+        </select>
+        
+        <div class="btn-group">
+            <button onclick="initEngine()" style="background: #059669;">Load Model</button>
+            <button onclick="clearCache()" style="background: #dc2626;">Clear Cache</button>
        </div>
        
        <label for="input-text">Text to translate:</label>