Add WebLLM model selection and cache management

This commit is contained in:
2025-11-30 11:57:58 +01:00
parent 9410b07512
commit 465cab8a61
2 changed files with 125 additions and 7 deletions

View File

@@ -10,24 +10,70 @@
let engine = null;
const statusDiv = document.getElementById('status');
const outputDiv = document.getElementById('output');
let currentModel = null;
async function initEngine() {
statusDiv.textContent = "Initializing WebLLM engine (first time: ~2GB download)...";
const modelSelect = document.getElementById('model-select');
const selectedModel = modelSelect.value;
// If already loaded and same model, skip
if (engine && currentModel === selectedModel) {
statusDiv.textContent = "✅ WebLLM engine already ready!";
return;
}
// Clear previous engine
if (engine) {
engine = null;
}
statusDiv.textContent = `Initializing ${selectedModel} (first time: downloading model)...`;
document.getElementById('translate-btn').disabled = true;
try {
engine = await CreateMLCEngine("Llama-3.1-8B-Instruct-q4f32_1-MLC", {
engine = await CreateMLCEngine(selectedModel, {
initProgressCallback: (progress) => {
statusDiv.textContent = `Loading: ${progress.text}`;
}
});
statusDiv.textContent = "✅ WebLLM engine ready!";
currentModel = selectedModel;
statusDiv.textContent = `${selectedModel} ready!`;
document.getElementById('translate-btn').disabled = false;
} catch (error) {
statusDiv.textContent = `❌ Error: ${error.message}`;
}
}
async function clearCache() {
if (!confirm('This will delete all downloaded WebLLM models (~2-5GB). Continue?')) {
return;
}
try {
const databases = await indexedDB.databases();
for (const db of databases) {
if (db.name && (db.name.includes('webllm') || db.name.includes('mlc'))) {
indexedDB.deleteDatabase(db.name);
}
}
if ('caches' in window) {
const cacheNames = await caches.keys();
for (const name of cacheNames) {
if (name.includes('webllm') || name.includes('mlc')) {
await caches.delete(name);
}
}
}
alert('✅ Cache cleared! Refresh the page.');
location.reload();
} catch (error) {
alert('❌ Error: ' + error.message);
}
}
async function translateText() {
const inputText = document.getElementById('input-text').value;
const targetLang = document.getElementById('target-lang').value;
@@ -63,8 +109,14 @@
}
// Auto-init on page load
window.addEventListener('DOMContentLoaded', initEngine);
window.addEventListener('DOMContentLoaded', () => {
// Don't auto-init, let user choose model first
statusDiv.textContent = "Select a model and click 'Load Model' to start.";
});
window.translateText = translateText;
window.initEngine = initEngine;
window.clearCache = clearCache;
</script>
<style>
body {
@@ -126,6 +178,12 @@
opacity: 0.5;
cursor: not-allowed;
}
.btn-group {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 10px;
margin-bottom: 15px;
}
#status {
margin-top: 15px;
padding: 12px;
@@ -149,7 +207,21 @@
<div class="container">
<h1>WebLLM Translation Demo</h1>
<div class="info">
<strong>Info:</strong> This demo runs entirely in your browser using WebGPU. First load will download ~2GB model.
<strong>Info:</strong> Runs entirely in your browser using WebGPU. Models are cached after first download.
</div>
<label for="model-select">Select Model:</label>
<select id="model-select">
<option value="Llama-3.2-3B-Instruct-q4f32_1-MLC">Llama 3.2 3B (~2GB) - Fast</option>
<option value="Llama-3.1-8B-Instruct-q4f32_1-MLC">Llama 3.1 8B (~4.5GB) - Accurate</option>
<option value="Phi-3.5-mini-instruct-q4f16_1-MLC">Phi 3.5 Mini (~2.5GB) - Balanced</option>
<option value="Mistral-7B-Instruct-v0.3-q4f16_1-MLC">Mistral 7B (~4.5GB) - High Quality</option>
<option value="gemma-2-2b-it-q4f16_1-MLC">Gemma 2 2B (~1.5GB) - Lightweight</option>
</select>
<div class="btn-group">
<button onclick="initEngine()" style="background: #059669;">Load Model</button>
<button onclick="clearCache()" style="background: #dc2626;">Clear Cache</button>
</div>
<label for="input-text">Text to translate:</label>