diff --git a/EMBEDDING-VALIDATION-TASK.md b/EMBEDDING-VALIDATION-TASK.md new file mode 100644 index 0000000..d0317ab --- /dev/null +++ b/EMBEDDING-VALIDATION-TASK.md @@ -0,0 +1,42 @@ +# Embedding Model Validation & Search Robustness + +## Context +pgvector supports max 2000 dimensions for HNSW/IVFFlat indexes. The app must validate embedding models and gracefully handle dimension mismatches. + +## Tasks + +### 1. Revert dimension from 2560 back to 1536 +- All files changed in commit e09ea3a need reverting: 1536 everywhere +- This includes: schema.prisma (both), migration.sql, embedding.service.ts, validate route, scripts, tests + +### 2. Add embedding dimension validation in admin settings +File: `memento-note/lib/ai/services/embedding.service.ts` +- After generating an embedding, check its dimension +- Add a method `validateEmbeddingModel()` that: + - Generates a test embedding + - Checks dimension count + - Returns { valid: boolean, dimensions: number, warning?: string } + - If dimensions > 2000: warning "This model produces {N} dimensions. pgvector indexes support max 2000 dimensions. Semantic search will use sequential scan (slower for large note collections)." + - If dimensions != current DB vector dimension: warning "Dimension mismatch: model produces {N}d but DB stores {M}d. You need to reindex all notes." + +File: `memento-note/app/api/admin/embeddings/validate/route.ts` +- Use the new validateEmbeddingModel() method +- Return dimension info in the API response + +File: `memento-note/app/api/admin/settings/route.ts` (or wherever embedding model is saved) +- After saving a new embedding model, call validateEmbeddingModel() +- Store the warning in the response so the frontend can display it + +### 3. Make semantic search robust +File: `memento-note/lib/ai/services/semantic-search.service.ts` +- In `vectorSearch()`: after generating query embedding, check dimension matches DB (1536). If not, log warning and return [] (fallback to FTS) +- In `_doSearch()`: the existing try/catch already calls `_ftsFallback()`. Make sure this works. + +### 4. Update the migration SQL +File: `memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql` +- Keep vector(1536) as the target type +- The migration should work correctly + +### 5. Commit +- Git add and commit with descriptive message +- Do NOT push diff --git a/memento-note/lib/ai/factory.ts b/memento-note/lib/ai/factory.ts index aaf1380..7f084af 100644 --- a/memento-note/lib/ai/factory.ts +++ b/memento-note/lib/ai/factory.ts @@ -117,7 +117,7 @@ function createDeepSeekProvider(config: Record, modelName: strin } function createOpenRouterProvider(config: Record, modelName: string, embeddingModelName: string): CustomOpenAIProvider { - const apiKey = config?.OPENROUTER_API_KEY || process.env.OPENROUTER_API_KEY || ''; + const apiKey = config?.OPENROUTER_API_KEY || process.env.OPENROUTER_API_KEY || config?.CUSTOM_OPENAI_API_KEY || process.env.CUSTOM_OPENAI_API_KEY || ''; if (!apiKey) throw new Error('OPENROUTER_API_KEY is required when using OpenRouter provider'); const defaults = PROVIDER_DEFAULTS.openrouter; return new CustomOpenAIProvider(apiKey, defaults.baseUrl, modelName || defaults.model, embeddingModelName || defaults.embeddingModel);