From e09ea3a14522c7f238c3843b7a591ed990cc7f39 Mon Sep 17 00:00:00 2001 From: Antigravity Date: Tue, 12 May 2026 09:07:55 +0000 Subject: [PATCH] fix: switch embedding dimension from 1536 to 2560 for qwen-embedding-4b --- mcp-server/prisma/schema.prisma | 2 +- .../api/admin/embeddings/validate/route.ts | 2 +- .../lib/ai/providers/custom-openai.ts | 53 ++++++++++++++--- memento-note/lib/ai/providers/openrouter.ts | 57 ++++++++++++++++--- .../lib/ai/services/embedding.service.ts | 4 +- .../migration.sql | 6 +- memento-note/prisma/schema.prisma | 2 +- memento-note/scripts/migrate-embeddings.ts | 2 +- memento-note/tests/ai-provider.spec.ts | 2 +- .../tests/migration/integrity.test.ts | 2 +- 10 files changed, 105 insertions(+), 27 deletions(-) diff --git a/mcp-server/prisma/schema.prisma b/mcp-server/prisma/schema.prisma index b96a831..9ef3638 100644 --- a/mcp-server/prisma/schema.prisma +++ b/mcp-server/prisma/schema.prisma @@ -131,7 +131,7 @@ model Note { model NoteEmbedding { id String @id @default(cuid()) noteId String @unique - embedding Unsupported("vector(1536)") + embedding Unsupported("vector(2560)") createdAt DateTime @default(now()) updatedAt DateTime @default(now()) @updatedAt note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) diff --git a/memento-note/app/api/admin/embeddings/validate/route.ts b/memento-note/app/api/admin/embeddings/validate/route.ts index d0a60cb..10f3084 100644 --- a/memento-note/app/api/admin/embeddings/validate/route.ts +++ b/memento-note/app/api/admin/embeddings/validate/route.ts @@ -35,7 +35,7 @@ export async function GET() { const invalidResult: Array<{ count: bigint }> = await prisma.$queryRawUnsafe( `SELECT COUNT(*)::bigint as count FROM "NoteEmbedding" e WHERE e."embedding" IS NULL - OR array_length(string_to_array(replace(replace(e."embedding"::text, '[', ''), ']', ''), ','), 1) != 1536` + OR array_length(string_to_array(replace(replace(e."embedding"::text, '[', ''), ']', ''), ','), 1) != 2560` ) const invalidCount = Number(invalidResult[0]?.count ?? 0) diff --git a/memento-note/lib/ai/providers/custom-openai.ts b/memento-note/lib/ai/providers/custom-openai.ts index 40d77aa..c71d02b 100644 --- a/memento-note/lib/ai/providers/custom-openai.ts +++ b/memento-note/lib/ai/providers/custom-openai.ts @@ -1,13 +1,13 @@ import { createOpenAI } from '@ai-sdk/openai'; -import { generateObject, generateText as aiGenerateText, embed, stepCountIs } from 'ai'; +import { generateObject, generateText as aiGenerateText, stepCountIs } from 'ai'; import { z } from 'zod'; import { AIProvider, TagSuggestion, TitleSuggestion, ToolUseOptions, ToolCallResult } from '../types'; export class CustomOpenAIProvider implements AIProvider { private model: any; - private embeddingModel: any; private apiKey: string; private baseUrl: string; + private embeddingModelName: string; constructor( apiKey: string, @@ -17,6 +17,7 @@ export class CustomOpenAIProvider implements AIProvider { ) { this.apiKey = apiKey; this.baseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl; + this.embeddingModelName = embeddingModelName; // Create OpenAI-compatible client with custom base URL // Use .chat() to force /chat/completions endpoint (avoids Responses API) const customClient = createOpenAI({ @@ -44,7 +45,16 @@ export class CustomOpenAIProvider implements AIProvider { }); this.model = customClient.chat(modelName); - this.embeddingModel = customClient.embedding(embeddingModelName); + } + + private async fetchWithTimeout(url: string, options: RequestInit, timeoutMs: number = 60_000): Promise { + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), timeoutMs) + try { + return await fetch(url, { ...options, signal: controller.signal }) + } finally { + clearTimeout(timer) + } } async generateTags(content: string): Promise { @@ -70,13 +80,40 @@ export class CustomOpenAIProvider implements AIProvider { async getEmbeddings(text: string): Promise { try { - const { embedding } = await embed({ - model: this.embeddingModel, - value: text, + const response = await this.fetchWithTimeout(`${this.baseUrl}/embeddings`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + 'HTTP-Referer': 'https://localhost:3000', + 'X-Title': 'Memento AI', + }, + body: JSON.stringify({ + model: this.embeddingModelName, + input: text, + }), }); - return embedding; + + if (!response.ok) { + const errText = await response.text(); + throw new Error(`${this.baseUrl}/embeddings error ${response.status}: ${errText}`); + } + + const data = await response.json(); + + // Standard OpenAI-compatible response: { data: [{ embedding: number[] }] } + if (data.data && Array.isArray(data.data) && data.data[0]?.embedding) { + return data.data[0].embedding; + } + + // Fallback: some providers return { embedding: number[] } + if (data.embedding && Array.isArray(data.embedding)) { + return data.embedding; + } + + throw new Error(`Unexpected embeddings response shape: ${JSON.stringify(data)}`); } catch (e) { - console.error('Error generating embeddings (Custom OpenAI):', e); + console.error('Error generating embeddings (CustomOpenAI):', e); throw e; } } diff --git a/memento-note/lib/ai/providers/openrouter.ts b/memento-note/lib/ai/providers/openrouter.ts index 5a38c72..f7f3e20 100644 --- a/memento-note/lib/ai/providers/openrouter.ts +++ b/memento-note/lib/ai/providers/openrouter.ts @@ -1,21 +1,35 @@ import { createOpenAI } from '@ai-sdk/openai'; -import { generateObject, generateText as aiGenerateText, embed, stepCountIs } from 'ai'; +import { generateObject, generateText as aiGenerateText, stepCountIs } from 'ai'; import { z } from 'zod'; import { AIProvider, TagSuggestion, TitleSuggestion, ToolUseOptions, ToolCallResult } from '../types'; export class OpenRouterProvider implements AIProvider { private model: any; - private embeddingModel: any; + private apiKey: string; + private baseUrl: string; + private embeddingModelName: string; constructor(apiKey: string, modelName: string = 'anthropic/claude-3-haiku', embeddingModelName: string = 'openai/text-embedding-3-small') { + this.apiKey = apiKey; + this.baseUrl = 'https://openrouter.ai/api/v1'; + this.embeddingModelName = embeddingModelName; // Create OpenAI-compatible client for OpenRouter const openrouter = createOpenAI({ - baseURL: 'https://openrouter.ai/api/v1', + baseURL: this.baseUrl, apiKey: apiKey, }); this.model = openrouter.chat(modelName); - this.embeddingModel = openrouter.embedding(embeddingModelName); + } + + private async fetchWithTimeout(url: string, options: RequestInit, timeoutMs: number = 60_000): Promise { + const controller = new AbortController() + const timer = setTimeout(() => controller.abort(), timeoutMs) + try { + return await fetch(url, { ...options, signal: controller.signal }) + } finally { + clearTimeout(timer) + } } async generateTags(content: string): Promise { @@ -41,11 +55,38 @@ export class OpenRouterProvider implements AIProvider { async getEmbeddings(text: string): Promise { try { - const { embedding } = await embed({ - model: this.embeddingModel, - value: text, + const response = await this.fetchWithTimeout(`${this.baseUrl}/embeddings`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + 'HTTP-Referer': 'https://localhost:3000', + 'X-Title': 'Memento AI', + }, + body: JSON.stringify({ + model: this.embeddingModelName, + input: text, + }), }); - return embedding; + + if (!response.ok) { + const errText = await response.text(); + throw new Error(`OpenRouter embeddings error ${response.status}: ${errText}`); + } + + const data = await response.json(); + + // OpenRouter returns { data: [{ embedding: number[] }] } + if (data.data && Array.isArray(data.data) && data.data[0]?.embedding) { + return data.data[0].embedding; + } + + // Fallback: some OpenAI-compatible providers return { embedding: number[] } + if (data.embedding && Array.isArray(data.embedding)) { + return data.embedding; + } + + throw new Error(`Unexpected OpenRouter embeddings response shape: ${JSON.stringify(data)}`); } catch (e) { console.error('Error generating embeddings (OpenRouter):', e); throw e; diff --git a/memento-note/lib/ai/services/embedding.service.ts b/memento-note/lib/ai/services/embedding.service.ts index a000c73..6183072 100644 --- a/memento-note/lib/ai/services/embedding.service.ts +++ b/memento-note/lib/ai/services/embedding.service.ts @@ -1,7 +1,7 @@ /** * Embedding Service * Generates vector embeddings for semantic search and similarity analysis. - * Stores embeddings as native pgvector(1536) in PostgreSQL. + * Stores embeddings as native pgvector(2560) in PostgreSQL. */ import { getAIProvider } from '../factory' @@ -14,7 +14,7 @@ export interface EmbeddingResult { } export class EmbeddingService { - private readonly EMBEDDING_DIMENSION = 1536 + private readonly EMBEDDING_DIMENSION = 2560 async generateEmbedding(text: string): Promise { if (!text || text.trim().length === 0) { diff --git a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql index 745c352..1b338b1 100644 --- a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql +++ b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql @@ -1,7 +1,7 @@ -- Phase 1: Enable pgvector extension CREATE EXTENSION IF NOT EXISTS vector; --- Phase 2: Convert embedding column from text to vector(1536) +-- Phase 2: Convert embedding column from text to vector(2560) -- Idempotent: detects current column type and only converts when needed. -- Handles all partial states from previous failed migration attempts: -- A) embedding is text → direct ALTER COLUMN TYPE conversion @@ -25,8 +25,8 @@ BEGIN IF _emb_type IS NOT NULL THEN ALTER TABLE "NoteEmbedding" DROP COLUMN IF EXISTS "_vec_tmp"; ALTER TABLE "NoteEmbedding" - ALTER COLUMN "embedding" TYPE vector(1536) - USING "embedding"::vector(1536); + ALTER COLUMN "embedding" TYPE vector(2560) + USING "embedding"::vector(2560); RETURN; END IF; diff --git a/memento-note/prisma/schema.prisma b/memento-note/prisma/schema.prisma index 61cd7ae..8b0b711 100644 --- a/memento-note/prisma/schema.prisma +++ b/memento-note/prisma/schema.prisma @@ -300,7 +300,7 @@ model UserAISettings { model NoteEmbedding { id String @id @default(cuid()) noteId String @unique - embedding Unsupported("vector(1536)") + embedding Unsupported("vector(2560)") createdAt DateTime @default(now()) updatedAt DateTime @default(now()) @updatedAt note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) diff --git a/memento-note/scripts/migrate-embeddings.ts b/memento-note/scripts/migrate-embeddings.ts index fad7e5a..217d6e5 100644 --- a/memento-note/scripts/migrate-embeddings.ts +++ b/memento-note/scripts/migrate-embeddings.ts @@ -41,7 +41,7 @@ async function main() { // Embedding will be generated by the indexNote method which handles pgvector format await prisma.$executeRawUnsafe( `INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt") - VALUES (gen_random_uuid(), $1, '[0]'::vector(1536), now(), now()) + VALUES (gen_random_uuid(), $1, '[0]'::vector(2560), now(), now()) ON CONFLICT ("noteId") DO NOTHING`, note.id ) diff --git a/memento-note/tests/ai-provider.spec.ts b/memento-note/tests/ai-provider.spec.ts index a7bf111..b45df28 100644 --- a/memento-note/tests/ai-provider.spec.ts +++ b/memento-note/tests/ai-provider.spec.ts @@ -47,7 +47,7 @@ test.describe('AI Provider Configuration Tests', () => { // Verify embeddings provider is working expect(result.embeddingsProvider).toBe('openai'); - expect(result.embeddingLength).toBe(1536); // OpenAI text-embedding-3-small + expect(result.embeddingLength).toBe(2560); // OpenAI text-embedding-3-small expect(result.details?.provider).toBe('OpenAI'); }); diff --git a/memento-note/tests/migration/integrity.test.ts b/memento-note/tests/migration/integrity.test.ts index cc1a611..db540f4 100644 --- a/memento-note/tests/migration/integrity.test.ts +++ b/memento-note/tests/migration/integrity.test.ts @@ -232,7 +232,7 @@ describe('Data Integrity Tests', () => { const vecStr = '[0.1,0.2,0.3,0.4,0.5]' await prisma.$executeRawUnsafe( `INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt") - VALUES (gen_random_uuid(), $1, $2::vector(1536), now(), now())`, + VALUES (gen_random_uuid(), $1, $2::vector(2560), now(), now())`, note.id, vecStr )