diff --git a/mcp-server/prisma/schema.prisma b/mcp-server/prisma/schema.prisma index 9ef3638..b96a831 100644 --- a/mcp-server/prisma/schema.prisma +++ b/mcp-server/prisma/schema.prisma @@ -131,7 +131,7 @@ model Note { model NoteEmbedding { id String @id @default(cuid()) noteId String @unique - embedding Unsupported("vector(2560)") + embedding Unsupported("vector(1536)") createdAt DateTime @default(now()) updatedAt DateTime @default(now()) @updatedAt note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) diff --git a/memento-note/app/api/admin/embeddings/validate/route.ts b/memento-note/app/api/admin/embeddings/validate/route.ts index 10f3084..d0a60cb 100644 --- a/memento-note/app/api/admin/embeddings/validate/route.ts +++ b/memento-note/app/api/admin/embeddings/validate/route.ts @@ -35,7 +35,7 @@ export async function GET() { const invalidResult: Array<{ count: bigint }> = await prisma.$queryRawUnsafe( `SELECT COUNT(*)::bigint as count FROM "NoteEmbedding" e WHERE e."embedding" IS NULL - OR array_length(string_to_array(replace(replace(e."embedding"::text, '[', ''), ']', ''), ','), 1) != 2560` + OR array_length(string_to_array(replace(replace(e."embedding"::text, '[', ''), ']', ''), ','), 1) != 1536` ) const invalidCount = Number(invalidResult[0]?.count ?? 0) diff --git a/memento-note/lib/ai/providers/custom-openai.ts b/memento-note/lib/ai/providers/custom-openai.ts index c71d02b..40d77aa 100644 --- a/memento-note/lib/ai/providers/custom-openai.ts +++ b/memento-note/lib/ai/providers/custom-openai.ts @@ -1,13 +1,13 @@ import { createOpenAI } from '@ai-sdk/openai'; -import { generateObject, generateText as aiGenerateText, stepCountIs } from 'ai'; +import { generateObject, generateText as aiGenerateText, embed, stepCountIs } from 'ai'; import { z } from 'zod'; import { AIProvider, TagSuggestion, TitleSuggestion, ToolUseOptions, ToolCallResult } from '../types'; export class CustomOpenAIProvider implements AIProvider { private model: any; + private embeddingModel: any; private apiKey: string; private baseUrl: string; - private embeddingModelName: string; constructor( apiKey: string, @@ -17,7 +17,6 @@ export class CustomOpenAIProvider implements AIProvider { ) { this.apiKey = apiKey; this.baseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl; - this.embeddingModelName = embeddingModelName; // Create OpenAI-compatible client with custom base URL // Use .chat() to force /chat/completions endpoint (avoids Responses API) const customClient = createOpenAI({ @@ -45,16 +44,7 @@ export class CustomOpenAIProvider implements AIProvider { }); this.model = customClient.chat(modelName); - } - - private async fetchWithTimeout(url: string, options: RequestInit, timeoutMs: number = 60_000): Promise { - const controller = new AbortController() - const timer = setTimeout(() => controller.abort(), timeoutMs) - try { - return await fetch(url, { ...options, signal: controller.signal }) - } finally { - clearTimeout(timer) - } + this.embeddingModel = customClient.embedding(embeddingModelName); } async generateTags(content: string): Promise { @@ -80,40 +70,13 @@ export class CustomOpenAIProvider implements AIProvider { async getEmbeddings(text: string): Promise { try { - const response = await this.fetchWithTimeout(`${this.baseUrl}/embeddings`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, - 'HTTP-Referer': 'https://localhost:3000', - 'X-Title': 'Memento AI', - }, - body: JSON.stringify({ - model: this.embeddingModelName, - input: text, - }), + const { embedding } = await embed({ + model: this.embeddingModel, + value: text, }); - - if (!response.ok) { - const errText = await response.text(); - throw new Error(`${this.baseUrl}/embeddings error ${response.status}: ${errText}`); - } - - const data = await response.json(); - - // Standard OpenAI-compatible response: { data: [{ embedding: number[] }] } - if (data.data && Array.isArray(data.data) && data.data[0]?.embedding) { - return data.data[0].embedding; - } - - // Fallback: some providers return { embedding: number[] } - if (data.embedding && Array.isArray(data.embedding)) { - return data.embedding; - } - - throw new Error(`Unexpected embeddings response shape: ${JSON.stringify(data)}`); + return embedding; } catch (e) { - console.error('Error generating embeddings (CustomOpenAI):', e); + console.error('Error generating embeddings (Custom OpenAI):', e); throw e; } } diff --git a/memento-note/lib/ai/providers/openrouter.ts b/memento-note/lib/ai/providers/openrouter.ts index f7f3e20..5a38c72 100644 --- a/memento-note/lib/ai/providers/openrouter.ts +++ b/memento-note/lib/ai/providers/openrouter.ts @@ -1,35 +1,21 @@ import { createOpenAI } from '@ai-sdk/openai'; -import { generateObject, generateText as aiGenerateText, stepCountIs } from 'ai'; +import { generateObject, generateText as aiGenerateText, embed, stepCountIs } from 'ai'; import { z } from 'zod'; import { AIProvider, TagSuggestion, TitleSuggestion, ToolUseOptions, ToolCallResult } from '../types'; export class OpenRouterProvider implements AIProvider { private model: any; - private apiKey: string; - private baseUrl: string; - private embeddingModelName: string; + private embeddingModel: any; constructor(apiKey: string, modelName: string = 'anthropic/claude-3-haiku', embeddingModelName: string = 'openai/text-embedding-3-small') { - this.apiKey = apiKey; - this.baseUrl = 'https://openrouter.ai/api/v1'; - this.embeddingModelName = embeddingModelName; // Create OpenAI-compatible client for OpenRouter const openrouter = createOpenAI({ - baseURL: this.baseUrl, + baseURL: 'https://openrouter.ai/api/v1', apiKey: apiKey, }); this.model = openrouter.chat(modelName); - } - - private async fetchWithTimeout(url: string, options: RequestInit, timeoutMs: number = 60_000): Promise { - const controller = new AbortController() - const timer = setTimeout(() => controller.abort(), timeoutMs) - try { - return await fetch(url, { ...options, signal: controller.signal }) - } finally { - clearTimeout(timer) - } + this.embeddingModel = openrouter.embedding(embeddingModelName); } async generateTags(content: string): Promise { @@ -55,38 +41,11 @@ export class OpenRouterProvider implements AIProvider { async getEmbeddings(text: string): Promise { try { - const response = await this.fetchWithTimeout(`${this.baseUrl}/embeddings`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, - 'HTTP-Referer': 'https://localhost:3000', - 'X-Title': 'Memento AI', - }, - body: JSON.stringify({ - model: this.embeddingModelName, - input: text, - }), + const { embedding } = await embed({ + model: this.embeddingModel, + value: text, }); - - if (!response.ok) { - const errText = await response.text(); - throw new Error(`OpenRouter embeddings error ${response.status}: ${errText}`); - } - - const data = await response.json(); - - // OpenRouter returns { data: [{ embedding: number[] }] } - if (data.data && Array.isArray(data.data) && data.data[0]?.embedding) { - return data.data[0].embedding; - } - - // Fallback: some OpenAI-compatible providers return { embedding: number[] } - if (data.embedding && Array.isArray(data.embedding)) { - return data.embedding; - } - - throw new Error(`Unexpected OpenRouter embeddings response shape: ${JSON.stringify(data)}`); + return embedding; } catch (e) { console.error('Error generating embeddings (OpenRouter):', e); throw e; diff --git a/memento-note/lib/ai/services/embedding.service.ts b/memento-note/lib/ai/services/embedding.service.ts index 6183072..a000c73 100644 --- a/memento-note/lib/ai/services/embedding.service.ts +++ b/memento-note/lib/ai/services/embedding.service.ts @@ -1,7 +1,7 @@ /** * Embedding Service * Generates vector embeddings for semantic search and similarity analysis. - * Stores embeddings as native pgvector(2560) in PostgreSQL. + * Stores embeddings as native pgvector(1536) in PostgreSQL. */ import { getAIProvider } from '../factory' @@ -14,7 +14,7 @@ export interface EmbeddingResult { } export class EmbeddingService { - private readonly EMBEDDING_DIMENSION = 2560 + private readonly EMBEDDING_DIMENSION = 1536 async generateEmbedding(text: string): Promise { if (!text || text.trim().length === 0) { diff --git a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql index 1b338b1..745c352 100644 --- a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql +++ b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql @@ -1,7 +1,7 @@ -- Phase 1: Enable pgvector extension CREATE EXTENSION IF NOT EXISTS vector; --- Phase 2: Convert embedding column from text to vector(2560) +-- Phase 2: Convert embedding column from text to vector(1536) -- Idempotent: detects current column type and only converts when needed. -- Handles all partial states from previous failed migration attempts: -- A) embedding is text → direct ALTER COLUMN TYPE conversion @@ -25,8 +25,8 @@ BEGIN IF _emb_type IS NOT NULL THEN ALTER TABLE "NoteEmbedding" DROP COLUMN IF EXISTS "_vec_tmp"; ALTER TABLE "NoteEmbedding" - ALTER COLUMN "embedding" TYPE vector(2560) - USING "embedding"::vector(2560); + ALTER COLUMN "embedding" TYPE vector(1536) + USING "embedding"::vector(1536); RETURN; END IF; diff --git a/memento-note/prisma/schema.prisma b/memento-note/prisma/schema.prisma index 8b0b711..61cd7ae 100644 --- a/memento-note/prisma/schema.prisma +++ b/memento-note/prisma/schema.prisma @@ -300,7 +300,7 @@ model UserAISettings { model NoteEmbedding { id String @id @default(cuid()) noteId String @unique - embedding Unsupported("vector(2560)") + embedding Unsupported("vector(1536)") createdAt DateTime @default(now()) updatedAt DateTime @default(now()) @updatedAt note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) diff --git a/memento-note/scripts/migrate-embeddings.ts b/memento-note/scripts/migrate-embeddings.ts index 217d6e5..fad7e5a 100644 --- a/memento-note/scripts/migrate-embeddings.ts +++ b/memento-note/scripts/migrate-embeddings.ts @@ -41,7 +41,7 @@ async function main() { // Embedding will be generated by the indexNote method which handles pgvector format await prisma.$executeRawUnsafe( `INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt") - VALUES (gen_random_uuid(), $1, '[0]'::vector(2560), now(), now()) + VALUES (gen_random_uuid(), $1, '[0]'::vector(1536), now(), now()) ON CONFLICT ("noteId") DO NOTHING`, note.id ) diff --git a/memento-note/tests/ai-provider.spec.ts b/memento-note/tests/ai-provider.spec.ts index b45df28..a7bf111 100644 --- a/memento-note/tests/ai-provider.spec.ts +++ b/memento-note/tests/ai-provider.spec.ts @@ -47,7 +47,7 @@ test.describe('AI Provider Configuration Tests', () => { // Verify embeddings provider is working expect(result.embeddingsProvider).toBe('openai'); - expect(result.embeddingLength).toBe(2560); // OpenAI text-embedding-3-small + expect(result.embeddingLength).toBe(1536); // OpenAI text-embedding-3-small expect(result.details?.provider).toBe('OpenAI'); }); diff --git a/memento-note/tests/migration/integrity.test.ts b/memento-note/tests/migration/integrity.test.ts index db540f4..cc1a611 100644 --- a/memento-note/tests/migration/integrity.test.ts +++ b/memento-note/tests/migration/integrity.test.ts @@ -232,7 +232,7 @@ describe('Data Integrity Tests', () => { const vecStr = '[0.1,0.2,0.3,0.4,0.5]' await prisma.$executeRawUnsafe( `INSERT INTO "NoteEmbedding" ("id", "noteId", "embedding", "createdAt", "updatedAt") - VALUES (gen_random_uuid(), $1, $2::vector(2560), now(), now())`, + VALUES (gen_random_uuid(), $1, $2::vector(1536), now(), now())`, note.id, vecStr )