From feaeb075cec677e60e15e69f75a480d4f77713d9 Mon Sep 17 00:00:00 2001 From: Antigravity Date: Tue, 12 May 2026 08:00:37 +0000 Subject: [PATCH] fix: repair pgvector migration to actually convert embedding column from text to vector(1536) The original migration used a fragile add-copy-drop-rename pattern with _jsonb casts that silently failed, leaving the embedding column as text. Replace with a direct ALTER COLUMN TYPE ... USING embedding::vector(1536) that is fully idempotent and handles all partial states from previous failed attempts. --- DEPLOY-ISSUES-3.md | 37 +++++++++++++ .../migration.sql | 55 +++++++++---------- 2 files changed, 62 insertions(+), 30 deletions(-) create mode 100644 DEPLOY-ISSUES-3.md diff --git a/DEPLOY-ISSUES-3.md b/DEPLOY-ISSUES-3.md new file mode 100644 index 0000000..1980bfc --- /dev/null +++ b/DEPLOY-ISSUES-3.md @@ -0,0 +1,37 @@ +# Search Broken — embedding column not converted to vector type + +## Date: 2026-05-12 + +## Problem +The search fails with this error: +``` +operator does not exist: text <=> vector +HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +``` + +## Root cause +The `NoteEmbedding.embedding` column is still type `text` (old JSON string format), NOT `vector(1536)`. +The Prisma migration marked itself as applied but the actual column type conversion was never executed. +The SQL query tries to use the `<=>` cosine distance operator on a text column, which fails. + +## Current state +- pgvector extension IS installed (CREATE EXTENSION worked) +- But the embedding column was NOT converted from text to vector(1536) +- There are 102 rows in NoteEmbedding with JSON string embeddings +- The migration SQL needs to: ALTER COLUMN embedding TYPE vector(1536) using proper casting + +## What needs to happen +1. Check the actual column type: SELECT column_name, data_type, udt_name FROM information_schema.columns WHERE table_name = 'NoteEmbedding' AND column_name = 'embedding'; +2. The migration SQL must convert the column. The embedding values are stored as JSON strings like "[0.1, 0.2, ...]" — need to strip brackets, then cast to vector. +3. The conversion SQL should be something like: + ALTER TABLE "NoteEmbedding" ALTER COLUMN embedding TYPE vector(1536) USING embedding::vector(1536); + OR if stored as JSON string: + ALTER TABLE "NoteEmbedding" ALTER COLUMN embedding TYPE vector(1536) USING (replace(replace(embedding, '[', ''), ']', ''))::vector(1536); +4. Also check if the tsvector column and trigger on Note table were created properly. +5. The semantic-search.service.ts code uses $queryRawUnsafe with <=> operator — make sure the SQL is correct for pgvector. + +## Files to check/fix +- prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql — the actual migration SQL +- lib/ai/services/semantic-search.service.ts — the search service using vector queries +- lib/ai/services/embedding.service.ts — embedding service +- schema.prisma — NoteEmbedding model diff --git a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql index c4ad4d9..745c352 100644 --- a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql +++ b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql @@ -1,48 +1,43 @@ -- Phase 1: Enable pgvector extension CREATE EXTENSION IF NOT EXISTS vector; --- Phase 2: Convert embedding column from text/JSON to vector(1536) if needed +-- Phase 2: Convert embedding column from text to vector(1536) +-- Idempotent: detects current column type and only converts when needed. +-- Handles all partial states from previous failed migration attempts: +-- A) embedding is text → direct ALTER COLUMN TYPE conversion +-- B) embedding already vector → skip +-- C) embedding missing, _vec_tmp exists → rename DO $$ DECLARE - _udt text; - _vec_tmp_exists boolean; + _emb_type text; + _tmp_type text; BEGIN - SELECT udt_name INTO _udt + SELECT udt_name INTO _emb_type FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'NoteEmbedding' AND column_name = 'embedding'; - IF _udt IS NOT NULL AND _udt != 'vector' THEN - SELECT EXISTS ( - SELECT 1 FROM information_schema.columns - WHERE table_schema = 'public' - AND table_name = 'NoteEmbedding' - AND column_name = '_vec_tmp' - ) INTO _vec_tmp_exists; + IF _emb_type = 'vector' THEN + RETURN; + END IF; - IF NOT _vec_tmp_exists THEN - ALTER TABLE "NoteEmbedding" ADD COLUMN "_vec_tmp" vector(1536); - END IF; + IF _emb_type IS NOT NULL THEN + ALTER TABLE "NoteEmbedding" DROP COLUMN IF EXISTS "_vec_tmp"; + ALTER TABLE "NoteEmbedding" + ALTER COLUMN "embedding" TYPE vector(1536) + USING "embedding"::vector(1536); + RETURN; + END IF; - UPDATE "NoteEmbedding" - SET "_vec_tmp" = ("embedding"::jsonb)::text::vector(1536) - WHERE "embedding" IS NOT NULL - AND "_vec_tmp" IS NULL; + SELECT udt_name INTO _tmp_type + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'NoteEmbedding' + AND column_name = '_vec_tmp'; - ALTER TABLE "NoteEmbedding" DROP COLUMN "embedding"; + IF _tmp_type IS NOT NULL THEN ALTER TABLE "NoteEmbedding" RENAME COLUMN "_vec_tmp" TO "embedding"; - ELSIF _udt IS NULL THEN - SELECT EXISTS ( - SELECT 1 FROM information_schema.columns - WHERE table_schema = 'public' - AND table_name = 'NoteEmbedding' - AND column_name = '_vec_tmp' - ) INTO _vec_tmp_exists; - - IF _vec_tmp_exists THEN - ALTER TABLE "NoteEmbedding" RENAME COLUMN "_vec_tmp" TO "embedding"; - END IF; END IF; END $$;