fix: pgvector deployment — idempotent migration, pgvector image, schema sync
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m21s
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m21s
- docker-compose.yml: switch postgres:16-alpine to pgvector/pgvector:pg16
- migration: rewrite with IF NOT EXISTS guards, DO block for safe
text→vector(1536) conversion, handles partial/re-run states
- schema.prisma (both): add @default(now()) on NoteEmbedding.updatedAt,
sync mcp-server embedding type to Unsupported("vector(1536)")
- deploy.yaml: add docker compose pull postgres before build
This commit is contained in:
@@ -1,38 +1,71 @@
|
||||
-- Phase 1: Enable pgvector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- Phase 2: Add native vector column to NoteEmbedding
|
||||
-- Convert existing JSON-string embeddings to native vector(1536)
|
||||
ALTER TABLE "NoteEmbedding" ADD COLUMN "vec" vector(1536);
|
||||
-- Phase 2: Convert embedding column from text/JSON to vector(1536) if needed
|
||||
DO $$
|
||||
DECLARE
|
||||
_udt text;
|
||||
_vec_tmp_exists boolean;
|
||||
BEGIN
|
||||
SELECT udt_name INTO _udt
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = 'NoteEmbedding'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
-- Migrate existing data: parse JSON arrays into pgvector format
|
||||
UPDATE "NoteEmbedding"
|
||||
SET "vec" = ("embedding"::jsonb)::text::vector(1536)
|
||||
WHERE "embedding" IS NOT NULL;
|
||||
IF _udt IS NOT NULL AND _udt != 'vector' THEN
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = 'NoteEmbedding'
|
||||
AND column_name = '_vec_tmp'
|
||||
) INTO _vec_tmp_exists;
|
||||
|
||||
-- Drop old string column, rename new one
|
||||
ALTER TABLE "NoteEmbedding" DROP COLUMN "embedding";
|
||||
ALTER TABLE "NoteEmbedding" RENAME COLUMN "vec" TO "embedding";
|
||||
IF NOT _vec_tmp_exists THEN
|
||||
ALTER TABLE "NoteEmbedding" ADD COLUMN "_vec_tmp" vector(1536);
|
||||
END IF;
|
||||
|
||||
-- Add updatedAt column for tracking reindex freshness
|
||||
ALTER TABLE "NoteEmbedding" ADD COLUMN "updatedAt" TIMESTAMP NOT NULL DEFAULT now();
|
||||
UPDATE "NoteEmbedding"
|
||||
SET "_vec_tmp" = ("embedding"::jsonb)::text::vector(1536)
|
||||
WHERE "embedding" IS NOT NULL
|
||||
AND "_vec_tmp" IS NULL;
|
||||
|
||||
ALTER TABLE "NoteEmbedding" DROP COLUMN "embedding";
|
||||
ALTER TABLE "NoteEmbedding" RENAME COLUMN "_vec_tmp" TO "embedding";
|
||||
ELSIF _udt IS NULL THEN
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = 'NoteEmbedding'
|
||||
AND column_name = '_vec_tmp'
|
||||
) INTO _vec_tmp_exists;
|
||||
|
||||
IF _vec_tmp_exists THEN
|
||||
ALTER TABLE "NoteEmbedding" RENAME COLUMN "_vec_tmp" TO "embedding";
|
||||
END IF;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add updatedAt with DEFAULT NOW() if not present
|
||||
ALTER TABLE "NoteEmbedding" ADD COLUMN IF NOT EXISTS "updatedAt" TIMESTAMP(3) NOT NULL DEFAULT NOW();
|
||||
|
||||
-- HNSW index for fast approximate nearest neighbor search (cosine distance)
|
||||
CREATE INDEX "NoteEmbedding_embedding_hnsw_idx" ON "NoteEmbedding"
|
||||
CREATE INDEX IF NOT EXISTS "NoteEmbedding_embedding_hnsw_idx" ON "NoteEmbedding"
|
||||
USING hnsw ("embedding" vector_cosine_ops)
|
||||
WITH (m = 16, ef_construction = 64);
|
||||
|
||||
-- Phase 3: Add full-text search tsvector column to Note
|
||||
ALTER TABLE "Note" ADD COLUMN "tsv" tsvector;
|
||||
ALTER TABLE "Note" ADD COLUMN IF NOT EXISTS "tsv" tsvector;
|
||||
|
||||
-- Populate tsv from existing title + content
|
||||
-- Populate tsv where still NULL
|
||||
UPDATE "Note"
|
||||
SET "tsv" =
|
||||
setweight(to_tsvector('simple', COALESCE("title", '')), 'A') ||
|
||||
setweight(to_tsvector('simple', COALESCE("content", '')), 'B');
|
||||
setweight(to_tsvector('simple', COALESCE("content", '')), 'B')
|
||||
WHERE "tsv" IS NULL;
|
||||
|
||||
-- GIN index for fast FTS queries
|
||||
CREATE INDEX "Note_tsv_gin_idx" ON "Note" USING gin ("tsv");
|
||||
CREATE INDEX IF NOT EXISTS "Note_tsv_gin_idx" ON "Note" USING gin ("tsv");
|
||||
|
||||
-- Trigger function to auto-update tsv on INSERT or UPDATE of title/content
|
||||
CREATE OR REPLACE FUNCTION "note_tsv_trigger"() RETURNS trigger AS $$
|
||||
@@ -44,7 +77,7 @@ BEGIN
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Attach trigger
|
||||
-- Attach trigger (DROP IF EXISTS + CREATE is idempotent)
|
||||
DROP TRIGGER IF EXISTS "note_tsv_update" ON "Note";
|
||||
CREATE TRIGGER "note_tsv_update"
|
||||
BEFORE INSERT OR UPDATE OF "title", "content" ON "Note"
|
||||
|
||||
@@ -302,7 +302,7 @@ model NoteEmbedding {
|
||||
noteId String @unique
|
||||
embedding Unsupported("vector(1536)")
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
updatedAt DateTime @default(now()) @updatedAt
|
||||
note Note @relation(fields: [noteId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@index([noteId])
|
||||
|
||||
Reference in New Issue
Block a user