diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index a50e66c..f381630 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -125,6 +125,7 @@ jobs: git reset --hard origin/main echo "=== Building ===" + docker compose pull postgres docker compose build memento-note docker compose build mcp-server diff --git a/DEPLOY-ISSUES.md b/DEPLOY-ISSUES.md new file mode 100644 index 0000000..6acc415 --- /dev/null +++ b/DEPLOY-ISSUES.md @@ -0,0 +1,67 @@ +# Déployment Issues — Migration pgvector + +## Date: 2026-05-12 + +## Contexte +Le commit `03e6a62` (migrate semantic search to pgvector + full-text search) a été pushé sur `main`. Le pipeline Gitea `deploy.yaml` a déployé automatiquement sur `192.168.1.190`. L'application **ne démarre plus** — erreur 502. + +## Production Environment +- **Serveur**: 192.168.1.190 (ops-user, sudo requires password) +- **docker-compose.yml**: `/opt/memento/docker-compose.yml` (root-owned, ops-user cannot write directly) +- **PostgreSQL image**: `postgres:16-alpine` — **pgvector NOT available** +- **Database**: `memento` on `memento-postgres` container +- **102 embeddings** existent dans la table `NoteEmbedding` + +## Problèmes rencontrés + +### 1. Extension pgvector manquante +- L'image PostgreSQL est `postgres:16-alpine` — pas de pgvector +- Il faut changer pour `pgvector/pgvector:pg16` dans le docker-compose +- **OPS-USER ne peut pas écrire dans `/opt/memento/docker-compose.yml`** (root-owned) +- Le `deploy.yaml` Gitea devrait gérer ce changement d'image + +### 2. Migration Prisma failed (précédente) +- `20260510123000_add_notebook_hierarchy_and_trash` était déjà en échec depuis le 10 mai +- **Résolu manuellement**: `UPDATE _prisma_migrations SET finished_at = NOW() WHERE migration_name = '...' AND finished_at IS NULL;` + +### 3. Nouvelle migration pgvector failed +- `20260512120000_pgvector_and_fts_search` échoue car: + - Le type `vector` n'existe pas (pgvector pas installé) + - La colonne `updatedAt` sur `NoteEmbedding` n'a pas de default value (102 rows existants) +- **Partiellement résolu**: `ALTER TABLE "NoteEmbedding" ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL DEFAULT NOW();` + +### 4. La migration doit: +- D'abord installer l'extension pgvector: `CREATE EXTENSION IF NOT EXISTS vector;` +- Puis modifier la colonne `embedding` de `String` (JSON) vers `vector(1536)` +- Ajouter l'index HNSW +- Ajouter le FTS tsvector sur `Note` +- Tout cela doit être fait dans un ordre précis dans le fichier de migration Prisma + +## Ce qui doit être corrigé dans le code + +### docker-compose.yml +```yaml +# AVANT +image: postgres:16-alpine +# APRÈS +image: pgvector/pgvector:pg16 +``` + +### Migration Prisma (fichier dans prisma/migrations/) +La migration doit: +1. `CREATE EXTENSION IF NOT EXISTS vector;` — en raw SQL +2. Ajouter `updatedAt` avec `DEFAULT NOW()` sur `NoteEmbedding` +3. Convertir la colonne `embedding` de text/JSON vers `vector(1536)` — avec conversion des données existantes +4. Créer l'index HNSW +5. Ajouter la colonne tsvector + trigger sur `Note` +6. Être **idempotente** — pouvoir tourner plusieurs fois sans erreur + +### schema.prisma +- Vérifier que `updatedAt` a `@default(now())` et `@updatedAt` +- Vérifier que le type `Unsupported("vector(1536)")` est correct + +##État actuel de la prod +- `memento-web`: **Restarting** en boucle (migration failed → app ne démarre pas) +- `memento-mcp`: **unhealthy** +- `memento-postgres`: healthy, mais migration en état incohérent +- Le site retourne **502 Bad Gateway** diff --git a/docker-compose.yml b/docker-compose.yml index d127a36..ca4e12a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: # PostgreSQL - Shared Database # ============================================ postgres: - image: postgres:16-alpine + image: pgvector/pgvector:pg16 container_name: memento-postgres restart: unless-stopped environment: diff --git a/mcp-server/prisma/schema.prisma b/mcp-server/prisma/schema.prisma index 48f67ca..b96a831 100644 --- a/mcp-server/prisma/schema.prisma +++ b/mcp-server/prisma/schema.prisma @@ -129,11 +129,12 @@ model Note { } model NoteEmbedding { - id String @id @default(cuid()) - noteId String @unique - embedding String - createdAt DateTime @default(now()) - note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) + id String @id @default(cuid()) + noteId String @unique + embedding Unsupported("vector(1536)") + createdAt DateTime @default(now()) + updatedAt DateTime @default(now()) @updatedAt + note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) @@index([noteId]) } diff --git a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql index 56b9a10..c4ad4d9 100644 --- a/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql +++ b/memento-note/prisma/migrations/20260512120000_pgvector_and_fts_search/migration.sql @@ -1,38 +1,71 @@ -- Phase 1: Enable pgvector extension CREATE EXTENSION IF NOT EXISTS vector; --- Phase 2: Add native vector column to NoteEmbedding --- Convert existing JSON-string embeddings to native vector(1536) -ALTER TABLE "NoteEmbedding" ADD COLUMN "vec" vector(1536); +-- Phase 2: Convert embedding column from text/JSON to vector(1536) if needed +DO $$ +DECLARE + _udt text; + _vec_tmp_exists boolean; +BEGIN + SELECT udt_name INTO _udt + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'NoteEmbedding' + AND column_name = 'embedding'; --- Migrate existing data: parse JSON arrays into pgvector format -UPDATE "NoteEmbedding" -SET "vec" = ("embedding"::jsonb)::text::vector(1536) -WHERE "embedding" IS NOT NULL; + IF _udt IS NOT NULL AND _udt != 'vector' THEN + SELECT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'NoteEmbedding' + AND column_name = '_vec_tmp' + ) INTO _vec_tmp_exists; --- Drop old string column, rename new one -ALTER TABLE "NoteEmbedding" DROP COLUMN "embedding"; -ALTER TABLE "NoteEmbedding" RENAME COLUMN "vec" TO "embedding"; + IF NOT _vec_tmp_exists THEN + ALTER TABLE "NoteEmbedding" ADD COLUMN "_vec_tmp" vector(1536); + END IF; --- Add updatedAt column for tracking reindex freshness -ALTER TABLE "NoteEmbedding" ADD COLUMN "updatedAt" TIMESTAMP NOT NULL DEFAULT now(); + UPDATE "NoteEmbedding" + SET "_vec_tmp" = ("embedding"::jsonb)::text::vector(1536) + WHERE "embedding" IS NOT NULL + AND "_vec_tmp" IS NULL; + + ALTER TABLE "NoteEmbedding" DROP COLUMN "embedding"; + ALTER TABLE "NoteEmbedding" RENAME COLUMN "_vec_tmp" TO "embedding"; + ELSIF _udt IS NULL THEN + SELECT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'NoteEmbedding' + AND column_name = '_vec_tmp' + ) INTO _vec_tmp_exists; + + IF _vec_tmp_exists THEN + ALTER TABLE "NoteEmbedding" RENAME COLUMN "_vec_tmp" TO "embedding"; + END IF; + END IF; +END $$; + +-- Add updatedAt with DEFAULT NOW() if not present +ALTER TABLE "NoteEmbedding" ADD COLUMN IF NOT EXISTS "updatedAt" TIMESTAMP(3) NOT NULL DEFAULT NOW(); -- HNSW index for fast approximate nearest neighbor search (cosine distance) -CREATE INDEX "NoteEmbedding_embedding_hnsw_idx" ON "NoteEmbedding" +CREATE INDEX IF NOT EXISTS "NoteEmbedding_embedding_hnsw_idx" ON "NoteEmbedding" USING hnsw ("embedding" vector_cosine_ops) WITH (m = 16, ef_construction = 64); -- Phase 3: Add full-text search tsvector column to Note -ALTER TABLE "Note" ADD COLUMN "tsv" tsvector; +ALTER TABLE "Note" ADD COLUMN IF NOT EXISTS "tsv" tsvector; --- Populate tsv from existing title + content +-- Populate tsv where still NULL UPDATE "Note" SET "tsv" = setweight(to_tsvector('simple', COALESCE("title", '')), 'A') || - setweight(to_tsvector('simple', COALESCE("content", '')), 'B'); + setweight(to_tsvector('simple', COALESCE("content", '')), 'B') +WHERE "tsv" IS NULL; -- GIN index for fast FTS queries -CREATE INDEX "Note_tsv_gin_idx" ON "Note" USING gin ("tsv"); +CREATE INDEX IF NOT EXISTS "Note_tsv_gin_idx" ON "Note" USING gin ("tsv"); -- Trigger function to auto-update tsv on INSERT or UPDATE of title/content CREATE OR REPLACE FUNCTION "note_tsv_trigger"() RETURNS trigger AS $$ @@ -44,7 +77,7 @@ BEGIN END; $$ LANGUAGE plpgsql; --- Attach trigger +-- Attach trigger (DROP IF EXISTS + CREATE is idempotent) DROP TRIGGER IF EXISTS "note_tsv_update" ON "Note"; CREATE TRIGGER "note_tsv_update" BEFORE INSERT OR UPDATE OF "title", "content" ON "Note" diff --git a/memento-note/prisma/schema.prisma b/memento-note/prisma/schema.prisma index 7da0745..61cd7ae 100644 --- a/memento-note/prisma/schema.prisma +++ b/memento-note/prisma/schema.prisma @@ -302,7 +302,7 @@ model NoteEmbedding { noteId String @unique embedding Unsupported("vector(1536)") createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt + updatedAt DateTime @default(now()) @updatedAt note Note @relation(fields: [noteId], references: [id], onDelete: Cascade) @@index([noteId])