From 670d3f4376789dc946d774332eec788342b2910c Mon Sep 17 00:00:00 2001 From: sepehr Date: Sun, 7 Jun 2026 09:39:26 +0200 Subject: [PATCH] Documentation: Add French Disaster Recovery Playbook for server failovers --- DISASTER_RECOVERY.md | 106 +++++++++++++ scripts/disaster-recovery.sh | 287 +++++++++++++++++++++++++++++++++++ 2 files changed, 393 insertions(+) create mode 100644 DISASTER_RECOVERY.md create mode 100755 scripts/disaster-recovery.sh diff --git a/DISASTER_RECOVERY.md b/DISASTER_RECOVERY.md new file mode 100644 index 0000000..33e5e97 --- /dev/null +++ b/DISASTER_RECOVERY.md @@ -0,0 +1,106 @@ +# Guide de Reprise d'Activité (Disaster Recovery Playbook) +> Procédure pas-à-pas en cas de crash total du serveur principal (`192.168.1.151`) + +--- + +## 🎯 Objectif +Ce document décrit comment restaurer l'intégralité de la plateforme SaaS **Wordly.art** (Base de données active, configurations secrètes `.env` et services Docker) sur un **nouveau serveur** de secours si le serveur principal tombe en panne complète. + +--- + +## 📁 Fonctionnement de la sauvegarde DR (Disaster Recovery) + +Le script de reprise d'activité [disaster-recovery.sh](file:///d:/dev1405/office_translator/scripts/disaster-recovery.sh) génère une archive compressée contenant l'intégralité du système à restaurer : +1. **La base de données active** (PostgreSQL ou SQLite). +2. **Le fichier de configuration de production** `.env` (contenant vos clés API Stripe, OpenAI, DeepL, etc.). +3. **Le fichier `docker-compose.yml`** et ses variantes. +4. **Le dossier `docker/`** contenant toutes les configurations Prometheus, Grafana, Nginx, etc. + +Toutes ces archives sont stockées sur votre **NAS** à l'abri des pannes matérielles du serveur local : `/mnt/nas-backups/wordly/dr/`. + +--- + +## 🛠️ Étape 1 : Automatisation de la sauvegarde complète (A faire aujourd'hui) + +Pour que la sauvegarde Disaster Recovery s'exécute automatiquement chaque nuit à 03h30 : + +1. Connectez-vous en SSH sur votre serveur principal (`192.168.1.151`). +2. Ouvrez le planificateur de tâches (cron) : + ```bash + crontab -e + ``` +3. Ajoutez la ligne suivante tout à la fin du fichier : + ```cron + 30 3 * * * /opt/wordly/scripts/disaster-recovery.sh --backup >> /var/log/wordly-dr-backup.log 2>&1 + ``` +4. Sauvegardez et quittez. Désormais, une archive complète de restauration sera créée et envoyée sur votre NAS chaque nuit, avec une rétention automatique de **14 jours**. + +--- + +## 🚨 Étape 2 : Procédure de restauration (En cas de crash du serveur) + +Si le serveur `192.168.1.151` est indisponible et que vous devez remonter le SaaS sur une nouvelle machine (ex : **`192.168.1.152`**), suivez ces étapes : + +### 2.1 Préparation de la nouvelle machine +1. Installez Docker et Docker Compose sur le nouveau serveur : + ```bash + curl -fsSL https://get.docker.com | sh + sudo usermod -aG docker $USER && newgrp docker + ``` +2. Créez le dossier du projet et clonez le repository (ou copiez les fichiers depuis le NAS) : + ```bash + git clone -b production-deployment https://gitea.parsanet.org/sepehr/office_translator.git /opt/wordly + cd /opt/wordly + ``` + +### 2.2 Monter le NAS sur le nouveau serveur +Pour que le nouveau serveur accède aux sauvegardes stockées sur votre NAS : +1. Installez l'utilitaire de montage : + ```bash + sudo apt install cifs-utils -y + sudo mkdir -p /mnt/nas-backups/wordly + ``` +2. Créez le fichier de credentials : + ```bash + sudo tee /etc/nas-credentials </dev/null | head -n 1 || true) + + if [ -z "${latest_db_backup}" ]; then + log_error "Could not locate the generated database backup file in ${local_backup_dir}/daily/." + exit 1 + fi + log "Latest database backup located: $(basename "${latest_db_backup}")" + + # 3. Create temp packing folder + local packing_dir="${PROJECT_ROOT}/temp_dr_pack_${TIMESTAMP}" + mkdir -p "${packing_dir}" + + # 4. Copy configurations + log "Packaging configuration files..." + if [ -f "${PROJECT_ROOT}/.env" ]; then + cp "${PROJECT_ROOT}/.env" "${packing_dir}/.env.production" + else + log_warning "No .env file found at project root. Continuing without it." + fi + + # Copy docker-compose files + for f in docker-compose.yml docker-compose.local.yml docker-compose.monitoring.yml docker-compose.dev.yml; do + if [ -f "${PROJECT_ROOT}/${f}" ]; then + cp "${PROJECT_ROOT}/${f}" "${packing_dir}/" + fi + done + + # Copy docker directory (Prometheus, Grafana, Nginx configs, Dockerfiles) + if [ -d "${PROJECT_ROOT}/docker" ]; then + cp -r "${PROJECT_ROOT}/docker" "${packing_dir}/" + fi + + # Copy scripts directory (so restore scripts are present in the package) + if [ -d "${PROJECT_ROOT}/scripts" ]; then + cp -r "${PROJECT_ROOT}/scripts" "${packing_dir}/" + fi + + # Copy the DB backup archive + mkdir -p "${packing_dir}/db_backup" + cp "${latest_db_backup}" "${packing_dir}/db_backup/" + + # 5. Compress Everything + mkdir -p "${DR_BACKUP_DIR}" + local dr_archive_name="wordly_dr_${TIMESTAMP}.tar.gz" + local dr_archive_path="${DR_BACKUP_DIR}/${dr_archive_name}" + + log "Compressing configurations and database into DR package..." + tar -czf "${dr_archive_path}" -C "${packing_dir}" . + + # Clean up temp packaging folder + rm -rf "${packing_dir}" + + if [ -f "${dr_archive_path}" ] && [ -s "${dr_archive_path}" ]; then + local size + size=$(du -h "${dr_archive_path}" | cut -f1) + log_success "Disaster Recovery backup package created: ${dr_archive_name} (${size})" + log_success "Stored securely at: ${dr_archive_path}" + + # 6. Apply retention cleanups + log "Cleaning up old DR packages (retention: ${DR_RETENTION_DAYS} days)..." + find "${DR_BACKUP_DIR}" -name "wordly_dr_*.tar.gz" -mtime +"${DR_RETENTION_DAYS}" -exec rm -f {} \; + log_success "Disaster Recovery backup complete." + else + log_error "DR Archive compression failed." + exit 1 + fi +} + +# ============================================================================== +# RESTORE ACTION +# ============================================================================== +perform_restore() { + local dr_package="$1" + + if [ -z "${dr_package}" ]; then + log_error "No DR package archive specified." + echo "Usage: $0 --restore " + echo "Available archives in ${DR_BACKUP_DIR}:" + ls -lh "${DR_BACKUP_DIR}"/wordly_dr_*.tar.gz 2>/dev/null || echo " (none)" + exit 1 + fi + + if [ ! -f "${dr_package}" ]; then + log_error "Archive file not found: ${dr_package}" + exit 1 + fi + + echo "" + log_warning "RESTORE DISASTER RECOVERY PACKAGE - THIS WILL OVERWRITE ENVIRONMENT CONFIGURATIONS AND DATABASES!" + echo " Archive: ${dr_package}" + echo " Target : Current Server Host (Workspace)" + echo "" + read -p "Type 'RESTORE-ALL' to confirm complete system restore: " confirm_val + if [ "${confirm_val}" != "RESTORE-ALL" ]; then + log "System restore cancelled." + exit 0 + fi + + log "Extracting DR archive contents..." + + # Create safety backup of existing .env before overwrite + if [ -f "${PROJECT_ROOT}/.env" ]; then + cp "${PROJECT_ROOT}/.env" "${PROJECT_ROOT}/.env.bak_before_dr_restore_${TIMESTAMP}" + log "Created backup of existing .env: .env.bak_before_dr_restore_${TIMESTAMP}" + fi + + # Extract configs directly into project root + tar -xzf "${dr_package}" -C "${PROJECT_ROOT}" + + # Restore .env from packaged .env.production + if [ -f "${PROJECT_ROOT}/.env.production" ]; then + mv "${PROJECT_ROOT}/.env.production" "${PROJECT_ROOT}/.env" + log "Restored .env configuration" + fi + + # Reload variables from restored .env + set -a + source "${PROJECT_ROOT}/.env" + set +a + + log_success "Docker and configurations extracted successfully." + + # Boot Docker Compose Services + log "Spinning up Docker containers (database, redis, backend, frontend)..." + if ! command -v docker-compose &>/dev/null && ! docker compose version &>/dev/null; then + log_error "docker-compose is not installed. Please install Docker first." + exit 1 + fi + + # Try running docker compose + local compose_cmd="docker compose" + if ! docker compose version &>/dev/null; then + compose_cmd="docker-compose" + fi + + # Start services in detached mode + ${compose_cmd} up -d + + # Locate the embedded database backup + local db_backup_archive + db_backup_archive=$(ls "${PROJECT_ROOT}/db_backup/"*.gz 2>/dev/null | head -n 1 || true) + + if [ -z "${db_backup_archive}" ]; then + log_error "Database backup archive not found inside the DR package extraction." + exit 1 + fi + + log "Database backup located: $(basename "${db_backup_archive}")" + + # Wait for database container to be healthy (PostgreSQL) + local db_type="sqlite" + if [[ "${DATABASE_URL:-}" =~ ^postgres ]]; then + db_type="postgres" + fi + + if [ "${db_type}" = "postgres" ]; then + local postgres_container="${POSTGRES_CONTAINER:-wordly-postgres}" + log "Waiting for PostgreSQL container (${postgres_container}) to be healthy..." + for i in $(seq 1 30); do + if docker inspect --format='{{.State.Health.Status}}' "${postgres_container}" 2>/dev/null | grep -q "healthy"; then + log_success "Database container is healthy." + break + fi + echo " Waiting for database... ($i/30)" + sleep 2 + done + else + sleep 2 + fi + + # Restore the database using the database backup script + log "Triggering database restore..." + # Make sure backups/daily folder exists and copy the db backup there for backup-database.sh to see it + local local_backup_dir="${BACKUP_DIR:-${PROJECT_ROOT}/backups}" + mkdir -p "${local_backup_dir}/daily" + cp "${db_backup_archive}" "${local_backup_dir}/daily/" + + local db_archive_filename + db_archive_filename=$(basename "${db_backup_archive}") + + # Run DB restore + # Sourcing backup-database.sh with the file name + # We pass the confirmation non-interactively using YES or mock prompt if needed, + # but backup-database.sh reads YES/RESTORE. Let's make it easy: + log "Restoring DB contents... (You will need to type 'RESTORE' if prompted)" + bash "${SCRIPT_DIR}/backup-database.sh" --restore "${db_archive_filename}" + + # Clean up extracted folders + rm -rf "${PROJECT_ROOT}/db_backup" + + # Restart app to clear connection caches + log "Restarting application backend..." + ${compose_cmd} restart backend + + log_success "==========================================================================" + log_success "DISASTER RECOVERY SYSTEM RESTORE COMPLETE!" + log_success "==========================================================================" + log "Your application has been restored and started." + log "Next Steps:" + log "1. Verify the service is online: curl http://localhost:8000/health" + log "2. Update your Nginx Proxy Manager (NPM) domains to point to this server's IP." + echo "" +} + +# ============================================================================== +# MAIN ENTRY +# ============================================================================== +main() { + case "${1:-}" in + --backup) + perform_backup + ;; + --restore) + perform_restore "${2:-}" + ;; + *) + echo "Wordly Disaster Recovery Utility" + echo "Usage:" + echo " $0 --backup # Package and copy configs + database to NAS" + echo " $0 --restore # Extract and restore full stack on new machine" + exit 1 + ;; + esac +} + +main "$@"