All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m35s
404 lines
15 KiB
Bash
Executable File
404 lines
15 KiB
Bash
Executable File
#!/bin/bash
|
|
# ==============================================================================
|
|
# Wordly.art - Disaster Recovery (DR) Backup & Restore Playbook (V3)
|
|
# ==============================================================================
|
|
# Archives app configs (.env, docker-compose), database backup, and exports
|
|
# to the NAS at 192.168.1.146.
|
|
#
|
|
# On RESTORE: deploys app on the new server and automatically updates NPM
|
|
# (192.168.1.184) to reroute traffic via API — no manual intervention needed.
|
|
#
|
|
# Usage:
|
|
# ./disaster-recovery.sh --backup # Create DR archive → NAS
|
|
# ./disaster-recovery.sh --restore <archive> # Restore on THIS machine
|
|
# ==============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m'
|
|
|
|
log() { echo -e "[DR ${TIMESTAMP}] $1"; }
|
|
log_success() { echo -e "[DR ${TIMESTAMP}] ${GREEN}$1${NC}"; }
|
|
log_warning() { echo -e "[DR ${TIMESTAMP}] ${YELLOW}WARNING: $1${NC}"; }
|
|
log_error() { echo -e "[DR ${TIMESTAMP}] ${RED}ERROR: $1${NC}"; }
|
|
|
|
# Sourcing .env
|
|
ENV_FILE="${PROJECT_ROOT}/.env"
|
|
if [ -f "${ENV_FILE}" ]; then
|
|
set -a
|
|
set +u
|
|
source "${ENV_FILE}"
|
|
set -u
|
|
set +a
|
|
fi
|
|
|
|
# NAS SSH (même config que backup-to-nas.sh)
|
|
NAS_HOST="${NAS_HOST:-192.168.1.146}"
|
|
NAS_USER="${NAS_USER:-wordly-backup}"
|
|
NAS_PATH="${NAS_PATH:-/volume1/backups/wordly}"
|
|
NAS_SSH_PORT="${NAS_SSH_PORT:-22}"
|
|
NAS_SSH_KEY="${NAS_SSH_KEY:-/root/.ssh/wordly_nas_key}"
|
|
BACKUP_DEST_PATH="${NAS_PATH}/snapshots"
|
|
DR_RETENTION_DAYS=${DR_RETENTION_DAYS:-30}
|
|
|
|
# IP of THIS server (used during restore to configure NPM failover)
|
|
SERVER_IP="${SERVER_IP:-}"
|
|
|
|
# Telegram
|
|
TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}"
|
|
TELEGRAM_CHAT_ID="${TELEGRAM_CHAT_ID:-}"
|
|
|
|
# ==============================================================================
|
|
# SEND TELEGRAM NOTIFICATION
|
|
# ==============================================================================
|
|
send_telegram() {
|
|
local message="$1"
|
|
if [ -n "${TELEGRAM_BOT_TOKEN}" ] && [ -n "${TELEGRAM_CHAT_ID}" ]; then
|
|
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
|
-d "chat_id=${TELEGRAM_CHAT_ID}" \
|
|
-d "text=${message}" \
|
|
-d "parse_mode=Markdown" \
|
|
>/dev/null 2>&1 || true
|
|
fi
|
|
}
|
|
|
|
# ==============================================================================
|
|
# DESTINATION PREPARATION (backup mode)
|
|
# ==============================================================================
|
|
prepare_destination() {
|
|
local ssh_cmd="ssh -i ${NAS_SSH_KEY} -p ${NAS_SSH_PORT} -o BatchMode=yes -o ConnectTimeout=10"
|
|
|
|
log "Vérification de la connectivité SSH vers le NAS ${NAS_HOST}..."
|
|
if ! ${ssh_cmd} "${NAS_USER}@${NAS_HOST}" "echo OK" >/dev/null 2>&1; then
|
|
log_error "Impossible de joindre le NAS ${NAS_HOST} via SSH."
|
|
log_error "Lancez d'abord : sudo bash scripts/setup-nas.sh"
|
|
exit 1
|
|
fi
|
|
|
|
# S'assurer que le dossier snapshots existe sur le NAS
|
|
${ssh_cmd} "${NAS_USER}@${NAS_HOST}" \
|
|
"mkdir -p ${NAS_PATH}/snapshots" 2>/dev/null || true
|
|
|
|
log_success "NAS SSH OK — Destination : ${NAS_USER}@${NAS_HOST}:${NAS_PATH}/snapshots"
|
|
}
|
|
|
|
# ==============================================================================
|
|
# BACKUP ACTION
|
|
# ==============================================================================
|
|
perform_backup() {
|
|
prepare_destination
|
|
log "Starting Disaster Recovery backup (Destination Mode: ${BACKUP_DEST_TYPE})..."
|
|
|
|
# 1. Trigger DB Backup
|
|
log "Triggering database dump..."
|
|
if ! bash "${SCRIPT_DIR}/backup-database.sh" --full; then
|
|
log_error "Database backup failed. Aborting DR packaging."
|
|
exit 1
|
|
fi
|
|
|
|
# 2. Locate DB Backup file
|
|
local local_backup_dir="${BACKUP_DIR:-${PROJECT_ROOT}/backups}"
|
|
local latest_db_backup
|
|
latest_db_backup=$(ls -t "${local_backup_dir}/daily/"*.gz 2>/dev/null | head -n 1 || true)
|
|
|
|
if [ -z "${latest_db_backup}" ]; then
|
|
log_error "Could not find database backup file."
|
|
exit 1
|
|
fi
|
|
log "Database backup file loaded: $(basename "${latest_db_backup}")"
|
|
|
|
# 3. Create temp packaging folder
|
|
local packing_dir="${PROJECT_ROOT}/temp_dr_pack_${TIMESTAMP}"
|
|
mkdir -p "${packing_dir}"
|
|
|
|
# 4. Pack Configurations
|
|
log "Packing application configuration (.env & docker-compose)..."
|
|
if [ -f "${PROJECT_ROOT}/.env" ]; then
|
|
cp "${PROJECT_ROOT}/.env" "${packing_dir}/.env.production"
|
|
fi
|
|
|
|
for f in docker-compose.yml docker-compose.local.yml docker-compose.monitoring.yml docker-compose.dev.yml; do
|
|
if [ -f "${PROJECT_ROOT}/${f}" ]; then
|
|
cp "${PROJECT_ROOT}/${f}" "${packing_dir}/"
|
|
fi
|
|
done
|
|
|
|
if [ -d "${PROJECT_ROOT}/docker" ]; then
|
|
cp -r "${PROJECT_ROOT}/docker" "${packing_dir}/"
|
|
fi
|
|
if [ -d "${PROJECT_ROOT}/scripts" ]; then
|
|
cp -r "${PROJECT_ROOT}/scripts" "${packing_dir}/"
|
|
fi
|
|
|
|
mkdir -p "${packing_dir}/db_backup"
|
|
cp "${latest_db_backup}" "${packing_dir}/db_backup/"
|
|
|
|
# 5. Note: NPM config is NOT backed up here.
|
|
# NPM runs on its own dedicated server (192.168.1.184) and is stable.
|
|
# Only the forward_host IP needs to change during failover, which is
|
|
# done automatically via the NPM API by npm-failover.sh during restore.
|
|
log "NPM is on dedicated server 192.168.1.184 — no NPM config to backup."
|
|
|
|
# 6. Compress DR Archive
|
|
local dr_archive_name="wordly_dr_${TIMESTAMP}.tar.gz"
|
|
local local_archive_path="${PROJECT_ROOT}/${dr_archive_name}"
|
|
|
|
log "Compressing configurations, database, and NPM data into DR archive..."
|
|
tar -czf "${local_archive_path}" -C "${packing_dir}" .
|
|
rm -rf "${packing_dir}"
|
|
|
|
if [ ! -f "${local_archive_path}" ] || [ ! -s "${local_archive_path}" ]; then
|
|
log_error "Failed to compress archive."
|
|
exit 1
|
|
fi
|
|
|
|
local size
|
|
size=$(du -h "${local_archive_path}" | cut -f1)
|
|
|
|
# 7. Envoyer l'archive sur le NAS via rsync SSH
|
|
local ssh_cmd="ssh -i ${NAS_SSH_KEY} -p ${NAS_SSH_PORT} -o BatchMode=yes -o ConnectTimeout=30"
|
|
local dest_path="${BACKUP_DEST_PATH}/${dr_archive_name}"
|
|
|
|
log "Transfert de l'archive DR vers le NAS via rsync SSH..."
|
|
if ! rsync -az \
|
|
-e "ssh -i ${NAS_SSH_KEY} -p ${NAS_SSH_PORT} -o BatchMode=yes -o ConnectTimeout=30" \
|
|
"${local_archive_path}" \
|
|
"${NAS_USER}@${NAS_HOST}:${BACKUP_DEST_PATH}/${dr_archive_name}"; then
|
|
log_error "rsync SSH vers le NAS a échoué !"
|
|
log_warning "Archive conservée localement : ${local_archive_path}"
|
|
send_telegram "🚨 *Wordly DR Backup FAILED*
|
|
rsync NAS échoué : ${NAS_HOST}
|
|
Fichier local : ${local_archive_path}
|
|
Date: $(date '+%Y-%m-%d %H:%M:%S')"
|
|
exit 1
|
|
fi
|
|
|
|
rm -f "${local_archive_path}"
|
|
log_success "Archive DR transférée (${size}) → ${NAS_USER}@${NAS_HOST}:${dest_path}"
|
|
|
|
# Retention policy sur le NAS
|
|
log "Rotation des archives (>${DR_RETENTION_DAYS} jours) sur le NAS..."
|
|
${ssh_cmd} "${NAS_USER}@${NAS_HOST}" \
|
|
"find ${BACKUP_DEST_PATH} -name 'wordly_dr_*.tar.gz' -mtime +${DR_RETENTION_DAYS} -delete 2>/dev/null; echo OK" | grep -q "OK" || true
|
|
|
|
# Sync scripts
|
|
if command -v rsync &>/dev/null; then
|
|
rsync -az \
|
|
-e "ssh -i ${NAS_SSH_KEY} -p ${NAS_SSH_PORT} -o BatchMode=yes" \
|
|
--exclude="__pycache__" \
|
|
"${SCRIPT_DIR}/" \
|
|
"${NAS_USER}@${NAS_HOST}:${NAS_PATH}/scripts/" 2>/dev/null || true
|
|
fi
|
|
|
|
send_telegram "✅ *Wordly.art DR Backup OK*
|
|
Archive: \`${dr_archive_name}\`
|
|
Taille: ${size}
|
|
NAS: \`${dest_path}\`
|
|
Date: $(date '+%Y-%m-%d %H:%M:%S')"
|
|
|
|
log_success "Disaster Recovery backup complete."
|
|
}
|
|
|
|
# ==============================================================================
|
|
# RESTORE ACTION
|
|
# ==============================================================================
|
|
perform_restore() {
|
|
local dr_package="$1"
|
|
|
|
if [ -z "${dr_package}" ]; then
|
|
log_error "No DR package archive specified."
|
|
echo "Usage: $0 --restore <path_to_archive.tar.gz>"
|
|
exit 1
|
|
fi
|
|
|
|
if [ ! -f "${dr_package}" ]; then
|
|
log_error "DR Archive file not found: ${dr_package}"
|
|
exit 1
|
|
fi
|
|
|
|
echo ""
|
|
log_warning "RESTORE DISASTER RECOVERY PACKAGE - THIS WILL OVERWRITE ENVIRONMENT CONFIGURATIONS, DATABASES, AND NPM FILES!"
|
|
echo " Archive: ${dr_package}"
|
|
echo ""
|
|
read -p "Type 'RESTORE-ALL' to confirm complete system restore: " confirm_val
|
|
if [ "${confirm_val}" != "RESTORE-ALL" ]; then
|
|
log "System restore cancelled."
|
|
exit 0
|
|
fi
|
|
|
|
log "Extracting DR archive contents..."
|
|
|
|
# Safety backup of existing .env
|
|
if [ -f "${PROJECT_ROOT}/.env" ]; then
|
|
cp "${PROJECT_ROOT}/.env" "${PROJECT_ROOT}/.env.bak_before_dr_restore_${TIMESTAMP}"
|
|
log "Created backup of existing .env: .env.bak_before_dr_restore_${TIMESTAMP}"
|
|
fi
|
|
|
|
# Extract all
|
|
tar -xzf "${dr_package}" -C "${PROJECT_ROOT}"
|
|
|
|
# Restore .env
|
|
if [ -f "${PROJECT_ROOT}/.env.production" ]; then
|
|
mv "${PROJECT_ROOT}/.env.production" "${PROJECT_ROOT}/.env"
|
|
log "Restored .env configuration"
|
|
fi
|
|
|
|
# Reload variables from restored .env
|
|
set -a
|
|
source "${PROJECT_ROOT}/.env"
|
|
set +a
|
|
|
|
log_success "Docker configurations and env keys restored."
|
|
|
|
# Boot Docker Compose Services
|
|
log "Spinning up Docker containers (database, redis, backend, frontend, NPM if configured)..."
|
|
local compose_cmd="docker compose"
|
|
if ! docker compose version &>/dev/null; then
|
|
compose_cmd="docker-compose"
|
|
fi
|
|
|
|
${compose_cmd} up -d
|
|
|
|
# Locate the embedded database backup
|
|
local db_backup_archive
|
|
db_backup_archive=$(ls "${PROJECT_ROOT}/db_backup/"*.gz 2>/dev/null | head -n 1 || true)
|
|
|
|
if [ -z "${db_backup_archive}" ]; then
|
|
log_error "Database backup archive not found inside the DR package extraction."
|
|
exit 1
|
|
fi
|
|
|
|
log "Database backup located: $(basename "${db_backup_archive}")"
|
|
|
|
# Wait for database container to be healthy (PostgreSQL)
|
|
local db_type="sqlite"
|
|
if [[ "${DATABASE_URL:-}" =~ ^postgres ]]; then
|
|
db_type="postgres"
|
|
fi
|
|
|
|
if [ "${db_type}" = "postgres" ]; then
|
|
local postgres_container="${POSTGRES_CONTAINER:-wordly-postgres}"
|
|
log "Waiting for PostgreSQL container (${postgres_container}) to be healthy..."
|
|
for i in $(seq 1 30); do
|
|
if docker inspect --format='{{.State.Health.Status}}' "${postgres_container}" 2>/dev/null | grep -q "healthy"; then
|
|
log_success "Database container is healthy."
|
|
break
|
|
fi
|
|
echo " Waiting for database... ($i/30)"
|
|
sleep 2
|
|
done
|
|
else
|
|
sleep 2
|
|
fi
|
|
|
|
# Restore the database using the database backup script
|
|
log "Triggering database restore..."
|
|
local local_backup_dir="${BACKUP_DIR:-${PROJECT_ROOT}/backups}"
|
|
mkdir -p "${local_backup_dir}/daily"
|
|
cp "${db_backup_archive}" "${local_backup_dir}/daily/"
|
|
|
|
local db_archive_filename
|
|
db_archive_filename=$(basename "${db_backup_archive}")
|
|
|
|
# Run DB restore
|
|
log "Restoring DB contents..."
|
|
bash "${SCRIPT_DIR}/backup-database.sh" --restore "${db_archive_filename}"
|
|
|
|
# Clean up extracted temporary folder
|
|
rm -rf "${PROJECT_ROOT}/db_backup"
|
|
|
|
# Restart app to clear connection caches
|
|
log "Restarting application backend..."
|
|
${compose_cmd} restart backend
|
|
|
|
# HTTP Health check (wait up to 3 minutes)
|
|
log "Waiting for application health check (max 180s)..."
|
|
local app_url="http://localhost:8001/health"
|
|
local health_ok=false
|
|
for i in $(seq 1 36); do
|
|
local http_code
|
|
http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 --max-time 5 "${app_url}" 2>/dev/null || echo "000")
|
|
if [ "${http_code}" = "200" ]; then
|
|
health_ok=true
|
|
log_success "App is healthy (HTTP 200) after $((i * 5))s"
|
|
break
|
|
fi
|
|
echo " Health check attempt ${i}/36... (HTTP ${http_code})"
|
|
sleep 5
|
|
done
|
|
|
|
if [ "${health_ok}" = "false" ]; then
|
|
log_error "App did NOT become healthy within 180s!"
|
|
log_error "NPM failover will NOT be triggered automatically."
|
|
log_error "Investigate: docker compose logs backend"
|
|
send_telegram "🚨 *Wordly.art DR FAILED — App unhealthy*
|
|
Serveur: \`$(hostname -I | awk '{print $1}')\`
|
|
Date: $(date '+%Y-%m-%d %H:%M:%S')
|
|
Action: vérifiez les logs Docker"
|
|
exit 1
|
|
fi
|
|
|
|
# ==============================================================================
|
|
# NPM AUTOMATIC FAILOVER
|
|
# ==============================================================================
|
|
log "App is healthy. Triggering NPM failover..."
|
|
local this_server_ip
|
|
this_server_ip="${SERVER_IP:-$(hostname -I | awk '{print $1}')}"
|
|
|
|
if bash "${SCRIPT_DIR}/npm-failover.sh" --target-ip "${this_server_ip}"; then
|
|
log_success "NPM now routes traffic to this server (${this_server_ip})"
|
|
send_telegram "✅ *Wordly.art DR COMPLET*
|
|
Serveur actif: \`${this_server_ip}\`
|
|
NPM redirigé automatiquement
|
|
Date: $(date '+%Y-%m-%d %H:%M:%S')"
|
|
else
|
|
log_error "NPM failover script FAILED."
|
|
log_warning "Manual failover required:"
|
|
log_warning " → Go to http://192.168.1.184:81"
|
|
log_warning " → Edit proxy host for ${NPM_PROXY_HOST_DOMAIN:-wordly.art}"
|
|
log_warning " → Change Forward Hostname to: ${this_server_ip}"
|
|
send_telegram "⚠️ *Wordly.art DR — NPM manuel requis*
|
|
App OK sur: \`${this_server_ip}\`
|
|
NPM failover automatique a échoué
|
|
Action: http://192.168.1.184:81 → modifier Forward Host"
|
|
fi
|
|
|
|
log_success "=========================================================================="
|
|
log_success "DISASTER RECOVERY SYSTEM RESTORE COMPLETE!"
|
|
log_success "=========================================================================="
|
|
log_success " App: http://${this_server_ip}:8001/health"
|
|
log_success " NPM: http://192.168.1.184:81"
|
|
echo ""
|
|
}
|
|
|
|
# ==============================================================================
|
|
# MAIN ENTRY
|
|
# ==============================================================================
|
|
main() {
|
|
case "${1:-}" in
|
|
--backup)
|
|
perform_backup
|
|
;;
|
|
--restore)
|
|
perform_restore "${2:-}"
|
|
;;
|
|
*)
|
|
echo "Wordly Disaster Recovery Utility (V2)"
|
|
echo "Usage:"
|
|
echo " $0 --backup # Package configs, db dump, NPM configurations, and export"
|
|
echo " $0 --restore <archive.tar.gz> # Extract and restore full stack on new machine"
|
|
exit 1
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|