feat: homelab deployment - NPM + IONOS DNS + monitoring + NAS backup
- Restructured docker-compose for Nginx Proxy Manager (no custom nginx) - Added domain wordly.art configuration - Added Prometheus + Grafana monitoring stack with pre-configured dashboards - Added PostgreSQL backup script to NAS (daily/weekly/monthly rotation) - Added alert rules for backend, system, and Docker metrics - Updated deployment guide for NPM + IONOS DNS homelab setup - Added marketing plan document - PDF translator and watermark support - Enhanced middleware, routes, and translator modules Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -26,11 +26,15 @@ FROM python:3.12-slim AS production
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install runtime dependencies only
|
||||
# Install runtime dependencies + LibreOffice headless (required for DOCX→PDF)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libmagic1 \
|
||||
libpq5 \
|
||||
curl \
|
||||
fonts-noto \
|
||||
fonts-noto-cjk \
|
||||
fonts-noto-cjk-extra \
|
||||
libreoffice-writer-nogui \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
@@ -38,8 +42,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
COPY --from=builder /opt/venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# Create non-root user for security
|
||||
RUN groupadd -r translator && useradd -r -g translator translator
|
||||
# Create non-root user with a proper home directory (LibreOffice needs it)
|
||||
RUN groupadd -r translator && \
|
||||
useradd -r -g translator -m -d /home/translator translator && \
|
||||
mkdir -p /home/translator/.cache && \
|
||||
chown -R translator:translator /home/translator
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /app/uploads /app/outputs /app/logs /app/temp \
|
||||
|
||||
@@ -6,19 +6,28 @@ echo "🚀 Starting Document Translation API..."
|
||||
# Wait for database to be ready (if DATABASE_URL is set)
|
||||
if [ -n "$DATABASE_URL" ]; then
|
||||
echo "⏳ Waiting for database to be ready..."
|
||||
|
||||
# Extract host and port from DATABASE_URL
|
||||
# postgresql://user:pass@host:port/db
|
||||
DB_HOST=$(echo $DATABASE_URL | sed -e 's/.*@\([^:]*\):.*/\1/')
|
||||
DB_PORT=$(echo $DATABASE_URL | sed -e 's/.*:\([0-9]*\)\/.*/\1/')
|
||||
|
||||
|
||||
# Extract host and port from DATABASE_URL (handles postgresql+asyncpg:// and postgresql://)
|
||||
DB_HOST=$(python -c "
|
||||
import re
|
||||
m = re.search(r'@([^:/]+)', '$DATABASE_URL')
|
||||
print(m.group(1) if m else 'postgres')
|
||||
")
|
||||
DB_PORT=$(python -c "
|
||||
import re
|
||||
m = re.search(r'@[^:]+:(\d+)', '$DATABASE_URL')
|
||||
print(m.group(1) if m else '5432')
|
||||
")
|
||||
|
||||
echo " Connecting to ${DB_HOST}:${DB_PORT}..."
|
||||
|
||||
# Wait up to 30 seconds for database
|
||||
for i in {1..30}; do
|
||||
for i in $(seq 1 30); do
|
||||
if python -c "
|
||||
import socket
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
try:
|
||||
s.connect(('$DB_HOST', $DB_PORT))
|
||||
s.connect(('$DB_HOST', int('$DB_PORT')))
|
||||
s.close()
|
||||
exit(0)
|
||||
except:
|
||||
@@ -30,7 +39,7 @@ except:
|
||||
echo " Waiting for database... ($i/30)"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
|
||||
# Run database migrations
|
||||
echo "📦 Running database migrations..."
|
||||
alembic upgrade head || echo "⚠️ Migration skipped (may already be up to date)"
|
||||
@@ -39,15 +48,23 @@ fi
|
||||
# Wait for Redis if configured
|
||||
if [ -n "$REDIS_URL" ]; then
|
||||
echo "⏳ Waiting for Redis..."
|
||||
REDIS_HOST=$(echo $REDIS_URL | sed -e 's/redis:\/\/\([^:]*\):.*/\1/')
|
||||
REDIS_PORT=$(echo $REDIS_URL | sed -e 's/.*:\([0-9]*\)\/.*/\1/')
|
||||
|
||||
for i in {1..10}; do
|
||||
REDIS_HOST=$(python -c "
|
||||
import re
|
||||
m = re.search(r'://([^:/]+)', '$REDIS_URL')
|
||||
print(m.group(1) if m else 'redis')
|
||||
")
|
||||
REDIS_PORT=$(python -c "
|
||||
import re
|
||||
m = re.search(r'://[^:]+:(\d+)', '$REDIS_URL')
|
||||
print(m.group(1) if m else '6379')
|
||||
")
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
if python -c "
|
||||
import socket
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
try:
|
||||
s.connect(('$REDIS_HOST', $REDIS_PORT))
|
||||
s.connect(('$REDIS_HOST', int('$REDIS_PORT')))
|
||||
s.close()
|
||||
exit(0)
|
||||
except:
|
||||
|
||||
168
docker/grafana/dashboards/wordly-infrastructure.json
Normal file
168
docker/grafana/dashboards/wordly-infrastructure.json
Normal file
@@ -0,0 +1,168 @@
|
||||
{
|
||||
"annotations": { "list": [] },
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"title": "CPU Usage (%)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "lineWidth": 2, "fillOpacity": 20 },
|
||||
"unit": "percent",
|
||||
"max": 100
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "RAM Usage",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100",
|
||||
"legendFormat": "Used %"
|
||||
},
|
||||
{
|
||||
"expr": "node_memory_Buffers_bytes / node_memory_MemTotal_bytes * 100",
|
||||
"legendFormat": "Buffers %"
|
||||
},
|
||||
{
|
||||
"expr": "node_memory_Cached_bytes / node_memory_MemTotal_bytes * 100",
|
||||
"legendFormat": "Cache %"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "lineWidth": 2, "fillOpacity": 15 },
|
||||
"unit": "percent",
|
||||
"max": 100
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Disk Space",
|
||||
"type": "gauge",
|
||||
"gridPos": { "h": 6, "w": 6, "x": 0, "y": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(1 - node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"}) * 100",
|
||||
"legendFormat": "Used"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 70 },
|
||||
{ "color": "red", "value": 90 }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Network I/O",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 6, "w": 10, "x": 6, "y": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(node_network_receive_bytes_total{device!=\"lo\"}[5m]) * 8",
|
||||
"legendFormat": "In {{device}}"
|
||||
},
|
||||
{
|
||||
"expr": "-rate(node_network_transmit_bytes_total{device!=\"lo\"}[5m]) * 8",
|
||||
"legendFormat": "Out {{device}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": { "lineWidth": 2 },
|
||||
"unit": "bps"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Container Memory",
|
||||
"type": "barchart",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "container_memory_usage_bytes{name=~\"wordly.*|translate.*\"}",
|
||||
"legendFormat": "{{name}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Container CPU %",
|
||||
"type": "barchart",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(container_cpu_usage_seconds_total{name=~\"wordly.*|translate.*\"}[5m]) * 100",
|
||||
"legendFormat": "{{name}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Service Status (Up/Down)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 22 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "up",
|
||||
"legendFormat": "{{job}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
},
|
||||
"mappings": [
|
||||
{ "type": "value", "options": { "0": { "text": "DOWN", "color": "red" }, "1": { "text": "UP", "color": "green" } } }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"tags": ["wordly", "infrastructure"],
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"timezone": "Europe/Paris",
|
||||
"title": "Wordly - Infrastructure",
|
||||
"uid": "wordly-infra",
|
||||
"version": 1
|
||||
}
|
||||
206
docker/grafana/dashboards/wordly-overview.json
Normal file
206
docker/grafana/dashboards/wordly-overview.json
Normal file
@@ -0,0 +1,206 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"title": "Traductions (dernières 24h)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(translation_total[24h]))",
|
||||
"legendFormat": "Total"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "blue", "value": null },
|
||||
{ "color": "green", "value": 10 },
|
||||
{ "color": "orange", "value": 50 }
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Temps moyen (secondes)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(rate(translation_duration_seconds_sum[5m]) / rate(translation_duration_seconds_count[5m]))",
|
||||
"legendFormat": "Avg"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 30 },
|
||||
{ "color": "red", "value": 60 }
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Taux d'erreur (%)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m])) * 100",
|
||||
"legendFormat": "Error %"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
},
|
||||
"unit": "percent",
|
||||
"max": 100
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Utilisateurs actifs (1h)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(increase(http_requests_total{path!=\"/health\",path!=\"/metrics\"}[1h]) > 0)",
|
||||
"legendFormat": "Active"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "blue", "value": null },
|
||||
{ "color": "green", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Requetes par minute (par endpoint)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (path) (rate(http_requests_total[5m]) * 60)",
|
||||
"legendFormat": "{{path}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"unit": "req/min"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Temps de traduction (percentiles)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum(rate(translation_duration_seconds_bucket[5m])) by (le))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(translation_duration_seconds_bucket[5m])) by (le))",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(translation_duration_seconds_bucket[5m])) by (le))",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "mode": "palette-classic" },
|
||||
"custom": {
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 5
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Traductions par provider",
|
||||
"type": "piechart",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (provider) (increase(translation_total[24h]))",
|
||||
"legendFormat": "{{provider}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Taille des fichiers uploades",
|
||||
"type": "histogram",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (le) (increase(file_size_bytes_bucket[24h]))",
|
||||
"legendFormat": "{{le}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Fichiers par type",
|
||||
"type": "piechart",
|
||||
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (file_type) (increase(translation_total[24h]))",
|
||||
"legendFormat": "{{file_type}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"tags": ["wordly", "application"],
|
||||
"templating": { "list": [] },
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"timepicker": {},
|
||||
"timezone": "Europe/Paris",
|
||||
"title": "Wordly - Application",
|
||||
"uid": "wordly-app",
|
||||
"version": 1
|
||||
}
|
||||
13
docker/grafana/provisioning/dashboards/dashboards.yml
Normal file
13
docker/grafana/provisioning/dashboards/dashboards.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Wordly Dashboards'
|
||||
orgId: 1
|
||||
folder: 'Wordly'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
updateIntervalSeconds: 30
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: true
|
||||
12
docker/grafana/provisioning/datasources/datasources.yml
Normal file
12
docker/grafana/provisioning/datasources/datasources.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://wordly-prometheus:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
jsonData:
|
||||
timeInterval: '15s'
|
||||
httpMethod: POST
|
||||
@@ -1,18 +1,11 @@
|
||||
# Document Translation API - Main Server Block
|
||||
# HTTP to HTTPS redirect and main application routing
|
||||
# Wordly.art - Production Nginx Config
|
||||
# HTTP to HTTPS redirect + main application routing
|
||||
|
||||
# HTTP server - redirect to HTTPS
|
||||
# HTTP server - redirect to HTTPS + Let's Encrypt
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name _;
|
||||
|
||||
# Allow health checks on HTTP
|
||||
location /health {
|
||||
proxy_pass http://backend/health;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
server_name wordly.art www.wordly.art;
|
||||
|
||||
# ACME challenge for Let's Encrypt
|
||||
location /.well-known/acme-challenge/ {
|
||||
@@ -21,7 +14,7 @@ server {
|
||||
|
||||
# Redirect all other traffic to HTTPS
|
||||
location / {
|
||||
return 301 https://$host$request_uri;
|
||||
return 301 https://wordly.art$request_uri;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,19 +22,16 @@ server {
|
||||
server {
|
||||
listen 443 ssl http2;
|
||||
listen [::]:443 ssl http2;
|
||||
server_name _;
|
||||
server_name wordly.art;
|
||||
|
||||
# SSL certificates (replace with your paths)
|
||||
# SSL certificates
|
||||
ssl_certificate /etc/nginx/ssl/fullchain.pem;
|
||||
ssl_certificate_key /etc/nginx/ssl/privkey.pem;
|
||||
ssl_trusted_certificate /etc/nginx/ssl/chain.pem;
|
||||
|
||||
# SSL configuration
|
||||
# SSL hardening
|
||||
ssl_session_timeout 1d;
|
||||
ssl_session_cache shared:SSL:50m;
|
||||
ssl_session_tickets off;
|
||||
|
||||
# Modern SSL configuration
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384;
|
||||
ssl_prefer_server_ciphers off;
|
||||
@@ -58,9 +48,16 @@ server {
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
||||
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
|
||||
add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self' data:; connect-src 'self' ws: wss:;" always;
|
||||
add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self' data:; connect-src 'self' https://wordly.art ws: wss:;" always;
|
||||
|
||||
# API routes - proxy to backend (preserve full path so FastAPI receives /api/v1/...)
|
||||
# File upload size
|
||||
client_max_body_size 100M;
|
||||
client_body_timeout 300s;
|
||||
proxy_buffer_size 128k;
|
||||
proxy_buffers 4 256k;
|
||||
proxy_busy_buffers_size 256k;
|
||||
|
||||
# API routes -> Backend
|
||||
location /api/ {
|
||||
limit_req zone=api_limit burst=20 nodelay;
|
||||
limit_conn conn_limit 10;
|
||||
@@ -72,8 +69,7 @@ server {
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Connection "";
|
||||
|
||||
# CORS headers for API (Origin restricted to same-origin/localhost via map in nginx.conf)
|
||||
|
||||
add_header Access-Control-Allow-Origin $cors_origin always;
|
||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
|
||||
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Requested-With, X-API-Key" always;
|
||||
@@ -84,7 +80,7 @@ server {
|
||||
}
|
||||
}
|
||||
|
||||
# File upload endpoint - special handling
|
||||
# Translation endpoint - extended timeouts
|
||||
location /translate {
|
||||
limit_req zone=upload_limit burst=5 nodelay;
|
||||
limit_conn conn_limit 5;
|
||||
@@ -95,21 +91,51 @@ server {
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# Increased timeouts for file processing
|
||||
|
||||
proxy_connect_timeout 60s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_read_timeout 600s;
|
||||
}
|
||||
|
||||
# Health check endpoint
|
||||
# Health check
|
||||
location /health {
|
||||
proxy_pass http://backend/health;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
|
||||
# Admin UI -> Frontend (Next.js page)
|
||||
# Prometheus metrics (internal only - restrict access)
|
||||
location /metrics {
|
||||
# Allow only from Docker network and localhost
|
||||
allow 172.28.0.0/16;
|
||||
allow 127.0.0.1;
|
||||
deny all;
|
||||
|
||||
proxy_pass http://backend/metrics;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
|
||||
# API docs
|
||||
location /docs {
|
||||
proxy_pass http://backend/docs;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
|
||||
location /redoc {
|
||||
proxy_pass http://backend/redoc;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
|
||||
location /openapi.json {
|
||||
proxy_pass http://backend/openapi.json;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
|
||||
# Admin -> Frontend
|
||||
location /admin {
|
||||
proxy_pass http://frontend;
|
||||
proxy_http_version 1.1;
|
||||
@@ -119,7 +145,13 @@ server {
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Frontend - Next.js application
|
||||
# Frontend static assets with aggressive caching
|
||||
location /_next/static/ {
|
||||
proxy_pass http://frontend;
|
||||
add_header Cache-Control "public, max-age=31536000, immutable";
|
||||
}
|
||||
|
||||
# Frontend -> Next.js
|
||||
location / {
|
||||
proxy_pass http://frontend;
|
||||
proxy_http_version 1.1;
|
||||
@@ -131,16 +163,21 @@ server {
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
|
||||
# Static files caching
|
||||
location /_next/static/ {
|
||||
proxy_pass http://frontend;
|
||||
proxy_cache_valid 200 365d;
|
||||
add_header Cache-Control "public, max-age=31536000, immutable";
|
||||
}
|
||||
|
||||
# Error pages
|
||||
error_page 500 502 503 504 /50x.html;
|
||||
location = /50x.html {
|
||||
root /usr/share/nginx/html;
|
||||
}
|
||||
}
|
||||
|
||||
# Redirect www to non-www
|
||||
server {
|
||||
listen 443 ssl http2;
|
||||
listen [::]:443 ssl http2;
|
||||
server_name www.wordly.art;
|
||||
|
||||
ssl_certificate /etc/nginx/ssl/fullchain.pem;
|
||||
ssl_certificate_key /etc/nginx/ssl/privkey.pem;
|
||||
|
||||
return 301 https://wordly.art$request_uri;
|
||||
}
|
||||
|
||||
101
docker/prometheus/alerts.yml
Normal file
101
docker/prometheus/alerts.yml
Normal file
@@ -0,0 +1,101 @@
|
||||
# Wordly.art - Prometheus Alert Rules
|
||||
|
||||
groups:
|
||||
# Application alerts
|
||||
- name: wordly_app
|
||||
rules:
|
||||
- alert: BackendDown
|
||||
expr: up{job="wordly-backend"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Wordly backend is down"
|
||||
description: "Backend has been down for more than 2 minutes."
|
||||
|
||||
- alert: HighErrorRate
|
||||
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High error rate detected"
|
||||
description: "More than 10% of requests are returning 5xx errors."
|
||||
|
||||
- alert: SlowTranslations
|
||||
expr: histogram_quantile(0.95, rate(translation_duration_seconds_bucket[5m])) > 120
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Translations are slow"
|
||||
description: "95th percentile translation time is over 120 seconds."
|
||||
|
||||
- alert: HighTranslationQueue
|
||||
expr: translation_queue_size > 20
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Translation queue is backing up"
|
||||
description: "More than 20 translations queued."
|
||||
|
||||
# System alerts
|
||||
- name: wordly_system
|
||||
rules:
|
||||
- alert: HighMemoryUsage
|
||||
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage"
|
||||
description: "Server memory usage is above 90%."
|
||||
|
||||
- alert: DiskSpaceLow
|
||||
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.15
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Low disk space"
|
||||
description: "Less than 15% disk space remaining on /."
|
||||
|
||||
- alert: DiskSpaceCritical
|
||||
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.05
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Critical disk space"
|
||||
description: "Less than 5% disk space remaining on /."
|
||||
|
||||
- alert: HighCPUUsage
|
||||
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU usage"
|
||||
description: "CPU usage is above 85% for 10 minutes."
|
||||
|
||||
# Docker alerts
|
||||
- name: wordly_docker
|
||||
rules:
|
||||
- alert: ContainerRestarted
|
||||
expr: increase(container_restart_count[1h]) > 2
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Container restarting"
|
||||
description: "Container {{ $labels.name }} has restarted more than 2 times in the last hour."
|
||||
|
||||
- alert: ContainerOOM
|
||||
expr: increase(container_oom_events_total[1h]) > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Container OOM killed"
|
||||
description: "Container {{ $labels.name }} was OOM killed."
|
||||
@@ -1,37 +1,34 @@
|
||||
# Prometheus Configuration for Document Translation API
|
||||
# Wordly.art - Prometheus Configuration
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
external_labels:
|
||||
monitor: 'translate-api'
|
||||
monitor: 'wordly-homelab'
|
||||
environment: 'production'
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: []
|
||||
|
||||
rule_files: []
|
||||
rule_files:
|
||||
- 'alerts.yml'
|
||||
|
||||
scrape_configs:
|
||||
# Backend API metrics
|
||||
- job_name: 'translate-backend'
|
||||
# Backend FastAPI
|
||||
- job_name: 'wordly-backend'
|
||||
static_configs:
|
||||
- targets: ['backend:8000']
|
||||
metrics_path: /metrics
|
||||
scrape_interval: 10s
|
||||
|
||||
# Nginx metrics (requires nginx-prometheus-exporter)
|
||||
- job_name: 'nginx'
|
||||
static_configs:
|
||||
- targets: ['nginx-exporter:9113']
|
||||
|
||||
# Node exporter for system metrics
|
||||
# Systeme (CPU, RAM, Disk, Reseau)
|
||||
- job_name: 'node'
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
|
||||
# Docker metrics
|
||||
# Containers Docker
|
||||
- job_name: 'docker'
|
||||
static_configs:
|
||||
- targets: ['cadvisor:8080']
|
||||
|
||||
# Prometheus lui-meme
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
Reference in New Issue
Block a user