75 lines
2.0 KiB
YAML
75 lines
2.0 KiB
YAML
groups:
|
|
- name: critical
|
|
rules:
|
|
- alert: MementoAppDown
|
|
expr: up{job="memento-app"} == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Memento app is DOWN"
|
|
|
|
- alert: PostgresDown
|
|
expr: up{job="postgres"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "PostgreSQL is DOWN"
|
|
|
|
- alert: RedisDown
|
|
expr: up{job="redis"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Redis is DOWN"
|
|
|
|
- alert: DiskSpaceLow
|
|
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.15
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Disk space below 15%"
|
|
|
|
- alert: HighMemoryUsage
|
|
expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) > 0.90
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Memory usage above 90%"
|
|
|
|
- alert: PostgresConnectionsHigh
|
|
expr: pg_stat_activity_count > 80
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "PostgreSQL connections above 80"
|
|
|
|
- alert: PostgresSlowQueries
|
|
expr: pg_stat_statements_mean_exec_seconds > 5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "PostgreSQL slow queries detected"
|
|
|
|
- alert: HighErrorRate
|
|
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
|
|
for: 3m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "HTTP 5xx error rate above 5%"
|
|
|
|
- alert: ContainerRestarted
|
|
expr: increase(container_restart_count[1h]) > 0
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Container restarted in the last hour"
|