Monitoring stack (Prometheus, AlertManager, Grafana, Loki, Uptime Kuma) moved from main to tools server. Prometheus now scrapes main exporters over network (ip_main:9100/8080). Promtail pushes logs to ip_tools:3100. Traefik routes for dash/status.walava.io updated to ip_tools. discord-bot PROMETHEUS_URL updated to http://ip_tools:9090. Outline S3 fix: remove AWS_S3_ACL=private (Timeweb doesn't support per-object ACLs — caused upload failures). Add CORS configuration task for browser-side presigned uploads. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
157 lines
5 KiB
Django/Jinja
157 lines
5 KiB
Django/Jinja
# Tools stack — generated by Ansible
|
|
# Do not edit manually; re-run ansible-playbook playbooks/tools.yml
|
|
# Monitoring: Prometheus, Grafana, Loki, AlertManager, Uptime Kuma, node-exporter, cAdvisor
|
|
|
|
networks:
|
|
monitoring:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
prometheus_data:
|
|
grafana_data:
|
|
loki_data:
|
|
uptime_kuma_data:
|
|
|
|
services:
|
|
|
|
# ── Prometheus ─────────────────────────────────────────────────────────────
|
|
prometheus:
|
|
image: {{ prometheus_image }}
|
|
container_name: prometheus
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
ports:
|
|
- "127.0.0.1:9090:9090" # exposed to main via UFW rule for discord-bot
|
|
volumes:
|
|
- prometheus_data:/prometheus
|
|
- {{ tools_root }}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- {{ tools_root }}/prometheus/rules:/etc/prometheus/rules:ro
|
|
command:
|
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
|
- "--storage.tsdb.path=/prometheus"
|
|
- "--storage.tsdb.retention.time=30d"
|
|
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
|
|
- "--web.console.templates=/usr/share/prometheus/consoles"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:9090/-/healthy"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
alertmanager:
|
|
image: {{ alertmanager_image }}
|
|
container_name: alertmanager
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
volumes:
|
|
- {{ tools_root }}/prometheus/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
|
command:
|
|
- "--config.file=/etc/alertmanager/alertmanager.yml"
|
|
- "--storage.path=/alertmanager"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:9093/-/healthy"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
# ── Exporters (monitor the tools host itself) ───────────────────────────────
|
|
node-exporter:
|
|
image: {{ node_exporter_image }}
|
|
container_name: node-exporter
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
pid: host
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
command:
|
|
- "--path.procfs=/host/proc"
|
|
- "--path.sysfs=/host/sys"
|
|
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
|
|
|
|
cadvisor:
|
|
image: {{ cadvisor_image }}
|
|
container_name: cadvisor
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
privileged: true
|
|
devices:
|
|
- /dev/kmsg
|
|
volumes:
|
|
- /:/rootfs:ro
|
|
- /var/run:/var/run:ro
|
|
- /sys:/sys:ro
|
|
- /var/lib/docker:/var/lib/docker:ro
|
|
- /dev/disk:/dev/disk:ro
|
|
|
|
# ── Grafana ─────────────────────────────────────────────────────────────────
|
|
grafana:
|
|
image: {{ grafana_image }}
|
|
container_name: grafana
|
|
restart: unless-stopped
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
depends_on:
|
|
- prometheus
|
|
networks:
|
|
- monitoring
|
|
ports:
|
|
- "3000:3000"
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
- {{ tools_root }}/grafana/provisioning:/etc/grafana/provisioning:ro
|
|
env_file: .env
|
|
environment:
|
|
- GF_SECURITY_ADMIN_USER=admin
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
|
- GF_SERVER_DOMAIN={{ domain_dashboard }}
|
|
- GF_SERVER_ROOT_URL=https://{{ domain_dashboard }}
|
|
- GF_AUTH_ANONYMOUS_ENABLED=false
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:3000/api/health"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
# ── Loki ────────────────────────────────────────────────────────────────────
|
|
loki:
|
|
image: {{ loki_image }}
|
|
container_name: loki
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
ports:
|
|
- "3100:3100" # exposed to main for Promtail log ingestion
|
|
volumes:
|
|
- loki_data:/loki
|
|
- {{ tools_root }}/loki/loki.yml:/etc/loki/local-config.yaml:ro
|
|
command: -config.file=/etc/loki/local-config.yaml
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:3100/ready"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
# ── Uptime Kuma ─────────────────────────────────────────────────────────────
|
|
uptime-kuma:
|
|
image: {{ uptime_kuma_image }}
|
|
container_name: uptime-kuma
|
|
restart: unless-stopped
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
networks:
|
|
- monitoring
|
|
ports:
|
|
- "3001:3001"
|
|
volumes:
|
|
- uptime_kuma_data:/app/data
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-sf", "http://localhost:3001/"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|