Reliability: - Add swap role (2GB, swappiness=10, idempotent via /etc/fstab) - Add mem_limit to plane-worker (512m) and plane-beat (256m) - Add health checks to all services (traefik, vaultwarden, forgejo, plane-*, syncthing, prometheus, grafana, loki) Code quality: - Remove Traefik Docker labels (file provider used, labels were dead code) - Add comment explaining file provider architecture Observability: - Add AlertManager with Telegram notifications - Add Prometheus alert rules: CPU, RAM, disk, swap, container health - Add Loki + Promtail for centralized log aggregation - Add Loki datasource to Grafana - Enable Traefik /ping endpoint for health checks Backups: - Add backup role: pg_dump for forgejo + plane DBs, tar for vaultwarden and forgejo data - 7-day retention, daily cron at 03:00 - Backup script at /usr/local/bin/backup-services Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
38 lines
949 B
Django/Jinja
38 lines
949 B
Django/Jinja
# Generated by Ansible — do not edit manually
|
|
server:
|
|
http_listen_port: 9080
|
|
grpc_listen_port: 0
|
|
|
|
positions:
|
|
filename: /tmp/positions.yaml
|
|
|
|
clients:
|
|
- url: http://loki:3100/loki/api/v1/push
|
|
|
|
scrape_configs:
|
|
- job_name: docker
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
refresh_interval: 5s
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_name]
|
|
regex: /(.*)
|
|
target_label: container
|
|
- source_labels: [__meta_docker_container_log_stream]
|
|
target_label: stream
|
|
- source_labels: [__meta_docker_container_label_com_docker_compose_service]
|
|
target_label: service
|
|
|
|
- job_name: syslog
|
|
static_configs:
|
|
- targets: [localhost]
|
|
labels:
|
|
job: syslog
|
|
__path__: /var/log/syslog
|
|
|
|
- job_name: auth
|
|
static_configs:
|
|
- targets: [localhost]
|
|
labels:
|
|
job: auth
|
|
__path__: /var/log/auth.log
|