infra/roles/services/tasks/configs.yml
jack 6ebd237894
Some checks failed
CI/CD / deploy (push) Has been cancelled
CI/CD / syntax-check (push) Successful in 1m7s
feat: major infrastructure improvements
Reliability:
- Add swap role (2GB, swappiness=10, idempotent via /etc/fstab)
- Add mem_limit to plane-worker (512m) and plane-beat (256m)
- Add health checks to all services (traefik, vaultwarden, forgejo,
  plane-*, syncthing, prometheus, grafana, loki)

Code quality:
- Remove Traefik Docker labels (file provider used, labels were dead code)
- Add comment explaining file provider architecture

Observability:
- Add AlertManager with Telegram notifications
- Add Prometheus alert rules: CPU, RAM, disk, swap, container health
- Add Loki + Promtail for centralized log aggregation
- Add Loki datasource to Grafana
- Enable Traefik /ping endpoint for health checks

Backups:
- Add backup role: pg_dump for forgejo + plane DBs, tar for
  vaultwarden and forgejo data
- 7-day retention, daily cron at 03:00
- Backup script at /usr/local/bin/backup-services

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-22 03:28:16 +07:00

145 lines
4.3 KiB
YAML

---
- name: Deploy .env file
ansible.builtin.template:
src: env.j2
dest: "{{ services_root }}/.env"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0600"
notify: Restart stack
- name: Deploy docker-compose.yml
ansible.builtin.template:
src: docker-compose.yml.j2
dest: "{{ services_root }}/docker-compose.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Traefik static config
ansible.builtin.template:
src: traefik/traefik.yml.j2
dest: "{{ services_root }}/traefik/traefik.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Traefik dynamic routes
ansible.builtin.template:
src: traefik/dynamic/routes.yml.j2
dest: "{{ services_root }}/traefik/dynamic/routes.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy act_runner config
ansible.builtin.template:
src: act_runner_config.yaml.j2
dest: "{{ services_root }}/act_runner/config.yaml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Prometheus config
ansible.builtin.template:
src: prometheus/prometheus.yml.j2
dest: "{{ services_root }}/prometheus/prometheus.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Grafana datasource provisioning
ansible.builtin.template:
src: grafana/provisioning/datasources/prometheus.yml.j2
dest: "{{ services_root }}/grafana/provisioning/datasources/prometheus.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Grafana dashboard provisioning config
ansible.builtin.template:
src: grafana/provisioning/dashboards/dashboards.yml.j2
dest: "{{ services_root }}/grafana/provisioning/dashboards/dashboards.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Node Exporter Full dashboard JSON
ansible.builtin.copy:
src: grafana/dashboards/node-exporter-full.json
dest: "{{ services_root }}/grafana/provisioning/dashboards/json/node-exporter-full.json"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy cAdvisor dashboard JSON
ansible.builtin.copy:
src: grafana/dashboards/cadvisor.json
dest: "{{ services_root }}/grafana/provisioning/dashboards/json/cadvisor.json"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Prometheus alert rules
ansible.builtin.template:
src: prometheus/rules/alerts.yml.j2
dest: "{{ services_root }}/prometheus/rules/alerts.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy AlertManager config
ansible.builtin.template:
src: prometheus/alertmanager.yml.j2
dest: "{{ services_root }}/prometheus/alertmanager.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0640"
notify: Restart stack
- name: Deploy Loki config
ansible.builtin.template:
src: loki/loki.yml.j2
dest: "{{ services_root }}/loki/loki.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Promtail config
ansible.builtin.template:
src: loki/promtail.yml.j2
dest: "{{ services_root }}/loki/promtail.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Deploy Grafana Loki datasource
ansible.builtin.template:
src: grafana/provisioning/datasources/loki.yml.j2
dest: "{{ services_root }}/grafana/provisioning/datasources/loki.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
notify: Restart stack
- name: Create acme.json for Let's Encrypt certificates
ansible.builtin.file:
path: "{{ services_root }}/traefik/acme.json"
state: touch
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0600"
modification_time: preserve
access_time: preserve