infra/roles/tools/tasks/main.yml
jack fde51352d7 feat: migrate monitoring to tools server, fix Outline S3 uploads
Monitoring stack (Prometheus, AlertManager, Grafana, Loki, Uptime Kuma)
moved from main to tools server. Prometheus now scrapes main exporters
over network (ip_main:9100/8080). Promtail pushes logs to ip_tools:3100.
Traefik routes for dash/status.walava.io updated to ip_tools. discord-bot
PROMETHEUS_URL updated to http://ip_tools:9090.

Outline S3 fix: remove AWS_S3_ACL=private (Timeweb doesn't support
per-object ACLs — caused upload failures). Add CORS configuration task
for browser-side presigned uploads.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-27 04:10:28 +07:00

167 lines
4.7 KiB
YAML

---
- name: Create tools root directory
ansible.builtin.file:
path: "{{ tools_root }}"
state: directory
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0750"
- name: Create tools subdirectories
ansible.builtin.file:
path: "{{ tools_root }}/{{ item }}"
state: directory
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0755"
loop:
- prometheus
- prometheus/rules
- grafana/provisioning/datasources
- grafana/provisioning/dashboards
- grafana/provisioning/dashboards/json
- loki
- name: Deploy .env file
ansible.builtin.template:
src: env.j2
dest: "{{ tools_root }}/.env"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0600"
- name: Deploy docker-compose.yml
ansible.builtin.template:
src: docker-compose.yml.j2
dest: "{{ tools_root }}/docker-compose.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0640"
- name: Deploy Prometheus config
ansible.builtin.template:
src: prometheus/prometheus.yml.j2
dest: "{{ tools_root }}/prometheus/prometheus.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy Prometheus alert rules
ansible.builtin.template:
src: prometheus/rules/alerts.yml.j2
dest: "{{ tools_root }}/prometheus/rules/alerts.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy AlertManager config
ansible.builtin.template:
src: prometheus/alertmanager.yml.j2
dest: "{{ tools_root }}/prometheus/alertmanager.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy Loki config
ansible.builtin.template:
src: loki/loki.yml.j2
dest: "{{ tools_root }}/loki/loki.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy Grafana Prometheus datasource
ansible.builtin.template:
src: grafana/provisioning/datasources/prometheus.yml.j2
dest: "{{ tools_root }}/grafana/provisioning/datasources/prometheus.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy Grafana Loki datasource
ansible.builtin.template:
src: grafana/provisioning/datasources/loki.yml.j2
dest: "{{ tools_root }}/grafana/provisioning/datasources/loki.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy Grafana dashboard provisioning config
ansible.builtin.template:
src: grafana/provisioning/dashboards/dashboards.yml.j2
dest: "{{ tools_root }}/grafana/provisioning/dashboards/dashboards.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy Node Exporter Full dashboard JSON
ansible.builtin.copy:
src: grafana/dashboards/node-exporter-full.json
dest: "{{ tools_root }}/grafana/provisioning/dashboards/json/node-exporter-full.json"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Deploy cAdvisor dashboard JSON
ansible.builtin.copy:
src: grafana/dashboards/cadvisor.json
dest: "{{ tools_root }}/grafana/provisioning/dashboards/json/cadvisor.json"
owner: "{{ deploy_user }}"
group: "{{ deploy_group }}"
mode: "0644"
- name: Pull monitoring images
ansible.builtin.command: docker pull {{ item }}
loop:
- "{{ prometheus_image }}"
- "{{ alertmanager_image }}"
- "{{ node_exporter_image }}"
- "{{ cadvisor_image }}"
- "{{ grafana_image }}"
- "{{ loki_image }}"
- "{{ uptime_kuma_image }}"
register: pull_result
changed_when: "'Status: Downloaded newer image' in pull_result.stdout"
retries: 5
delay: 30
until: pull_result.rc == 0
# ── UFW: allow main server to reach monitoring services ───────────────────────
- name: Allow main server to reach Loki (Promtail log push)
community.general.ufw:
rule: allow
port: "3100"
proto: tcp
src: "{{ ip_main }}"
- name: Allow main server to reach Prometheus (discord-bot metrics)
community.general.ufw:
rule: allow
port: "9090"
proto: tcp
src: "{{ ip_main }}"
- name: Allow main Traefik to reach Grafana
community.general.ufw:
rule: allow
port: "3000"
proto: tcp
src: "{{ ip_main }}"
- name: Allow main Traefik to reach Uptime Kuma
community.general.ufw:
rule: allow
port: "3001"
proto: tcp
src: "{{ ip_main }}"
- name: Start tools stack
community.docker.docker_compose_v2:
project_src: "{{ tools_root }}"
state: present
pull: never
remove_orphans: true
retries: 3
delay: 15
register: compose_result
until: compose_result is succeeded