fix: remove grafana_admin_password from env.j2, delete dead prometheus templates
These files referenced variables removed in the previous refactor commit, causing deploy failure (undefined variable: grafana_admin_password).
This commit is contained in:
parent
339f0e8484
commit
5f44441bd1
4 changed files with 0 additions and 156 deletions
|
|
@ -8,7 +8,6 @@ DOMAIN_GIT={{ domain_git }}
|
||||||
DOMAIN_PLANE={{ domain_plane }}
|
DOMAIN_PLANE={{ domain_plane }}
|
||||||
DOMAIN_TRAEFIK={{ domain_traefik }}
|
DOMAIN_TRAEFIK={{ domain_traefik }}
|
||||||
FORGEJO_RUNNER_TOKEN={{ forgejo_runner_token }}
|
FORGEJO_RUNNER_TOKEN={{ forgejo_runner_token }}
|
||||||
GRAFANA_ADMIN_PASSWORD={{ grafana_admin_password }}
|
|
||||||
CROWDSEC_BOUNCER_KEY={{ crowdsec_bouncer_key }}
|
CROWDSEC_BOUNCER_KEY={{ crowdsec_bouncer_key }}
|
||||||
# Cloudflare DNS-01 ACME challenge
|
# Cloudflare DNS-01 ACME challenge
|
||||||
CF_DNS_API_TOKEN={{ cloudflare_dns_api_token }}
|
CF_DNS_API_TOKEN={{ cloudflare_dns_api_token }}
|
||||||
|
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
||||||
# Generated by Ansible — do not edit manually
|
|
||||||
global:
|
|
||||||
resolve_timeout: 5m
|
|
||||||
|
|
||||||
route:
|
|
||||||
group_by: [alertname, severity]
|
|
||||||
group_wait: 30s
|
|
||||||
group_interval: 5m
|
|
||||||
repeat_interval: 4h
|
|
||||||
receiver: all
|
|
||||||
|
|
||||||
receivers:
|
|
||||||
- name: all
|
|
||||||
telegram_configs:
|
|
||||||
- bot_token: "{{ alertmanager_telegram_token }}"
|
|
||||||
chat_id: {{ alertmanager_telegram_chat_id }}
|
|
||||||
message: |
|
|
||||||
{{ '{{' }} range .Alerts {{ '}}' }}
|
|
||||||
{{ '{{' }} if eq .Status "firing" {{ '}}' }}🔴{{ '{{' }} else {{ '}}' }}🟢{{ '{{' }} end {{ '}}' }} *{{ '{{' }} .Labels.alertname {{ '}}' }}*
|
|
||||||
{{ '{{' }} .Annotations.summary {{ '}}' }}
|
|
||||||
{{ '{{' }} .Annotations.description {{ '}}' }}
|
|
||||||
{{ '{{' }} end {{ '}}' }}
|
|
||||||
parse_mode: Markdown
|
|
||||||
discord_configs:
|
|
||||||
- webhook_url: "{{ discord_webhook_alerts }}"
|
|
||||||
title: >-
|
|
||||||
{{ '{{' }} if eq (index .Alerts 0).Status "firing" {{ '}}' }}🔴 Alert{{ '{{' }} else {{ '}}' }}🟢 Resolved{{ '{{' }} end {{ '}}' }}
|
|
||||||
message: |
|
|
||||||
{{ '{{' }} range .Alerts {{ '}}' }}
|
|
||||||
**{{ '{{' }} .Labels.alertname {{ '}}' }}**
|
|
||||||
{{ '{{' }} .Annotations.summary {{ '}}' }}
|
|
||||||
{{ '{{' }} .Annotations.description {{ '}}' }}
|
|
||||||
{{ '{{' }} end {{ '}}' }}
|
|
||||||
|
|
||||||
inhibit_rules:
|
|
||||||
- source_matchers: [severity="critical"]
|
|
||||||
target_matchers: [severity="warning"]
|
|
||||||
equal: [alertname]
|
|
||||||
|
|
@ -1,31 +0,0 @@
|
||||||
# Generated by Ansible — do not edit manually
|
|
||||||
global:
|
|
||||||
scrape_interval: 15s
|
|
||||||
evaluation_interval: 15s
|
|
||||||
external_labels:
|
|
||||||
instance: "{{ domain_base }}"
|
|
||||||
|
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- static_configs:
|
|
||||||
- targets: ["alertmanager:9093"]
|
|
||||||
|
|
||||||
rule_files:
|
|
||||||
- /etc/prometheus/rules/*.yml
|
|
||||||
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: prometheus
|
|
||||||
static_configs:
|
|
||||||
- targets: ["localhost:9090"]
|
|
||||||
|
|
||||||
- job_name: node-exporter
|
|
||||||
static_configs:
|
|
||||||
- targets: ["node-exporter:9100"]
|
|
||||||
|
|
||||||
- job_name: cadvisor
|
|
||||||
static_configs:
|
|
||||||
- targets: ["cadvisor:8080"]
|
|
||||||
|
|
||||||
- job_name: alertmanager
|
|
||||||
static_configs:
|
|
||||||
- targets: ["alertmanager:9093"]
|
|
||||||
|
|
@ -1,86 +0,0 @@
|
||||||
# Generated by Ansible — do not edit manually
|
|
||||||
groups:
|
|
||||||
- name: host
|
|
||||||
rules:
|
|
||||||
- alert: HighCPULoad
|
|
||||||
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Высокая нагрузка CPU ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)"
|
|
||||||
description: "CPU загружен более 85% на протяжении 5 минут."
|
|
||||||
|
|
||||||
- alert: HighMemoryUsage
|
|
||||||
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Высокое использование RAM ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)"
|
|
||||||
description: "Использование RAM превысило 85%."
|
|
||||||
|
|
||||||
- alert: CriticalMemoryUsage
|
|
||||||
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Критическое использование RAM ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)"
|
|
||||||
description: "RAM заполнена на 95%+. Возможны OOM kills."
|
|
||||||
|
|
||||||
- alert: DiskSpaceWarning
|
|
||||||
expr: (1 - (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|aufs"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|aufs"})) * 100 > 75
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Заканчивается место на диске ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)"
|
|
||||||
description: "Диск {{ '{{' }} $labels.mountpoint {{ '}}' }} занят на {{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%."
|
|
||||||
|
|
||||||
- alert: DiskSpaceCritical
|
|
||||||
expr: (1 - (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|aufs"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|aufs"})) * 100 > 90
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Критически мало места на диске ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)"
|
|
||||||
description: "Диск {{ '{{' }} $labels.mountpoint {{ '}}' }} занят на {{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%."
|
|
||||||
|
|
||||||
- alert: SwapUsageHigh
|
|
||||||
expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 50
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Высокое использование swap ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)"
|
|
||||||
description: "Swap используется более чем на 50% — RAM под давлением."
|
|
||||||
|
|
||||||
- name: containers
|
|
||||||
rules:
|
|
||||||
- alert: ContainerDown
|
|
||||||
expr: absent(container_last_seen{name=~".+"}) or time() - container_last_seen{name=~".+"} > 60
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Контейнер {{ '{{' }} $labels.name {{ '}}' }} недоступен"
|
|
||||||
description: "Контейнер не отвечает более 2 минут."
|
|
||||||
|
|
||||||
- alert: ContainerHighMemory
|
|
||||||
expr: (container_memory_usage_bytes{name=~".+"} / (container_spec_memory_limit_bytes{name=~".+"} > 0)) * 100 > 90
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Контейнер {{ '{{' }} $labels.name {{ '}}' }} использует 90%+ памяти"
|
|
||||||
description: "Контейнер близок к mem_limit — возможен OOM kill."
|
|
||||||
|
|
||||||
- alert: ContainerRestarting
|
|
||||||
expr: increase(container_last_seen{name=~".+"}[5m]) == 0 and rate(container_cpu_usage_seconds_total{name=~".+"}[5m]) == 0
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Контейнер {{ '{{' }} $labels.name {{ '}}' }} возможно перезапускается"
|
|
||||||
description: "Контейнер не активен — проверьте docker ps."
|
|
||||||
Loading…
Reference in a new issue