From 44ccdf48822c8aa64cef7ddb12cfacd344dfb4c4 Mon Sep 17 00:00:00 2001 From: jack Date: Fri, 27 Mar 2026 19:05:19 +0700 Subject: [PATCH] =?UTF-8?q?refactor:=20remove=20tools=20server,=20Vaultwar?= =?UTF-8?q?den,=20monitoring=20stack;=20rename=20plane=E2=86=92hub?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove tools server entirely (roles/tools, playbooks/tools.yml, CI deploy step) - Remove Vaultwarden (already absent from compose, clean up vars) - Remove node-exporter, cadvisor, promtail from main stack - Remove grafana/uptime-kuma Traefik routes (pointed to tools) - Remove monitoring network from docker-compose - Remove tools vault vars (grafana_admin_password, alertmanager telegram) - Rename domain_plane: plane.walava.io → hub.walava.io - Update CI workflow to only deploy main server - Update STATUS.md and BACKLOG.md to reflect current state --- .claude/scheduled_tasks.lock | 1 - .forgejo/workflows/deploy.yml | 5 - docs/BACKLOG.md | 94 +- docs/STATUS.md | 82 +- inventory/group_vars/all/main.yml | 10 +- inventory/group_vars/all/vault.yml | 369 +- inventory/hosts.ini | 15 +- playbooks/site.yml | 7 - playbooks/tools.yml | 14 - roles/services/defaults/main.yml | 8 - roles/services/tasks/configs.yml | 9 - roles/services/tasks/main.yml | 27 - .../templates/authelia/configuration.yml.j2 | 72 - .../services/templates/authelia/users.yml.j2 | 12 - .../services/templates/docker-compose.yml.j2 | 59 +- roles/services/templates/loki/loki.yml.j2 | 36 - roles/services/templates/loki/promtail.yml.j2 | 38 - .../templates/traefik/dynamic/routes.yml.j2 | 26 - roles/tools/defaults/main.yml | 11 - .../files/grafana/dashboards/cadvisor.json | 817 - .../dashboards/node-exporter-full.json | 15766 ---------------- roles/tools/tasks/main.yml | 167 - roles/tools/templates/docker-compose.yml.j2 | 157 - roles/tools/templates/env.j2 | 2 - .../provisioning/dashboards/dashboards.yml.j2 | 13 - .../provisioning/datasources/loki.yml.j2 | 10 - .../datasources/prometheus.yml.j2 | 10 - roles/tools/templates/loki/loki.yml.j2 | 36 - .../templates/prometheus/alertmanager.yml.j2 | 38 - .../templates/prometheus/prometheus.yml.j2 | 49 - .../templates/prometheus/rules/alerts.yml.j2 | 86 - 31 files changed, 248 insertions(+), 17798 deletions(-) delete mode 100644 .claude/scheduled_tasks.lock delete mode 100644 playbooks/tools.yml delete mode 100644 roles/services/templates/authelia/configuration.yml.j2 delete mode 100644 roles/services/templates/authelia/users.yml.j2 delete mode 100644 roles/services/templates/loki/loki.yml.j2 delete mode 100644 roles/services/templates/loki/promtail.yml.j2 delete mode 100644 roles/tools/defaults/main.yml delete mode 100644 roles/tools/files/grafana/dashboards/cadvisor.json delete mode 100644 roles/tools/files/grafana/dashboards/node-exporter-full.json delete mode 100644 roles/tools/tasks/main.yml delete mode 100644 roles/tools/templates/docker-compose.yml.j2 delete mode 100644 roles/tools/templates/env.j2 delete mode 100644 roles/tools/templates/grafana/provisioning/dashboards/dashboards.yml.j2 delete mode 100644 roles/tools/templates/grafana/provisioning/datasources/loki.yml.j2 delete mode 100644 roles/tools/templates/grafana/provisioning/datasources/prometheus.yml.j2 delete mode 100644 roles/tools/templates/loki/loki.yml.j2 delete mode 100644 roles/tools/templates/prometheus/alertmanager.yml.j2 delete mode 100644 roles/tools/templates/prometheus/prometheus.yml.j2 delete mode 100644 roles/tools/templates/prometheus/rules/alerts.yml.j2 diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock deleted file mode 100644 index a1197ab..0000000 --- a/.claude/scheduled_tasks.lock +++ /dev/null @@ -1 +0,0 @@ -{"sessionId":"7fb37d5d-263a-43f6-b0f8-390b858cbf0b","pid":44636,"acquiredAt":1774567037959} \ No newline at end of file diff --git a/.forgejo/workflows/deploy.yml b/.forgejo/workflows/deploy.yml index 0aa225b..9269ec8 100644 --- a/.forgejo/workflows/deploy.yml +++ b/.forgejo/workflows/deploy.yml @@ -30,8 +30,6 @@ jobs: - name: Syntax check — main run: ansible-playbook playbooks/deploy.yml --syntax-check - - name: Syntax check — tools - run: ansible-playbook playbooks/tools.yml --syntax-check # ── Deploy (push to master only, after syntax-check passes) ──────────────── deploy: @@ -54,7 +52,6 @@ jobs: chmod 600 ~/.ssh/id_ed25519 # Scan host keys directly (no need for SSH_KNOWN_HOSTS secret) ssh-keyscan -p 22 87.249.49.32 >> ~/.ssh/known_hosts - ssh-keyscan -p 22 85.193.83.9 >> ~/.ssh/known_hosts chmod 600 ~/.ssh/known_hosts - name: Write vault password @@ -65,5 +62,3 @@ jobs: - name: Deploy main server run: ansible-playbook playbooks/deploy.yml -l main - - name: Deploy tools server - run: ansible-playbook playbooks/tools.yml -l tools diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md index 273f2f4..23abd99 100644 --- a/docs/BACKLOG.md +++ b/docs/BACKLOG.md @@ -1,100 +1,66 @@ # Бэклог задач > Задачи по приоритету. Обновляй этот файл при добавлении/завершении задач. -> Последнее обновление: 2026-03-23 +> Последнее обновление: 2026-03-27 --- -## 🔴 Критично (сделать как можно скорее) +## 🔴 Критично -- [ ] **Настроить домен csrx.ru в SnappyMail** (admin панель → Domains) - IMAP: `mailserver`, порт 993, SSL - SMTP: `mailserver`, порт 587, STARTTLS, с авторизацией - Без этого вход в mail.csrx.ru невозможен. - -- [ ] **Бэкап tools-сервера** - Добавить скрипт для tools-сервера (роль backup не подключена к tools.yml): - - outline-db (PostgreSQL dump) - - n8n_data (Docker volume) - - mailserver/config (DKIM ключи, аккаунты) - - snappymail/data - -- [ ] **plane-minio не бэкапится** - Вложения и файлы из Plane не входят в текущий бэкап. Нужен dump MinIO bucket `uploads`. +- [ ] **Обновить DNS: hub.walava.io** + Plane переименован с `plane.walava.io` на `hub.walava.io`. + В Cloudflare: добавить A-запись `hub` → 87.249.49.32, удалить старую `plane` (или оставить как алиас). --- ## 🟡 Высокий приоритет -- [ ] **Мониторинг tools-сервера** - Добавить node-exporter на tools-сервер и scrape-конфиг в Prometheus. +- [ ] **Собрать образ discord-bot** + Собрать и запушить `git.walava.io/jack/discord-bot:latest` в Forgejo registry. + После этого раскомментировать сервис в docker-compose.yml.j2. -- [ ] **mem_limit для plane-worker и plane-beat** - Сейчас без ограничений памяти, риск OOM. Добавить `mem_limit: 256m`. +- [ ] **Собрать образ walava-web** + Собрать и запушить `git.walava.io/jack/walava-web:latest` в Forgejo registry. + После этого раскомментировать сервис в docker-compose.yml.j2. -- [ ] **Проверить работу swap** - `roles/base/tasks/swap.yml` должен создавать 2GB swap. Проверить что он реально создан на сервере. - -- [ ] **Верификация бэкапов** - Добавить еженедельную проверку: скачать последний бэкап из S3, сделать test restore в Docker. +- [ ] **Настроить UptimeRobot** + Зарегистрироваться на uptimerobot.com, добавить мониторы: + - git.walava.io + - hub.walava.io + - wiki.walava.io + - auto.walava.io + Настроить уведомления в Telegram/email. --- ## 🟢 Обычный приоритет +- [ ] **Верификация бэкапов** + Добавить еженедельную проверку: скачать последний бэкап из S3, сделать test restore в Docker. + - [ ] **Алерты на сбой бэкапа** - Если `backup-services` завершился с ошибкой — отправить алерт в Telegram. - -- [ ] **Алерты на истечение TLS-сертификатов** - Добавить Prometheus правило: предупреждение за 7 дней до истечения сертификата. - -- [ ] **Prometheus → tools-сервер** - Настроить federated scraping или remote_write для получения метрик tools-сервера. - -- [ ] **Бакет S3 `visual-backup`** - Убедиться что бакет создан в Timeweb как "холодное хранилище" (тип: COLD). - ---- - -## 📋 Бэклог (без срока) + Если backup-services завершился с ошибкой — отправить уведомление в Discord. - [ ] **Обновление образов** - Регулярно проверять новые версии (образы запинены в `defaults/main.yml`): + Регулярно проверять новые версии: - Forgejo (сейчас `:9`) - Traefik (сейчас `v3.3`) - Plane (сейчас `:stable`) - - Authelia (сейчас `4.38`) - -- [ ] **Автоматическое обновление Cloudflare IP-списков** - UFW разрешает только Cloudflare IPs. Список захардкожен — нужен cron для обновления. - -- [ ] **Логи mail-сервера в Loki** - Добавить promtail pipeline для сбора логов mailserver с tools-сервера. - -- [ ] **Двухфакторная аутентификация для n8n** - n8n сейчас доступен только с rate-limit middleware, без 2FA через Authelia. + - Docmost (сейчас `:latest`) + - n8n (сейчас `1.89.2`) --- ## ✅ Сделано -- [x] PTR-запись 85.193.83.9 → mx.csrx.ru (настроена в Timeweb, ожидает propagation 3-24ч) -- [x] DNS Cloudflare: MX→mx, SPF `-all`, DMARC `p=quarantine`, DKIM, autoconfig/autodiscover, A dash/mail/mx -- [x] Бэкап изменён с hourly на каждые 6 часов (00:00, 06:00, 12:00, 18:00) - [x] Traefik с wildcard TLS через Cloudflare DNS-01 -- [x] Vaultwarden (менеджер паролей) - [x] Forgejo + CI/CD через Forgejo Actions - [x] Plane (управление проектами) -- [x] Outline wiki с email magic link авторизацией +- [x] Docmost wiki (заменил Outline) - [x] n8n автоматизация -- [x] docker-mailserver (Postfix + Dovecot), аккаунты: noreply, admin, jack -- [x] SnappyMail вебмейл на mail.csrx.ru -- [x] docker-mailserver на mx.csrx.ru (было mail.csrx.ru) -- [x] Certbot авторотация сертификата (cron 2x/день + deploy-hook) -- [x] DMARC p=quarantine, SPF -all, DKIM -- [x] Мониторинг (Prometheus + Grafana + Loki + AlertManager) - [x] CrowdSec IDS + fail2ban -- [x] Authelia 2FA SSO -- [x] Uptime Kuma статус-страница +- [x] Бэкап каждые 6 часов → S3 - [x] Удаление Syncthing -- [x] cloudflare_zone_id перенесён в vault +- [x] Удаление Vaultwarden +- [x] Удаление tools-сервера (Grafana/Prometheus/Loki/AlertManager/Uptime Kuma) +- [x] Переезд с csrx.ru на walava.io diff --git a/docs/STATUS.md b/docs/STATUS.md index ded3ac5..881223b 100644 --- a/docs/STATUS.md +++ b/docs/STATUS.md @@ -9,40 +9,30 @@ | Сервер | IP | Роль | Состояние | |--------|----|------|-----------| -| **main** | 87.249.49.32 | Все продуктовые сервисы + мониторинг | ✅ Работает | -| **tools** | 85.193.83.9 | Вспомогательные сервисы (пусто, ожидает мониторинг) | ✅ Работает | - -> mon (188.225.79.34) — планируется к отключению. +| **main** | 87.249.49.32 | Все продуктовые сервисы | ✅ Работает | --- -## Сервисы - -### Основной сервер (main, 87.249.49.32) +## Сервисы (main, 87.249.49.32) | Сервис | Домен | Статус | Заметки | |--------|-------|--------|---------| | Traefik | — | ✅ | Реверс-прокси, TLS wildcard `*.walava.io` через Cloudflare DNS-01 | -| Vaultwarden | vault.walava.io | ✅ | Менеджер паролей | | Forgejo | git.walava.io | ✅ | Git-сервер, SSH на порту 2222 | | Forgejo Actions | — | ✅ | CI/CD runner, деплой через push в master | -| Plane | plane.walava.io | ✅ | Управление проектами | -| Outline Wiki | wiki.walava.io | 🔄 Переезд (CI в процессе) | SMTP: Resend через walava.io | -| n8n | auto.walava.io | 🔄 Переезд (CI в процессе) | Workflow автоматизация | -| outline-mcp | — | 🔄 Переезд | MCP сервер для Claude | -| discord-bot | — | ✅ | Деплой-нотификации в Discord | -| walava-web | walava.io | ✅ | Лендинг (заглушка) | -| Grafana | dash.walava.io | ✅ | Дашборды мониторинга | -| Prometheus | — | ✅ | Сбор метрик, 30 дней хранения | -| Loki + Promtail | — | ✅ | Сбор логов | -| AlertManager | — | ✅ | Алерты в Telegram | +| Plane | hub.walava.io | ✅ | Управление проектами | +| Docmost | wiki.walava.io | ✅ | Wiki. S3: walava-docmost | +| n8n | auto.walava.io | ✅ | Workflow автоматизация | | CrowdSec | — | ✅ | IDS, банит злоумышленников | -| Uptime Kuma | status.walava.io | ✅ | Публичная страница статуса | -| Бэкап | — | ✅ | Каждые 6 часов → S3 `walava-backup/data/`, 7 дней | +| discord-bot | — | ⏳ Ожидает образ | Нужно собрать и запушить в Forgejo registry | +| walava-web | walava.io | ⏳ Ожидает образ | Нужно собрать и запушить в Forgejo registry | -### Tools-сервер (tools, 85.193.83.9) +--- -Outline и n8n **переехали на main**. Сервер ожидает переноса мониторинга. +## Мониторинг + +Используется **UptimeRobot** (внешний, бесплатный) для проверки доступности сервисов. +Grafana/Prometheus/Loki/AlertManager/Uptime Kuma удалены — tools-сервер упразднён. --- @@ -51,27 +41,28 @@ Outline и n8n **переехали на main**. Сервер ожидает п Используется **Resend** (resend.com) для исходящей почты. - Домен `walava.io` верифицирован в Resend - Отправитель: `noreply@walava.io` -- Outline шлёт magic link напрямую через `smtp.resend.com:587` +- Docmost шлёт инвайты/magic links через `smtp.resend.com:587` - API ключ: в vault как `vault_resend_api_key` **Входящая почта не настроена** (нет MX, не нужна). --- -## S3 (Timeweb Object Storage) +## S3 (Timeweb Object Storage, endpoint: s3.twcstorage.ru) | Bucket | Назначение | |--------|-----------| | `walava-backup` | Бэкапы (каждые 6 часов, 7 дней хранения) | -| `walava-outline` | Файлы Outline (вложения, изображения) | +| `walava-docmost` | Файлы Docmost (вложения, изображения) | --- ## CI/CD - Репозиторий: `git.walava.io/jack/infra` -- Триггер: push в `master` запускает `ansible-playbook playbooks/deploy.yml` + `playbooks/tools.yml` +- Триггер: push в `master` запускает `ansible-playbook playbooks/deploy.yml` - Runner: `act_runner` на main-сервере +- SSH ключ: `ci_deploy_pubkey` в `main.yml`, приватный ключ в Forgejo secret `SSH_PRIVATE_KEY` - **Правило**: все изменения только через git, никаких ручных правок на сервере --- @@ -84,35 +75,30 @@ Outline и n8n **переехали на main**. Сервер ожидает п | Forgejo data | tar volume | | Plane DB | pg_dump → gzip | | Plane MinIO | tar volume | -| Outline DB | pg_dump → gzip | +| Docmost DB | pg_dump → gzip | | n8n workflows | tar volume | -| Vaultwarden | tar volume | -| Uptime Kuma | tar volume | -| Traefik acme.json | в составе volumes | Расписание: 00:00, 06:00, 12:00, 18:00 UTC. Хранение: 7 дней. --- +## Сети Docker + +- `proxy` — публичная, только для Traefik (ACME + исходящий интернет) +- `backend` — internal, Traefik ↔ сервисы +- `forgejo-db` — internal, Forgejo ↔ PostgreSQL +- `forgejo-ssh` — публичная, для SSH-клиентов +- `plane-internal` — internal, все компоненты Plane +- `runner-jobs` — публичная, для job-контейнеров CI/CD +- `docmost-internal` — internal, Docmost ↔ DB ↔ Redis +- `n8n-internal` — internal, n8n изоляция + +--- + ## Известные проблемы / TODO | Проблема | Статус | |----------|--------| -| Outline + n8n переезд на main | 🔄 CI задеплоен, ожидаем старт контейнеров | -| Authelia всё ещё запущена | ⚠️ Нужно удалить после деплоя (remove_orphans уберёт) | -| Мониторинг переезд на tools | ⏳ Следующий шаг | -| Отключить mon-сервер | ⏳ После переноса мониторинга | - ---- - -## Сети Docker (main) - -- `proxy` — публичная, только для Traefik (нужна для ACME) -- `backend` — internal, Traefik ↔ сервисы -- `forgejo-db` — internal, Forgejo ↔ PostgreSQL -- `forgejo-ssh` — публичная, для SSH-клиентов -- `plane-internal` — internal, все компоненты Plane -- `runner-jobs` — публичная, для job-контейнеров CI/CD -- `monitoring` — internal, стек мониторинга -- `outline-internal` — internal, Outline ↔ DB ↔ Redis -- `n8n-internal` — internal, n8n изоляция +| discord-bot без образа | ⏳ Собрать и запушить `git.walava.io/jack/discord-bot:latest` | +| walava-web без образа | ⏳ Собрать и запушить `git.walava.io/jack/walava-web:latest` | +| DNS: hub.walava.io | ⚠️ Обновить A-запись в Cloudflare (было plane.walava.io) | diff --git a/inventory/group_vars/all/main.yml b/inventory/group_vars/all/main.yml index 74b3e48..cec8f2e 100644 --- a/inventory/group_vars/all/main.yml +++ b/inventory/group_vars/all/main.yml @@ -4,11 +4,8 @@ domain_base: "walava.io" # Derived domains domain_git: "git.{{ domain_base }}" -domain_plane: "plane.{{ domain_base }}" +domain_plane: "hub.{{ domain_base }}" domain_traefik: "traefik.{{ domain_base }}" -domain_dashboard: "dash.{{ domain_base }}" -domain_auth: "auth.{{ domain_base }}" -domain_status: "status.{{ domain_base }}" domain_wiki: "wiki.{{ domain_base }}" domain_n8n: "auto.{{ domain_base }}" domain_landing: "{{ domain_base }}" @@ -26,9 +23,6 @@ plane_secret_key: "{{ vault_plane_secret_key }}" plane_minio_password: "{{ vault_plane_minio_password }}" traefik_dashboard_htpasswd: "{{ vault_traefik_dashboard_htpasswd }}" forgejo_runner_token: "{{ vault_forgejo_runner_token }}" -grafana_admin_password: "{{ vault_grafana_admin_password }}" -alertmanager_telegram_token: "{{ vault_alertmanager_telegram_token }}" -alertmanager_telegram_chat_id: "{{ vault_alertmanager_telegram_chat_id }}" crowdsec_bouncer_key: "{{ vault_crowdsec_bouncer_key }}" s3_access_key: "{{ vault_s3_access_key }}" s3_secret_key: "{{ vault_s3_secret_key }}" @@ -48,9 +42,7 @@ discord_bot_app_id: "{{ vault_discord_bot_app_id }}" discord_bot_public_key: "{{ vault_discord_bot_public_key }}" timeweb_token: "{{ vault_timeweb_token }}" -# Server IPs (used for cross-server Traefik routing) ip_main: "87.249.49.32" -ip_tools: "85.193.83.9" # CI/CD deploy key (public key — not a secret) ci_deploy_pubkey: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILQSKgqMTT7uGo423Nrb94PgQ8cu2IiH96JOrIKhlvm/ ci-deploy@forgejo-runner" diff --git a/inventory/group_vars/all/vault.yml b/inventory/group_vars/all/vault.yml index cc72b18..8e08b9e 100644 --- a/inventory/group_vars/all/vault.yml +++ b/inventory/group_vars/all/vault.yml @@ -1,190 +1,181 @@ $ANSIBLE_VAULT;1.1;AES256 -34363131346366626337653162663033323865323130636637353265613034383739303463646531 -3333363364363231363438393761343239626538333062620a303233636130376135306666316437 -64393437663837656430383565666337653730393262303036333039626535623134383537373836 -3165643836376665300a356539623533313065316463303963656638323165656337653833633130 -39396639373862323234643362356566393765306366383631316638623931623536646561333965 -33313463376462343063383435393138333236336137363230393462613466366361383761353834 -65666364646164323131313334396164616161386331386364373231363563323835383432346438 -37366664636661343662396231653664383433323533643266323131393363326436643331363831 -66323532623236633337623366343436373731323635376433386433613737303763653563363332 -31313165323364633230386661666135346635633239636363356466666464376236643532643530 -31386264303066646133313836356465363938386565383763303132633961636537303431656231 -36313037666135393733663434353361343630663464346664303261643439373135636436333066 -66343933663466366466633838363438386161353239373165376337373366336231333435306636 -62313762373330343031333064363030373066316662336432306236623439633331663935656432 -33313064323631346363303930333461613963643464363662363830343064303337643238653435 -35643338333134383936303637316232303330626263333434343764366433386139666266616166 -35336131346462336631633432623662633730656662303265396633613333326135353537343965 -37316634376133383632366533626631623134373066653633643266623739623036643238363966 -36353236333161356531326161323433303265336533353539333030303737303837653237376438 -36303562386137646133613864336230306635356233363737313632663963343432386434356237 -62356664623239663562656538386134323563376637323934633535643865643139323166623166 -64356262666464643638626236343439643361366461646131363063613831653337393966383065 -37353063666264343831323861363531646437346438396566343061326236373236346265333237 -65613461393864643539663434316666663235616362363535383334623862323839653666663865 -36383237386464336661616630623535646465623463653939356234363034623038353662396566 -63396435326538663137356338346337323364666136386433663338376562313430373039336333 -61383063613065386132326334663765396535386363623734393130393335393666613265616232 -38336439666166303037353436343063303034323830636135316535386433623031306563353736 -61653733303864383663326664643761326665343363373338333266356464623430393262636465 -62333931643433363262323831646135643237313535303638393763323463386165346335353333 -31613236623739386661376130633736623937333733626163363962303134396231353835343233 -63653533633330333964363839363062323866653236356532636137626236336636623630323036 -39656365306162643535303661396336386265306634363832313764373264636164653966633731 -38313032393164303833363964666238303339313933373262656435396162323462313561623938 -38393230333961643533633264323165356162353738636662326561636535383534373666396334 -63663565343938643035633337633931353333303630323339323533383862363230626533363062 -35656133336532623838383061653737313932656664626630616662626363663432323665363933 -37353130646231343134653834313966383435363532353039393463323536303232396437643161 -35393039656462373436663334353939666163383861336139353263663437363837623664353161 -62346661646463616233653164383638313834626534636638646139653032643935353665653163 -64353733313932656139383030633437333632326261666363626263636438343537636134393961 -66303366376433663737653164616561373938623039383238363264623431633033636466613233 -63343463326264363263643861306561323237633837393233313263386438646136323934636262 -63383531303030643939326134363634646539376530656335316130613963653131633962336638 -30623633666663643762306461356138626634613665306638666135623561666434313539323766 -65623562393734353536306334613430636661333865386238366265663366336239356231626535 -33353837366234666264346137343431386461363162323031313263373533356234393834323535 -65313365363631616365303166343262613139303239363839353638373736363364356535393932 -38336633353732653733323234333263666266303261643836616236363432363937613530646362 -37383234393337613463653537333031343338303465333335666632393630623165383031313535 -38373231633238613331383834333437323962666665333231326539633930356266616636373961 -65336161303236393639653765373834323732666535623330316636346335393064663836343537 -66366336633134653464646137363562376263383662626336323434343330616230323538316439 -63336165396431663037323333316366623537623331626130303633383063646438633931626266 -34356264353732383039363061643733353363366136393964323262633565333862373962393939 -66623966663839306235393765623033636336353639356366306365633637386638316538356333 -35336334383662346234666238633465383765366537623133616465343863626165303465313063 -30353337636432623966656563633332393032323237353336393338613164623831616137396533 -39386631623166663039306262333030623033643832636433353263663636386233386266373732 -61646437303664383434653965353634643365363664336661363863363339383734316262663865 -35303765643536323263323166663061326339396465386134623230383730353539383965356131 -66636534653136323262383166623262346632633231656530653032336136626439373263393933 -33616366393466383462663737613463346561343065333432343962366532373133396462383033 -33666533356239343161313734343234373139653235346130633865646362366433656463646337 -32366265643434383366306364346236393935376263356632346265613032393563646432313734 -31366134643731616366393164353230323138323138303066396235376366663031336466646234 -30386261616530343631303563613530346437623966313933626332663138393732336138353932 -30326531366134663665616432316138383639363766613661633238323165613039396537626433 -37636635646463316636643764306139666264633534666361616538343838373135323161383130 -62653664336662646330666565333632316266643737616436613130373430323935643162366531 -33373061396533316266303334636432663161613635343030373166386463313235633965356161 -63306335376636346238376535613761613539326535303761333863373362383366353936626632 -33383730363162653839623931313936616436633564653835303836663830336337663737353434 -66363964373864306465656366343638386362653836663931653238643136383762326461386366 -36383335393130356666336338346534326432366630633931393934323836653963646132623263 -36346636353334383165323461346536383962303263633438366661656565356539653339316236 -32383163636666346331356435666133646363326539346262393234323863663332623438663336 -65393037333162343334386437646562653735653366393636333338313035373562333731303965 -61313664363861666630336237363037393333643038663537666135363866336432373266656661 -64323065303430653138663330373731616362373433613939343365353065646663613837303265 -33376436653334333762346664613532343765366132356234616566636166336165383339633563 -65393235383135656132343866633866626336666663333230616263383862653539393731666665 -65356537396438333031313835333865613465626236396339313433396537343964623234663033 -37636165636137346432616666653333383162633932363238626238613361623932363632383363 -39316238393939363637633333396137346463393363666236323561323764323065376136336439 -65343534663538616565613130636435643132316437386538343531663130626162623435633834 -37643766356335356535353363386465303963323465313765306334633066356538396464313735 -66313062363739353865336631613935366465373832613461633530316437366235666266313134 -35373730303230306330626630653637333964366130366230363864653662656662346365323739 -63636539306561326131383139306136653833383461363833343034323234623463653462363039 -61363332366237613962356530626165373731343136373032636235353633653130663061343666 -63643339383635303233363435306466326135636539373862396365643232323139656261633030 -39316235346561663439373361626665383030333562623562316665346262376363663830663737 -37386135386535363863353638383035303933333336336234306166386236303339366666663066 -38336330363235616230333438313433323034343863633139326139373436393531373963636636 -61393439346539383661386236613262616366353366373033333365353235383861303735303330 -35393138666232653364643539316333393464626361386630396631663330303561356336353334 -39303063643261633361333137303837336433393363316364633534393064636634303231383339 -37333235666137306438343233663566323931376437663730383938383163613732326130633038 -63626635353665303265316264613263666365363331613231623132383237396663316465663562 -35623563393239333866663433326337326365653030346137363464313632393865393932636637 -34643133366132343130353330366261363031346336663233393132316432376132313361666637 -31643634623435636134383139656131626361663263383762643131356430616632343633313064 -31656365326361336133326462656161666139383630366266383265656239383432653530363930 -34306638306266326361306134656535646266376261386632393763666536383533333530616236 -64643362666335636335303030646437386563313137323630653563633362376165383935396164 -61623765396463323063363931626162303662626563303937373035656138386639383436373135 -64313730653530656562656562343664396234323638633732333031613035663961343663653166 -66653030633435663766663066373635626537316464343766653435313835633861663637323035 -32343932353961386636333230303166666533636336333632316661613939323961393235643436 -38356633646633336261363338663836616237616634636336633631616166633962636338383639 -64383837363634666135653066336363353037316230366639363930663239373232306264386539 -62393861333230333663646138393439613536353461623164626462396363613838623332353530 -64303637613530613736346135626139653861636637373833333766396331323738346334353164 -38313533663230643766313339363233383338616130306334316136343134646339353332303130 -30353861336562613166653431666562326530353162633036346265393439326231313261303866 -36346162643231383962326365383739623936636536396631663639623165363232303638653436 -38623032323739316134653232633262336332343037316161353938343237623238623165313534 -64623430333233353738383763303030363965373137363732343531346665633738353639613165 -32663734303666643365646364353535386265613531613733643732653537393231396230623765 -35616235646232393765343062636639626564393934383462323138376633383032663635643335 -30396534386139663431353935333765366362383539633131653835333933323937643564343166 -64356262633933396637616563383063363666396437306263616233623066343265303138336330 -38336464336663373539343332663562336638656437643063323435373334353030653132653932 -65386232383466363830613030363233373430373636643763633430653233333939333131653665 -30643437643731326435393631303835666138356564393662653165363238376165383532633130 -37333565373330343631366333386636376166323864343334376530656331643831303063326138 -65346263633039363630616538373932373031303363356339623036386630383838373037323536 -37643965326564643435303137323665363735643730393136373439373564306632376166363864 -33346132313331636230386135306465303338303162333465303533306335363638613766373139 -36353737353835313435383439363162363234383864643661313466643262326136363332656666 -61386432653733613632323530353939656632396166373361363061366531303231333036323161 -62636535313439656531373163376563303231616534306238613966653333306233383265633134 -36366266326361636635663037393563353231613362383561333062623731633361636561396335 -62316362373232336531653432613732393938626562646132383232396438643539633731323930 -63306563396132393932343037616566323338656234326537326539623861623065343237653766 -37633230396430343063303565363034393332653261306431636134623764646361396238633364 -31323732363432316134656664333232333838333130373065656633633235653832643061633133 -36373835636536383964663039393838623932646237643966303436343361633732366162613836 -65393930346435376330646337383631626632666638623536393363646335343035613832303732 -62323562653732333830333039646466326531613230336536623864303935346330346331363338 -63626237316633333963303861613366643465316435643364653631333662386331653461653238 -34383335616438613363623737376130306631636230613637303631666339626431313666376530 -38336664666330353636383565653162363735316566333362356261653264303866366462626235 -65613563363634393739633035303230396537366161623832636364356363353261663362376234 -66326164653066396434383534326639346634306138396464393666396363653161613461386631 -63303938393837613136393133316466336634656536333931343839303366666664666162313236 -31663632623633626637373839316537316166323633313063353436663466386535316462653338 -39383734616133633064663631346364613436383634393934643633346231343236643035326237 -62336432323064656633353633326539316638653036363665376263383965656333383533616366 -38646433386636383865343162353633636533333464333564323365373461373665643136663664 -32366366373133613532306434363436643337373235656362623265653435643634613533653236 -36326234666134623165313964313038313863316433373261306138666231396632646630616532 -62613334383230363532333137333933306336656166643831666535663838656363623962663739 -31303533346631323365336431373738393837353239373031376134623464343561333261323763 -38343861313437646539653033636137363036356564363963363037323533393535363231343566 -31353065613731366163303062343131383531303635333266376339393237366662343532363537 -32393066333962313835623165376531313532656264303964353062383537623966613562633039 -39643239343336356130613666626433323938343937373564303738376430313534323632356232 -39336432373161383663313534323237626466346164303862313931356663373432303738343931 -66376465666564616235383339663263393733363066626663646532366165336235323934373732 -66313138623331373239323839373735343162323439656633303233333464613662303833633133 -66663233306537643239636564626564613231636664376133313930636261656438383931376230 -64663138346136383265363938393532636438363333653332373235313638373730623032653064 -34366139303865663338363862313032326635646130376364333565623238616537316236343364 -61363762393661656639393632663064303731373030393538323031313166663232616538376163 -39396363303066646263383733306236616663383066373532363664376137393566323866303836 -39636362396335303938393836633632346264383930363365376430323661396537393261613834 -31636262623235343338303466303733313337303132616130653937356464663738393930656466 -35393831396331396463376461373066303234323330393861373736623139383466353062323434 -32646264346666393434613739353939343566373165383636303531306331663062303938643562 -30616661306539313833646364646535323333623839386535663863353037626661346331353666 -39646264303533666265666539383262626663633234353163393539323266343763336665383732 -65326335316639353566326266306561366230356135666435326637663434323637626539393535 -62316634663262303365333966643564356638373832373566373331663064323765646330343434 -30346336353566626136613264336634626230363664643834373334663331643938343932326465 -65313561653761376635363963303136376566393730363630633965623935363931383864656433 -63323136336562353636643730313661616634666464376334356161326265633463663165303664 -33616630633331343535333761623936613963356630646631623232663161323435623533663830 -35356432663133653831613464333433663330323335393562316335303066633132623264343538 -30356532366661623730366134646337356430323232643661636666346635663437333738383332 -66646638363539613939613332316633366462376264663165356535323233393334366432383738 -39623238623961643632663533306330316265313038653230333861626663336565643961343933 -30356366366332363864306362373835303938653737663531353736323332393232386364353334 -32383939396134663438306233636637366235623437626434313630386563393161646130646635 -66613039353435313062393930663437383732386138323232623063383364393465366532623132 -31636436326234313534633234303261363866336561343630376531346336633232616364383162 -63313835393035313938336238363331623937653438343737346135336237623564383534363434 -33346135303932323065323762643731306666303935336662323939393135323337 +36633364396561303333666464303364636135333533356136613233653230626239353935356135 +3938613863303862333531333832613834383330316666370a613833633230383661376661303435 +36373864356135316238373733386536376539323365306161643330613233313230616233336565 +3838383831643930660a343733356362633661633535663561643661336231386363383437646632 +65356462323933643161313935663463396461383438346530623663323463653037313537313933 +66633233653434353237633036386537643866623631616232346165396231643837626536383563 +34396338386533393731656439633031613065386662633437643937373565663930343933356137 +32623936646261636363653562636338366531633635323330613436643462376665366165313261 +33663564643330363838353564343837643230663433613165616633373563623430653963623734 +30323130353431666666316232633366643532373161383939323638663761393831303666666538 +63356337393064626339653935653762646638376161356139626562643463306234643262303366 +38626461306634366463623866613935303137376334333364613866366364656530383630613864 +30626234306530386133353861663737373462333331363566646433613939313961656336383733 +33323232386633353866636663623230393065663333363539326462336132396234366536353533 +38623164616466396164303138333335633664323861343139663530346262303666643833336132 +64353432646463353861623630326330316432613835376462306262626434343539303263376439 +62343536306236663761313865313236326163663665336135336639633835623738396363396236 +61653639663436353932373561613566633831633139616137306232313261333663326432376333 +39663433643237313338306162663530623861623736373662346136626562383436383439626264 +63333565613531636234643565356339663938303332366233326463306439316564643631643962 +30346230356532353064633364326236323762343261396238383235376431333962613030363439 +31313934373264316162316438613431343131643134613731396566656666656364643766346232 +63333463613661623565323034313464303630323238326363323866643964353932326463633066 +30373435643061376135326339343762366636363032343162653465316132633934636564633231 +64616564373232666366336632666534373430353962383536306230663266333766336435613631 +63643866376264363534656563306461393536666437316135313836343764613431633634623461 +35653439336266323466383432656461636539306432626332613638353264663936666633306331 +35636238366362366238396366396332356433393635613433363665376631393964326430303539 +34373966626134306237346261323234303362383430323738343839616639623265343532353332 +62636338613730353537343463316531343565313861353336393264656361613036366238393735 +36643138343864343233663564303832313064323631326238376438313365616365323765343464 +36356636323437386537623436373230333762656334626330343433363331313835323430646237 +39316464613238656665323561633737616462343531313739353434663734643165396131663363 +36343763363738646131663236366262393834653230666332303031626365323438326332343631 +31313533313562333932626136383033663935326632356334306162343039643562383665376664 +34333636356634313933376238633166323762653132623338653934333231303462376534363833 +64393364613566343236346464303138346365636562356339626134346631633032646631376234 +61303261313335353764646564333334616263666439663339646230353162323533626231613062 +61386162333564613432353231383265366262393764666139623230333135336634633938356236 +32366161643530643563363534386563383338343538326433383436653966636332316139353539 +65636334336531366561666466333038393831393439373365666164373739343933656561303538 +34353436313836656662346435353538376565313765646465613464396164653766343165636533 +31323563333332313833316161306534643664333934303136623961643837376435316233316362 +65623536666466666432643466636566363437383532663133363030363338643331656666323764 +38323733613138303461396330396164623132633536666539643334613163633734646137336666 +61356464356639353035353131303333353532663861633633626536623963613432393366326463 +62323963626261653333383031333766336535366539653036626265626463333763346435656234 +37303833373863623762323062336563323533383437306337663733353730313736633166633033 +31326664386633316437356131633434383161346664306432663034386464623630633366616231 +35373437623630613165646562633562386135376261663762646234303365396661623330313038 +64353263323331323961353031376566643162633334333865393164636132366465356336393836 +39313634363734663935393938353539623165333165373938366333303362316364313466323130 +63353630383333326461663838636534393337666630613333393839363433373035333862616633 +31333538653464396265623663343436353532613334356362373936326364336432393465663239 +35653036616363623333353664646165623137386365326134643733383832373363303737366431 +64383065396362346262636334346236353466396434626365383439386134633439616561383136 +65373534313638326132306134386337623465346162396239643733626238666439623562326637 +30363461613462663931316537323930353337353936633436343035383833333362653666653330 +66616331643235663038323736303231383337373037613361646562653830326238396639383532 +34613762373566313962303537613035633961636138623936336635386331653135633338376533 +31323162323530326562376234373262633734316135303132336236343762353932626330616162 +65396337653163613834353161356530613938323532363133363839386463633161653231313965 +30356361323036313163623962623335393864396136313462616266353330313937376533666136 +65656262356266393765383934383166636262663035666332313431666534626538666638623961 +31623639623261653861353231343536613135613637316237346339383333376134643464636639 +33623937663139366363653266373135336266396262663864623430333764383363393435333135 +36616139346538643434636233633331643535326334393231353338643333626264313632613230 +30623233393835386136663835303634393236366137383238666233636663363965323131386266 +37326439646165613264396631363431313234646538363531326232386266323335383665333062 +66393461363465386330306430613764623462373235616633373134393161666165306636396430 +35396634363132373761306463646535333662393266653334323864633836373336303334336630 +65653636376532623938356666656363616161353562303139386661326136313333386434363931 +63656334373335323133343661353266306239653234333964346139356461623638363566346166 +35633764333764333662333138623439383663353662373465633937376664363130616532326466 +34633734356365636633636561626363396238653739393635656666663530396363373463386630 +30343666393762343763663562333335366436616463343438313932356462363066633535306137 +38393766663534326561646635663333313636653430653239396631343934323639313336373437 +36373966336232323664353330626131633437373937633961323232636166363461636639646539 +66393530663063613834663738333238386631376138363863366533396135613465626639313836 +65646131306535633335353937353362366533343561383539316266373637663065363734363662 +35613437653232356236626337653930316130303933396563613737653765613030643436343534 +63623966356364646463383064626466643337653730616666333066663031653233616537363732 +32336531363830653032663639323662643663633731666634353365653766353538303634633532 +35643363366634623764343766646233343732333535326335323931353939313961623361643336 +62383262383438343761356436303637336262326637386137646436373536383139393033366638 +63386465616661363337633232653531323137373237366663653861373131376162363365623138 +30616435343466653438613863353164633464396435353461336261376661313762336337653963 +61376339643165393539386465633737636236623066363364303863663634643963633139643564 +36653261646462616462626634646262373638323162373736393463323063646237653266373962 +34316432313338363231346235386134333334353733666337323335633330633962323930656637 +30643031383338396338636566613036383031303331313664356165393433393635323333666533 +62653964636265326133333339643433663362376337383533373939386465363438663239313566 +30636463316132623631393135656439643264613830633134386135383931653761313666393938 +64613130616361306663353038386337373033383330336564356332353530636233613635343063 +63393264386234663135333533626162386335353463646535613361653764363165303230386262 +34386261326639636461653035346239663861303662303865373032313361386262366636343037 +34313738376335663734393062666336346665393961343634366237633765386430653063623834 +63333265356664393337663138303466653833343664363262316565653933653039666561393138 +39623366393437316261376436646630623662343933653930653766326461306266646430653761 +36373337613765373438326339356161353661386335363039656430363563616563383832353863 +36613634663063343934623533313337353635393535393530336636386138323330373235663139 +34396538623838363564386132636361386266663762363163616331383938323139613132333263 +62373037316434376531636661646366326163323735373966386133306539383533373161646139 +32623866633434343363623866376232333434653561653664666661343936333932396664366634 +35346234313830353165336437363365376436376431356538303038366466356366393661303835 +35336231643934613739666461656332663530396437313262376138333734613932346436316635 +38326335323531356164666232396633623331646530353166656432373336663035613832396438 +37663030643964633861613337383931636238623864636363336631666539376237306564653930 +34383536363363323937356666353931346562303035396539393038323830666337363664313137 +37326331346139393533643530303231316432356333613063363666653832316431303735363464 +62623130653566646235653436386433666263323561363732343764396439356363643336326137 +61313833353066316138616365623332616564366435363561363864616366336636636563306234 +62626566363663333761393563343037323936323530303930666364626661383965323135383330 +33323339643234653563373661646430313462616634363431633436386334316333353263656636 +34343061363233646238646563623237303231636632336462386565653533313330383362643235 +31303939653664663531626566653439363331616661646231326232306238306663366332396466 +38623834383339346533393664323561396137376539643135383138386630643737623739313964 +33663839336366366537356561646335643236633832663630626337653138623830646661383635 +39646438356533386239336234643065393338316664666435666330633765353633656133626239 +34323962656238363731656161393530643662366337653939366435393738653832613031386230 +32306134623164616365353735386434613739343265303031386163626135326332326666346138 +65656239386332336637646464383530633137386363396531336464366465663036663331653230 +65633464663234353765666337346537363639633833323239373264643837386533393131613165 +30643138656666666639336637393631343464316536663539366333666535306566616362336364 +37663934373462346231353434653632646435373934643831616636306438363363643266663933 +31633364653364363061653566366336333264623238383130313738303837303163316639616361 +65623736653133373266383735363930393263343161343966613233353030316336306238353831 +34356432313563393635633235316238643638623437643135643262376533383463666138653539 +36393130643663646333323365303733303764396330313236353836376366353364316130643036 +38363734353363303262623638656663343037633135653333373265346637363835663936323836 +39376235313962633632353366616531393365376662373265313433666364346532653834636535 +39396531656266376635323833636530353761616362643064343130303833356339613863646234 +32346365323162646561653638666236346166623165323031633961653635613062623631653731 +64613265666236626466373635376565313533616431313963666135376637336633613663383965 +66343231363738656164633164343139303166633362363464386235333437643262646634643063 +64393364333936386565656465653236313062643035663565643537666138393938316565306136 +35346539623539323866653632323136343535626634313633643135336664663166363639313333 +62363866323833313162366466323130666366343534376462636431396433373035643662366236 +61393730366332373666383837613534636166643761323334636430383531333362373632623833 +35626566383231643938386565613461343564313863336337306264323863353362316265393266 +39336462653464386432646137396633653931616565336139383632386133633135623833316666 +36336639666134386636343730383962383564336333646239626532343663666338363031323034 +30396635366563613164363332393933633334616261326533343461323862306539393939353539 +30313832646432366431396566656266363139343861363962626664353762303533306664346162 +31366337376363343762376333386437613330393665383861393465663761613939663039373165 +32323964636462336336643436616563363533613738346230666566643339656630326639386566 +31303638393162306434376238613933316230376334393838336139323762623532366136626663 +65373930353862383633646335363763643637393335643966396464333630356663333832333330 +35346236353337373732653466636364623931323863323439643962643034373765663636616666 +65393632316138343131303030626231653164623661633662663732616336303137326231343334 +36646535356236336537383637393864643566363438366163656134353931666464376332363933 +62623362323539346336643266303035616333366666373437346132343734663763626233663666 +61623061306336373165633365363335383838653534373434323132653465313838366239363338 +62343765336266303661313635623237666263323938636335316133376134303539653661633637 +62326232326235646365643037656138343930653233393833653565306636393063613830336235 +62656132363566396665636633326538336339353638336538363630666662333362376564366136 +61653537653238626538666664303963643463653234316662336162613831376231333038313964 +65656330303936313339336464353433353532613363633432393630303337663765336432643466 +31623437353431666338663036336530313638663862653261326561353864376562666361643230 +63656665653862353534376364653266363032316236373161343934353765623334396331383638 +38613030613830633662373865653036623331356131346464646639663732333135326638656163 +33646263366462616531343066616637346562373533316561646535336164323332653631303232 +63323631383032623466643638363364646136633936366366336530616232326339313036633037 +37643536636264316537386666663663306161666166656662643531633766373839646465393933 +63623933336135383931613539633066656665313733323065613937303366653031343837653037 +30356331396532613935303736343565306430643663393037653932343465326636646664306462 +64336336313737613337373763623562643532373533396435633739303161613433316465643238 +34613733363939653963353165313331326537396336633833306130396266313139356330636132 +64383536646566393530663430376331663630363366313962343433326662616238346234343761 +35623630626265383138353533383561653065333535346263616164396431363663656638646534 +66313537376334666631313061656633393238303536636263346663313066323864333836353262 +39383063653831303066656265393165653764316130356633633539356335306637373735663762 +31653864636632383661613931383836373131396532306637316533386466303663396365666462 +39373034333165636535323963653639333339373637616464653437646334356635366334643735 +66663166616133363334663366393238633963653734383535333737326536326331313830376336 +31383765313931383237303637316230656436636566646139353631336363636632356534323835 +31363338633466653336383265623233623731646534316466636431336433363663366433376430 +31636461626663633639636633623364343066316636313433396533326536646166386562646237 +39313333353135303430336130626531656365303437643230396161356539346161393635383833 +33616232303262303761 diff --git a/inventory/hosts.ini b/inventory/hosts.ini index a962417..068e8ad 100644 --- a/inventory/hosts.ini +++ b/inventory/hosts.ini @@ -1,23 +1,12 @@ # ── VISUAL Infrastructure ───────────────────────────────────────────────────── -# main 87.249.49.32 — все сервисы (Traefik, Forgejo, Plane, Vaultwarden, Outline, n8n, CI/CD) -# tools 85.193.83.9 — мониторинг (Grafana, Prometheus, Loki, AlertManager, Uptime Kuma) +# main 87.249.49.32 — все сервисы (Traefik, Forgejo, Plane, Docmost, n8n, CI/CD) [main] main ansible_host=87.249.49.32 -[tools] -tools ansible_host=85.193.83.9 - -[all_servers:children] -main -tools - -[all_servers:vars] -ansible_python_interpreter=/usr/bin/python3 -ansible_user=deploy - [servers] main ansible_host=87.249.49.32 [servers:vars] ansible_python_interpreter=/usr/bin/python3 +ansible_user=deploy diff --git a/playbooks/site.yml b/playbooks/site.yml index 65d4e8f..8a2e53e 100644 --- a/playbooks/site.yml +++ b/playbooks/site.yml @@ -6,12 +6,5 @@ # # ALL SUBSEQUENT DEPLOYS (idempotent, run as deploy user): # ansible-playbook playbooks/site.yml -# -# Per-server: -# ansible-playbook playbooks/deploy.yml # main server only -# ansible-playbook playbooks/tools.yml # tools server only (Outline + n8n) -# -# Secrets required in ~/.vault-password-file (see CLAUDE.md) - import_playbook: deploy.yml -- import_playbook: tools.yml diff --git a/playbooks/tools.yml b/playbooks/tools.yml deleted file mode 100644 index 5fcd3c4..0000000 --- a/playbooks/tools.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -# Deploy tools stack (Outline wiki) on visual-tools server -# ansible-playbook playbooks/tools.yml -- name: Deploy tools stack - hosts: tools - become: true - - roles: - - role: base - tags: base - - role: docker - tags: docker - - role: tools - tags: tools diff --git a/roles/services/defaults/main.yml b/roles/services/defaults/main.yml index 73276d4..cab281d 100644 --- a/roles/services/defaults/main.yml +++ b/roles/services/defaults/main.yml @@ -18,16 +18,8 @@ plane_redis_image: "redis:7-alpine" # Рекомендуется перейти на alpine/minio или собирать из исходников. plane_minio_image: "minio/minio:RELEASE.2025-04-22T22-12-26Z" # https://hub.docker.com/r/minio/minio/tags act_runner_image: "gitea/act_runner:0.3.0" # https://hub.docker.com/r/gitea/act_runner/tags -prometheus_image: "prom/prometheus:v3.4.0" # https://hub.docker.com/r/prom/prometheus/tags -node_exporter_image: "prom/node-exporter:v1.9.1" # https://hub.docker.com/r/prom/node-exporter/tags -cadvisor_image: "gcr.io/cadvisor/cadvisor:v0.52.1" # https://github.com/google/cadvisor/releases -grafana_image: "grafana/grafana:11.6.1" # https://hub.docker.com/r/grafana/grafana/tags -alertmanager_image: "prom/alertmanager:v0.28.1" # https://hub.docker.com/r/prom/alertmanager/tags -loki_image: "grafana/loki:3.4.3" # https://hub.docker.com/r/grafana/loki/tags -promtail_image: "grafana/promtail:3.4.3" # https://hub.docker.com/r/grafana/promtail/tags crowdsec_image: "crowdsecurity/crowdsec:v1.6.8" # https://hub.docker.com/r/crowdsecurity/crowdsec/tags redis_image: "redis:7-alpine" -uptime_kuma_image: "louislam/uptime-kuma:1" # https://hub.docker.com/r/louislam/uptime-kuma/tags docmost_image: "docmost/docmost:latest" # https://hub.docker.com/r/docmost/docmost/tags docmost_db_image: "postgres:16-alpine" docmost_redis_image: "redis:7-alpine" diff --git a/roles/services/tasks/configs.yml b/roles/services/tasks/configs.yml index 18eed3a..f3efded 100644 --- a/roles/services/tasks/configs.yml +++ b/roles/services/tasks/configs.yml @@ -59,15 +59,6 @@ changed_when: false ignore_errors: true -- name: Deploy Promtail config - ansible.builtin.template: - src: loki/promtail.yml.j2 - dest: "{{ services_root }}/loki/promtail.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - notify: Restart stack - - name: Deploy CrowdSec acquisition config ansible.builtin.template: src: crowdsec/acquis.yaml.j2 diff --git a/roles/services/tasks/main.yml b/roles/services/tasks/main.yml index 38b3b1d..38ff159 100644 --- a/roles/services/tasks/main.yml +++ b/roles/services/tasks/main.yml @@ -16,9 +16,6 @@ - "{{ plane_redis_image }}" - "{{ plane_minio_image }}" - "{{ act_runner_image }}" - - "{{ node_exporter_image }}" - - "{{ cadvisor_image }}" - - "{{ promtail_image }}" - "{{ crowdsec_image }}" - "{{ docmost_image }}" - "{{ docmost_db_image }}" @@ -30,30 +27,6 @@ delay: 30 until: pull_result.rc == 0 -# ── UFW: allow tools Prometheus to scrape exporters on main ────────────────── -- name: Allow tools server to scrape node-exporter - community.general.ufw: - rule: allow - port: "9100" - proto: tcp - src: "{{ ip_tools }}" - -- name: Allow tools server to scrape cAdvisor - community.general.ufw: - rule: allow - port: "8080" - proto: tcp - src: "{{ ip_tools }}" - -- name: Remove legacy SMTP relay UFW rule (port 1025) - community.general.ufw: - rule: allow - port: "1025" - proto: tcp - src: "{{ ip_tools }}" - delete: true - failed_when: false - - name: Deploy Docker Compose stack community.docker.docker_compose_v2: project_src: "{{ services_root }}" diff --git a/roles/services/templates/authelia/configuration.yml.j2 b/roles/services/templates/authelia/configuration.yml.j2 deleted file mode 100644 index f413a65..0000000 --- a/roles/services/templates/authelia/configuration.yml.j2 +++ /dev/null @@ -1,72 +0,0 @@ -# Generated by Ansible — do not edit manually -# Authelia v4 configuration - -theme: dark - -server: - host: 0.0.0.0 - port: 9091 - -log: - level: warn - -jwt_secret: "{{ authelia_jwt_secret }}" - -default_redirection_url: "https://{{ domain_auth }}" - -session: - name: authelia_session - secret: "{{ authelia_session_secret }}" - expiration: 12h - inactivity: 30m - domain: "{{ domain_base }}" - redis: - host: authelia-redis - port: 6379 - -regulation: - max_retries: 3 - find_time: 2m - ban_time: 10m - -storage: - encryption_key: "{{ authelia_storage_key }}" - local: - path: /config/db.sqlite3 - -notifier: - disable_startup_check: true - filesystem: - filename: /config/notifications.txt - -authentication_backend: - password_reset: - disable: false - file: - path: /config/users.yml - password: - algorithm: argon2id - iterations: 3 - memory: 65536 - parallelism: 4 - key_length: 32 - salt_length: 16 - -access_control: - default_policy: deny - rules: - # Authelia portal itself — всегда доступен - - domain: "{{ domain_auth }}" - policy: bypass - - # Traefik dashboard — только admin, требует 2FA - - domain: "{{ domain_traefik }}" - policy: two_factor - subject: "group:admins" - - # Plane god-mode — только admin, требует 2FA - - domain: "{{ domain_plane }}" - resources: - - "^/god-mode/.*$" - policy: two_factor - subject: "group:admins" diff --git a/roles/services/templates/authelia/users.yml.j2 b/roles/services/templates/authelia/users.yml.j2 deleted file mode 100644 index f5f657d..0000000 --- a/roles/services/templates/authelia/users.yml.j2 +++ /dev/null @@ -1,12 +0,0 @@ -# Generated by Ansible — do not edit manually -# Authelia users database -# To update password hash: docker exec authelia authelia crypto hash generate argon2 --password 'yourpassword' -# To set up TOTP: visit https://{{ domain_auth }} and login — QR code will appear on first use - -users: - {{ authelia_admin_user }}: - displayname: "Admin" - password: "{{ authelia_admin_password_hash }}" - email: "{{ acme_email }}" - groups: - - admins diff --git a/roles/services/templates/docker-compose.yml.j2 b/roles/services/templates/docker-compose.yml.j2 index 1971ef9..22850ba 100644 --- a/roles/services/templates/docker-compose.yml.j2 +++ b/roles/services/templates/docker-compose.yml.j2 @@ -23,9 +23,6 @@ networks: internal: true runner-jobs: driver: bridge - monitoring: - driver: bridge - internal: true docmost-internal: driver: bridge internal: true @@ -396,58 +393,6 @@ services: - backend - runner-jobs - # ── Monitoring exporters (metrics scraped by tools Prometheus over network) ── - # Ports exposed: tools server must have UFW rules allowing ip_main:9100/8080 - node-exporter: - image: {{ node_exporter_image }} - container_name: node-exporter - restart: unless-stopped - networks: - - monitoring - ports: - - "9100:9100" - pid: host - volumes: - - /proc:/host/proc:ro - - /sys:/host/sys:ro - - /:/rootfs:ro - command: - - "--path.procfs=/host/proc" - - "--path.sysfs=/host/sys" - - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" - - cadvisor: - image: {{ cadvisor_image }} - container_name: cadvisor - restart: unless-stopped - networks: - - monitoring - ports: - - "8080:8080" - privileged: true - devices: - - /dev/kmsg - volumes: - - /:/rootfs:ro - - /var/run:/var/run:ro - - /sys:/sys:ro - - /var/lib/docker:/var/lib/docker:ro - - /dev/disk:/dev/disk:ro - - # ── Logging (Promtail pushes to Loki on tools server) ───────────────────── - promtail: - image: {{ promtail_image }} - container_name: promtail - restart: unless-stopped - networks: - - monitoring - volumes: - - /var/log:/var/log:ro - - /var/lib/docker/containers:/var/lib/docker/containers:ro - - /var/run/docker.sock:/var/run/docker.sock:ro - - {{ services_root }}/loki/promtail.yml:/etc/promtail/config.yml:ro - command: -config.file=/etc/promtail/config.yml - # ── Security Stack ───────────────────────────────────────────────────────── # CrowdSec: анализирует логи Traefik, банит злоумышленников по IP # Использует community-репутацию + локальный анализ поведения @@ -456,8 +401,7 @@ services: container_name: crowdsec restart: unless-stopped networks: - - monitoring - - proxy # needs internet for hub/threat-intel downloads + - proxy environment: - COLLECTIONS=crowdsecurity/traefik crowdsecurity/http-cve crowdsecurity/linux - GID=1000 @@ -481,7 +425,6 @@ services: # FORGEJO_TOKEN: "${FORGEJO_RUNNER_TOKEN}" # FORGEJO_URL: "https://{{ domain_git }}" # FORGEJO_REPO: "jack/infra" - # PROMETHEUS_URL: "http://{{ ip_tools }}:9090" # volumes: # - /var/run/docker.sock:/var/run/docker.sock:ro # networks: diff --git a/roles/services/templates/loki/loki.yml.j2 b/roles/services/templates/loki/loki.yml.j2 deleted file mode 100644 index 0d801a7..0000000 --- a/roles/services/templates/loki/loki.yml.j2 +++ /dev/null @@ -1,36 +0,0 @@ -# Generated by Ansible — do not edit manually -auth_enabled: false - -server: - http_listen_port: 3100 - grpc_listen_port: 9096 - -common: - instance_addr: 127.0.0.1 - path_prefix: /loki - storage: - filesystem: - chunks_directory: /loki/chunks - rules_directory: /loki/rules - replication_factor: 1 - ring: - kvstore: - store: inmemory - -schema_config: - configs: - - from: 2020-10-24 - store: tsdb - object_store: filesystem - schema: v13 - index: - prefix: index_ - period: 24h - -limits_config: - retention_period: 30d - -compactor: - working_directory: /loki/retention - delete_request_store: filesystem - retention_enabled: true diff --git a/roles/services/templates/loki/promtail.yml.j2 b/roles/services/templates/loki/promtail.yml.j2 deleted file mode 100644 index ff7d3b7..0000000 --- a/roles/services/templates/loki/promtail.yml.j2 +++ /dev/null @@ -1,38 +0,0 @@ -# Generated by Ansible — do not edit manually -server: - http_listen_port: 9080 - grpc_listen_port: 0 - -positions: - filename: /tmp/positions.yaml - -clients: - - url: http://{{ ip_tools }}:3100/loki/api/v1/push - -scrape_configs: - - job_name: docker - docker_sd_configs: - - host: unix:///var/run/docker.sock - refresh_interval: 5s - relabel_configs: - - source_labels: [__meta_docker_container_name] - regex: /(.*) - target_label: container - - source_labels: [__meta_docker_container_log_stream] - target_label: stream - - source_labels: [__meta_docker_container_label_com_docker_compose_service] - target_label: service - - - job_name: syslog - static_configs: - - targets: [localhost] - labels: - job: syslog - __path__: /var/log/syslog - - - job_name: auth - static_configs: - - targets: [localhost] - labels: - job: auth - __path__: /var/log/auth.log diff --git a/roles/services/templates/traefik/dynamic/routes.yml.j2 b/roles/services/templates/traefik/dynamic/routes.yml.j2 index 104332e..34aba20 100644 --- a/roles/services/templates/traefik/dynamic/routes.yml.j2 +++ b/roles/services/templates/traefik/dynamic/routes.yml.j2 @@ -65,22 +65,6 @@ http: middlewares: [rate-limit-default] priority: 10 - grafana: - rule: "Host(`{{ domain_dashboard }}`)" - entrypoints: [websecure] - tls: - certresolver: letsencrypt - service: grafana - middlewares: [rate-limit-default] - - uptime-kuma: - rule: "Host(`{{ domain_status }}`)" - entrypoints: [websecure] - tls: - certresolver: letsencrypt - service: uptime-kuma - middlewares: [rate-limit-default] - walava-landing: rule: "Host(`{{ domain_landing }}`)" entrypoints: [websecure] @@ -131,16 +115,6 @@ http: servers: - url: "http://plane-space:3000" - grafana: - loadBalancer: - servers: - - url: "http://{{ ip_tools }}:3000" - - uptime-kuma: - loadBalancer: - servers: - - url: "http://{{ ip_tools }}:3001" - walava-landing: loadBalancer: servers: diff --git a/roles/tools/defaults/main.yml b/roles/tools/defaults/main.yml deleted file mode 100644 index c482860..0000000 --- a/roles/tools/defaults/main.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -tools_root: /opt/tools - -# Image versions (mirrors services role — keep in sync) -prometheus_image: "prom/prometheus:v3.4.0" -node_exporter_image: "prom/node-exporter:v1.9.1" -cadvisor_image: "gcr.io/cadvisor/cadvisor:v0.52.1" -grafana_image: "grafana/grafana:11.6.1" -alertmanager_image: "prom/alertmanager:v0.28.1" -loki_image: "grafana/loki:3.4.3" -uptime_kuma_image: "louislam/uptime-kuma:1" diff --git a/roles/tools/files/grafana/dashboards/cadvisor.json b/roles/tools/files/grafana/dashboards/cadvisor.json deleted file mode 100644 index fd0a30c..0000000 --- a/roles/tools/files/grafana/dashboards/cadvisor.json +++ /dev/null @@ -1,817 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "Prometheus as the datasource is obligatory", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "7.4.5" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": 14282, - "graphTooltip": 0, - "id": null, - "iteration": 1617715580880, - "links": [], - "panels": [ - { - "collapsed": false, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 8, - "panels": [], - "title": "CPU", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 1 - }, - "hiddenSeries": false, - "id": 15, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name) *100", - "hide": false, - "interval": "", - "legendFormat": "{{name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:606", - "format": "percent", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:607", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 11, - "panels": [], - "title": "Memory", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "hiddenSeries": false, - "id": 9, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_rss{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)", - "hide": false, - "interval": "", - "legendFormat": "{{name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:606", - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:607", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_cache{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)", - "hide": false, - "interval": "", - "legendFormat": "{{name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Cached", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:606", - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:607", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 2, - "panels": [], - "title": "Network", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 18 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)", - "hide": false, - "interval": "", - "legendFormat": "{{name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Received Network Traffic", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:674", - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:675", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 18 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_network_transmit_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)", - "interval": "", - "legendFormat": "{{name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sent Network Traffic", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:832", - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:833", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 19, - "panels": [], - "title": "Misc", - "type": "row" - }, - { - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": { - "align": null, - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "id" - }, - "properties": [ - { - "id": "custom.width", - "value": 260 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Running" - }, - "properties": [ - { - "id": "unit", - "value": "d" - }, - { - "id": "decimals", - "value": 1 - }, - { - "id": "custom.displayMode", - "value": "color-text" - }, - { - "id": "color", - "value": { - "fixedColor": "dark-green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 17, - "options": { - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "7.4.5", - "targets": [ - { - "expr": "(time() - container_start_time_seconds{instance=~\"$host\",name=~\"$container\",name=~\".+\"})/86400", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "{{name}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Containers Info", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "container_label_com_docker_compose_project", - "container_label_com_docker_compose_project_working_dir", - "image", - "instance", - "name", - "Value", - "container_label_com_docker_compose_service" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": {}, - "renameByName": { - "Value": "Running", - "container_label_com_docker_compose_project": "Label", - "container_label_com_docker_compose_project_working_dir": "Working dir", - "container_label_com_docker_compose_service": "Service", - "image": "Registry Image", - "instance": "Instance", - "name": "Name" - } - } - } - ], - "type": "table" - } - ], - "schemaVersion": 27, - "style": "dark", - "tags": [ - "cadvisor", - "docker" - ], - "templating": { - "list": [ - { - "allValue": ".*", - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values({__name__=~\"container.*\"},instance)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Host", - "multi": false, - "name": "host", - "options": [], - "query": { - "query": "label_values({__name__=~\"container.*\"},instance)", - "refId": "Prometheus-host-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 5, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values({__name__=~\"container.*\", instance=~\"$host\"},name)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Container", - "multi": false, - "name": "container", - "options": [], - "query": { - "query": "label_values({__name__=~\"container.*\", instance=~\"$host\"},name)", - "refId": "Prometheus-container-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Cadvisor exporter", - "uid": "pMEd7m0Mz", - "version": 1, - "description": "Simple exporter for cadvisor only" -} \ No newline at end of file diff --git a/roles/tools/files/grafana/dashboards/node-exporter-full.json b/roles/tools/files/grafana/dashboards/node-exporter-full.json deleted file mode 100644 index fdc3a00..0000000 --- a/roles/tools/files/grafana/dashboards/node-exporter-full.json +++ /dev/null @@ -1,15766 +0,0 @@ -{ - "__requires": [ - { - "type": "panel", - "id": "bargauge", - "name": "Bar gauge", - "version": "" - }, - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "11.6.1" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "id": null, - "links": [ - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "GitHub", - "type": "link", - "url": "https://github.com/rfmoz/grafana-dashboards" - }, - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "Grafana", - "type": "link", - "url": "https://grafana.com/grafana/dashboards/1860" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 261, - "panels": [], - "title": "Quick CPU / Mem / Disk", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Resource pressure via PSI", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "links": [], - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green" - }, - { - "color": "dark-yellow", - "value": 70 - }, - { - "color": "dark-red", - "value": 90 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 0, - "y": 1 - }, - "id": 323, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 300, - "minVizHeight": 10, - "minVizWidth": 0, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "text": {}, - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "CPU", - "range": false, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "Mem", - "range": false, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "I/O", - "range": false, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_irq_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "legendFormat": "Irq", - "range": false, - "refId": "D", - "step": 240 - } - ], - "title": "Pressure", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Overall CPU busy percentage (averaged across all cores)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 3, - "y": 1 - }, - "id": 20, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", instance=\"$node\"}[$__rate_interval])))", - "instant": true, - "legendFormat": "", - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "CPU Busy", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "System load over all CPU cores together", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 6, - "y": 1 - }, - "id": 155, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "scalar(node_load1{instance=\"$node\",job=\"$job\"}) * 100 / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Sys Load", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Real RAM usage excluding cache and reclaimable memory", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 9, - "y": 1 - }, - "id": 16, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{instance=\"$node\", job=\"$job\"} / node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"})) * 100, 0)", - "format": "time_series", - "instant": true, - "range": false, - "refId": "B", - "step": 240 - } - ], - "title": "RAM Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of swap space currently used by the system", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 10 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 25 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 21, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"})) * 100", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "SWAP Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Used Root FS", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 15, - "y": 1 - }, - "id": 154, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "(\n (node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n - node_filesystem_avail_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"})\n / node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n) * 100\n", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Root FS Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 1 - }, - "id": 14, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "CPU Cores", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 1 - }, - "id": 75, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RAM Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 1 - }, - "id": 18, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "SWAP Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)" - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 70 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 3 - }, - "id": 23, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", - "format": "time_series", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RootFS Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 20, - "y": 3 - }, - "id": 15, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", - "instant": true, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Uptime", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 263, - "panels": [], - "title": "Basic CPU / Mem / Net / Disk", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "CPU time spent busy vs idle, split by activity type", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Busy Iowait" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy System" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy User" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy Other" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 6 - }, - "id": 77, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "width": 250 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "exemplar": false, - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "instant": false, - "legendFormat": "Busy System", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy User", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy Iowait", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=~\".*irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy IRQs", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq'}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Busy Other", - "range": true, - "refId": "E", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Idle", - "range": true, - "refId": "F", - "step": 240 - } - ], - "title": "CPU Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "RAM and swap usage overview, including caches", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Swap used" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache + Buffer" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 78, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Total", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Used", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Cache + Buffer", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Free", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Swap used", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "Memory Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Per-interface network traffic (receive and transmit) in bits per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Tx.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 13 - }, - "id": 74, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "Rx {{device}}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "Tx {{device}} ", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of filesystem space used for each mounted device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 13 - }, - "id": 152, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "((node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"} - node_filesystem_avail_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"}) / node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"}) * 100", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Disk Space Used Basic", - "type": "timeseries" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 265, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "CPU time usage split by state, normalized across all CPU cores", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 70, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Idle - Waiting for something to happen" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Iowait - Waiting for I/O to complete" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Irq - Servicing interrupts" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Nice - Niced processes executing in user mode" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Softirq - Servicing softirqs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Steal - Time spent in other operating systems when running in a virtualized environment" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCE2DE", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "System - Processes executing in kernel mode" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "User - Normal processes executing in user mode" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#5195CE", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Guest CPU usage" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "custom.stacking", - "value": { - "group": "A", - "mode": "none" - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 250 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "interval": "", - "legendFormat": "System - Processes executing in kernel mode", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"user\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "User - Normal processes executing in user mode", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"nice\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Nice - Niced processes executing in user mode", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"iowait\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Iowait - Waiting for I/O to complete", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"irq\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Irq - Servicing interrupts", - "range": true, - "refId": "E", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"softirq\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Softirq - Servicing softirqs", - "range": true, - "refId": "F", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"steal\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", - "range": true, - "refId": "G", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "legendFormat": "Idle - Waiting for something to happen", - "range": true, - "refId": "H", - "step": 240 - }, - { - "editorMode": "code", - "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]))) > 0", - "format": "time_series", - "legendFormat": "Guest CPU usage", - "range": true, - "refId": "I", - "step": 240 - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Breakdown of physical memory and swap usage. Hardware-detected memory errors are also displayed", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap - Swap memory usage" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused - Free memory unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Hardware Corrupted - *./" - }, - "properties": [ - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 21 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Apps - Memory used by user-space applications", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Cache - Parked file data (file content) cache", - "range": true, - "refId": "E", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Buffers - Block device (e.g. harddisk) cache", - "range": true, - "refId": "F", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Unused - Free memory unassigned", - "range": true, - "refId": "G", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Swap - Swap space used", - "range": true, - "refId": "H", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", - "range": true, - "refId": "I", - "step": 240 - } - ], - "title": "Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Incoming and outgoing network traffic per interface", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 433 - }, - "id": 84, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Network interface utilization as a percentage of its maximum capacity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 433 - }, - "id": 338, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n / ignoring(speed) node_network_speed_bytes{instance=\"$node\",job=\"$job\", speed!=\"-1\"}", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n / ignoring(speed) node_network_speed_bytes{instance=\"$node\",job=\"$job\", speed!=\"-1\"})", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Saturation", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Disk I/O operations per second for each device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 445 - }, - "id": 229, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk IOps", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Disk I/O throughput per device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read*./" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 445 - }, - "id": 42, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Throughput", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Amount of available disk space per mounted filesystem, excluding rootfs. Based on block availability to non-root users", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 457 - }, - "id": 43, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "metric": "", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "legendFormat": "{{mountpoint}} - Free", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "legendFormat": "{{mountpoint}} - Size", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Filesystem Space Available", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Disk usage (used = total - available) per mountpoint", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 457 - }, - "id": 156, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Filesystem Used", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of time the disk was actively processing I/O operations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 469 - }, - "id": 127, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"} [$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Disk I/O Utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "How often tasks experience CPU, memory, or I/O delays. “Some” indicates partial slowdown; “Full” indicates all tasks are stalled. Based on Linux PSI metrics:\nhttps://docs.kernel.org/accounting/psi.html", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "some (-) / full (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Some.*/" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Some.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 469 - }, - "id": 322, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "CPU - Some", - "range": true, - "refId": "CPU some", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Memory - Some", - "range": true, - "refId": "Memory some", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_memory_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Memory - Full", - "range": true, - "refId": "Memory full", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "I/O - Some", - "range": true, - "refId": "I/O some", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_io_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "I/O - Full", - "range": true, - "refId": "I/O full", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_pressure_irq_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "IRQ - Full", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Pressure Stall Information", - "type": "timeseries" - } - ], - "title": "CPU / Memory / Net / Disk", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 21 - }, - "id": 266, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays committed memory usage versus the system's commit limit. Exceeding the limit is allowed under Linux overcommit policies but may increase OOM risks under high load", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*CommitLimit - *./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 732 - }, - "id": 135, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Committed_AS – Memory promised to processes (not necessarily used)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "CommitLimit - Max allowable committed memory", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Committed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory currently dirty (modified but not yet written to disk), being actively written back, or held by writeback buffers. High dirty or writeback memory may indicate disk I/O pressure or delayed flushing", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 732 - }, - "id": 130, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Writeback – Memory currently being flushed to disk", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "WritebackTmp – FUSE temporary writeback buffers", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Dirty – Memory marked dirty (pending write to disk)", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "NFS Unstable – Pages sent to NFS server, awaiting storage commit", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory Writeback and Dirty", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Kernel slab memory usage, separated into reclaimable and non-reclaimable categories. Reclaimable memory can be freed under memory pressure (e.g., caches), while unreclaimable memory is locked by the kernel for core functions", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 932 - }, - "id": 131, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "SUnreclaim – Non-reclaimable slab memory (kernel objects)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "SReclaimable – Potentially reclaimable slab memory (e.g., inode cache)", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Slab", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory used for mapped files (such as libraries) and shared memory (shmem and tmpfs), including variants backed by huge pages", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 932 - }, - "id": 138, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Mapped – Memory mapped from files (e.g., libraries, mmap)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Shmem – Shared memory used by processes and tmpfs", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "ShmemHugePages – Shared memory (shmem/tmpfs) allocated with HugePages", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PMD Mapped – Shmem/tmpfs backed by Transparent HugePages (PMD)", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory Shared and Mapped", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Proportion of memory pages in the kernel's active and inactive LRU lists relative to total RAM. Active pages have been recently used, while inactive pages are less recently accessed but still resident in memory", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Active.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Inactive.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 942 - }, - "id": 136, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "(node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}) \n/ \n(node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "legendFormat": "Inactive – Less recently used memory, more likely to be reclaimed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "(node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}) \n/ \n(node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})\n", - "format": "time_series", - "legendFormat": "Active – Recently used memory, retained unless under pressure", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory LRU Active / Inactive (%)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Breakdown of memory pages in the kernel's active and inactive LRU lists, separated by anonymous (heap, tmpfs) and file-backed (caches, mmap) pages.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 942 - }, - "id": 191, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Inactive_anon – Anonymous memory on inactive LRU (incl. tmpfs & swap cache)", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Active_file - File-backed memory on active LRU list", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Active_anon – Anonymous memory on active LRU (incl. tmpfs & swap cache)", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory LRU Active / Inactive Detail", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks kernel memory used for CPU-local structures, per-thread stacks, and bounce buffers used for I/O on DMA-limited devices. These areas are typically small but critical for low-level operations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 952 - }, - "id": 160, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "KernelStack – Kernel stack memory (per-thread, non-reclaimable)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PerCPU – Dynamically allocated per-CPU memory (used by kernel modules)", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Bounce Memory – I/O buffer for DMA-limited devices", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory Kernel / CPU / IO", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Usage of the kernel's vmalloc area, which provides virtual memory allocations for kernel modules and drivers. Includes total, used, and largest free block sizes", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Total.*/" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 952 - }, - "id": 70, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Vmalloc Free Chunk – Largest available block in vmalloc area", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Vmalloc Total – Total size of the vmalloc memory area", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Vmalloc Used – Portion of vmalloc area currently in use", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory Vmalloc", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory used by anonymous pages (not backed by files), including standard and huge page allocations. Includes heap, stack, and memory-mapped anonymous regions", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 962 - }, - "id": 129, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "AnonHugePages – Anonymous memory using HugePages", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "AnonPages – Anonymous memory (non-file-backed)", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Anonymous", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Memory that is locked in RAM and cannot be swapped out. Includes both kernel-unevictable memory and user-level memory locked with mlock()", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 962 - }, - "id": 137, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Unevictable – Kernel-pinned memory (not swappable)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Mlocked – Application-locked memory via mlock()", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Unevictable and MLocked", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "How much memory is directly mapped in the kernel using different page sizes (4K, 2M, 1G). Helps monitor large page utilization in the direct map region", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#58140C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Dirty" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B7DBAB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Mapped" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total RAM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total RAM + Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "VmallocUsed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 972 - }, - "id": 128, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "DirectMap 1G – Memory mapped with 1GB pages", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "DirectMap 2M – Memory mapped with 2MB pages", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "DirectMap 4K – Memory mapped with 4KB pages", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory DirectMap", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays HugePages memory usage in bytes, including allocated, free, reserved, and surplus memory. All values are calculated based on the number of huge pages multiplied by their configured size", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 972 - }, - "id": 140, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Used – Currently allocated", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Reserved – Promised but unused", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Surplus – Dynamic pool extension", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "HugePages Total – Reserved memory", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Memory HugePages", - "type": "timeseries" - } - ], - "title": "Memory Meminfo", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 267, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of memory pages being read from or written to disk (page-in and page-out operations). High page-out may indicate memory pressure or swapping activity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 733 - }, - "id": 176, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pagesin - Page in ops", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pagesout - Page out ops", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Pages In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate at which memory pages are being swapped in from or out to disk. High swap-out activity may indicate memory pressure", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 733 - }, - "id": 22, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pswpin - Pages swapped in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pswpout - Pages swapped out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Memory Pages Swap In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of memory page faults, split into total, major (disk-backed), and derived minor (non-disk) faults. High major fault rates may indicate memory pressure or insufficient RAM", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Pgfault - Page major and minor fault ops" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "none" - } - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 913 - }, - "id": 175, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pgfault - Page major and minor fault ops", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pgmajfault - Major page fault ops", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Pgminfault - Minor page fault ops", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Memory Page Faults", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of Out-of-Memory (OOM) kill events. A non-zero value indicates the kernel has terminated one or more processes due to memory exhaustion", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "OOM Kills" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 913 - }, - "id": 307, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "OOM Kills", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "OOM Killer", - "type": "timeseries" - } - ], - "title": "Memory Vmstat", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 23 - }, - "id": 293, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks the system clock's estimated and maximum error, as well as its offset from the reference clock (e.g., via NTP). Useful for detecting synchronization drift", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 734 - }, - "id": 260, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Estimated error", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Offset local vs reference", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Maximum error", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Time Synchronized Drift", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "NTP phase-locked loop (PLL) time constant used by the kernel to control time adjustments. Lower values mean faster correction but less stability", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 734 - }, - "id": 291, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PLL Time Constant", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Time PLL Adjust", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows whether the system clock is synchronized to a reliable time source, and the current frequency correction ratio applied by the kernel to maintain synchronization", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 884 - }, - "id": 168, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Sync status (1 = ok)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Frequency Adjustment", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "Tick Interval", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "TAI Offset", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Time Synchronized Status", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays the PPS signal's frequency offset and stability (jitter) in hertz. Useful for monitoring high-precision time sources like GPS or atomic clocks", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "rothz" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 884 - }, - "id": 333, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_pps_frequency_hertz{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Frequency Offset", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_pps_stability_hertz{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Frequency Stability", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "PPS Frequency / Stability", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks PPS signal timing jitter and shift compared to system clock", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 894 - }, - "id": 334, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_timex_pps_jitter_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Jitter", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_timex_pps_shift_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Shift", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "PPS Time Accuracy", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of PPS synchronization diagnostics including calibration events, jitter violations, errors, and frequency stability exceedances", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 894 - }, - "id": 335, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_timex_pps_calibration_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Calibrations/sec", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_timex_pps_error_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Errors/sec", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_timex_pps_stability_exceeded_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Stability Exceeded/sec", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_timex_pps_jitter_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "PPS Jitter Events/sec", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "PPS Sync Events", - "type": "timeseries" - } - ], - "title": "System Timesync", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 312, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Processes currently in runnable or blocked states. Helps identify CPU contention or I/O wait bottlenecks.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 735 - }, - "id": 62, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Blocked (I/O Wait)", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Runnable (Ready for CPU)", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Processes Status", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current number of processes in each state (e.g., running, sleeping, zombie). Requires --collector.processes to be enabled in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "D" - }, - "properties": [ - { - "id": "displayName", - "value": "Uninterruptible Sleeping" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "I" - }, - "properties": [ - { - "id": "displayName", - "value": "Idle Kernel Thread" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "R" - }, - "properties": [ - { - "id": "displayName", - "value": "Running" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "S" - }, - "properties": [ - { - "id": "displayName", - "value": "Interruptible Sleeping" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "T" - }, - "properties": [ - { - "id": "displayName", - "value": "Stopped" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "X" - }, - "properties": [ - { - "id": "displayName", - "value": "Dead" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Z" - }, - "properties": [ - { - "id": "displayName", - "value": "Zombie" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 735 - }, - "id": 315, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ state }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Processes Detailed States", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of new processes being created on the system (forks/sec).", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 765 - }, - "id": 148, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_forks_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Process Forks per second", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Processes Forks", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows CPU saturation per core, calculated as the proportion of time spent waiting to run relative to total time demanded (running + waiting).", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*waiting.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 765 - }, - "id": 305, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "CPU {{ cpu }} - Running", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "CPU {{cpu}} - Waiting Queue", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n/\n(irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) + irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]))\n", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}}", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "CPU Saturation per Core", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of active PIDs on the system and the configured maximum allowed. Useful for detecting PID exhaustion risk. Requires --collector.processes in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "PIDs limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 775 - }, - "id": 313, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Number of PIDs", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "PIDs limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "PIDs Number and Limit", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of active threads on the system and the configured thread limit. Useful for monitoring thread pressure. Requires --collector.processes in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Threads limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 775 - }, - "id": 314, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Allocated threads", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Threads limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Threads Number and Limit", - "type": "timeseries" - } - ], - "title": "System Processes", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 269, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Per-second rate of context switches and hardware interrupts. High values may indicate intense CPU or I/O activity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 816 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Context switches", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "Interrupts", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Context Switches / Interrupts", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "System load average over 1, 5, and 15 minutes. Reflects the number of active or waiting processes. Values above CPU core count may indicate overload", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "CPU Core Count" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 816 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_load1{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Load 1m", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_load5{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Load 5m", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_load15{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Load 15m", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "format": "time_series", - "legendFormat": "CPU Core Count", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "System Load", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Real-time CPU frequency scaling per core, including average minimum and maximum allowed scaling frequencies", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "hertz" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Max" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Min" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 826 - }, - "id": 321, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_cpu_scaling_frequency_hertz{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{ cpu }}", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "avg(node_cpu_scaling_frequency_max_hertz{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "interval": "", - "legendFormat": "Max", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "avg(node_cpu_scaling_frequency_min_hertz{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "interval": "", - "legendFormat": "Min", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "CPU Frequency Scaling", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of scheduling timeslices executed per CPU. Reflects how frequently the scheduler switches tasks on each core", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 826 - }, - "id": 306, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{ cpu }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "CPU Schedule Timeslices", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Breaks down hardware interrupts by type and device. Useful for diagnosing IRQ load on network, disk, or CPU interfaces. Requires --collector.interrupts to be enabled in node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 836 - }, - "id": 259, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{ type }} - {{ info }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "IRQ Detail", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of bits of entropy currently available to the system's random number generators (e.g., /dev/random). Low values may indicate that random number generation could block or degrade performance of cryptographic operations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "decbits" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Entropy pool max" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 836 - }, - "id": 151, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Entropy available", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_entropy_pool_size_bits{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Entropy pool max", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Entropy", - "type": "timeseries" - } - ], - "title": "System Misc", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 304, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Monitors hardware sensor temperatures and critical thresholds as exposed by Linux hwmon. Includes CPU, GPU, and motherboard sensors where available", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "celsius" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Critical*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 737 - }, - "id": 158, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Critical Alarm", - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Critical", - "range": true, - "refId": "C", - "step": 240 - }, - { - "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Critical Historical", - "refId": "D", - "step": 240 - }, - { - "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} Max", - "refId": "E", - "step": 240 - } - ], - "title": "Hardware Temperature Monitor", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows how hard each cooling device (fan/throttle) is working relative to its maximum capacity", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percent" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 737 - }, - "id": 300, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "100 * node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"} / node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }} - {{ type }} ", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Cooling Device Utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows the online status of power supplies (e.g., AC, battery). A value of 1-Yes indicates the power supply is active/online", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bool_yes_no" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 747 - }, - "id": 302, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ power_supply }} online", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Power Supply", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Displays the current fan speeds (RPM) from hardware sensors via the hwmon interface", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "rotrpm" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 747 - }, - "id": 325, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_hwmon_fan_rpm{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_hwmon_fan_min_rpm{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "legendFormat": "{{ chip_name }} {{ sensor }} rpm min", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Hardware Fan Speed", - "type": "timeseries" - } - ], - "title": "Hardware Misc", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 296, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current number of systemd units in each operational state, such as active, failed, inactive, or transitioning", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Activating" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C8F2C2", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Deactivating" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 4228 - }, - "id": 298, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Activating", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Active", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Deactivating", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Failed", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Inactive", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "Systemd Units State", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current number of active connections per systemd socket, as reported by the Node Exporter systemd collector", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 4228 - }, - "id": 331, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_systemd_socket_current_connections{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Systemd Sockets Current", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of accepted connections per second for each systemd socket", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 4238 - }, - "id": 297, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Systemd Sockets Accepted", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of systemd socket connection refusals per second, typically due to service unavailability or backlog overflow", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 4238 - }, - "id": 332, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_systemd_socket_refused_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{ name }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Systemd Sockets Refused", - "type": "timeseries" - } - ], - "title": "Systemd", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 270, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of I/O operations completed per second for the device (after merges), including both reads and writes", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (–) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 29 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Read/Write IOps", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of bytes read from or written to the device per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (–) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 29 - }, - "id": 33, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": false, - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Read/Write Data", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (–) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 389 - }, - "id": 37, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk Average Wait Time", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Average queue length of the requests that were issued to the device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda_*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 389 - }, - "id": 35, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Average Queue Size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of read and write requests merged per second that were queued to the device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "read (–) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 399 - }, - "id": 133, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Read", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "legendFormat": "{{device}} - Write", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Disk R/W Merged", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Percentage of time the disk spent actively processing I/O operations, including general I/O, discards (TRIM), and write cache flushes", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 399 - }, - "id": 36, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - General IO", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Discard/TRIM", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_flush_requests_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Flush (write cache)", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Time Spent Doing I/Os", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Per-second rate of discard (TRIM) and flush (write cache) operations. Useful for monitoring low-level disk activity on SSDs and advanced storage", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 409 - }, - "id": 301, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Discards completed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Discards merged", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_disk_flush_requests_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}} - Flush", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Disk Ops Discards / Flush", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows how many disk sectors are discarded (TRIMed) per second. Useful for monitoring SSD behavior and storage efficiency", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 409 - }, - "id": 326, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_disk_discarded_sectors_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Disk Sectors Discarded Successfully", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of in-progress I/O requests at the time of sampling (active requests in the disk queue)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sda.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 419 - }, - "id": 34, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_disk_io_now{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "{{device}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Instantaneous Queue Size", - "type": "timeseries" - } - ], - "title": "Storage Disk", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 271, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of file descriptors currently allocated system-wide versus the system limit. Important for detecting descriptor exhaustion risks", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max.*/" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 30 - }, - "id": 28, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Max open files", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "Open files", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "File Descriptor", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of free file nodes (inodes) available per mounted filesystem. A low count may prevent file creation even if disk space is available", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 30 - }, - "id": 41, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "File Nodes Free", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Indicates filesystems mounted in read-only mode or reporting device-level I/O errors.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bool_yes_no" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 370 - }, - "id": 44, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}} - ReadOnly", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}", - "format": "time_series", - "interval": "", - "legendFormat": "{{mountpoint}} - Device error", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Filesystem in ReadOnly / Error", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of file nodes (inodes) available per mounted filesystem. Reflects maximum file capacity regardless of disk size", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 370 - }, - "id": 219, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "legendFormat": "{{mountpoint}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "File Nodes Size", - "type": "timeseries" - } - ], - "title": "Storage Filesystem", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 272, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of network packets received and transmitted per second, by interface.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 31 - }, - "id": 60, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic by Packets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of packet-level errors for each network interface. Receive errors may indicate physical or driver issues; transmit errors may reflect collisions or hardware faults", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 31 - }, - "id": 142, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of dropped packets per network interface. Receive drops can indicate buffer overflow or driver issues; transmit drops may result from outbound congestion or queuing limits", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 251 - }, - "id": 143, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Drop", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of compressed network packets received and transmitted per interface. These are common in low-bandwidth or special interfaces like PPP or SLIP", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 251 - }, - "id": 141, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Compressed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of incoming multicast packets received per network interface. Multicast is used by protocols such as mDNS, SSDP, and some streaming or cluster services", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 261 - }, - "id": 146, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Multicast", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of received packets that could not be processed due to missing protocol or handler in the kernel. May indicate unsupported traffic or misconfiguration", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 261 - }, - "id": 327, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_nohandler_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic NoHandler", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of frame errors on received packets, typically caused by physical layer issues such as bad cables, duplex mismatches, or hardware problems", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 271 - }, - "id": 145, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Frame", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks FIFO buffer overrun errors on network interfaces. These occur when incoming or outgoing packets are dropped due to queue or buffer overflows, often indicating congestion or hardware limits", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 271 - }, - "id": 144, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "rate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Fifo", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of packet collisions detected during transmission. Mostly relevant on half-duplex or legacy Ethernet networks", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 281 - }, - "id": 232, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Collision", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of carrier errors during transmission. These typically indicate physical layer issues like faulty cabling or duplex mismatches", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 281 - }, - "id": 231, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "rate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "{{device}} - Tx out", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Network Traffic Carrier Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of ARP entries per interface. Useful for detecting excessive ARP traffic or table growth due to scanning or misconfiguration", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 291 - }, - "id": 230, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "{{ device }} ARP Table", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "ARP Entries", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Current and maximum connection tracking entries used by Netfilter (nf_conntrack). High usage approaching the limit may cause packet drops or connection issues", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "NF conntrack limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 291 - }, - "id": 61, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "NF conntrack entries", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "NF conntrack limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "NF Conntrack", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Operational and physical link status of each network interface. Values are Yes for 'up' or link present, and No for 'down' or no carrier.\"", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bool_yes_no" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 301 - }, - "id": 309, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "legendFormat": "{{interface}} - Operational state UP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{device}} - Physical link", - "refId": "B" - } - ], - "title": "Network Operational Status", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Maximum speed of each network interface as reported by the operating system. This is a static hardware capability, not current throughput", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "fieldMinMax": false, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 301 - }, - "id": 280, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 30, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "manual", - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"} * 8", - "format": "time_series", - "legendFormat": "{{ device }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Speed", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "MTU (Maximum Transmission Unit) in bytes for each network interface. Affects packet size and transmission efficiency", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 301 - }, - "id": 288, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 30, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "manual", - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "legendFormat": "{{ device }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "MTU", - "type": "bargauge" - } - ], - "title": "Network Traffic", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 273, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks TCP socket usage and memory per node", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 63, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Allocated Sockets", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "In-Use Sockets", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Orphaned Sockets", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TIME_WAIT Sockets", - "range": true, - "refId": "D", - "step": 240 - } - ], - "title": "Sockstat TCP", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of UDP and UDPLite sockets currently in use", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 124, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDPLite - In-Use Sockets", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP - In-Use Sockets", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Sockstat UDP", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Total number of sockets currently in use across all protocols (TCP, UDP, UNIX, etc.), as reported by /proc/net/sockstat", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 126, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Total sockets", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Sockstat Used", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of FRAG and RAW sockets currently in use. RAW sockets are used for custom protocols or tools like ping; FRAG sockets are used internally for IP packet defragmentation", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 125, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "FRAG - In-Use Sockets", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "RAW - In-Use Sockets", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "Sockstat FRAG / RAW", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Kernel memory used by TCP, UDP, and IP fragmentation buffers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 52 - }, - "id": 220, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TCP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Fragmentation", - "range": true, - "refId": "C" - } - ], - "title": "Sockstat Memory Size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Average memory used per socket (TCP/UDP). Helps tune net.ipv4.tcp_rmem / tcp_wmem", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 52 - }, - "id": 339, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"} / node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TCP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"} / node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Sockstat Average Socket Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "TCP/UDP socket memory usage in kernel (in pages)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 62 - }, - "id": 336, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TCP", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP/UDP Kernel Buffer Memory Pages", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Packets processed and dropped by the softnet network stack per CPU. Drops may indicate CPU saturation or network driver limitations", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "drop (-) / process (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Dropped.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 62 - }, - "id": 290, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Processed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Dropped", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Softnet Packets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "How often the kernel was unable to process all packets in the softnet queue before time ran out. Frequent squeezes may indicate CPU contention or driver inefficiency", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 72 - }, - "id": 310, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Times Squeezed", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Softnet Out of Quota", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks the number of packets processed or dropped by Receive Packet Steering (RPS), a mechanism to distribute packet processing across CPUs", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Dropped.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 72 - }, - "id": 330, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_softnet_received_rps_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Processed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_softnet_flow_limit_count_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "CPU {{cpu}} - Dropped", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Softnet RPS", - "type": "timeseries" - } - ], - "title": "Network Sockstat", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 274, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of octets sent and received at the IP layer, as reported by /proc/net/netstat", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 163 - }, - "id": 221, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "IP Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "legendFormat": "IP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Netstat IP In / Out Octets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of TCP segments sent and received per second, including data and control segments", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Snd.*/" - }, - "properties": [] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 163 - }, - "id": 299, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "TCP Rx in", - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "TCP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of UDP datagrams sent and received per second, based on /proc/net/netstat", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 193 - }, - "id": 55, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "UDP In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of ICMP messages sent and received per second, including error and control messages", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 193 - }, - "id": 115, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "ICMP Rx in", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "ICMP Tx out", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "ICMP In / Out", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks various TCP error and congestion-related events, including retransmissions, timeouts, dropped connections, and buffer issues", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 203 - }, - "id": 104, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Listen Overflows", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Listen Drops", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Retransmits", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Segment Retransmits", - "range": true, - "refId": "D" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Receive Errors", - "range": true, - "refId": "E" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_OutRsts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "RST Sent", - "range": true, - "refId": "F" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPRcvQDrop{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Receive Queue Drops", - "range": true, - "refId": "G" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPOFOQueue{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "Out-of-order Queued", - "range": true, - "refId": "H" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPTimeouts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "TCP Timeouts", - "range": true, - "refId": "I" - } - ], - "title": "TCP Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of UDP and UDPLite datagram delivery errors, including missing listeners, buffer overflows, and protocol-specific issues", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 203 - }, - "id": 109, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in Errors", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP No Listener", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "UDPLite Rx in Errors", - "range": true, - "refId": "C" - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in Buffer Errors", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Tx out Buffer Errors", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "UDP Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of incoming ICMP messages that contained protocol-specific errors, such as bad checksums or invalid lengths", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 213 - }, - "id": 50, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "ICMP Rx In", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "ICMP Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of TCP SYN cookies sent, validated, and failed. These are used to protect against SYN flood attacks and manage TCP handshake resources under load", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Failed.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 213 - }, - "id": 91, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Cookies Failed", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Cookies Validated", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "SYN Cookies Sent", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "TCP SynCookie", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of currently established TCP connections and the system's max supported limit. On Linux, MaxConn may return -1 to indicate a dynamic/unlimited configuration", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 223 - }, - "id": 85, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Current Connections", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Max Connections", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP Connections", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of UDP packets currently queued in the receive (RX) and transmit (TX) buffers. A growing queue may indicate a bottleneck", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 223 - }, - "id": 337, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_udp_queues{instance=\"$node\",job=\"$job\",ip=\"v4\",queue=\"rx\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Rx in Queue", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_udp_queues{instance=\"$node\",job=\"$job\",ip=\"v4\",queue=\"tx\"}", - "format": "time_series", - "interval": "", - "legendFormat": "UDP Tx out Queue", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "UDP Queue", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of TCP connection initiations per second. 'Active' opens are initiated by this host. 'Passive' opens are accepted from incoming connections", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "eps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 233 - }, - "id": 82, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Active Opens", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Passive Opens", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "TCP Direct Transition", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of TCP sockets in key connection states. Requires the --collector.tcpstat flag on node_exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 233 - }, - "id": 320, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"established\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Established", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"fin_wait2\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "FIN_WAIT2", - "range": true, - "refId": "B", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"listen\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "Listen", - "range": true, - "refId": "C", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"time_wait\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "TIME_WAIT", - "range": true, - "refId": "D", - "step": 240 - }, - { - "editorMode": "code", - "expr": "node_tcp_connection_states{state=\"close_wait\", instance=\"$node\", job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "CLOSE_WAIT", - "range": true, - "refId": "E", - "step": 240 - } - ], - "title": "TCP Stat", - "type": "timeseries" - } - ], - "title": "Network Netstat", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 279, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Duration of each individual collector executed during a Node Exporter scrape. Useful for identifying slow or failing collectors", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 164 - }, - "id": 40, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{collector}}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Node Exporter Scrape Time", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Rate of CPU time used by the process exposing this metric (user + system mode)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 164 - }, - "id": 308, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "legendFormat": "Process CPU Usage", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Exporter Process CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Tracks the memory usage of the process exposing this metric (e.g., node_exporter), including current virtual memory and maximum virtual memory limit", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Virtual Memory Limit" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "Virtual Memory" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 10, - "x": 0, - "y": 174 - }, - "id": 149, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Virtual Memory", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Virtual Memory Limit", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Exporter Processes Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Number of file descriptors used by the exporter process versus its configured limit", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - } - ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "Open file descriptors" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 10, - "x": 10, - "y": 174 - }, - "id": 64, - "options": { - "legend": { - "calcs": [ - "min", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Maximum open file descriptors", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "legendFormat": "Open file descriptors", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Exporter File Descriptor Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "description": "Shows whether each Node Exporter collector scraped successfully (1 = success, 0 = failure), and whether the textfile collector returned an error.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "dark-red", - "value": 0 - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "bool" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 4, - "x": 20, - "y": 174 - }, - "id": 157, - "options": { - "displayMode": "basic", - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "maxVizHeight": 300, - "minVizHeight": 16, - "minVizWidth": 8, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "sizing": "auto", - "valueMode": "color" - }, - "pluginVersion": "11.6.1", - "targets": [ - { - "editorMode": "code", - "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{collector}}", - "range": true, - "refId": "A", - "step": 240 - }, - { - "editorMode": "code", - "expr": "1 - node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "legendFormat": "textfile", - "range": true, - "refId": "B", - "step": 240 - } - ], - "title": "Node Exporter Scrape", - "type": "bargauge" - } - ], - "title": "Node Exporter", - "type": "row" - } - ], - "refresh": "1m", - "schemaVersion": 41, - "tags": [ - "linux" - ], - "templating": { - "list": [ - { - "current": {}, - "includeAll": false, - "label": "Datasource", - "name": "ds_prometheus", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "", - "includeAll": false, - "label": "Job", - "name": "job", - "options": [], - "query": { - "query": "label_values(node_uname_info, job)", - "refId": "Prometheus-job-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "label_values(node_uname_info{job=\"$job\"}, nodename)", - "includeAll": false, - "label": "Nodename", - "name": "nodename", - "options": [], - "query": { - "query": "label_values(node_uname_info{job=\"$job\"}, nodename)", - "refId": "Prometheus-nodename-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "definition": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", - "includeAll": false, - "label": "Instance", - "name": "node", - "options": [], - "query": { - "query": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", - "refId": "Prometheus-node-Variable-Query" - }, - "refresh": 1, - "regex": "", - "sort": 1, - "type": "query" - } - ] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Node Exporter Full", - "uid": "rYdddlPWk", - "version": 98, - "weekStart": "", - "gnetId": 1860 -} \ No newline at end of file diff --git a/roles/tools/tasks/main.yml b/roles/tools/tasks/main.yml deleted file mode 100644 index 3bb0487..0000000 --- a/roles/tools/tasks/main.yml +++ /dev/null @@ -1,167 +0,0 @@ ---- -- name: Create tools root directory - ansible.builtin.file: - path: "{{ tools_root }}" - state: directory - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0750" - -- name: Create tools subdirectories - ansible.builtin.file: - path: "{{ tools_root }}/{{ item }}" - state: directory - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0755" - loop: - - prometheus - - prometheus/rules - - grafana/provisioning/datasources - - grafana/provisioning/dashboards - - grafana/provisioning/dashboards/json - - loki - -- name: Deploy .env file - ansible.builtin.template: - src: env.j2 - dest: "{{ tools_root }}/.env" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0600" - -- name: Deploy docker-compose.yml - ansible.builtin.template: - src: docker-compose.yml.j2 - dest: "{{ tools_root }}/docker-compose.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0640" - -- name: Deploy Prometheus config - ansible.builtin.template: - src: prometheus/prometheus.yml.j2 - dest: "{{ tools_root }}/prometheus/prometheus.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy Prometheus alert rules - ansible.builtin.template: - src: prometheus/rules/alerts.yml.j2 - dest: "{{ tools_root }}/prometheus/rules/alerts.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy AlertManager config - ansible.builtin.template: - src: prometheus/alertmanager.yml.j2 - dest: "{{ tools_root }}/prometheus/alertmanager.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy Loki config - ansible.builtin.template: - src: loki/loki.yml.j2 - dest: "{{ tools_root }}/loki/loki.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy Grafana Prometheus datasource - ansible.builtin.template: - src: grafana/provisioning/datasources/prometheus.yml.j2 - dest: "{{ tools_root }}/grafana/provisioning/datasources/prometheus.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy Grafana Loki datasource - ansible.builtin.template: - src: grafana/provisioning/datasources/loki.yml.j2 - dest: "{{ tools_root }}/grafana/provisioning/datasources/loki.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy Grafana dashboard provisioning config - ansible.builtin.template: - src: grafana/provisioning/dashboards/dashboards.yml.j2 - dest: "{{ tools_root }}/grafana/provisioning/dashboards/dashboards.yml" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy Node Exporter Full dashboard JSON - ansible.builtin.copy: - src: grafana/dashboards/node-exporter-full.json - dest: "{{ tools_root }}/grafana/provisioning/dashboards/json/node-exporter-full.json" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Deploy cAdvisor dashboard JSON - ansible.builtin.copy: - src: grafana/dashboards/cadvisor.json - dest: "{{ tools_root }}/grafana/provisioning/dashboards/json/cadvisor.json" - owner: "{{ deploy_user }}" - group: "{{ deploy_group }}" - mode: "0644" - -- name: Pull monitoring images - ansible.builtin.command: docker pull {{ item }} - loop: - - "{{ prometheus_image }}" - - "{{ alertmanager_image }}" - - "{{ node_exporter_image }}" - - "{{ cadvisor_image }}" - - "{{ grafana_image }}" - - "{{ loki_image }}" - - "{{ uptime_kuma_image }}" - register: pull_result - changed_when: "'Status: Downloaded newer image' in pull_result.stdout" - retries: 5 - delay: 30 - until: pull_result.rc == 0 - -# ── UFW: allow main server to reach monitoring services ─────────────────────── -- name: Allow main server to reach Loki (Promtail log push) - community.general.ufw: - rule: allow - port: "3100" - proto: tcp - src: "{{ ip_main }}" - -- name: Allow main server to reach Prometheus (discord-bot metrics) - community.general.ufw: - rule: allow - port: "9090" - proto: tcp - src: "{{ ip_main }}" - -- name: Allow main Traefik to reach Grafana - community.general.ufw: - rule: allow - port: "3000" - proto: tcp - src: "{{ ip_main }}" - -- name: Allow main Traefik to reach Uptime Kuma - community.general.ufw: - rule: allow - port: "3001" - proto: tcp - src: "{{ ip_main }}" - -- name: Start tools stack - community.docker.docker_compose_v2: - project_src: "{{ tools_root }}" - state: present - pull: never - remove_orphans: true - retries: 3 - delay: 15 - register: compose_result - until: compose_result is succeeded diff --git a/roles/tools/templates/docker-compose.yml.j2 b/roles/tools/templates/docker-compose.yml.j2 deleted file mode 100644 index ce20f72..0000000 --- a/roles/tools/templates/docker-compose.yml.j2 +++ /dev/null @@ -1,157 +0,0 @@ -# Tools stack — generated by Ansible -# Do not edit manually; re-run ansible-playbook playbooks/tools.yml -# Monitoring: Prometheus, Grafana, Loki, AlertManager, Uptime Kuma, node-exporter, cAdvisor - -networks: - monitoring: - driver: bridge - -volumes: - prometheus_data: - grafana_data: - loki_data: - uptime_kuma_data: - -services: - - # ── Prometheus ───────────────────────────────────────────────────────────── - prometheus: - image: {{ prometheus_image }} - container_name: prometheus - restart: unless-stopped - networks: - - monitoring - ports: - - "127.0.0.1:9090:9090" # exposed to main via UFW rule for discord-bot - volumes: - - prometheus_data:/prometheus - - {{ tools_root }}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - - {{ tools_root }}/prometheus/rules:/etc/prometheus/rules:ro - command: - - "--config.file=/etc/prometheus/prometheus.yml" - - "--storage.tsdb.path=/prometheus" - - "--storage.tsdb.retention.time=30d" - - "--web.console.libraries=/usr/share/prometheus/console_libraries" - - "--web.console.templates=/usr/share/prometheus/consoles" - healthcheck: - test: ["CMD", "wget", "-qO-", "http://localhost:9090/-/healthy"] - interval: 30s - timeout: 5s - retries: 3 - - alertmanager: - image: {{ alertmanager_image }} - container_name: alertmanager - restart: unless-stopped - networks: - - monitoring - volumes: - - {{ tools_root }}/prometheus/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro - command: - - "--config.file=/etc/alertmanager/alertmanager.yml" - - "--storage.path=/alertmanager" - healthcheck: - test: ["CMD", "wget", "-qO-", "http://localhost:9093/-/healthy"] - interval: 30s - timeout: 5s - retries: 3 - - # ── Exporters (monitor the tools host itself) ─────────────────────────────── - node-exporter: - image: {{ node_exporter_image }} - container_name: node-exporter - restart: unless-stopped - networks: - - monitoring - pid: host - volumes: - - /proc:/host/proc:ro - - /sys:/host/sys:ro - - /:/rootfs:ro - command: - - "--path.procfs=/host/proc" - - "--path.sysfs=/host/sys" - - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" - - cadvisor: - image: {{ cadvisor_image }} - container_name: cadvisor - restart: unless-stopped - networks: - - monitoring - privileged: true - devices: - - /dev/kmsg - volumes: - - /:/rootfs:ro - - /var/run:/var/run:ro - - /sys:/sys:ro - - /var/lib/docker:/var/lib/docker:ro - - /dev/disk:/dev/disk:ro - - # ── Grafana ───────────────────────────────────────────────────────────────── - grafana: - image: {{ grafana_image }} - container_name: grafana - restart: unless-stopped - security_opt: - - no-new-privileges:true - depends_on: - - prometheus - networks: - - monitoring - ports: - - "3000:3000" - volumes: - - grafana_data:/var/lib/grafana - - {{ tools_root }}/grafana/provisioning:/etc/grafana/provisioning:ro - env_file: .env - environment: - - GF_SECURITY_ADMIN_USER=admin - - GF_USERS_ALLOW_SIGN_UP=false - - GF_SERVER_DOMAIN={{ domain_dashboard }} - - GF_SERVER_ROOT_URL=https://{{ domain_dashboard }} - - GF_AUTH_ANONYMOUS_ENABLED=false - healthcheck: - test: ["CMD", "wget", "-qO-", "http://localhost:3000/api/health"] - interval: 30s - timeout: 5s - retries: 3 - - # ── Loki ──────────────────────────────────────────────────────────────────── - loki: - image: {{ loki_image }} - container_name: loki - restart: unless-stopped - networks: - - monitoring - ports: - - "3100:3100" # exposed to main for Promtail log ingestion - volumes: - - loki_data:/loki - - {{ tools_root }}/loki/loki.yml:/etc/loki/local-config.yaml:ro - command: -config.file=/etc/loki/local-config.yaml - healthcheck: - test: ["CMD", "wget", "-qO-", "http://localhost:3100/ready"] - interval: 30s - timeout: 5s - retries: 3 - - # ── Uptime Kuma ───────────────────────────────────────────────────────────── - uptime-kuma: - image: {{ uptime_kuma_image }} - container_name: uptime-kuma - restart: unless-stopped - security_opt: - - no-new-privileges:true - networks: - - monitoring - ports: - - "3001:3001" - volumes: - - uptime_kuma_data:/app/data - healthcheck: - test: ["CMD", "curl", "-sf", "http://localhost:3001/"] - interval: 30s - timeout: 5s - retries: 3 diff --git a/roles/tools/templates/env.j2 b/roles/tools/templates/env.j2 deleted file mode 100644 index 490c649..0000000 --- a/roles/tools/templates/env.j2 +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by Ansible — do not edit manually -GF_SECURITY_ADMIN_PASSWORD={{ grafana_admin_password }} diff --git a/roles/tools/templates/grafana/provisioning/dashboards/dashboards.yml.j2 b/roles/tools/templates/grafana/provisioning/dashboards/dashboards.yml.j2 deleted file mode 100644 index fc7264f..0000000 --- a/roles/tools/templates/grafana/provisioning/dashboards/dashboards.yml.j2 +++ /dev/null @@ -1,13 +0,0 @@ -# Generated by Ansible — do not edit manually -apiVersion: 1 - -providers: - - name: default - orgId: 1 - folder: "" - type: file - disableDeletion: false - updateIntervalSeconds: 30 - allowUiUpdates: false - options: - path: /etc/grafana/provisioning/dashboards/json diff --git a/roles/tools/templates/grafana/provisioning/datasources/loki.yml.j2 b/roles/tools/templates/grafana/provisioning/datasources/loki.yml.j2 deleted file mode 100644 index 4de25a0..0000000 --- a/roles/tools/templates/grafana/provisioning/datasources/loki.yml.j2 +++ /dev/null @@ -1,10 +0,0 @@ -# Generated by Ansible — do not edit manually -apiVersion: 1 - -datasources: - - name: Loki - type: loki - access: proxy - url: http://loki:3100 - isDefault: false - editable: false diff --git a/roles/tools/templates/grafana/provisioning/datasources/prometheus.yml.j2 b/roles/tools/templates/grafana/provisioning/datasources/prometheus.yml.j2 deleted file mode 100644 index e695f24..0000000 --- a/roles/tools/templates/grafana/provisioning/datasources/prometheus.yml.j2 +++ /dev/null @@ -1,10 +0,0 @@ -# Generated by Ansible — do not edit manually -apiVersion: 1 - -datasources: - - name: Prometheus - type: prometheus - access: proxy - url: http://prometheus:9090 - isDefault: true - editable: false diff --git a/roles/tools/templates/loki/loki.yml.j2 b/roles/tools/templates/loki/loki.yml.j2 deleted file mode 100644 index 0d801a7..0000000 --- a/roles/tools/templates/loki/loki.yml.j2 +++ /dev/null @@ -1,36 +0,0 @@ -# Generated by Ansible — do not edit manually -auth_enabled: false - -server: - http_listen_port: 3100 - grpc_listen_port: 9096 - -common: - instance_addr: 127.0.0.1 - path_prefix: /loki - storage: - filesystem: - chunks_directory: /loki/chunks - rules_directory: /loki/rules - replication_factor: 1 - ring: - kvstore: - store: inmemory - -schema_config: - configs: - - from: 2020-10-24 - store: tsdb - object_store: filesystem - schema: v13 - index: - prefix: index_ - period: 24h - -limits_config: - retention_period: 30d - -compactor: - working_directory: /loki/retention - delete_request_store: filesystem - retention_enabled: true diff --git a/roles/tools/templates/prometheus/alertmanager.yml.j2 b/roles/tools/templates/prometheus/alertmanager.yml.j2 deleted file mode 100644 index 877527c..0000000 --- a/roles/tools/templates/prometheus/alertmanager.yml.j2 +++ /dev/null @@ -1,38 +0,0 @@ -# Generated by Ansible — do not edit manually -global: - resolve_timeout: 5m - -route: - group_by: [alertname, severity] - group_wait: 30s - group_interval: 5m - repeat_interval: 4h - receiver: all - -receivers: - - name: all - telegram_configs: - - bot_token: "{{ alertmanager_telegram_token }}" - chat_id: {{ alertmanager_telegram_chat_id }} - message: | - {{ '{{' }} range .Alerts {{ '}}' }} - {{ '{{' }} if eq .Status "firing" {{ '}}' }}🔴{{ '{{' }} else {{ '}}' }}🟢{{ '{{' }} end {{ '}}' }} *{{ '{{' }} .Labels.alertname {{ '}}' }}* - {{ '{{' }} .Annotations.summary {{ '}}' }} - {{ '{{' }} .Annotations.description {{ '}}' }} - {{ '{{' }} end {{ '}}' }} - parse_mode: Markdown - discord_configs: - - webhook_url: "{{ discord_webhook_alerts }}" - title: >- - {{ '{{' }} if eq (index .Alerts 0).Status "firing" {{ '}}' }}🔴 Alert{{ '{{' }} else {{ '}}' }}🟢 Resolved{{ '{{' }} end {{ '}}' }} - message: | - {{ '{{' }} range .Alerts {{ '}}' }} - **{{ '{{' }} .Labels.alertname {{ '}}' }}** - {{ '{{' }} .Annotations.summary {{ '}}' }} - {{ '{{' }} .Annotations.description {{ '}}' }} - {{ '{{' }} end {{ '}}' }} - -inhibit_rules: - - source_matchers: [severity="critical"] - target_matchers: [severity="warning"] - equal: [alertname] diff --git a/roles/tools/templates/prometheus/prometheus.yml.j2 b/roles/tools/templates/prometheus/prometheus.yml.j2 deleted file mode 100644 index 605518e..0000000 --- a/roles/tools/templates/prometheus/prometheus.yml.j2 +++ /dev/null @@ -1,49 +0,0 @@ -# Generated by Ansible — do not edit manually -global: - scrape_interval: 15s - evaluation_interval: 15s - external_labels: - instance: "{{ domain_base }}" - -alerting: - alertmanagers: - - static_configs: - - targets: ["alertmanager:9093"] - -rule_files: - - /etc/prometheus/rules/*.yml - -scrape_configs: - - job_name: prometheus - static_configs: - - targets: ["localhost:9090"] - - # tools server metrics - - job_name: node-exporter-tools - static_configs: - - targets: ["node-exporter:9100"] - labels: - host: tools - - - job_name: cadvisor-tools - static_configs: - - targets: ["cadvisor:8080"] - labels: - host: tools - - - job_name: alertmanager - static_configs: - - targets: ["alertmanager:9093"] - - # main server metrics (scraped over network) - - job_name: node-exporter-main - static_configs: - - targets: ["{{ ip_main }}:9100"] - labels: - host: main - - - job_name: cadvisor-main - static_configs: - - targets: ["{{ ip_main }}:8080"] - labels: - host: main diff --git a/roles/tools/templates/prometheus/rules/alerts.yml.j2 b/roles/tools/templates/prometheus/rules/alerts.yml.j2 deleted file mode 100644 index 4e8a5c2..0000000 --- a/roles/tools/templates/prometheus/rules/alerts.yml.j2 +++ /dev/null @@ -1,86 +0,0 @@ -# Generated by Ansible — do not edit manually -groups: - - name: host - rules: - - alert: HighCPULoad - expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85 - for: 5m - labels: - severity: warning - annotations: - summary: "Высокая нагрузка CPU ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)" - description: "CPU загружен более 85% на протяжении 5 минут." - - - alert: HighMemoryUsage - expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85 - for: 5m - labels: - severity: warning - annotations: - summary: "Высокое использование RAM ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)" - description: "Использование RAM превысило 85%." - - - alert: CriticalMemoryUsage - expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95 - for: 2m - labels: - severity: critical - annotations: - summary: "Критическое использование RAM ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)" - description: "RAM заполнена на 95%+. Возможны OOM kills." - - - alert: DiskSpaceWarning - expr: (1 - (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|aufs"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|aufs"})) * 100 > 75 - for: 5m - labels: - severity: warning - annotations: - summary: "Заканчивается место на диске ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)" - description: "Диск {{ '{{' }} $labels.mountpoint {{ '}}' }} занят на {{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%." - - - alert: DiskSpaceCritical - expr: (1 - (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|aufs"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|aufs"})) * 100 > 90 - for: 2m - labels: - severity: critical - annotations: - summary: "Критически мало места на диске ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)" - description: "Диск {{ '{{' }} $labels.mountpoint {{ '}}' }} занят на {{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%." - - - alert: SwapUsageHigh - expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 50 - for: 5m - labels: - severity: warning - annotations: - summary: "Высокое использование swap ({{ '{{' }} $value | printf \"%.0f\" {{ '}}' }}%)" - description: "Swap используется более чем на 50% — RAM под давлением." - - - name: containers - rules: - - alert: ContainerDown - expr: absent(container_last_seen{name=~".+"}) or time() - container_last_seen{name=~".+"} > 60 - for: 2m - labels: - severity: critical - annotations: - summary: "Контейнер {{ '{{' }} $labels.name {{ '}}' }} недоступен" - description: "Контейнер не отвечает более 2 минут." - - - alert: ContainerHighMemory - expr: (container_memory_usage_bytes{name=~".+"} / (container_spec_memory_limit_bytes{name=~".+"} > 0)) * 100 > 90 - for: 5m - labels: - severity: warning - annotations: - summary: "Контейнер {{ '{{' }} $labels.name {{ '}}' }} использует 90%+ памяти" - description: "Контейнер близок к mem_limit — возможен OOM kill." - - - alert: ContainerRestarting - expr: increase(container_last_seen{name=~".+"}[5m]) == 0 and rate(container_cpu_usage_seconds_total{name=~".+"}[5m]) == 0 - for: 0m - labels: - severity: warning - annotations: - summary: "Контейнер {{ '{{' }} $labels.name {{ '}}' }} возможно перезапускается" - description: "Контейнер не активен — проверьте docker ps."