From 24e5cbf3d9ca6c88532e2118bdf064695d32816c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6rz=2C=20Friedrich?= Date: Wed, 5 Oct 2022 12:36:51 +0000 Subject: [PATCH] DEV-616: increased vol_count to mitigate disk size problem --- group_vars/stage_prodnso/prometheus.yml | 2 +- templates/prometheus/config/prometheus/alert.rules.j2 | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/group_vars/stage_prodnso/prometheus.yml b/group_vars/stage_prodnso/prometheus.yml index bbd90e1..cb266ef 100644 --- a/group_vars/stage_prodnso/prometheus.yml +++ b/group_vars/stage_prodnso/prometheus.yml @@ -1,5 +1,5 @@ --- prometheus_lvm_hcloudvol_size: 30 -prometheus_lvm_hcloudvol_count: 2 +prometheus_lvm_hcloudvol_count: 3 prometheus_tsdb_rentention_time: '90d' diff --git a/templates/prometheus/config/prometheus/alert.rules.j2 b/templates/prometheus/config/prometheus/alert.rules.j2 index 8a81e12..12a91a0 100644 --- a/templates/prometheus/config/prometheus/alert.rules.j2 +++ b/templates/prometheus/config/prometheus/alert.rules.j2 @@ -90,6 +90,15 @@ groups: summary: "Disk Space Usage (instance {{ '{{' }} $labels.instance {{ '}}' }})" description: "Disk Space on Drive is used more than {{ prometheus_alert_diskspaceusage_warning }}%\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS: {{ '{{' }} $labels {{ '}}' }}" + - alert: DiskSpaceUsage mounted volumes + expr: 100.0 - 100 * (node_filesystem_free_bytes{env="{{ stage }}",device=~"/dev/mapper/.*"} / node_filesystem_size_bytes{device!="/dev/loop1",env="{{ stage }}",device=~"/dev/mapper/.*"}) > {{ prometheus_alert_diskspaceusage_warning }} + for: 10m + labels: + severity: warning + annotations: + summary: "Disk Space Usage (instance {{ '{{' }} $labels.instance {{ '}}' }})" + description: "Disk Space on Drive is used more than {{ prometheus_alert_diskspaceusage_warning }}%\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS: {{ '{{' }} $labels {{ '}}' }}" + - alert: software_raid_disks_active expr: node_md_disks_active != 2 for: 2m