diff --git a/group_vars/all/plain.yml b/group_vars/all/plain.yml index 4438dfe..cef035b 100644 --- a/group_vars/all/plain.yml +++ b/group_vars/all/plain.yml @@ -211,3 +211,4 @@ k8s_basic_services: selfsigned_ca_private_key_passphrase: '{{ selfsigned_ca_private_key_passphrase_vault }}' +prometheus_alert_diskspaceusage_warning: 85 diff --git a/templates/prometheus/config/prometheus/alert.rules.j2 b/templates/prometheus/config/prometheus/alert.rules.j2 index 336e3dd..92f21e7 100644 --- a/templates/prometheus/config/prometheus/alert.rules.j2 +++ b/templates/prometheus/config/prometheus/alert.rules.j2 @@ -81,15 +81,14 @@ groups: summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> needs a new certificate until next 50 days." description: "needs a new certificate until next 50 days." - - alert: disk_space - expr: predict_linear(node_filesystem_avail_bytes{device!="/dev/loop1",device!="veeamagent",device!="/dev/veeamimage1",env="{{ stage }}",fstype!="cifs",mountpoint=~"/|/rootfs"}[1d], 24 * 3600 * 14) < 1024 * 1024 * 1024 * 5 - for: 2m + - alert: DiskSpaceUsage + expr: 100.0 - 100 * (node_filesystem_free_bytes{env="{{ stage }}",mountpoint=~"/|/rootfs"} / node_filesystem_size_bytes{device!="/dev/loop1",env="{{ stage }}",mountpoint=~"/|/rootfs"}) > {{ prometheus_alert_diskspaceusage_warning }} + for: 10m labels: severity: warning annotations: - identifier: '{{ '{{' }} $labels.instance {{ '}}' }}' - summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> disk space falls below 5GB in the next 14 days." - description: "disk space falls below 5GB in the next 14 days." + summary: "Disk Space Usage (instance {{ '{{' }} $labels.instance {{ '}}' }})" + description: "Disk Space on Drive is used more than {{ prometheus_alert_diskspaceusage_warning }}%\n VALUE = {{ '{{' }} $value {{ '}}' }}\n LABELS: {{ '{{' }} $labels {{ '}}' }}" - alert: software_raid_disks_active expr: node_md_disks_active != 2