|
|
|
@ -367,7 +367,7 @@ groups:
|
|
|
|
annotations:
|
|
|
|
annotations:
|
|
|
|
summary: "awx job failed with status failed"
|
|
|
|
summary: "awx job failed with status failed"
|
|
|
|
description: "Alert awx jobs failed"
|
|
|
|
description: "Alert awx jobs failed"
|
|
|
|
|
|
|
|
|
|
|
|
- alert: postgres backup zombies
|
|
|
|
- alert: postgres backup zombies
|
|
|
|
expr: 100 - ((node_filesystem_avail_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'} * 100) / node_filesystem_size_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'}) > 10
|
|
|
|
expr: 100 - ((node_filesystem_avail_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'} * 100) / node_filesystem_size_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'}) > 10
|
|
|
|
for: 2h
|
|
|
|
for: 2h
|
|
|
|
@ -378,17 +378,6 @@ groups:
|
|
|
|
summary: "postgres backup zombies, have not been deleted"
|
|
|
|
summary: "postgres backup zombies, have not been deleted"
|
|
|
|
description: "postgres backup zombies, have not been deleted"
|
|
|
|
description: "postgres backup zombies, have not been deleted"
|
|
|
|
|
|
|
|
|
|
|
|
- alert: hetzner unattached volumes
|
|
|
|
|
|
|
|
expr: hetzner_api_unattached_volumes > 0 or absent(hetzner_api_unattached_volumes)
|
|
|
|
|
|
|
|
for: 2h
|
|
|
|
|
|
|
|
labels:
|
|
|
|
|
|
|
|
severity: critical
|
|
|
|
|
|
|
|
annotations:
|
|
|
|
|
|
|
|
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
|
|
|
|
|
|
|
|
summary: "unattached volumes in hetzner"
|
|
|
|
|
|
|
|
description: "unattached volumes in hetzner"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- alert: hetzner ratelimit_remaining low
|
|
|
|
- alert: hetzner ratelimit_remaining low
|
|
|
|
expr: (hetzner_api_ratelimit_remaining * 100)/ hetzner_api_ratelimit_limit < 50 or absent(hetzner_api_ratelimit_remaining)
|
|
|
|
expr: (hetzner_api_ratelimit_remaining * 100)/ hetzner_api_ratelimit_limit < 50 or absent(hetzner_api_ratelimit_remaining)
|
|
|
|
for: 10m
|
|
|
|
for: 10m
|
|
|
|
@ -399,6 +388,16 @@ groups:
|
|
|
|
summary: "hetzner ratelimit_remaining below 50%"
|
|
|
|
summary: "hetzner ratelimit_remaining below 50%"
|
|
|
|
description: "hetzner ratelimit_remaining below 50%"
|
|
|
|
description: "hetzner ratelimit_remaining below 50%"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- alert: hetzner unattached volumes
|
|
|
|
|
|
|
|
expr: hetzner_api_unattached_volumes > 0 or absent(hetzner_api_unattached_volumes)
|
|
|
|
|
|
|
|
for: 2h
|
|
|
|
|
|
|
|
labels:
|
|
|
|
|
|
|
|
severity: critical
|
|
|
|
|
|
|
|
annotations:
|
|
|
|
|
|
|
|
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
|
|
|
|
|
|
|
|
summary: "unattached volumes in hetzner"
|
|
|
|
|
|
|
|
description: "unattached volumes in hetzner"
|
|
|
|
|
|
|
|
|
|
|
|
- alert: hetzner locked server exists
|
|
|
|
- alert: hetzner locked server exists
|
|
|
|
expr: hetzner_api_locked_servers > 0 or absent(hetzner_api_locked_servers)
|
|
|
|
expr: hetzner_api_locked_servers > 0 or absent(hetzner_api_locked_servers)
|
|
|
|
for: 1h
|
|
|
|
for: 1h
|
|
|
|
@ -408,5 +407,3 @@ groups:
|
|
|
|
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
|
|
|
|
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
|
|
|
|
summary: "hetzner locked server exists"
|
|
|
|
summary: "hetzner locked server exists"
|
|
|
|
description: "hetzner locked server exists"
|
|
|
|
description: "hetzner locked server exists"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|