|
|
|
@ -350,6 +350,24 @@ groups:
|
|
|
|
summary: "Elasticsearch health status is not green. Please Check"
|
|
|
|
summary: "Elasticsearch health status is not green. Please Check"
|
|
|
|
description: "Alert for Elasticsearch health status"
|
|
|
|
description: "Alert for Elasticsearch health status"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- alert: awx job failed with status error
|
|
|
|
|
|
|
|
expr: changes(awx_status_total{status="error"}[2m]) > 0
|
|
|
|
|
|
|
|
for: 2m
|
|
|
|
|
|
|
|
labels:
|
|
|
|
|
|
|
|
severity: critical
|
|
|
|
|
|
|
|
annotations:
|
|
|
|
|
|
|
|
summary: "awx job failed with status error"
|
|
|
|
|
|
|
|
description: "Alert awx jobs has an error"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- alert: awx job failed with status failed
|
|
|
|
|
|
|
|
expr: changes(awx_status_total{status="failed"}[2m]) > 0
|
|
|
|
|
|
|
|
for: 2m
|
|
|
|
|
|
|
|
labels:
|
|
|
|
|
|
|
|
severity: critical
|
|
|
|
|
|
|
|
annotations:
|
|
|
|
|
|
|
|
summary: "awx job failed with status failed"
|
|
|
|
|
|
|
|
description: "Alert awx jobs failed"
|
|
|
|
|
|
|
|
|
|
|
|
- alert: postgres backup zombies
|
|
|
|
- alert: postgres backup zombies
|
|
|
|
expr: 100 - ((node_filesystem_avail_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'} * 100) / node_filesystem_size_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'}) > 10
|
|
|
|
expr: 100 - ((node_filesystem_avail_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'} * 100) / node_filesystem_size_bytes{instance=~"{{ stage }}-postgres-01.smardigo.digital",job=~"node-exporter",device='/dev/mapper/vg.postgres_backup-lv.postgres_backup'}) > 10
|
|
|
|
for: 2h
|
|
|
|
for: 2h
|
|
|
|
|