|
|
|
@ -174,6 +174,30 @@ groups:
|
|
|
|
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly backup failed."
|
|
|
|
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly backup failed."
|
|
|
|
description: "PostgreSQL backup failed."
|
|
|
|
description: "PostgreSQL backup failed."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- alert: nightly_restoretest_failed_postgres
|
|
|
|
|
|
|
|
expr: |
|
|
|
|
|
|
|
|
(time() - nightly_restore_successful_generic{job="restore_test",database_engine="postgres"}) > 30 * 3600 or
|
|
|
|
|
|
|
|
absent(nightly_restore_successful_generic{job="restore_test",database_engine="postgres"})
|
|
|
|
|
|
|
|
for: 2m
|
|
|
|
|
|
|
|
labels:
|
|
|
|
|
|
|
|
severity: critical
|
|
|
|
|
|
|
|
annotations:
|
|
|
|
|
|
|
|
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
|
|
|
|
|
|
|
|
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly restore test failed."
|
|
|
|
|
|
|
|
description: "PostgreSQL restore test failed."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- alert: nightly_restoretest_failed_maria
|
|
|
|
|
|
|
|
expr: |
|
|
|
|
|
|
|
|
(time() - nightly_restore_successful_generic{job="restore_test",database_engine="maria"}) > 30 * 3600 or
|
|
|
|
|
|
|
|
absent(nightly_restore_successful_generic{job="restore_test",database_engine="maria"})
|
|
|
|
|
|
|
|
for: 2m
|
|
|
|
|
|
|
|
labels:
|
|
|
|
|
|
|
|
severity: critical
|
|
|
|
|
|
|
|
annotations:
|
|
|
|
|
|
|
|
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
|
|
|
|
|
|
|
|
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly restore test failed."
|
|
|
|
|
|
|
|
description: "MariaDB restore test failed."
|
|
|
|
|
|
|
|
|
|
|
|
- alert: megaraid_smart_errors
|
|
|
|
- alert: megaraid_smart_errors
|
|
|
|
expr: megaraid_smart_errors > 1
|
|
|
|
expr: megaraid_smart_errors > 1
|
|
|
|
for: 2m
|
|
|
|
for: 2m
|
|
|
|
|