DEV-442: added threshold for pg_repl_lag to avoid false positives on DEV-stage

feature/DEV-380
Görz, Friedrich 4 years ago
parent ff9c0d94a1
commit 6c6dd5c1ae

@ -214,3 +214,4 @@ k8s_basic_services:
selfsigned_ca_private_key_passphrase: '{{ selfsigned_ca_private_key_passphrase_vault }}'
prometheus_alert_diskspaceusage_warning: 85
prometheus_alert_pg_replication_lag: 120

@ -374,3 +374,5 @@ webdav_opentracing_jaeger_enabled: true
webdav_opentracing_jaeger_http_sender_url: "http://{{ shared_service_kube_jaeger_collector_hostname }}/api/traces"
connect_opentracing_jaeger_enabled: true
connect_opentracing_jaeger_http_sender_url: "http://{{ shared_service_kube_jaeger_collector_hostname }}/api/traces"
prometheus_alert_pg_replication_lag: 300

@ -367,3 +367,5 @@ management_oidc_client_secret: "{{ management_oidc_client_secret_vault }}"
# https://git.dev-at.de/smardigo-hetzner/communication-keys/
# push mirror: https://prodnso-gitea-01.smardigo.digital/gitea-admin/communication-keys/
gpg_key_smardigo_automation__private: '{{ gpg_key_smardigo_automation__private__vault }}'
prometheus_alert_pg_replication_lag: 60

@ -271,8 +271,8 @@ groups:
description: "checks if postgres service is running.."
- alert: postgres replication broken
expr: pg_replication_lag > 120 or absent(pg_replication_lag)
for: 1m
expr: pg_replication_lag > {{ prometheus_alert_pg_replication_lag }} or absent(pg_replication_lag)
for: 5m
labels:
severity: critical
annotations:

Loading…
Cancel
Save