DEV-471: added push metrics part to restre playbook

qa
Görz, Friedrich 3 years ago
parent 8374ae0d2a
commit 3905dff581

@ -247,6 +247,37 @@
vars: vars:
record_to_remove: '{{ inventory_hostname }}' record_to_remove: '{{ inventory_hostname }}'
#############################################################
# Sending metric to prometheus push gateway
#############################################################
- hosts: "{{ stage }}-virtual-host-to-read-groups-vars"
serial: "{{ serial_number | default(1) }}"
gather_facts: false
connection: local
run_once: true
vars:
prometheus_pushgw_host: prometheus-pushgateway.monitoring:9091 # pushgw-endpoint within k8s cluster running AWX
tasks:
# attention: do not remove manually set newline char at the end of metric line
# in case of removing: request will die with HTTP 400 bad request
- name: "Set Fact"
set_fact:
metric: "nightly_restore_successful_generic {{ lookup('pipe', 'date +%s') }}\n"
# body => trim needed to remove new line char
# due to created yaml string
- name: "Send metric to prometheus pushgw"
uri:
url: "http://{{ prometheus_pushgw_host }}/metrics/job/restore_test/creator/awx-run/database_engine/{{ database_engine }}"
method: POST
body: "{{ metric }}"
status_code: [200]
register: send_prome_metric
retries: 5
delay: 5
until: send_prome_metric.status in [200]
############################################################# #############################################################
# Sending smardigo management message to process # Sending smardigo management message to process
############################################################# #############################################################

@ -174,6 +174,30 @@ groups:
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly backup failed." summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly backup failed."
description: "PostgreSQL backup failed." description: "PostgreSQL backup failed."
- alert: nightly_restoretest_failed_postgres
expr: |
(time() - nightly_restore_successful_generic{job="restore_test",database_engine="postgres"}) > 30 * 3600 or
absent(nightly_restore_successful_generic{job="restore_test",database_engine="postgres"})
for: 2m
labels:
severity: critical
annotations:
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly restore test failed."
description: "PostgreSQL restore test failed."
- alert: nightly_restoretest_failed_maria
expr: |
(time() - nightly_restore_successful_generic{job="restore_test",database_engine="maria"}) > 30 * 3600 or
absent(nightly_restore_successful_generic{job="restore_test",database_engine="maria"})
for: 2m
labels:
severity: critical
annotations:
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> nightly restore test failed."
description: "MariaDB restore test failed."
- alert: megaraid_smart_errors - alert: megaraid_smart_errors
expr: megaraid_smart_errors > 1 expr: megaraid_smart_errors > 1
for: 2m for: 2m

Loading…
Cancel
Save