diff --git a/roles/backup/tasks/main.yml b/roles/backup/tasks/main.yml index d81b2cd..f2f83cc 100644 --- a/roles/backup/tasks/main.yml +++ b/roles/backup/tasks/main.yml @@ -37,3 +37,18 @@ with_items: - pull_remote_backups.sh - push_backups_to_restore_server.sh + +- name: Touch metrics.prom is not exists + file: + path: "/home/{{ system_user }}/metrics.prom" + state: touch + mode: '0744' + owner: '{{ system_user }}' + group: '{{ system_user }}' + +- name: Create symbolic link for node_exporter text metrics + file: + src: "/home/{{ system_user }}/metrics.prom" + dest: "/var/lib/prometheus/node-exporter/offsite-metrics.prom" + state: link + diff --git a/templates/prometheus/config/prometheus/alert.rules.j2 b/templates/prometheus/config/prometheus/alert.rules.j2 index defac25..e2db780 100644 --- a/templates/prometheus/config/prometheus/alert.rules.j2 +++ b/templates/prometheus/config/prometheus/alert.rules.j2 @@ -299,3 +299,36 @@ groups: identifier: '{{ '{{' }} $labels.instance {{ '}}' }}' summary: "ssh root login on Instance <{{ '{{' }} $labels.instance {{ '}}' }}> detected. plz check" description: "unexpected ssh root login detected." + + - alert: offsite backup pending + expr: | + (time() - offsite_backup_archive_started_seconds) > 25 * 3600 or + (time() - offsite_backup_archive_ended_seconds) > 25 * 3600 or + (time() - offsite_backup_transfer_started_seconds) > 25 * 3600 or + (time() - offsite_backup_transfer_ended_seconds) > 25 * 3600 or + (time() - offsite_backup_forget_started_seconds) > 25 * 3600 or + (time() - offsite_backup_forget_ended_seconds) > 25 * 3600 + for: 1m + labels: + severity: critical + annotations: + identifier: '{{ '{{' }} $labels.instance {{ '}}' }}' + summary: "one or more offsite backup metric timestamps for Instance <{{ '{{' }} $labels.instance {{ '}}' }}> older then 25h" + description: "offsite backups older then 25h." + + - alert: offsite backup metrics unavailable + expr: | + absent(offsite_backup_archive_started_seconds) or + absent(offsite_backup_archive_ended_seconds) or + absent(offsite_backup_transfer_started_seconds) or + absent(offsite_backup_transfer_ended_seconds) or + absent(offsite_backup_forget_started_seconds) or + absent(offsite_backup_forget_ended_seconds) + for: 5m + labels: + severity: critical + annotations: + identifier: '{{ '{{' }} $labels.instance {{ '}}' }}' + summary: "one or more offsite backup metric unavailable for Instance <{{ '{{' }} $labels.instance {{ '}}' }}>" + description: "offsite metrics unavailable." + diff --git a/users/backuphamster/ssh.pub b/users/backuphamster/ssh.pub index 70139a1..0cd58aa 100644 --- a/users/backuphamster/ssh.pub +++ b/users/backuphamster/ssh.pub @@ -1 +1 @@ -ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFRlmqgkIJxBC45cbVX25P1Uam/+Ct7XFvgMm60TDOWkQiTuVp5vd1sHq2HCRRfGxPrsKmwSQS5wMYIjeiclTag= friedrich@friedrich-HP-ZBook +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILth9bbKnrSAgaZdvWor2OihrOVfxi0TSO6EuNUhQnND backupuser@offsite1.dev-at.de