DEV-721: exclude restore-servers from patchday - avoiding broken...

qa
Görz, Friedrich 3 years ago committed by Ketelsen, Sven
parent 2fc032f047
commit e1d05f5e81

@ -235,7 +235,7 @@
state: present
loop: '{{ docker_compose_services.files }}'
- hosts: all,!elastic,!postgres,!k8s_cluster,!iam
- hosts: all,!elastic,!postgres,!k8s_cluster,!iam,!restore
serial: 10
become: yes
tasks:

@ -14,7 +14,6 @@ DATABASE_SERVER_IP=$1
STAGE=$2
DATABASE_ENGINE=$3
DEST_DIR=${HOME}/backups/${STAGE}/${DATABASE_ENGINE}
BACKUP_STATUS_FILE=${DEST_DIR}/${DATE}/backup_finished_${DATE}_*
METRICS_FILE=${HOME}/backup_status_${DATABASE_ENGINE}.prom
LOG_FILE=${DEST_DIR}/backup_${DATE_TIME}.log
@ -31,19 +30,31 @@ mkdir -p ${DEST_DIR}
find $DEST_DIR -type d -mtime +1 -print0 | xargs -I OLD_DIR -0 rm -rf "OLD_DIR"
[ "$?" != "0" ] && exit 1
echo "Removing logfiles older than 7d ..."
find $DEST_DIR -type f -mtime +7 -name "backup_*.log" -print0 | xargs -I OLD_FILES -0 rm -rf "OLD_FILES"
# Start rsync job from ${DATABASE_SERVER_IP} to ${DEST_DIR}/
rsync -av --remove-source-files -e "ssh -o StrictHostKeyChecking=no" ${REMOTE_SYSTEM_USER}@${DATABASE_SERVER_IP}:/backups/${DATABASE_ENGINE}/ ${DEST_DIR}/
[ "$?" -eq "0" ] && NIGHTLY_BACKUP_SUCCESSFUL="0" || NIGHTLY_BACKUP_SUCCESSFUL="1"
BACKUP_STATUS_FILE=$(ls -t1 ${DEST_DIR}/${DATE}/backup_finished_${DATE}_* | head -n1)
# Check existence of current ${BACKUP_STATUS_FILE}, which is created by AWX, in case of succesful database backup only.
[ -f ${BACKUP_STATUS_FILE} ] && NIGHTLY_BACKUP_SUCCESSFUL="0" || NIGHTLY_BACKUP_SUCCESSFUL="1"
# Add backup status to Prometheus metrics file
if [ "$NIGHTLY_BACKUP_SUCCESSFUL" -eq "0" ]; then
echo "NIGHTLY_BACKUP_SUCCESSFUL=0 - writing METRICS_FILE"
cat <<EOF > $METRICS_FILE
# HELP nightly_backup_successful_${DATABASE_ENGINE}
# TYPE nightly_backup_successful_${DATABASE_ENGINE} gauge
nightly_backup_successful_${DATABASE_ENGINE}{stage="$STAGE"} $NIGHTLY_BACKUP_SUCCESSFUL
nightly_backup_successful_${DATABASE_ENGINE}_finished_seconds{stage="$STAGE"} `date +%s`
EOF
else
echo "NIGHTLY_BACKUP_SUCCESSFUL=1 - removing METRICS_FILE to trigger alert"
rm $METRICS_FILE
fi
# Log backup sync end time
echo "----- End backup Sync - ${DATE_TIME} -----"

@ -130,7 +130,9 @@ groups:
description: "backup failed."
- alert: nightly_backup_failed_maria
expr: nightly_backup_successful_maria > 0 or absent(nightly_backup_successful_maria)
expr: |
(time() - nightly_backup_successful_maria_finished_seconds) > 30 * 3600 or
absent(nightly_backup_successful_maria_finished_seconds)
for: 2m
labels:
severity: critical
@ -140,7 +142,9 @@ groups:
description: "MariaDB backup failed."
- alert: nightly_backup_failed_postgres
expr: nightly_backup_successful_postgres > 0 or absent(nightly_backup_successful_postgres)
expr: |
(time() - nightly_backup_successful_postgres_finished_seconds) > 30 * 3600 or
absent(nightly_backup_successful_postgres_finished_seconds)
for: 2m
labels:
severity: critical

Loading…
Cancel
Save