From e4b93d9ee8316db58b07a014aef0477b6cfbefa1 Mon Sep 17 00:00:00 2001 From: friedrich goerz Date: Wed, 3 May 2023 13:04:31 +0200 Subject: [PATCH 1/7] DEV-1029: added debugging stuff to hopefully get more insights --- templates/cm_pg_backup_scripts.yaml | 84 +++++++++++++++++++++++++++++ templates/cm_postgres_bkp.yaml | 4 +- values_nsodev.yaml | 37 +++++++++++++ 3 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 templates/cm_pg_backup_scripts.yaml diff --git a/templates/cm_pg_backup_scripts.yaml b/templates/cm_pg_backup_scripts.yaml new file mode 100644 index 0000000..32d3382 --- /dev/null +++ b/templates/cm_pg_backup_scripts.yaml @@ -0,0 +1,84 @@ +apiVersion: v1 +data: + nsodevops_postgres_backup_wrapper.sh: | + #!/bin/bash + + + + envdir "/run/etc/wal-e.d/env" bash -x /nsoscripts/postgres_backup.sh "/home/postgres/pgdata/pgroot/data" >> /tmp/backup_cron_`date +%F`.log + postgres_backup.sh: | + #!/bin/bash + + # fgoerz + + function log + { + echo "$(date "+%Y-%m-%d %H:%M:%S.%3N") - $0 - $*" + } + + [[ -z $1 ]] && echo "Usage: $0 PGDATA" && exit 1 + + log "I was called as: $0 $*" + + + readonly PGDATA=$1 + DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN + + readonly IN_RECOVERY=$(psql -tXqAc "select pg_is_in_recovery()") + if [[ $IN_RECOVERY == "f" ]]; then + [[ "$WALG_BACKUP_FROM_REPLICA" == "true" ]] && log "Cluster is not in recovery, not running backup" && exit 0 + elif [[ $IN_RECOVERY == "t" ]]; then + [[ "$WALG_BACKUP_FROM_REPLICA" != "true" ]] && log "Cluster is in recovery, not running backup" && exit 0 + else + log "ERROR: Recovery state unknown: $IN_RECOVERY" && exit 1 + fi + + # leave at least 2 days base backups before creating a new one + [[ "$DAYS_TO_RETAIN" -lt 2 ]] && DAYS_TO_RETAIN=2 + + if [[ "$USE_WALG_BACKUP" == "true" ]]; then + readonly WAL_E="wal-g" + [[ -z $WALG_BACKUP_COMPRESSION_METHOD ]] || export WALG_COMPRESSION_METHOD=$WALG_BACKUP_COMPRESSION_METHOD + export PGHOST=/var/run/postgresql + else + readonly WAL_E="wal-e" + + # Ensure we don't have more workes than CPU's + POOL_SIZE=$(grep -c ^processor /proc/cpuinfo 2>/dev/null || 1) + [ "$POOL_SIZE" -gt 4 ] && POOL_SIZE=4 + POOL_SIZE=(--pool-size "$POOL_SIZE") + fi + + BEFORE="" + LEFT=0 + + readonly NOW=$(date +%s -u) + while read -r name last_modified rest; do + last_modified=$(date +%s -ud "$last_modified") + if [ $(((NOW-last_modified)/86400)) -ge $DAYS_TO_RETAIN ]; then + if [ -z "$BEFORE" ] || [ "$last_modified" -gt "$BEFORE_TIME" ]; then + BEFORE_TIME=$last_modified + BEFORE=$name + fi + else + # count how many backups will remain after we remove everything up to certain date + ((LEFT=LEFT+1)) + fi + done < <($WAL_E backup-list 2> /dev/null | sed '0,/^name\s*\(last_\)\?modified\s*/d') + + # we want keep at least N backups even if the number of days exceeded + if [ ! -z "$BEFORE" ] && [ $LEFT -ge $DAYS_TO_RETAIN ]; then + if [[ "$USE_WALG_BACKUP" == "true" ]]; then + $WAL_E delete before FIND_FULL "$BEFORE" --confirm + else + $WAL_E delete --confirm before "$BEFORE" + fi + fi + + # push a new base backup + log "producing a new backup" + # We reduce the priority of the backup for CPU consumption + exec nice -n 5 $WAL_E backup-push "$PGDATA" "${POOL_SIZE[@]}" +kind: ConfigMap +metadata: + name: pg-backup-script diff --git a/templates/cm_postgres_bkp.yaml b/templates/cm_postgres_bkp.yaml index d59b35d..fb33881 100644 --- a/templates/cm_postgres_bkp.yaml +++ b/templates/cm_postgres_bkp.yaml @@ -10,7 +10,7 @@ data: AWS_REGION: "" AWS_S3_FORCE_PATH_STYLE: "true" # needed for MinIO BACKUP_NUM_TO_RETAIN: "7" - BACKUP_SCHEDULE: "00 2 * * *" +# BACKUP_SCHEDULE: "00 2 * * *" CLONE_USE_WALG_RESTORE: "true" USE_WALG_BACKUP: "true" USE_WALG_RESTORE: "true" @@ -18,4 +18,4 @@ data: WAL_S3_BUCKET: postgres WAL_BUCKET_SCOPE_PREFIX: "" WAL_BUCKET_SCOPE_SUFFIX: "" - CRONTAB: "['* * * * * /nso_scripts/backup-monitoring.sh']" + CRONTAB: "['* * * * * /nso_scripts/backup-monitoring.sh','00 2 * * * /nsoscripts/nsodevops_postgres_backup_wrapper.sh']" diff --git a/values_nsodev.yaml b/values_nsodev.yaml index 3bf404d..c5d6a1e 100644 --- a/values_nsodev.yaml +++ b/values_nsodev.yaml @@ -70,6 +70,43 @@ smardigo-connect: spec: volume: size: 11Gi + additionalVolumes: + - + name: backup-monitoring-script + mountPath: /nso_scripts/backup-monitoring.sh + volumeSource: + configMap: + name: backup-monitoring-script + items: + - key: backup-monitoring.sh + path: backup-monitoring.sh + defaultMode: 0777 + targetContainers: + - postgres + - + name: pg-backup-script + mountPath: /nsoscripts/postgres_backup.sh + volumeSource: + configMap: + name: pg-backup-script + items: + - key: postgres_backup.sh + path: postgres_backup.sh + defaultMode: 0777 + targetContainers: + - postgres + - + name: pg-backup-wrapper-script + mountPath: /nsoscripts/nsodevops_postgres_backup_wrapper.sh + volumeSource: + configMap: + name: pg-backup-script + items: + - key: nsodevops_postgres_backup_wrapper.sh + path: nsodevops_postgres_backup_wrapper.sh + defaultMode: 0777 + targetContainers: + - postgres monitoring: alerts: postgres: From 1c8796ae15d60778db2ad26465382a2f3b677fd2 Mon Sep 17 00:00:00 2001 From: friedrich goerz Date: Wed, 3 May 2023 13:07:19 +0200 Subject: [PATCH 2/7] DEV-1029: added debugging stuff to hopefully get more insights --- values_nsodev.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/values_nsodev.yaml b/values_nsodev.yaml index c5d6a1e..944b46f 100644 --- a/values_nsodev.yaml +++ b/values_nsodev.yaml @@ -73,13 +73,10 @@ smardigo-connect: additionalVolumes: - name: backup-monitoring-script - mountPath: /nso_scripts/backup-monitoring.sh + mountPath: /nso_scripts volumeSource: configMap: name: backup-monitoring-script - items: - - key: backup-monitoring.sh - path: backup-monitoring.sh defaultMode: 0777 targetContainers: - postgres From fc0366629f911676148830b45d47f7835664f017 Mon Sep 17 00:00:00 2001 From: friedrich goerz Date: Wed, 3 May 2023 13:27:35 +0200 Subject: [PATCH 3/7] DEV-1029: added debugging stuff to hopefully get more insights - bugfix --- values_nsodev.yaml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/values_nsodev.yaml b/values_nsodev.yaml index 944b46f..f68dbf9 100644 --- a/values_nsodev.yaml +++ b/values_nsodev.yaml @@ -86,21 +86,6 @@ smardigo-connect: volumeSource: configMap: name: pg-backup-script - items: - - key: postgres_backup.sh - path: postgres_backup.sh - defaultMode: 0777 - targetContainers: - - postgres - - - name: pg-backup-wrapper-script - mountPath: /nsoscripts/nsodevops_postgres_backup_wrapper.sh - volumeSource: - configMap: - name: pg-backup-script - items: - - key: nsodevops_postgres_backup_wrapper.sh - path: nsodevops_postgres_backup_wrapper.sh defaultMode: 0777 targetContainers: - postgres From 955c708d10f99db82900c06758b94ce59a7dde63 Mon Sep 17 00:00:00 2001 From: friedrich goerz Date: Wed, 3 May 2023 13:34:21 +0200 Subject: [PATCH 4/7] DEV-1029: added debugging stuff to hopefully get more insights - bugfix2 --- values_nsodev.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/values_nsodev.yaml b/values_nsodev.yaml index f68dbf9..f268417 100644 --- a/values_nsodev.yaml +++ b/values_nsodev.yaml @@ -82,7 +82,7 @@ smardigo-connect: - postgres - name: pg-backup-script - mountPath: /nsoscripts/postgres_backup.sh + mountPath: /nsoscripts volumeSource: configMap: name: pg-backup-script From e3dfecf4e8aee2135a6c441ea850279aac9a2364 Mon Sep 17 00:00:00 2001 From: friedrich goerz Date: Wed, 3 May 2023 22:08:27 +0200 Subject: [PATCH 5/7] DEV-1029: added debugging stuff to hopefully get more insights - bugfix3 --- templates/cm_pg_backup_scripts.yaml | 12 +++++------- templates/cm_postgres_bkp.yaml | 2 +- values_nsodev.yaml | 3 ++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/templates/cm_pg_backup_scripts.yaml b/templates/cm_pg_backup_scripts.yaml index 32d3382..7bfe3a9 100644 --- a/templates/cm_pg_backup_scripts.yaml +++ b/templates/cm_pg_backup_scripts.yaml @@ -1,15 +1,13 @@ apiVersion: v1 data: - nsodevops_postgres_backup_wrapper.sh: | - #!/bin/bash - - - - envdir "/run/etc/wal-e.d/env" bash -x /nsoscripts/postgres_backup.sh "/home/postgres/pgdata/pgroot/data" >> /tmp/backup_cron_`date +%F`.log postgres_backup.sh: | #!/bin/bash - # fgoerz + DEBUG_LOG="/tmp/pg_backup_`date +%F`.log" + echo "Plz check $DEBUG_LOG for debugging purpose. EVERY output will be redirected!" + # fgoerz DEV-1029 + # pipe all output to file for debugging purpose + exec 2>&1 1>$DEBUG_LOG function log { diff --git a/templates/cm_postgres_bkp.yaml b/templates/cm_postgres_bkp.yaml index fb33881..ae2d9b6 100644 --- a/templates/cm_postgres_bkp.yaml +++ b/templates/cm_postgres_bkp.yaml @@ -10,7 +10,7 @@ data: AWS_REGION: "" AWS_S3_FORCE_PATH_STYLE: "true" # needed for MinIO BACKUP_NUM_TO_RETAIN: "7" -# BACKUP_SCHEDULE: "00 2 * * *" + BACKUP_SCHEDULE: "00 2 * * *" CLONE_USE_WALG_RESTORE: "true" USE_WALG_BACKUP: "true" USE_WALG_RESTORE: "true" diff --git a/values_nsodev.yaml b/values_nsodev.yaml index f268417..72af017 100644 --- a/values_nsodev.yaml +++ b/values_nsodev.yaml @@ -82,7 +82,8 @@ smardigo-connect: - postgres - name: pg-backup-script - mountPath: /nsoscripts + mountPath: /scripts/postgres_backup.sh + subPath: postgres_backup.sh volumeSource: configMap: name: pg-backup-script From 1278c452919b6388fc3d367aa0072770811e3606 Mon Sep 17 00:00:00 2001 From: friedrich goerz Date: Wed, 3 May 2023 22:09:15 +0200 Subject: [PATCH 6/7] DEV-1029: added debugging stuff to hopefully get more insights - bugfix3 --- templates/cm_postgres_bkp.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/cm_postgres_bkp.yaml b/templates/cm_postgres_bkp.yaml index ae2d9b6..d59b35d 100644 --- a/templates/cm_postgres_bkp.yaml +++ b/templates/cm_postgres_bkp.yaml @@ -18,4 +18,4 @@ data: WAL_S3_BUCKET: postgres WAL_BUCKET_SCOPE_PREFIX: "" WAL_BUCKET_SCOPE_SUFFIX: "" - CRONTAB: "['* * * * * /nso_scripts/backup-monitoring.sh','00 2 * * * /nsoscripts/nsodevops_postgres_backup_wrapper.sh']" + CRONTAB: "['* * * * * /nso_scripts/backup-monitoring.sh']" From 3221fa16efa272d94d63d8ba831573e515bd6dbe Mon Sep 17 00:00:00 2001 From: friedrich goerz Date: Wed, 3 May 2023 22:34:50 +0200 Subject: [PATCH 7/7] DEV-1029: added debugging stuff to hopefully get more insights - bugfix4 --- templates/cm_pg_backup_scripts.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/cm_pg_backup_scripts.yaml b/templates/cm_pg_backup_scripts.yaml index 7bfe3a9..6354fdb 100644 --- a/templates/cm_pg_backup_scripts.yaml +++ b/templates/cm_pg_backup_scripts.yaml @@ -3,11 +3,11 @@ data: postgres_backup.sh: | #!/bin/bash - DEBUG_LOG="/tmp/pg_backup_`date +%F`.log" - echo "Plz check $DEBUG_LOG for debugging purpose. EVERY output will be redirected!" + echo "Plz check DEBUG_LOG for debugging purpose. EVERY output will be redirected!" # fgoerz DEV-1029 # pipe all output to file for debugging purpose - exec 2>&1 1>$DEBUG_LOG + exec 1>>/tmp/pg_backup_`date +%F`.log + exec 2>&1 function log {