From c875ef3ecda0e2e9ec6a26552c74aa444fc2787b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=A4hnel?= Date: Thu, 14 Sep 2023 17:16:46 +0000 Subject: [PATCH] Rollout KW 37 --- group_vars/stage_prodwork01/plain.yml | 3 + host_vars/demompmx-postgres-restore.yml | 3 + host_vars/demompmx-postgres01-01.yml | 3 + host_vars/demompmx-postgres01-02.yml | 3 + roles/postgres/tasks/_create_backup.yml | 124 ++++++++++++------ .../postgres/tasks/_update_database_state.yml | 33 +++-- stage-prodwork01 | 4 + 7 files changed, 114 insertions(+), 59 deletions(-) create mode 100644 host_vars/demompmx-postgres-restore.yml diff --git a/group_vars/stage_prodwork01/plain.yml b/group_vars/stage_prodwork01/plain.yml index 9578440..226ba67 100644 --- a/group_vars/stage_prodwork01/plain.yml +++ b/group_vars/stage_prodwork01/plain.yml @@ -7,6 +7,9 @@ shared_service_network: "10.3.0.0/16" filebeat_enabled: false shared_service_hostname_harbor: "prodnso-harbor-01.{{ domain }}" +shared_service_mail_hostname: "{{ stage }}-mail-01.{{ domain_env }}" +shared_service_hostname_keycloak: "keycloak-prodwork01.{{ domain_env }}" +shared_service_url_keycloak: "https://{{ shared_service_hostname_keycloak }}" netgo_msteams_hook_cd: "{{ netgo_msteams_hook_cd_vault }}" netgo_msteams_hook_alerting: "{{ netgo_msteams_hook_alerting_vault }}" diff --git a/host_vars/demompmx-postgres-restore.yml b/host_vars/demompmx-postgres-restore.yml new file mode 100644 index 0000000..7e4e7ef --- /dev/null +++ b/host_vars/demompmx-postgres-restore.yml @@ -0,0 +1,3 @@ +--- +postgres_pgdatadir_lvm_hcloudvol_size: 20 +postgres_pgdatadir_lvm_hcloudvol_count: 2 diff --git a/host_vars/demompmx-postgres01-01.yml b/host_vars/demompmx-postgres01-01.yml index bc65616..cf4b320 100644 --- a/host_vars/demompmx-postgres01-01.yml +++ b/host_vars/demompmx-postgres01-01.yml @@ -4,4 +4,7 @@ server_type: "master" shared_service_postgres_primary: "{{ stage }}-postgres01-01" shared_service_postgres_secondary: "{{ stage }}-postgres01-02" +postgres_pgdatadir_lvm_hcloudvol_size: 20 +postgres_pgdatadir_lvm_hcloudvol_count: 2 + hetzner_server_type: cpx21 diff --git a/host_vars/demompmx-postgres01-02.yml b/host_vars/demompmx-postgres01-02.yml index 55c6013..5933629 100644 --- a/host_vars/demompmx-postgres01-02.yml +++ b/host_vars/demompmx-postgres01-02.yml @@ -4,4 +4,7 @@ server_type: "slave" shared_service_postgres_primary: "{{ stage }}-postgres01-01" shared_service_postgres_secondary: "{{ stage }}-postgres01-02" +postgres_pgdatadir_lvm_hcloudvol_size: 20 +postgres_pgdatadir_lvm_hcloudvol_count: 2 + hetzner_server_type: cpx21 diff --git a/roles/postgres/tasks/_create_backup.yml b/roles/postgres/tasks/_create_backup.yml index 838e558..dbabeca 100644 --- a/roles/postgres/tasks/_create_backup.yml +++ b/roles/postgres/tasks/_create_backup.yml @@ -1,20 +1,20 @@ --- - name: "Ensure needed packages" - become: yes + become: true package: name: pigz - name: "Create destination backup directory" - become: yes + become: true ansible.builtin.file: - path: '{{ backup_dest_dir }}' + path: "{{ backup_dest_dir }}" state: directory - mode: '0755' + mode: "0755" owner: postgres group: postgres - name: "Block: gpg stuff" - become: yes + become: true become_user: postgres block: - name: Create temp dir @@ -30,44 +30,84 @@ dest: "{{ tempdir.path }}" version: master -# there is no ansible gpg module already in place -# linting violation needs to be whitelisted + # there is no ansible gpg module already in place + # linting violation needs to be whitelisted - name: "Importing stage specific automation gpg-key" # noqa command-instead-of-shell - shell: 'gpg --import {{ tempdir.path }}/{{ backup_communication_keys_stage_gpg_key }}' + shell: "gpg --import {{ tempdir.path }}/{{ backup_communication_keys_stage_gpg_key }}" -# there is no ansible module already in place for (pg_basebackup|gpg) -# so using shell module -- name: "Creating pg_basebackup ... + doing async check if successful or not" - become: yes - become_user: postgres - vars: - backup_file: '{{ backup_dest_dir }}/basebackup_{{ current_date_time }}.tar.gz' - shell: | - set -o pipefail - /usr/bin/pg_basebackup -Ft -X fetch -D - | nice -n {{ postgres_backup_niceness_pigz | default(8) }} pigz -p 2 > {{ backup_file }} && \ - nice -n {{ postgres_backup_niceness_gpg | default(10) }} gpg --encrypt --recipient "{{ backup_gpg_recipient }}" --trust-model always {{ backup_file }} && \ - rm {{ backup_file }} - args: - executable: /bin/bash - async: 3600 # allows duration for task up to 3600sec - poll: 30 # rechecks every 30sec if task has finished yet - changed_when: false +- name: "Block: Creating pg_basebackup" + become: true + block: + - name: "Set common variables" + set_fact: + backup_file: "{{ backup_dest_dir }}/basebackup_{{ current_date_time }}.tar.gz" + backup_status_file: "{{ backup_status_file }}_{{ current_date_time }}" + # there is no ansible module already in place for (pg_basebackup|gpg) + # so using shell module + - name: "Creating pg_basebackup ... + doing async check if successful or not" + become: true + become_user: postgres + shell: | + set -o pipefail + /usr/bin/pg_basebackup -Ft -X fetch -D - | nice -n {{ postgres_backup_niceness_pigz | default(8) }} pigz -p 2 > {{ backup_file }} && \ + nice -n {{ postgres_backup_niceness_gpg | default(10) }} gpg --encrypt --recipient "{{ backup_gpg_recipient }}" --trust-model always {{ backup_file }} && \ + rm {{ backup_file }} + args: + executable: /bin/bash + async: 3600 # allows duration for task up to 3600sec + poll: 30 # rechecks every 30sec if task has finished yet + register: backup_result + changed_when: false -# just to make it easier to detect potential failures. -# maybe: can be removed later -- name: "Create STATUS file for successful backup" - become: yes - file: - path: '{{ backup_status_file }}_{{ current_date_time }}' - state: touch - mode: '0644' - owner: postgres - group: postgres + - name: "Save output to {{ backup_status_file }}" + copy: + content: "{{ backup_result }}" + dest: "{{ backup_status_file }}" -- name: "Prepare backup dir..." - become: yes - ansible.builtin.file: - path: '{{ backup_dest_dir }}' - owner: '{{ backupuser_user_name }}' - group: '{{ backupuser_user_name }}' - recurse: yes + - name: "Change ownership of {{ backup_dest_dir }} to {{ backupuser_user_name }}:{{ backupuser_user_name }}" + ansible.builtin.file: + path: "{{ backup_dest_dir }}" + owner: "{{ backupuser_user_name }}" + group: "{{ backupuser_user_name }}" + recurse: yes + rescue: + - name: "Rescue: Save output to {{ backup_status_file }}_with_failures" + copy: + content: "{{ backup_result }}" + dest: "{{ backup_status_file }}_with_failures" + when: backup_result.failed + + - name: "Rescue: Delete {{ backup_file }} on failure" + file: + path: "{{ backup_file }}" + state: absent + when: backup_result.failed + + - name: "Rescue: Sending e-mail to devops team" + delegate_to: "{{ stage }}-mail-01.smardigo.digital" + community.general.mail: + host: "{{ stage }}-mail-01.smardigo.digital" + port: 25 + from: "noreply@smardigo.digital" + to: "{{ devops_email_address }}" + subject: "Backup Postgresql on {{ inventory_hostname }} ( {{ lookup('pipe','date +%Y-%m-%d_%H:%M') }} ) problem report for failed postgresql basebackup" + body: | + Dear Sir or Madam + creation of postgresql basebackup failed on host {{ inventory_hostname }} + Plz check what happened/ fix it little padawan ; + kind regards + your automation-bofh + + Error report below + + --- + + {{ backup_result.stderr }} + + --- + when: backup_result.failed + + - name: "Rescue: Stop backup because of failure" + fail: + msg: "Postgres backup failed. See the status file for more information about what happened." + when: backup_result.failed diff --git a/roles/postgres/tasks/_update_database_state.yml b/roles/postgres/tasks/_update_database_state.yml index e34834a..a29b73f 100644 --- a/roles/postgres/tasks/_update_database_state.yml +++ b/roles/postgres/tasks/_update_database_state.yml @@ -78,22 +78,6 @@ - item.stdout == '0' - server_type == 'master' -- name: "Grant CREATE privilege on public schema if necessary" - community.postgresql.postgresql_privs: - role: "{{ item.item.name }}" - type: schema - priv: ALL - objs: public - login_user: "{{ postgres_admin_user }}" - database: "{{ item.item.name }}" - state: present - loop: "{{ role_check.results }}" - become: true - become_user: "{{ postgres_admin_user }}" - when: - - database_state == 'present' - - server_type == 'master' - - name: "Deleting Databases if necessary" shell: '/usr/bin/psql -c "DROP DATABASE {{ item.item.name }} WITH (FORCE);"' with_items: "{{ database_check.results }}" @@ -145,13 +129,28 @@ - name: "Get list of all databases" community.postgresql.postgresql_query: - query: "SELECT datname FROM pg_database WHERE datistemplate = false" + query: "SELECT datname FROM pg_database WHERE datname LIKE '{{ stage }}_%' AND datistemplate = false" login_user: "{{ postgres_admin_user }}" db: "{{ postgres_admin_user }}" register: database_list become: true become_user: "{{ postgres_admin_user }}" +- name: "Grant ALL privileges on public schema to application specific roles if necessary" + community.postgresql.postgresql_privs: + role: "{{ item.datname }}" + type: schema + priv: ALL + objs: public + login_user: "{{ postgres_admin_user }}" + database: "{{ item.datname }}" + state: present + loop: "{{ database_list.query_result }}" + become: true + become_user: "{{ postgres_admin_user }}" + when: + - server_type == 'master' + - name: "Revoke CREATE privilege on public schema for group postgres_readonly" community.postgresql.postgresql_privs: role: "postgres_readonly" diff --git a/stage-prodwork01 b/stage-prodwork01 index 6dcd041..e1a8ed6 100644 --- a/stage-prodwork01 +++ b/stage-prodwork01 @@ -1,3 +1,6 @@ +[postfix] +prodwork01-mail-01 + [backup_minio] prodwork01-backup-01 @@ -25,6 +28,7 @@ kube_control_plane kube_node [stage_prodwork01:children] +postfix k8s_cluster backup_minio