DEv-421: refactored installation for postgres-exporter + installed newer...

feature/DEV-380
Görz, Friedrich 4 years ago committed by Ketelsen, Sven
parent a2fa12ef40
commit f0eab6d3ae

@ -128,21 +128,235 @@ postgres_ssl_config:
postgres_config : '{{ postgres_base_config + ( postgres_ssl_config | default([]) ) }}'
prometheus_postgres_exporter_config:
-
regex: "^ARGS="
line: "ARGS=\"--web.listen-address='{{ stage_private_server_ip }}:{{ monitor_port_postgres }}'\""
-
regex: "^DATA_SOURCE_URI"
line: "DATA_SOURCE_URI='postgres@:5432/postgres?host=/var/run/postgresql'"
-
regex: "^PG_EXPORTER_AUTO_DISCOVER_DATABASES"
line: "PG_EXPORTER_AUTO_DISCOVER_DATABASES=true"
-
regex: "^PG_EXPORTER_EXTEND_QUERY_PATH"
line: "PG_EXPORTER_EXTEND_QUERY_PATH=/metrics/queries.yaml"
postgres_exporter_user: postgres
postgres_exporter_group: "{{ postgres_exporter_user }}"
postgres_exporter_dir: "/opt/"
postgres_exporter_version: "0.10.1"
postgres_exporter_checksum: "sha256:5344afe06a90c3cbd52803d56031bfcbcff78b56448e16c9228697ea0a2577b7"
postgres_exporter_dist: "postgres_exporter-{{ postgres_exporter_version }}.linux-amd64"
postgres_exporter_download_url: "https://github.com/prometheus-community/postgres_exporter/releases/download/v{{ postgres_exporter_version }}/{{ postgres_exporter_dist }}.tar.gz"
postgres_exporter_binary: "{{ postgres_exporter_dir }}{{ postgres_exporter_dist }}/postgres_exporter"
postgres_exporter_datasource_uri: "postgres@:5432/postgres?host=/var/run/postgresql"
postgres_exporter_home: "{{ '/var/lib/pgsql' if ansible_os_family == 'RedHat' else '/var/lib/postgresql' }}"
postgres_exporter_flags:
- "--web.listen-address='{{ stage_private_server_ip }}:{{ monitor_port_postgres }}'"
- '--auto-discover-databases'
- '--extend.query-path={{ postgres_exporter_home }}/queries.yml'
postgres_exporter_config_file: /etc/default/postgres_exporter
# got several queries from here:
# https://raw.githubusercontent.com/bdellegrazie/ansible-role-postgres_exporter/b01ae2aae53e02a0778ce6c06361cfb6af2a50c2/files/queries.yml
postgres_exporter_additional_queries:
pg_replication:
query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) as lag"
master: true
metrics:
- lag:
usage: "GAUGE"
description: "Replication lag behind master in seconds"
pg_postmaster:
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
master: true
metrics:
- start_time_seconds:
usage: "GAUGE"
description: "Time at which postmaster started"
pg_stat_user_tables:
query: "SELECT current_database() datname, schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z'), COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- seq_scan:
usage: "COUNTER"
description: "Number of sequential scans initiated on this table"
- seq_tup_read:
usage: "COUNTER"
description: "Number of live rows fetched by sequential scans"
- idx_scan:
usage: "COUNTER"
description: "Number of index scans initiated on this table"
- idx_tup_fetch:
usage: "COUNTER"
description: "Number of live rows fetched by index scans"
- n_tup_ins:
usage: "COUNTER"
description: "Number of rows inserted"
- n_tup_upd:
usage: "COUNTER"
description: "Number of rows updated"
- n_tup_del:
usage: "COUNTER"
description: "Number of rows deleted"
- n_tup_hot_upd:
usage: "COUNTER"
description: "Number of rows HOT updated (i.e., with no separate index update required)"
- n_live_tup:
usage: "GAUGE"
description: "Estimated number of live rows"
- n_dead_tup:
usage: "GAUGE"
description: "Estimated number of dead rows"
- n_mod_since_analyze:
usage: "GAUGE"
description: "Estimated number of rows changed since last analyze"
- last_vacuum:
usage: "GAUGE"
description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
- last_autovacuum:
usage: "GAUGE"
description: "Last time at which this table was vacuumed by the autovacuum daemon"
- last_analyze:
usage: "GAUGE"
description: "Last time at which this table was manually analyzed"
- last_autoanalyze:
usage: "GAUGE"
description: "Last time at which this table was analyzed by the autovacuum daemon"
- vacuum_count:
usage: "COUNTER"
description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
- autovacuum_count:
usage: "COUNTER"
description: "Number of times this table has been vacuumed by the autovacuum daemon"
- analyze_count:
usage: "COUNTER"
description: "Number of times this table has been manually analyzed"
- autoanalyze_count:
usage: "COUNTER"
description: "Number of times this table has been analyzed by the autovacuum daemon"
pg_statio_user_tables:
query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- heap_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table"
- heap_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table"
- idx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from all indexes on this table"
- idx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in all indexes on this table"
- toast_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table (if any)"
- toast_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table (if any)"
- tidx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
- tidx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table indexes (if any)"
pg_database:
query: "SELECT pg_database.datname, pg_database_size(pg_database.datname) as size_bytes FROM pg_database"
master: true
cache_seconds: 30
metrics:
- datname:
usage: "LABEL"
description: "Name of the database"
- size_bytes:
usage: "GAUGE"
description: "Disk space used by the database"
pg_stat_statements:
query: "SELECT t2.rolname, t3.datname, queryid, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin'"
master: true
metrics:
- rolname:
usage: "LABEL"
description: "Name of user"
- datname:
usage: "LABEL"
description: "Name of database"
- queryid:
usage: "LABEL"
description: "Query ID"
- calls:
usage: "COUNTER"
description: "Number of times executed"
- total_time_seconds:
usage: "COUNTER"
description: "Total time spent in the statement, in milliseconds"
- min_time_seconds:
usage: "GAUGE"
description: "Minimum time spent in the statement, in milliseconds"
- max_time_seconds:
usage: "GAUGE"
description: "Maximum time spent in the statement, in milliseconds"
- mean_time_seconds:
usage: "GAUGE"
description: "Mean time spent in the statement, in milliseconds"
- stddev_time_seconds:
usage: "GAUGE"
description: "Population standard deviation of time spent in the statement, in milliseconds"
- rows:
usage: "COUNTER"
description: "Total number of rows retrieved or affected by the statement"
- shared_blks_hit:
usage: "COUNTER"
description: "Total number of shared block cache hits by the statement"
- shared_blks_read:
usage: "COUNTER"
description: "Total number of shared blocks read by the statement"
- shared_blks_dirtied:
usage: "COUNTER"
description: "Total number of shared blocks dirtied by the statement"
- shared_blks_written:
usage: "COUNTER"
description: "Total number of shared blocks written by the statement"
- local_blks_hit:
usage: "COUNTER"
description: "Total number of local block cache hits by the statement"
- local_blks_read:
usage: "COUNTER"
description: "Total number of local blocks read by the statement"
- local_blks_dirtied:
usage: "COUNTER"
description: "Total number of local blocks dirtied by the statement"
- local_blks_written:
usage: "COUNTER"
description: "Total number of local blocks written by the statement"
- temp_blks_read:
usage: "COUNTER"
description: "Total number of temp blocks read by the statement"
- temp_blks_written:
usage: "COUNTER"
description: "Total number of temp blocks written by the statement"
- blk_read_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
- blk_write_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
database_engine: postgres
backup_dest_dir: "{{ backup_directory }}/{{ database_engine }}/{{ get_current_date }}"

@ -4,7 +4,7 @@
name: postgresql
state: restarted
- name: "restart prometheus-postgres-exporter"
- name: "restart postgres_exporter"
service:
name: prometheus-postgres-exporter
name: postgres_exporter
state: restarted

@ -83,54 +83,11 @@
group: postgres
mode: "g+s"
- name: "Install prometheus postgres exporter..."
apt:
name: "prometheus-postgres-exporter"
update_cache: yes
cache_valid_time: 900
state: present
- name: "Ensure config for prometheus-postgres-exporter via evil lineinfile..."
lineinfile:
state: present
regex: "{{ item.regex }}"
line: "{{ item.line }}"
path: /etc/default/prometheus-postgres-exporter
loop: '{{ prometheus_postgres_exporter_config }}'
notify: restart prometheus-postgres-exporter
- name: "Ensure /metrics directory exists"
file:
state: directory
path: /metrics
mode: '0755'
- name: "Ensure /metrics/queries.yaml exists"
copy:
src: pg-exporter-queries.yml
dest: /metrics/queries.yaml
mode: '0755'
- name: Check role prometheus exists # noqa command-instead-of-shell no-changed-when
become: yes
become_user: postgres
shell: "/usr/bin/psql -Atc \"SELECT count(rolname) FROM pg_roles where rolname='prometheus'\""
register: role_check
ignore_errors: yes
- name: "Copy prometheus_postgres_exporter init script"
copy:
src: init.sql
dest: /tmp/prometheus_postgres_exporter.sql
mode: '0755'
when: "role_check.stdout == '0' and server_type == 'master'"
- name: "Execute prometheus_postgres_exporter init script" # noqa command-instead-of-shell
become: true
become_user: postgres
shell: "psql -f /tmp/prometheus_postgres_exporter.sql"
when: "role_check.stdout == '0' and server_type == 'master'"
- name: "Delete prometheus_postgres_exporter init script"
file: path="/tmp/prometheus_postgres_exporter.sql" state=absent
when: "role_check.stdout == '0' and server_type == 'master'"
- name: "Install postgres exporter via include_task"
include_tasks: install_postgres_exporter.yml
args:
apply:
tags:
- postgres-exporter
tags:
- postgres-exporter

@ -0,0 +1,82 @@
---
- name: "Delete package <prometheus postgres exporter>"
apt:
name: "prometheus-postgres-exporter"
state: absent
- name: "Check if version is already installed"
ansible.builtin.stat:
path: "{{ postgres_exporter_dir }}/{{ postgres_exporter_dist }}/postgres_exporter"
register: check_pg_exp
- name: "Download and extract pg_exporter"
unarchive:
src: "{{ postgres_exporter_download_url }}"
dest: "{{ postgres_exporter_dir }}"
owner: root
group: root
mode: "u=rwx,g=rx,o=rx"
remote_src: true
creates: "{{ postgres_exporter_dir }}/{{ postgres_exporter_dist }}/postgres_exporter"
when:
- not check_pg_exp.stat.exists
- name: "Create systemd service file"
become: true
template:
src: "postgres_exporter.systemd.j2"
dest: "/etc/systemd/system/postgres_exporter.service"
owner: root
group: root
mode: "u=rw,go=r"
notify:
- restart postgres_exporter
- name: "Create Config for postgres_exporter"
template:
src: "postgres_exporter.default.conf.j2"
dest: "/etc/default/postgres_exporter"
owner: root
group: "{{ postgres_exporter_group }}"
mode: "u=rw,g=r,o="
notify: restart postgres_exporter
- name: "Create file for additional queries"
copy:
dest: '{{ postgres_exporter_home }}/queries.yml'
owner: root
group: '{{ postgres_exporter_group }}'
mode: '0644'
content: "{{ lookup('vars','postgres_exporter_additional_queries') | to_nice_yaml }}"
notify: restart postgres_exporter
- name: "Ensure postgres_exporter up and running"
service:
name: postgres_exporter
state: started
enabled: yes
daemon_reload: yes
- name: Check role prometheus exists # noqa command-instead-of-shell no-changed-when
become: yes
become_user: postgres
shell: "/usr/bin/psql -Atc \"SELECT count(rolname) FROM pg_roles where rolname='prometheus'\""
register: role_check
ignore_errors: yes
- name: "Copy prometheus_postgres_exporter init script"
copy:
src: init.sql
dest: /tmp/prometheus_postgres_exporter.sql
mode: '0755'
when: "role_check.stdout == '0' and server_type == 'master'"
- name: "Execute prometheus_postgres_exporter init script" # noqa command-instead-of-shell
become: true
become_user: postgres
shell: "psql -f /tmp/prometheus_postgres_exporter.sql"
when: "role_check.stdout == '0' and server_type == 'master'"
- name: "Delete prometheus_postgres_exporter init script"
file: path="/tmp/prometheus_postgres_exporter.sql" state=absent
when: "role_check.stdout == '0' and server_type == 'master'"

@ -5,6 +5,8 @@
# Minimal requirements for postgres
- name: Include Base Requirements
include_tasks: base-requirements.yml
tags:
- postgres-exporter
# Master requirements for postgres
- name: Include Master Requirements

@ -0,0 +1,7 @@
{% if postgres_exporter_datasource_name is defined %}
DATA_SOURCE_NAME="{{ postgres_exporter_datasource_name }}"
{% endif %}
{% if postgres_exporter_datasource_uri is defined %}
DATA_SOURCE_URI="{{ postgres_exporter_datasource_uri }}"
{% endif %}
FLAGS="{{ postgres_exporter_flags | join(' ') }}"

@ -0,0 +1,16 @@
[Unit]
Description=postgres_exporter - Exporter for machine metrics.
Documentation=https://github.com/prometheus/postgres_exporter
After=network.target
[Service]
User={{ postgres_exporter_user }}
Group={{ postgres_exporter_group }}
EnvironmentFile={{ postgres_exporter_config_file }}
ExecStart={{ postgres_exporter_binary }} $FLAGS
SyslogIdentifier=postgres_exporter
Restart=always
[Install]
WantedBy=multi-user.target

@ -259,3 +259,23 @@ groups:
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> is down. plz check."
description: "checks if endpoint is reachable or not"
- alert: postgres replication broken
expr: pg_replication_lag > 120
for: 1m
labels:
severity: critical
annotations:
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
summary: "Postgres replication on instance <{{ '{{' }} $labels.instance {{ '}}' }}> broken. plz. check"
description: "checks if postgres replication works well, if lag is higher than X - something bad happened."
- alert: postgres down
expr: (-100 * delta((rate(node_network_receive_bytes_total{instance=~".*postgres.*",job=~"node-exporter",device=~"enp7s0"}[1m])*8)[60m:])) / (rate(node_network_receive_bytes_total{instance=~".*postgres.*",job=~"node-exporter",device=~"enp7s0"}[1m] offset 1m)*8) > 80
for: 1m
labels:
severity: critical
annotations:
identifier: '{{ '{{' }} $labels.instance {{ '}}' }}'
summary: "Instance <{{ '{{' }} $labels.instance {{ '}}' }}> has less traffic than expected"
description: "checks if postgres receives traffic on internal interface."

Loading…
Cancel
Save