DEV-1060 Prometheus Setup für DEMO MPMX anpassen (Metriken/Alerts)

qa
Michael Hähnel 3 years ago committed by Ketelsen, Sven
parent 67c6a4de58
commit fb1ead8a1a

@ -0,0 +1,2 @@
---
elastic_cluster_settings_max_shards: 1000

@ -1,5 +1,4 @@
--- ---
ansible_ssh_host: "{{ stage_server_domain }}" ansible_ssh_host: "{{ stage_server_domain }}"
ssh_macs: ssh_macs:
@ -59,8 +58,6 @@ stage_server_domain: "{{ inventory_hostname }}.{{ domain }}"
stage_server_url: "{{ http_s }}://{{ stage_server_domain }}" stage_server_url: "{{ http_s }}://{{ stage_server_domain }}"
stage_kube_load_balancer: "{{ stage_kube }}-ingress" stage_kube_load_balancer: "{{ stage_kube }}-ingress"
alertmanager_channel_smardigo: "#monitoring-{{ stage }}"
hetzner_server_type: cx11 hetzner_server_type: cx11
hetzner_server_image: ubuntu-20.04 hetzner_server_image: ubuntu-20.04
hetzner_location: nbg1 hetzner_location: nbg1
@ -99,11 +96,11 @@ sudo_groups:
{ id: "Ubuntu", sudo_group: "sudo" }, { id: "Ubuntu", sudo_group: "sudo" },
] ]
sudo_group: "{{ sudo_groups sudo_group: "{{ sudo_groups
| selectattr('id', 'match', '' + ansible_distribution + '' ) | selectattr('id', 'match', '' + ansible_distribution + '')
| map(attribute='sudo_group') | map(attribute='sudo_group')
| list | list
| first | first
| replace('.','-') }}" | replace('.', '-') }}"
# whitelist for outdated user detection - they wont't be deleted at all # whitelist for outdated user detection - they wont't be deleted at all
default_users: default_users:
@ -191,8 +188,8 @@ logstash_certificate: "{{ stage }}-elastic-stack-logstash-01"
backup_directory: "/backups" backup_directory: "/backups"
get_current_date: "{{ lookup('pipe','date +%Y-%m-%d') }}" get_current_date: "{{ lookup('pipe', 'date +%Y-%m-%d') }}"
get_current_date_time: "{{ lookup('pipe','date +%Y-%m-%d_%H:%M') }}" get_current_date_time: "{{ lookup('pipe', 'date +%Y-%m-%d_%H:%M') }}"
hetzner_authentication_ansible: "{{ hetzner_authentication_ansible_vault }}" hetzner_authentication_ansible: "{{ hetzner_authentication_ansible_vault }}"
hetzner_authentication_ccm: "{{ hetzner_authentication_ccm_vault }}" hetzner_authentication_ccm: "{{ hetzner_authentication_ccm_vault }}"

@ -6,15 +6,33 @@ node_exporter_listen_address: "{{ stage_private_server_ip }}"
blackbox_exporter_fqdn: "dev-blackbox-01.{{ domain }}" blackbox_exporter_fqdn: "dev-blackbox-01.{{ domain }}"
blackbox_http_2xx_targets: blackbox_http_2xx_targets:
- 'https://{{ stage }}-keycloak-01.smardigo.digital/auth/' - "https://{{ stage }}-keycloak-01.smardigo.digital/auth/"
- 'https://{{ stage_kube }}-awx.smardigo.digital' - "https://{{ stage_kube }}-awx.smardigo.digital"
#- 'https://{{ stage }}-management-01-connect.smardigo.digital/' # - 'https://{{ stage }}-management-01-connect.smardigo.digital/'
blackbox_http_2xx_additional_targets: [] blackbox_http_2xx_additional_targets: []
prometheus_tsdb_rentention_time: "4w"
prometheus_federation_enabled: true prometheus_federation_enabled: true
kubernetes_prometheus_endpoint: "{{ stage_kube }}-prometheus.{{ domain }}" prometheus_remote_write_enabled: true
prometheus_alert_diskspaceusage_warning: 85 prometheus_alert_diskspaceusage_warning: 85
prometheus_alert_pg_replication_lag: 120 prometheus_alert_pg_replication_lag: 120
elastic_cluster_settings_max_shards: 1000 alertmanager_channel_smardigo: "#monitoring-{{ stage }}"
prometheus_enabled: true
prometheus_alertmanager_enabled: true
prometheus_grafana_enabled: true
prometheus_prom2teams_enabled: true
prometheus_service_names:
- "{{ (prometheus_enabled | default(true)) | ternary(prometheus_id, '') }}"
- "{{ (prometheus_alertmanager_enabled | default(true)) | ternary(alertmanager_id, '') }}"
- "{{ (prometheus_grafana_enabled | default(true)) | ternary(grafana_id, '') }}"
- "{{ (prometheus_prom2teams_enabled | default(true)) | ternary(prom2teams_id, '') }}"
prometheus_docker_volume_names:
- "{{ (prometheus_alertmanager_enabled | default(true)) | ternary(alertmanager_id + '-data', '') }}"
- "{{ (prometheus_grafana_enabled | default(true)) | ternary(grafana_id + '-data', '') }}"

@ -7,6 +7,9 @@ alertmanager_admin_username: "alertmanager-admin"
alertmanager_admin_password: "{{ alertmanager_admin_password_vault }}" alertmanager_admin_password: "{{ alertmanager_admin_password_vault }}"
alertmanager_admin_password_htpasswd: "{{ alertmanager_admin_password_htpasswd_vault }}" alertmanager_admin_password_htpasswd: "{{ alertmanager_admin_password_htpasswd_vault }}"
prometheus_tsdb_rentention_time: '2w'
# federation for k8s prometheus -> stage prometheus # federation for k8s prometheus -> stage prometheus
prometheus_federation_enabled: false prometheus_federation_enabled: false
prometheus_alertmanager_enabled: false
prometheus_prom2teams_enabled: false
prometheus_grafana_enabled: false

@ -1,3 +1,2 @@
--- ---
prometheus_tsdb_rentention_time: '2w' prometheus_remote_write_enabled: false
kubernetes_prometheus_endpoint: "{{ stage_kube }}-prometheus.{{ domain }}"

@ -0,0 +1,2 @@
---
elastic_cluster_settings_max_shards: 1500

@ -2,7 +2,7 @@
prometheus_lvm_hcloudvol_size: 30 prometheus_lvm_hcloudvol_size: 30
prometheus_lvm_hcloudvol_count: 10 prometheus_lvm_hcloudvol_count: 10
prometheus_tsdb_rentention_time: '90d' prometheus_tsdb_rentention_time: "90d"
# check firewall settings # check firewall settings
smardigo_connect_extra_servers: smardigo_connect_extra_servers:
@ -12,6 +12,4 @@ smardigo_connect_extra_servers:
node_exporter_extra_servers: node_exporter_extra_servers:
- ext-bdev-mpmexec-02.smardigo.digital - ext-bdev-mpmexec-02.smardigo.digital
prometheus_remote_write_enabled: false
kubernetes_prometheus_endpoint: "prodnso-prometheus.{{ domain }}"
elastic_cluster_settings_max_shards: 1500

@ -1,3 +1,2 @@
--- ---
prometheus_tsdb_rentention_time: '2w' prometheus_remote_write_enabled: false
kubernetes_prometheus_endpoint: "{{ stage_kube }}-prometheus.{{ domain }}"

@ -1,7 +1,7 @@
--- ---
prometheus_lvm_hcloudvol_size: 30 prometheus_lvm_hcloudvol_size: 30
prometheus_lvm_hcloudvol_count: 1 prometheus_lvm_hcloudvol_count: 1
prometheus_lvm_hcloudvol_mountpath: '/prometheus_datadir' prometheus_lvm_hcloudvol_mountpath: "/prometheus_datadir"
prometheus_datadir: "{{ prometheus_lvm_hcloudvol_mountpath }}" prometheus_datadir: "{{ prometheus_lvm_hcloudvol_mountpath }}"

@ -0,0 +1,29 @@
---
### tags:
### update_config
- name: "Wait for <{{ http_s }}://{{ prometheus_id }}.{{ domain }}>"
uri:
url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}"
url_username: "{{ prometheus_admin_username }}"
url_password: "{{ prometheus_admin_password }}"
method: GET
status_code: 200
return_content: yes
register: prometheus_stats
until: prometheus_stats.status == 200
retries: 10
delay: 60
tags:
- update_config
- name: "Reload prometheus configuration"
uri:
url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}/-/reload"
url_username: "{{ prometheus_admin_username }}"
url_password: "{{ prometheus_admin_password }}"
method: POST
timeout: 300 # blocks until reload configuration is complete
status_code: 200
tags:
- update_config

@ -1,43 +1,32 @@
--- ---
### tags: ### tags:
### update_config ### update_config
- name: "Deploy service templates for {{ inventory_hostname }}" - name: "Get enabled Prometheus services"
set_fact:
prometheus_services_enabled: "{{ prometheus_services_enabled | default([]) + [item] | flatten }}"
loop_control:
extended: yes
loop:
- "{{ ['alertmanager'] if prometheus_alertmanager_enabled|bool else omit }}"
- "{{ ['grafana'] if prometheus_grafana_enabled|bool else omit }}"
- "{{ ['prom2teams'] if prometheus_prom2teams_enabled|bool else omit }}"
- "{{ ['prometheus'] if prometheus_enabled|bool else omit }}"
tags:
- update_config
- name: "Deploy Prometheus service templates for {{ inventory_hostname }}"
include_role: include_role:
name: hetzner-ansible-sma-deploy name: hetzner-ansible-sma-deploy
tasks_from: templates tasks_from: templates
vars: vars:
current_config: "prometheus" current_config: "prometheus/{{ service }}_config"
current_base_path: "{{ service_base_path }}" current_base_path: "{{ service_base_path }}"
current_destination: "{{ inventory_hostname }}" current_destination: "{{ inventory_hostname }}"
current_owner: "{{ docker_owner }}" current_owner: "{{ docker_owner }}"
current_group: "{{ docker_group }}" current_group: "{{ docker_group }}"
tags: loop: "{{ prometheus_services_enabled }}"
- update_config loop_control:
loop_var: service
- name: "Wait for <{{ http_s }}://{{ prometheus_id }}.{{ domain }}>"
uri:
url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}"
url_username: "{{ prometheus_admin_username }}"
url_password: "{{ prometheus_admin_password }}"
method: GET
status_code: 200
return_content: yes
register: prometheus_stats
until: prometheus_stats.status == 200
retries: 10
delay: 60
tags:
- update_config
- name: "Reload prometheus configuration"
uri:
url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}/-/reload"
url_username: "{{ prometheus_admin_username }}"
url_password: "{{ prometheus_admin_password }}"
method: POST
timeout: 300 # blocks until reload configuration is complete
status_code: 200
tags: tags:
- update_config - update_config

@ -21,10 +21,7 @@
vars: vars:
record_data: "{{ stage_server_ip }}" record_data: "{{ stage_server_ip }}"
record_name: "{{ item }}" record_name: "{{ item }}"
loop: loop: "{{ prometheus_service_names | reject('match', '^$') | list }}"
- "{{ prometheus_id }}"
- "{{ grafana_id }}"
- "{{ alertmanager_id }}"
- name: "Check if {{ inventory_hostname }}/docker-compose.yml exists" - name: "Check if {{ inventory_hostname }}/docker-compose.yml exists"
stat: stat:
@ -56,6 +53,11 @@
tags: tags:
- update_deployment - update_deployment
- name: "Updating prometheus configuration"
include_tasks: _update_config.yml
tags:
- update_config
- name: "Update {{ inventory_hostname }}" - name: "Update {{ inventory_hostname }}"
community.docker.docker_compose: community.docker.docker_compose:
project_src: "{{ service_base_path }}/{{ inventory_hostname }}" project_src: "{{ service_base_path }}/{{ inventory_hostname }}"
@ -64,8 +66,8 @@
tags: tags:
- update_deployment - update_deployment
- name: "Updating prometheus configuration" - name: "Reload prometheus configuration"
include_tasks: _update_config.yml include_tasks: _reload_config.yml
tags: tags:
- update_config - update_config
@ -83,6 +85,8 @@
loop: "{{ grafana_users }}" loop: "{{ grafana_users }}"
tags: tags:
- grafana-user-update - grafana-user-update
when:
- prometheus_grafana_enabled
- name: "Get all Dashboard uids from {{ http_s }}://{{ inventory_hostname }}-grafana.{{ domain }}" - name: "Get all Dashboard uids from {{ http_s }}://{{ inventory_hostname }}-grafana.{{ domain }}"
uri: uri:
@ -99,10 +103,12 @@
delay: 60 delay: 60
tags: tags:
- grafana-user-update - grafana-user-update
when:
- prometheus_grafana_enabled
- name: "Get all existing Dashboard uids" - name: "Get all existing Dashboard uids"
set_fact: set_fact:
grafana_dashboards: "{{ grafana_dashboards_plain.json | json_query('[].{uid: uid, type: type, title: title}') }}" grafana_dashboards: "{{ grafana_dashboards_plain.json | json_query('[].{uid: uid, type: type, title: title}') if prometheus_grafana_enabled else [] }}"
tags: tags:
- grafana-user-update - grafana-user-update
@ -113,6 +119,7 @@
- grafana-user-update - grafana-user-update
when: when:
- debug - debug
- prometheus_grafana_enabled
- name: Restrict admin dashboard permissions - name: Restrict admin dashboard permissions
uri: uri:
@ -133,6 +140,7 @@
when: when:
- item.type == 'dash-db' - item.type == 'dash-db'
- item.uid not in grafana_dashboard_whitelist - item.uid not in grafana_dashboard_whitelist
- prometheus_grafana_enabled
tags: tags:
- grafana-user-update - grafana-user-update
@ -155,6 +163,7 @@
when: when:
- item.type == 'dash-db' - item.type == 'dash-db'
- item.uid in grafana_dashboard_whitelist - item.uid in grafana_dashboard_whitelist
- prometheus_grafana_enabled
tags: tags:
- grafana-user-update - grafana-user-update

@ -1,5 +1,4 @@
--- ---
service_port_grafana: 3000 service_port_grafana: 3000
service_port_prometheus: 9090 service_port_prometheus: 9090
service_port_alertmanager: 9093 service_port_alertmanager: 9093
@ -9,140 +8,112 @@ alertmanager_id: "{{ inventory_hostname }}-alertmanager"
grafana_id: "{{ inventory_hostname }}-grafana" grafana_id: "{{ inventory_hostname }}-grafana"
prom2teams_id: "{{ inventory_hostname }}-prom2teams" prom2teams_id: "{{ inventory_hostname }}-prom2teams"
prometheus_docker: { prometheus_docker:
networks: [ networks:
{ - name: back-tier
name: back-tier, external: true
external: true,
}, - name: front-tier
{ external: true
name: front-tier,
external: true, volumes: "{{ prometheus_docker_volumes | selectattr('name', 'in', prometheus_docker_volume_names | join(',')) }}"
},
], services: "{{ prometheus_services | selectattr('name', 'in', prometheus_service_names | join(',')) }}"
volumes: [
{ prometheus_docker_volumes:
name: "{{ alertmanager_id }}-data" - name: "{{ alertmanager_id }}-data"
}, - name: "{{ grafana_id }}-data"
{
name: "{{ grafana_id }}-data" prometheus_services:
}, - name: "{{ prometheus_id }}"
], image_name: "{{ prometheus_image_name }}"
services: [ image_version: "{{ prom_prometheus_version }}"
{ user: "root"
name: "{{ prometheus_id }}", labels:
image_name: "{{ prometheus_image_name }}", - "traefik.enable=true"
image_version: "{{ prom_prometheus_version }}", - "traefik.http.routers.{{ prometheus_id }}.service={{ prometheus_id }}"
user: "root", - "traefik.http.routers.{{ prometheus_id }}.rule=Host(`{{ inventory_hostname }}-prometheus.{{ domain }}`)"
labels: [ - "traefik.http.routers.{{ prometheus_id }}.entrypoints=websecure"
'"traefik.enable=true"', - "traefik.http.routers.{{ prometheus_id }}.tls=true"
'"traefik.http.routers.{{ prometheus_id }}.service={{ prometheus_id }}"', - "traefik.http.routers.{{ prometheus_id }}.tls.certresolver=letsencrypt"
'"traefik.http.routers.{{ prometheus_id }}.rule=Host(`{{ inventory_hostname }}-prometheus.{{ domain }}`)"', - "traefik.http.services.{{ prometheus_id }}.loadbalancer.server.port={{ service_port_prometheus }}"
'"traefik.http.routers.{{ prometheus_id }}.entrypoints=websecure"', - "traefik.http.routers.{{ prometheus_id }}.middlewares={{ prometheus_id }}-basicauth"
'"traefik.http.routers.{{ prometheus_id }}.tls=true"', - "traefik.http.middlewares.{{ prometheus_id }}-basicauth.basicauth.users={{ prometheus_admin_username }}:{{ prometheus_admin_password_htpasswd }}"
'"traefik.http.routers.{{ prometheus_id }}.tls.certresolver=letsencrypt"', command:
'"traefik.http.services.{{ prometheus_id }}.loadbalancer.server.port={{ service_port_prometheus }}"', - "--config.file=/etc/prometheus/prometheus.yml"
'"traefik.http.routers.{{ prometheus_id }}.middlewares={{ prometheus_id }}-basicauth"', - "--storage.tsdb.path=/prometheus"
'"traefik.http.middlewares.{{ prometheus_id }}-basicauth.basicauth.users={{ prometheus_admin_username }}:{{ prometheus_admin_password_htpasswd }}"', - "--web.console.libraries=/usr/share/prometheus/console_libraries"
], - "--web.console.templates=/usr/share/prometheus/consoles"
command: [ - "--web.external-url={{ http_s }}://{{ inventory_hostname }}-prometheus.{{ domain }}"
'"--config.file=/etc/prometheus/prometheus.yml"', - "--web.enable-lifecycle"
'"--storage.tsdb.path=/prometheus"', - "--storage.tsdb.retention.time={{ prometheus_tsdb_rentention_time }}"
'"--web.console.libraries=/usr/share/prometheus/console_libraries"', volumes:
'"--web.console.templates=/usr/share/prometheus/consoles"', - "./config/prometheus/:/etc/prometheus/:ro"
'"--web.external-url={{ http_s}}://{{ inventory_hostname }}-prometheus.{{ domain }}"', - "{{ prometheus_datadir + '/' + 'data' | default(prometheus_id + '-data') }}:/prometheus"
'"--web.enable-lifecycle"', networks:
'"--storage.tsdb.retention.time={{ prometheus_tsdb_rentention_time | default("4w")}}"', - back-tier
], - front-tier
volumes: [ extra_hosts: "{{ prometheus_extra_hosts | default([]) }}"
'"./config/prometheus/:/etc/prometheus/:ro"',
'"{{ prometheus_datadir + "/" +"_data" | default(prometheus_id + "-data") }}:/prometheus"', - name: "{{ grafana_id }}"
], image_name: "{{ grafana_image_name }}"
networks: [ image_version: "{{ prom_grafana_version }}"
'"back-tier"', user: "472"
'"front-tier"', labels:
], - "traefik.enable=true"
extra_hosts: "{{ prometheus_extra_hosts | default([]) }}", - "traefik.http.routers.{{ grafana_id }}.service={{ grafana_id }}"
}, - "traefik.http.routers.{{ grafana_id }}.rule=Host(`{{ inventory_hostname }}-grafana.{{ domain }}`)"
{ - "traefik.http.routers.{{ grafana_id }}.entrypoints=websecure"
name: "{{ alertmanager_id }}", - "traefik.http.routers.{{ grafana_id }}.tls=true"
image_name: "{{ alertmanager_image_name }}", - "traefik.http.routers.{{ grafana_id }}.tls.certresolver=letsencrypt"
image_version: "{{ prom_alertmanager_version }}", - "traefik.http.services.{{ grafana_id }}.loadbalancer.server.port={{ service_port_grafana }}"
labels: [ volumes:
'"traefik.enable=true"', - "./config/grafana/provisioning/:/etc/grafana/provisioning/"
'"traefik.http.routers.{{ alertmanager_id }}.service={{ alertmanager_id }}"', - "./config/grafana/conf/defaults.ini:/usr/share/grafana/conf/defaults.ini"
'"traefik.http.routers.{{ alertmanager_id }}.rule=Host(`{{ inventory_hostname }}-alertmanager.{{ domain }}`)"', - "{{ grafana_id }}-data:/var/lib/grafana"
'"traefik.http.routers.{{ alertmanager_id }}.entrypoints=websecure"', networks:
'"traefik.http.routers.{{ alertmanager_id }}.tls=true"', - back-tier
'"traefik.http.routers.{{ alertmanager_id }}.tls.certresolver=letsencrypt"', - front-tier
'"traefik.http.services.{{ alertmanager_id }}.loadbalancer.server.port={{ service_port_alertmanager }}"', env_file:
'"traefik.http.routers.{{ alertmanager_id }}.middlewares={{ alertmanager_id }}-basicauth"', - "./config/grafana/config.monitoring"
'"traefik.http.middlewares.{{ alertmanager_id }}-basicauth.basicauth.users={{ alertmanager_admin_username }}:{{ alertmanager_admin_password_htpasswd }}"',
], - name: "{{ alertmanager_id }}"
command: [ image_name: "{{ alertmanager_image_name }}"
'"--config.file=/etc/alertmanager/config.yml"', image_version: "{{ prom_alertmanager_version }}"
'"--storage.path=/alertmanager"', labels:
'"--web.external-url={{ http_s}}://{{ inventory_hostname }}-alertmanager.{{ domain }}"', - "traefik.enable=true"
], - "traefik.http.routers.{{ alertmanager_id }}.service={{ alertmanager_id }}"
environment: [ - "traefik.http.routers.{{ alertmanager_id }}.rule=Host(`{{ inventory_hostname }}-alertmanager.{{ domain }}`)"
'LS_JAVA_OPTS: "-Xmx1G -Xms1G"', - "traefik.http.routers.{{ alertmanager_id }}.entrypoints=websecure"
], - "traefik.http.routers.{{ alertmanager_id }}.tls=true"
volumes: [ - "traefik.http.routers.{{ alertmanager_id }}.tls.certresolver=letsencrypt"
'"./config/alertmanager/:/etc/alertmanager/:ro"', - "traefik.http.services.{{ alertmanager_id }}.loadbalancer.server.port={{ service_port_alertmanager }}"
'"{{ alertmanager_id }}-data:/alertmanager"', - "traefik.http.routers.{{ alertmanager_id }}.middlewares={{ alertmanager_id }}-basicauth"
], - "traefik.http.middlewares.{{ alertmanager_id }}-basicauth.basicauth.users={{ alertmanager_admin_username }}:\
networks: [ {{ alertmanager_admin_password_htpasswd }}"
'"back-tier"', command:
'"front-tier"', - "--config.file=/etc/alertmanager/config.yml"
], - "--storage.path=/alertmanager"
}, - "--web.external-url={{ http_s }}://{{ inventory_hostname }}-alertmanager.{{ domain }}"
{ environment:
name: "{{ grafana_id }}", - 'LS_JAVA_OPTS: "-Xmx1G -Xms1G"'
image_name: "{{ grafana_image_name }}", volumes:
image_version: "{{ prom_grafana_version }}", - "./config/alertmanager/:/etc/alertmanager/:ro"
user: '"472"', - "{{ alertmanager_id }}-data:/alertmanager"
labels: [ networks:
'"traefik.enable=true"', - back-tier
'"traefik.http.routers.{{ grafana_id }}.service={{ grafana_id }}"', - front-tier
'"traefik.http.routers.{{ grafana_id }}.rule=Host(`{{ inventory_hostname }}-grafana.{{ domain }}`)"',
'"traefik.http.routers.{{ grafana_id }}.entrypoints=websecure"', - name: "{{ prom2teams_id }}"
'"traefik.http.routers.{{ grafana_id }}.tls=true"', image_name: "{{ prom2teams_image_name }}"
'"traefik.http.routers.{{ grafana_id }}.tls.certresolver=letsencrypt"', image_version: "{{ prom_prom2teams_version }}"
'"traefik.http.services.{{ grafana_id }}.loadbalancer.server.port={{ service_port_grafana }}"', environment:
], - 'PROM2TEAMS_PROMETHEUS_METRICS: "true"'
volumes: [ - 'DEBUG_METRICS: "true"'
'"./config/grafana/provisioning/:/etc/grafana/provisioning/"', ports:
'"./config/grafana/conf/defaults.ini:/usr/share/grafana/conf/defaults.ini"', - external: 8089
'"{{ grafana_id }}-data:/var/lib/grafana"', internal: 8089
], volumes:
networks: [ - "./config/prom2teams/config.ini:/opt/prom2teams/config.ini"
'"back-tier"', networks:
'"front-tier"', - back-tier
],
env_file: [
'"./config/grafana/config.monitoring"',
],
},
{
name: "{{ prom2teams_id }}",
image_name: "{{ prom2teams_image_name }}",
image_version: "{{ prom_prom2teams_version }}",
environment: [
"PROM2TEAMS_PROMETHEUS_METRICS: \"true\"",
"DEBUG_METRICS: \"true\"",
],
ports: [
{
"external": "8089",
"internal": "8089",
},
],
volumes: [
'"./config/prom2teams/config.ini:/opt/prom2teams/config.ini"',
],
networks: [
'"back-tier"',
],
},
],
}

@ -20,19 +20,21 @@ networks:
{% endfor %} {% endfor %}
{% endif %} {% endif %}
{# ################################################## networks #} {# ################################################## networks #}
{# ################################################### volumes #} {# ################################################### volumes #}
{% if {% if
current_docker.volumes is defined current_docker.volumes is defined
and (current_docker.volumes|length>0) and (current_docker.volumes|length>0)
%} %}
volumes: volumes:
{% for volume in current_docker.volumes %} {% for volume in current_docker.volumes %}
{{ volume.name }}: {} {{ volume.name }}: {}
{% endfor %} {% endfor %}
{% else %}
{% endif %} {% endif %}
{# ################################################### volumes #} {# ################################################### volumes #}
{# ################################################## services #} {# ################################################## services #}
services: services:
{% for service in current_docker.services %} {% for service in current_docker.services %}
@ -43,7 +45,7 @@ services:
{% if {% if
service.user is defined service.user is defined
%} %}
user: {{ service.user }} user: "{{ service.user }}"
{% endif %} {% endif %}
{# ########################################## command #} {# ########################################## command #}
{% if {% if

@ -9,6 +9,15 @@ global:
external_labels: external_labels:
monitor: '{{ inventory_hostname }}' monitor: '{{ inventory_hostname }}'
{% if prometheus_remote_write_enabled | default(true) %}
remote_write:
- url: "https://{{ shared_service_kube_hostname_prometheus }}/api/v1/write"
basic_auth:
username: "{{ prometheus_admin_username }}"
password: "{{ prometheus_admin_password }}"
{% endif %}
{% if prometheus_alertmanager_enabled | default(true) %}
# Load and evaluate rules in this file every 'evaluation_interval' seconds. # Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files: rule_files:
- 'alert.rules' - 'alert.rules'
@ -20,6 +29,7 @@ alerting:
static_configs: static_configs:
- targets: - targets:
- "{{ inventory_hostname }}-alertmanager:9093" - "{{ inventory_hostname }}-alertmanager:9093"
{% endif %}
# A scrape configuration containing exactly one endpoint to scrape: # A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself. # Here it's Prometheus itself.
@ -43,6 +53,7 @@ scrape_configs:
target_label: instance target_label: instance
replacement: '{{ inventory_hostname }}-prometheus.{{ domain }}' replacement: '{{ inventory_hostname }}-prometheus.{{ domain }}'
{% if prometheus_alertmanager_enabled | default(true) %}
- job_name: 'alertmanager' - job_name: 'alertmanager'
static_configs: static_configs:
- targets: [ - targets: [
@ -56,7 +67,9 @@ scrape_configs:
- source_labels: [__address__] - source_labels: [__address__]
target_label: instance target_label: instance
replacement: '{{ inventory_hostname }}-alertmanager.{{ domain }}' replacement: '{{ inventory_hostname }}-alertmanager.{{ domain }}'
{% endif %}
{% if prometheus_prom2teams_enabled | default(true) %}
- job_name: 'prom2teams' - job_name: 'prom2teams'
scheme: http scheme: http
metrics_path: '/metrics' metrics_path: '/metrics'
@ -68,6 +81,7 @@ scrape_configs:
env: {{ stage }} env: {{ stage }}
project: monitoring project: monitoring
application: prom2teams application: prom2teams
{% endif %}
- job_name: 'blackbox' - job_name: 'blackbox'
metrics_path: /probe metrics_path: /probe
@ -534,6 +548,6 @@ scrape_configs:
- '{job=~".*"}' - '{job=~".*"}'
static_configs: static_configs:
- targets: ['{{ kubernetes_prometheus_endpoint }}'] - targets: ['{{ shared_service_kube_hostname_prometheus }}']
{% endif %} {% endif %}
Loading…
Cancel
Save