diff --git a/group_vars/all/elastic.yml b/group_vars/all/elastic.yml new file mode 100644 index 0000000..2c968bf --- /dev/null +++ b/group_vars/all/elastic.yml @@ -0,0 +1,2 @@ +--- +elastic_cluster_settings_max_shards: 1000 diff --git a/group_vars/all/plain.yml b/group_vars/all/plain.yml index d825f39..d563224 100644 --- a/group_vars/all/plain.yml +++ b/group_vars/all/plain.yml @@ -1,5 +1,4 @@ --- - ansible_ssh_host: "{{ stage_server_domain }}" ssh_macs: @@ -59,8 +58,6 @@ stage_server_domain: "{{ inventory_hostname }}.{{ domain }}" stage_server_url: "{{ http_s }}://{{ stage_server_domain }}" stage_kube_load_balancer: "{{ stage_kube }}-ingress" -alertmanager_channel_smardigo: "#monitoring-{{ stage }}" - hetzner_server_type: cx11 hetzner_server_image: ubuntu-20.04 hetzner_location: nbg1 @@ -99,11 +96,11 @@ sudo_groups: { id: "Ubuntu", sudo_group: "sudo" }, ] sudo_group: "{{ sudo_groups - | selectattr('id', 'match', '' + ansible_distribution + '' ) + | selectattr('id', 'match', '' + ansible_distribution + '') | map(attribute='sudo_group') | list | first - | replace('.','-') }}" + | replace('.', '-') }}" # whitelist for outdated user detection - they wont't be deleted at all default_users: @@ -191,8 +188,8 @@ logstash_certificate: "{{ stage }}-elastic-stack-logstash-01" backup_directory: "/backups" -get_current_date: "{{ lookup('pipe','date +%Y-%m-%d') }}" -get_current_date_time: "{{ lookup('pipe','date +%Y-%m-%d_%H:%M') }}" +get_current_date: "{{ lookup('pipe', 'date +%Y-%m-%d') }}" +get_current_date_time: "{{ lookup('pipe', 'date +%Y-%m-%d_%H:%M') }}" hetzner_authentication_ansible: "{{ hetzner_authentication_ansible_vault }}" hetzner_authentication_ccm: "{{ hetzner_authentication_ccm_vault }}" diff --git a/group_vars/all/prometheus.yml b/group_vars/all/prometheus.yml index 5058cab..466fdf9 100644 --- a/group_vars/all/prometheus.yml +++ b/group_vars/all/prometheus.yml @@ -2,19 +2,37 @@ # node exporter exposes data only into the private network node_exporter_listen_address: "{{ stage_private_server_ip }}" -# TODO the blackbox exporter shouldn't be DEV tagged at all +# TODO the blackbox exporter shouldn't be DEV tagged at all blackbox_exporter_fqdn: "dev-blackbox-01.{{ domain }}" blackbox_http_2xx_targets: -- 'https://{{ stage }}-keycloak-01.smardigo.digital/auth/' -- 'https://{{ stage_kube }}-awx.smardigo.digital' -#- 'https://{{ stage }}-management-01-connect.smardigo.digital/' + - "https://{{ stage }}-keycloak-01.smardigo.digital/auth/" + - "https://{{ stage_kube }}-awx.smardigo.digital" +# - 'https://{{ stage }}-management-01-connect.smardigo.digital/' + blackbox_http_2xx_additional_targets: [] +prometheus_tsdb_rentention_time: "4w" + prometheus_federation_enabled: true -kubernetes_prometheus_endpoint: "{{ stage_kube }}-prometheus.{{ domain }}" +prometheus_remote_write_enabled: true prometheus_alert_diskspaceusage_warning: 85 prometheus_alert_pg_replication_lag: 120 -elastic_cluster_settings_max_shards: 1000 +alertmanager_channel_smardigo: "#monitoring-{{ stage }}" + +prometheus_enabled: true +prometheus_alertmanager_enabled: true +prometheus_grafana_enabled: true +prometheus_prom2teams_enabled: true + +prometheus_service_names: + - "{{ (prometheus_enabled | default(true)) | ternary(prometheus_id, '') }}" + - "{{ (prometheus_alertmanager_enabled | default(true)) | ternary(alertmanager_id, '') }}" + - "{{ (prometheus_grafana_enabled | default(true)) | ternary(grafana_id, '') }}" + - "{{ (prometheus_prom2teams_enabled | default(true)) | ternary(prom2teams_id, '') }}" + +prometheus_docker_volume_names: + - "{{ (prometheus_alertmanager_enabled | default(true)) | ternary(alertmanager_id + '-data', '') }}" + - "{{ (prometheus_grafana_enabled | default(true)) | ternary(grafana_id + '-data', '') }}" diff --git a/group_vars/stage_demompmx/prometheus.yml b/group_vars/stage_demompmx/prometheus.yml index 20b37c0..ee2a503 100644 --- a/group_vars/stage_demompmx/prometheus.yml +++ b/group_vars/stage_demompmx/prometheus.yml @@ -7,6 +7,9 @@ alertmanager_admin_username: "alertmanager-admin" alertmanager_admin_password: "{{ alertmanager_admin_password_vault }}" alertmanager_admin_password_htpasswd: "{{ alertmanager_admin_password_htpasswd_vault }}" -prometheus_tsdb_rentention_time: '2w' # federation for k8s prometheus -> stage prometheus -prometheus_federation_enabled: false \ No newline at end of file +prometheus_federation_enabled: false + +prometheus_alertmanager_enabled: false +prometheus_prom2teams_enabled: false +prometheus_grafana_enabled: false diff --git a/group_vars/stage_dev/prometheus.yml b/group_vars/stage_dev/prometheus.yml index 52a48fe..e80eb11 100644 --- a/group_vars/stage_dev/prometheus.yml +++ b/group_vars/stage_dev/prometheus.yml @@ -1,3 +1,2 @@ --- -prometheus_tsdb_rentention_time: '2w' -kubernetes_prometheus_endpoint: "{{ stage_kube }}-prometheus.{{ domain }}" +prometheus_remote_write_enabled: false diff --git a/group_vars/stage_prodnso/elastic.yml b/group_vars/stage_prodnso/elastic.yml new file mode 100644 index 0000000..36ded5e --- /dev/null +++ b/group_vars/stage_prodnso/elastic.yml @@ -0,0 +1,2 @@ +--- +elastic_cluster_settings_max_shards: 1500 diff --git a/group_vars/stage_prodnso/prometheus.yml b/group_vars/stage_prodnso/prometheus.yml index 8e0483f..a6fd95f 100644 --- a/group_vars/stage_prodnso/prometheus.yml +++ b/group_vars/stage_prodnso/prometheus.yml @@ -2,7 +2,7 @@ prometheus_lvm_hcloudvol_size: 30 prometheus_lvm_hcloudvol_count: 10 -prometheus_tsdb_rentention_time: '90d' +prometheus_tsdb_rentention_time: "90d" # check firewall settings smardigo_connect_extra_servers: @@ -12,6 +12,4 @@ smardigo_connect_extra_servers: node_exporter_extra_servers: - ext-bdev-mpmexec-02.smardigo.digital - -kubernetes_prometheus_endpoint: "prodnso-prometheus.{{ domain }}" -elastic_cluster_settings_max_shards: 1500 +prometheus_remote_write_enabled: false diff --git a/group_vars/stage_qa/prometheus.yml b/group_vars/stage_qa/prometheus.yml index 52a48fe..e80eb11 100644 --- a/group_vars/stage_qa/prometheus.yml +++ b/group_vars/stage_qa/prometheus.yml @@ -1,3 +1,2 @@ --- -prometheus_tsdb_rentention_time: '2w' -kubernetes_prometheus_endpoint: "{{ stage_kube }}-prometheus.{{ domain }}" +prometheus_remote_write_enabled: false diff --git a/roles/prometheus/defaults/main.yml b/roles/prometheus/defaults/main.yml index 18c00ae..672779e 100644 --- a/roles/prometheus/defaults/main.yml +++ b/roles/prometheus/defaults/main.yml @@ -1,7 +1,7 @@ --- prometheus_lvm_hcloudvol_size: 30 prometheus_lvm_hcloudvol_count: 1 -prometheus_lvm_hcloudvol_mountpath: '/prometheus_datadir' +prometheus_lvm_hcloudvol_mountpath: "/prometheus_datadir" prometheus_datadir: "{{ prometheus_lvm_hcloudvol_mountpath }}" diff --git a/roles/prometheus/tasks/_reload_config.yml b/roles/prometheus/tasks/_reload_config.yml new file mode 100644 index 0000000..f2f609a --- /dev/null +++ b/roles/prometheus/tasks/_reload_config.yml @@ -0,0 +1,29 @@ +--- +### tags: +### update_config + +- name: "Wait for <{{ http_s }}://{{ prometheus_id }}.{{ domain }}>" + uri: + url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}" + url_username: "{{ prometheus_admin_username }}" + url_password: "{{ prometheus_admin_password }}" + method: GET + status_code: 200 + return_content: yes + register: prometheus_stats + until: prometheus_stats.status == 200 + retries: 10 + delay: 60 + tags: + - update_config + +- name: "Reload prometheus configuration" + uri: + url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}/-/reload" + url_username: "{{ prometheus_admin_username }}" + url_password: "{{ prometheus_admin_password }}" + method: POST + timeout: 300 # blocks until reload configuration is complete + status_code: 200 + tags: + - update_config diff --git a/roles/prometheus/tasks/_update_config.yml b/roles/prometheus/tasks/_update_config.yml index 4525603..25c86ad 100644 --- a/roles/prometheus/tasks/_update_config.yml +++ b/roles/prometheus/tasks/_update_config.yml @@ -1,43 +1,32 @@ --- - ### tags: ### update_config -- name: "Deploy service templates for {{ inventory_hostname }}" +- name: "Get enabled Prometheus services" + set_fact: + prometheus_services_enabled: "{{ prometheus_services_enabled | default([]) + [item] | flatten }}" + loop_control: + extended: yes + loop: + - "{{ ['alertmanager'] if prometheus_alertmanager_enabled|bool else omit }}" + - "{{ ['grafana'] if prometheus_grafana_enabled|bool else omit }}" + - "{{ ['prom2teams'] if prometheus_prom2teams_enabled|bool else omit }}" + - "{{ ['prometheus'] if prometheus_enabled|bool else omit }}" + tags: + - update_config + +- name: "Deploy Prometheus service templates for {{ inventory_hostname }}" include_role: name: hetzner-ansible-sma-deploy tasks_from: templates vars: - current_config: "prometheus" + current_config: "prometheus/{{ service }}_config" current_base_path: "{{ service_base_path }}" current_destination: "{{ inventory_hostname }}" current_owner: "{{ docker_owner }}" current_group: "{{ docker_group }}" + loop: "{{ prometheus_services_enabled }}" + loop_control: + loop_var: service tags: - update_config - -- name: "Wait for <{{ http_s }}://{{ prometheus_id }}.{{ domain }}>" - uri: - url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}" - url_username: "{{ prometheus_admin_username }}" - url_password: "{{ prometheus_admin_password }}" - method: GET - status_code: 200 - return_content: yes - register: prometheus_stats - until: prometheus_stats.status == 200 - retries: 10 - delay: 60 - tags: - - update_config - -- name: "Reload prometheus configuration" - uri: - url: "{{ http_s }}://{{ prometheus_id }}.{{ domain }}/-/reload" - url_username: "{{ prometheus_admin_username }}" - url_password: "{{ prometheus_admin_password }}" - method: POST - timeout: 300 # blocks until reload configuration is complete - status_code: 200 - tags: - - update_config \ No newline at end of file diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 43ef57c..61adea8 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -21,10 +21,7 @@ vars: record_data: "{{ stage_server_ip }}" record_name: "{{ item }}" - loop: - - "{{ prometheus_id }}" - - "{{ grafana_id }}" - - "{{ alertmanager_id }}" + loop: "{{ prometheus_service_names | reject('match', '^$') | list }}" - name: "Check if {{ inventory_hostname }}/docker-compose.yml exists" stat: @@ -56,6 +53,11 @@ tags: - update_deployment +- name: "Updating prometheus configuration" + include_tasks: _update_config.yml + tags: + - update_config + - name: "Update {{ inventory_hostname }}" community.docker.docker_compose: project_src: "{{ service_base_path }}/{{ inventory_hostname }}" @@ -64,8 +66,8 @@ tags: - update_deployment -- name: "Updating prometheus configuration" - include_tasks: _update_config.yml +- name: "Reload prometheus configuration" + include_tasks: _reload_config.yml tags: - update_config @@ -83,6 +85,8 @@ loop: "{{ grafana_users }}" tags: - grafana-user-update + when: + - prometheus_grafana_enabled - name: "Get all Dashboard uids from {{ http_s }}://{{ inventory_hostname }}-grafana.{{ domain }}" uri: @@ -99,10 +103,12 @@ delay: 60 tags: - grafana-user-update + when: + - prometheus_grafana_enabled - name: "Get all existing Dashboard uids" set_fact: - grafana_dashboards: "{{ grafana_dashboards_plain.json | json_query('[].{uid: uid, type: type, title: title}') }}" + grafana_dashboards: "{{ grafana_dashboards_plain.json | json_query('[].{uid: uid, type: type, title: title}') if prometheus_grafana_enabled else [] }}" tags: - grafana-user-update @@ -113,6 +119,7 @@ - grafana-user-update when: - debug + - prometheus_grafana_enabled - name: Restrict admin dashboard permissions uri: @@ -133,6 +140,7 @@ when: - item.type == 'dash-db' - item.uid not in grafana_dashboard_whitelist + - prometheus_grafana_enabled tags: - grafana-user-update @@ -155,6 +163,7 @@ when: - item.type == 'dash-db' - item.uid in grafana_dashboard_whitelist + - prometheus_grafana_enabled tags: - grafana-user-update diff --git a/roles/prometheus/vars/main.yml b/roles/prometheus/vars/main.yml index 0708c08..d2a97ba 100644 --- a/roles/prometheus/vars/main.yml +++ b/roles/prometheus/vars/main.yml @@ -1,5 +1,4 @@ --- - service_port_grafana: 3000 service_port_prometheus: 9090 service_port_alertmanager: 9093 @@ -9,140 +8,112 @@ alertmanager_id: "{{ inventory_hostname }}-alertmanager" grafana_id: "{{ inventory_hostname }}-grafana" prom2teams_id: "{{ inventory_hostname }}-prom2teams" -prometheus_docker: { - networks: [ - { - name: back-tier, - external: true, - }, - { - name: front-tier, - external: true, - }, - ], - volumes: [ - { - name: "{{ alertmanager_id }}-data" - }, - { - name: "{{ grafana_id }}-data" - }, - ], - services: [ - { - name: "{{ prometheus_id }}", - image_name: "{{ prometheus_image_name }}", - image_version: "{{ prom_prometheus_version }}", - user: "root", - labels: [ - '"traefik.enable=true"', - '"traefik.http.routers.{{ prometheus_id }}.service={{ prometheus_id }}"', - '"traefik.http.routers.{{ prometheus_id }}.rule=Host(`{{ inventory_hostname }}-prometheus.{{ domain }}`)"', - '"traefik.http.routers.{{ prometheus_id }}.entrypoints=websecure"', - '"traefik.http.routers.{{ prometheus_id }}.tls=true"', - '"traefik.http.routers.{{ prometheus_id }}.tls.certresolver=letsencrypt"', - '"traefik.http.services.{{ prometheus_id }}.loadbalancer.server.port={{ service_port_prometheus }}"', - '"traefik.http.routers.{{ prometheus_id }}.middlewares={{ prometheus_id }}-basicauth"', - '"traefik.http.middlewares.{{ prometheus_id }}-basicauth.basicauth.users={{ prometheus_admin_username }}:{{ prometheus_admin_password_htpasswd }}"', - ], - command: [ - '"--config.file=/etc/prometheus/prometheus.yml"', - '"--storage.tsdb.path=/prometheus"', - '"--web.console.libraries=/usr/share/prometheus/console_libraries"', - '"--web.console.templates=/usr/share/prometheus/consoles"', - '"--web.external-url={{ http_s}}://{{ inventory_hostname }}-prometheus.{{ domain }}"', - '"--web.enable-lifecycle"', - '"--storage.tsdb.retention.time={{ prometheus_tsdb_rentention_time | default("4w")}}"', - ], - volumes: [ - '"./config/prometheus/:/etc/prometheus/:ro"', - '"{{ prometheus_datadir + "/" +"_data" | default(prometheus_id + "-data") }}:/prometheus"', - ], - networks: [ - '"back-tier"', - '"front-tier"', - ], - extra_hosts: "{{ prometheus_extra_hosts | default([]) }}", - }, - { - name: "{{ alertmanager_id }}", - image_name: "{{ alertmanager_image_name }}", - image_version: "{{ prom_alertmanager_version }}", - labels: [ - '"traefik.enable=true"', - '"traefik.http.routers.{{ alertmanager_id }}.service={{ alertmanager_id }}"', - '"traefik.http.routers.{{ alertmanager_id }}.rule=Host(`{{ inventory_hostname }}-alertmanager.{{ domain }}`)"', - '"traefik.http.routers.{{ alertmanager_id }}.entrypoints=websecure"', - '"traefik.http.routers.{{ alertmanager_id }}.tls=true"', - '"traefik.http.routers.{{ alertmanager_id }}.tls.certresolver=letsencrypt"', - '"traefik.http.services.{{ alertmanager_id }}.loadbalancer.server.port={{ service_port_alertmanager }}"', - '"traefik.http.routers.{{ alertmanager_id }}.middlewares={{ alertmanager_id }}-basicauth"', - '"traefik.http.middlewares.{{ alertmanager_id }}-basicauth.basicauth.users={{ alertmanager_admin_username }}:{{ alertmanager_admin_password_htpasswd }}"', - ], - command: [ - '"--config.file=/etc/alertmanager/config.yml"', - '"--storage.path=/alertmanager"', - '"--web.external-url={{ http_s}}://{{ inventory_hostname }}-alertmanager.{{ domain }}"', - ], - environment: [ - 'LS_JAVA_OPTS: "-Xmx1G -Xms1G"', - ], - volumes: [ - '"./config/alertmanager/:/etc/alertmanager/:ro"', - '"{{ alertmanager_id }}-data:/alertmanager"', - ], - networks: [ - '"back-tier"', - '"front-tier"', - ], - }, - { - name: "{{ grafana_id }}", - image_name: "{{ grafana_image_name }}", - image_version: "{{ prom_grafana_version }}", - user: '"472"', - labels: [ - '"traefik.enable=true"', - '"traefik.http.routers.{{ grafana_id }}.service={{ grafana_id }}"', - '"traefik.http.routers.{{ grafana_id }}.rule=Host(`{{ inventory_hostname }}-grafana.{{ domain }}`)"', - '"traefik.http.routers.{{ grafana_id }}.entrypoints=websecure"', - '"traefik.http.routers.{{ grafana_id }}.tls=true"', - '"traefik.http.routers.{{ grafana_id }}.tls.certresolver=letsencrypt"', - '"traefik.http.services.{{ grafana_id }}.loadbalancer.server.port={{ service_port_grafana }}"', - ], - volumes: [ - '"./config/grafana/provisioning/:/etc/grafana/provisioning/"', - '"./config/grafana/conf/defaults.ini:/usr/share/grafana/conf/defaults.ini"', - '"{{ grafana_id }}-data:/var/lib/grafana"', - ], - networks: [ - '"back-tier"', - '"front-tier"', - ], - env_file: [ - '"./config/grafana/config.monitoring"', - ], - }, - { - name: "{{ prom2teams_id }}", - image_name: "{{ prom2teams_image_name }}", - image_version: "{{ prom_prom2teams_version }}", - environment: [ - "PROM2TEAMS_PROMETHEUS_METRICS: \"true\"", - "DEBUG_METRICS: \"true\"", - ], - ports: [ - { - "external": "8089", - "internal": "8089", - }, - ], - volumes: [ - '"./config/prom2teams/config.ini:/opt/prom2teams/config.ini"', - ], - networks: [ - '"back-tier"', - ], - }, - ], -} +prometheus_docker: + networks: + - name: back-tier + external: true + + - name: front-tier + external: true + + volumes: "{{ prometheus_docker_volumes | selectattr('name', 'in', prometheus_docker_volume_names | join(',')) }}" + + services: "{{ prometheus_services | selectattr('name', 'in', prometheus_service_names | join(',')) }}" + +prometheus_docker_volumes: + - name: "{{ alertmanager_id }}-data" + - name: "{{ grafana_id }}-data" + +prometheus_services: + - name: "{{ prometheus_id }}" + image_name: "{{ prometheus_image_name }}" + image_version: "{{ prom_prometheus_version }}" + user: "root" + labels: + - "traefik.enable=true" + - "traefik.http.routers.{{ prometheus_id }}.service={{ prometheus_id }}" + - "traefik.http.routers.{{ prometheus_id }}.rule=Host(`{{ inventory_hostname }}-prometheus.{{ domain }}`)" + - "traefik.http.routers.{{ prometheus_id }}.entrypoints=websecure" + - "traefik.http.routers.{{ prometheus_id }}.tls=true" + - "traefik.http.routers.{{ prometheus_id }}.tls.certresolver=letsencrypt" + - "traefik.http.services.{{ prometheus_id }}.loadbalancer.server.port={{ service_port_prometheus }}" + - "traefik.http.routers.{{ prometheus_id }}.middlewares={{ prometheus_id }}-basicauth" + - "traefik.http.middlewares.{{ prometheus_id }}-basicauth.basicauth.users={{ prometheus_admin_username }}:{{ prometheus_admin_password_htpasswd }}" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + - "--web.external-url={{ http_s }}://{{ inventory_hostname }}-prometheus.{{ domain }}" + - "--web.enable-lifecycle" + - "--storage.tsdb.retention.time={{ prometheus_tsdb_rentention_time }}" + volumes: + - "./config/prometheus/:/etc/prometheus/:ro" + - "{{ prometheus_datadir + '/' + 'data' | default(prometheus_id + '-data') }}:/prometheus" + networks: + - back-tier + - front-tier + extra_hosts: "{{ prometheus_extra_hosts | default([]) }}" + + - name: "{{ grafana_id }}" + image_name: "{{ grafana_image_name }}" + image_version: "{{ prom_grafana_version }}" + user: "472" + labels: + - "traefik.enable=true" + - "traefik.http.routers.{{ grafana_id }}.service={{ grafana_id }}" + - "traefik.http.routers.{{ grafana_id }}.rule=Host(`{{ inventory_hostname }}-grafana.{{ domain }}`)" + - "traefik.http.routers.{{ grafana_id }}.entrypoints=websecure" + - "traefik.http.routers.{{ grafana_id }}.tls=true" + - "traefik.http.routers.{{ grafana_id }}.tls.certresolver=letsencrypt" + - "traefik.http.services.{{ grafana_id }}.loadbalancer.server.port={{ service_port_grafana }}" + volumes: + - "./config/grafana/provisioning/:/etc/grafana/provisioning/" + - "./config/grafana/conf/defaults.ini:/usr/share/grafana/conf/defaults.ini" + - "{{ grafana_id }}-data:/var/lib/grafana" + networks: + - back-tier + - front-tier + env_file: + - "./config/grafana/config.monitoring" + + - name: "{{ alertmanager_id }}" + image_name: "{{ alertmanager_image_name }}" + image_version: "{{ prom_alertmanager_version }}" + labels: + - "traefik.enable=true" + - "traefik.http.routers.{{ alertmanager_id }}.service={{ alertmanager_id }}" + - "traefik.http.routers.{{ alertmanager_id }}.rule=Host(`{{ inventory_hostname }}-alertmanager.{{ domain }}`)" + - "traefik.http.routers.{{ alertmanager_id }}.entrypoints=websecure" + - "traefik.http.routers.{{ alertmanager_id }}.tls=true" + - "traefik.http.routers.{{ alertmanager_id }}.tls.certresolver=letsencrypt" + - "traefik.http.services.{{ alertmanager_id }}.loadbalancer.server.port={{ service_port_alertmanager }}" + - "traefik.http.routers.{{ alertmanager_id }}.middlewares={{ alertmanager_id }}-basicauth" + - "traefik.http.middlewares.{{ alertmanager_id }}-basicauth.basicauth.users={{ alertmanager_admin_username }}:\ + {{ alertmanager_admin_password_htpasswd }}" + command: + - "--config.file=/etc/alertmanager/config.yml" + - "--storage.path=/alertmanager" + - "--web.external-url={{ http_s }}://{{ inventory_hostname }}-alertmanager.{{ domain }}" + environment: + - 'LS_JAVA_OPTS: "-Xmx1G -Xms1G"' + volumes: + - "./config/alertmanager/:/etc/alertmanager/:ro" + - "{{ alertmanager_id }}-data:/alertmanager" + networks: + - back-tier + - front-tier + + - name: "{{ prom2teams_id }}" + image_name: "{{ prom2teams_image_name }}" + image_version: "{{ prom_prom2teams_version }}" + environment: + - 'PROM2TEAMS_PROMETHEUS_METRICS: "true"' + - 'DEBUG_METRICS: "true"' + ports: + - external: 8089 + internal: 8089 + volumes: + - "./config/prom2teams/config.ini:/opt/prom2teams/config.ini" + networks: + - back-tier diff --git a/templates/_docker/docker-compose.yml.j2 b/templates/_docker/docker-compose.yml.j2 index 6edd9d5..a8194b8 100644 --- a/templates/_docker/docker-compose.yml.j2 +++ b/templates/_docker/docker-compose.yml.j2 @@ -20,19 +20,21 @@ networks: {% endfor %} {% endif %} {# ################################################## networks #} - {# ################################################### volumes #} {% if current_docker.volumes is defined and (current_docker.volumes|length>0) %} + volumes: {% for volume in current_docker.volumes %} {{ volume.name }}: {} {% endfor %} + +{% else %} + {% endif %} {# ################################################### volumes #} - {# ################################################## services #} services: {% for service in current_docker.services %} @@ -43,7 +45,7 @@ services: {% if service.user is defined %} - user: {{ service.user }} + user: "{{ service.user }}" {% endif %} {# ########################################## command #} {% if diff --git a/templates/prometheus/config/alertmanager/config.yml.j2 b/templates/prometheus/alertmanager_config/config/alertmanager/config.yml.j2 similarity index 100% rename from templates/prometheus/config/alertmanager/config.yml.j2 rename to templates/prometheus/alertmanager_config/config/alertmanager/config.yml.j2 diff --git a/templates/prometheus/config/alertmanager/templates/notifications.tmpl b/templates/prometheus/alertmanager_config/config/alertmanager/templates/notifications.tmpl similarity index 100% rename from templates/prometheus/config/alertmanager/templates/notifications.tmpl rename to templates/prometheus/alertmanager_config/config/alertmanager/templates/notifications.tmpl diff --git a/templates/prometheus/config/prometheus/alert.rules.j2 b/templates/prometheus/alertmanager_config/config/prometheus/alert.rules.j2 similarity index 100% rename from templates/prometheus/config/prometheus/alert.rules.j2 rename to templates/prometheus/alertmanager_config/config/prometheus/alert.rules.j2 diff --git a/templates/prometheus/config/grafana/conf/defaults.ini.j2 b/templates/prometheus/grafana_config/config/grafana/conf/defaults.ini.j2 similarity index 100% rename from templates/prometheus/config/grafana/conf/defaults.ini.j2 rename to templates/prometheus/grafana_config/config/grafana/conf/defaults.ini.j2 diff --git a/templates/prometheus/config/grafana/config.monitoring.j2 b/templates/prometheus/grafana_config/config/grafana/config.monitoring.j2 similarity index 100% rename from templates/prometheus/config/grafana/config.monitoring.j2 rename to templates/prometheus/grafana_config/config/grafana/config.monitoring.j2 diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Alertmanager.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Alertmanager.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Alertmanager.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Alertmanager.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Ansible_AWX.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Ansible_AWX.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Ansible_AWX.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Ansible_AWX.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Elasticsearch.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Elasticsearch.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Elasticsearch.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Elasticsearch.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Gitea.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Gitea.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Gitea.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Gitea.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Harbor.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Harbor.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Harbor.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Harbor.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Hetzner_Metrics.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Hetzner_Metrics.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Hetzner_Metrics.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Hetzner_Metrics.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Keycloak_Metrics.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Keycloak_Metrics.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Keycloak_Metrics.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Keycloak_Metrics.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/MySQL.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/MySQL.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/MySQL.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/MySQL.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/NGINX_Ingress_controller.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/NGINX_Ingress_controller.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/NGINX_Ingress_controller.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/NGINX_Ingress_controller.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Node_Exporter_Full.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Node_Exporter_Full.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Node_Exporter_Full.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Node_Exporter_Full.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/PostgreSQL_Database.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/PostgreSQL_Database.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/PostgreSQL_Database.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/PostgreSQL_Database.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Prometheus_Blackbox_Exporter.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Prometheus_Blackbox_Exporter.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Prometheus_Blackbox_Exporter.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Prometheus_Blackbox_Exporter.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Servers.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Servers.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Servers.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Servers.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Spring_Boot_Statistics.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Spring_Boot_Statistics.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Spring_Boot_Statistics.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Spring_Boot_Statistics.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Stage_Overview.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Stage_Overview.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Stage_Overview.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Stage_Overview.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/Traefik_2_2.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Traefik_2_2.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/Traefik_2_2.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/Traefik_2_2.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/dashboard.yml b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/dashboard.yml similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/dashboard.yml rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/dashboard.yml diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-cluster.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-cluster.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-cluster.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-cluster.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-namespace.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-namespace.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-namespace.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-namespace.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-node.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-node.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-node.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-node.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-pod.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-pod.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-pod.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-pod.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-workload.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-workload.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-workload.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-workload.json diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-workloads-namespace.json b/templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-workloads-namespace.json similarity index 100% rename from templates/prometheus/config/grafana/provisioning/dashboards/k8s-resources-workloads-namespace.json rename to templates/prometheus/grafana_config/config/grafana/provisioning/dashboards/k8s-resources-workloads-namespace.json diff --git a/templates/prometheus/config/grafana/provisioning/datasources/datasource.yml.j2 b/templates/prometheus/grafana_config/config/grafana/provisioning/datasources/datasource.yml.j2 similarity index 100% rename from templates/prometheus/config/grafana/provisioning/datasources/datasource.yml.j2 rename to templates/prometheus/grafana_config/config/grafana/provisioning/datasources/datasource.yml.j2 diff --git a/templates/prometheus/config/prom2teams/config.ini.j2 b/templates/prometheus/prom2teams_config/config/prom2teams/config.ini.j2 similarity index 100% rename from templates/prometheus/config/prom2teams/config.ini.j2 rename to templates/prometheus/prom2teams_config/config/prom2teams/config.ini.j2 diff --git a/templates/prometheus/config/blackbox.yml.j2 b/templates/prometheus/prometheus_config/config/blackbox.yml.j2 similarity index 100% rename from templates/prometheus/config/blackbox.yml.j2 rename to templates/prometheus/prometheus_config/config/blackbox.yml.j2 diff --git a/templates/prometheus/config/prometheus/prometheus.yml.j2 b/templates/prometheus/prometheus_config/config/prometheus/prometheus.yml.j2 similarity index 96% rename from templates/prometheus/config/prometheus/prometheus.yml.j2 rename to templates/prometheus/prometheus_config/config/prometheus/prometheus.yml.j2 index 9cdf0cb..b39c9d8 100644 --- a/templates/prometheus/config/prometheus/prometheus.yml.j2 +++ b/templates/prometheus/prometheus_config/config/prometheus/prometheus.yml.j2 @@ -9,6 +9,15 @@ global: external_labels: monitor: '{{ inventory_hostname }}' +{% if prometheus_remote_write_enabled | default(true) %} +remote_write: + - url: "https://{{ shared_service_kube_hostname_prometheus }}/api/v1/write" + basic_auth: + username: "{{ prometheus_admin_username }}" + password: "{{ prometheus_admin_password }}" +{% endif %} + +{% if prometheus_alertmanager_enabled | default(true) %} # Load and evaluate rules in this file every 'evaluation_interval' seconds. rule_files: - 'alert.rules' @@ -20,6 +29,7 @@ alerting: static_configs: - targets: - "{{ inventory_hostname }}-alertmanager:9093" +{% endif %} # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. @@ -43,6 +53,7 @@ scrape_configs: target_label: instance replacement: '{{ inventory_hostname }}-prometheus.{{ domain }}' +{% if prometheus_alertmanager_enabled | default(true) %} - job_name: 'alertmanager' static_configs: - targets: [ @@ -56,7 +67,9 @@ scrape_configs: - source_labels: [__address__] target_label: instance replacement: '{{ inventory_hostname }}-alertmanager.{{ domain }}' +{% endif %} +{% if prometheus_prom2teams_enabled | default(true) %} - job_name: 'prom2teams' scheme: http metrics_path: '/metrics' @@ -68,6 +81,7 @@ scrape_configs: env: {{ stage }} project: monitoring application: prom2teams +{% endif %} - job_name: 'blackbox' metrics_path: /probe @@ -534,6 +548,6 @@ scrape_configs: - '{job=~".*"}' static_configs: - - targets: ['{{ kubernetes_prometheus_endpoint }}'] + - targets: ['{{ shared_service_kube_hostname_prometheus }}'] {% endif %}