From 9f9a192432c97ecc1ce0953e97e1472f837fa1a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6rz=2C=20Friedrich?= Date: Fri, 17 Dec 2021 12:51:37 +0000 Subject: [PATCH] DEV-269: added stuff to federate k8s-internal prometheus metrics --- group_vars/all/plain.yml | 13 + group_vars/prometheus/plain.yml | 2 +- group_vars/stage_dev/plain.yml | 7 + group_vars/stage_qa/plain.yml | 6 + roles/common/tasks/main.yml | 22 +- ...ler__dashboardID_9614__-1638872710623.json | 1464 +++++++++++++++++ .../config/prometheus/prometheus.yml.j2 | 15 + 7 files changed, 1518 insertions(+), 11 deletions(-) create mode 100644 templates/prometheus/config/grafana/provisioning/dashboards/NGINX_Ingress_controller__dashboardID_9614__-1638872710623.json diff --git a/group_vars/all/plain.yml b/group_vars/all/plain.yml index d3229b0..830f369 100644 --- a/group_vars/all/plain.yml +++ b/group_vars/all/plain.yml @@ -31,6 +31,17 @@ traefik_enabled: true filebeat_enabled: true node_exporter_enabled: true +common_apt_dependencies: + - mc + - vim + - zip + - curl + - htop + - net-tools + - bash-completion + - python3-pip + +common_pip_dependencies: [] use_ssl: true http_s: "http{{ use_ssl | ternary('s', '', omit) }}" @@ -177,3 +188,5 @@ blackbox_http_2xx_targets: - 'https://{{ stage }}-keycloak-01.smardigo.digital/auth/' - 'https://{{ stage }}-management-01-connect.smardigo.digital/auth/' blackbox_http_2xx_additional_targets: [] + +kubernetes_prometheus_endpoint: "{{ stage }}-kube-prometheus.{{ domain }}" diff --git a/group_vars/prometheus/plain.yml b/group_vars/prometheus/plain.yml index 788e6c4..e29c724 100644 --- a/group_vars/prometheus/plain.yml +++ b/group_vars/prometheus/plain.yml @@ -1,4 +1,4 @@ --- -hetzner_server_type: cpx11 +hetzner_server_type: cx21 hetzner_server_labels: "stage={{ stage }} service=prometheus" diff --git a/group_vars/stage_dev/plain.yml b/group_vars/stage_dev/plain.yml index f839464..fd3cc2e 100644 --- a/group_vars/stage_dev/plain.yml +++ b/group_vars/stage_dev/plain.yml @@ -131,6 +131,8 @@ shared_service_kube_node_03: "{{ stage_server_infos | first | default('-') }}" +shared_service_kube_prometheus_ip: 10.0.0.20 + shared_service_maria_hostname: "{{ stage }}-maria-01" shared_service_postgres_01_hostname: "{{ stage }}-postgres-01" shared_service_elastic_stack_01_hostname: "{{ stage }}-elastic-stack-elastic-01" @@ -149,6 +151,7 @@ shared_service_iam_hostname: "{{ stage }}-iam-01.{{ domain }}" shared_service_mail_hostname: "{{ stage }}-mail-01.{{ domain }}" shared_service_gitea_hostname: "{{ stage }}-gitea-01.{{ domain }}" shared_service_redis_hostname: "{{ stage }}-redis-01.{{ domain }}" +shared_service_kube_prometheus_hostname: "{{ stage }}-kube-prometheus.{{ domain }}" shared_service_pdns_hostname: "{{ stage }}-pdns-01.{{ domain }}" shared_service_webdav_hostname: "{{ stage }}-webdav-01.{{ domain }}" shared_service_keycloak_hostname: "{{ stage }}-keycloak-01.{{ domain }}" @@ -242,6 +245,10 @@ shared_service_hosts: [ { ip: "{{ shared_service_pdns_ip }}", name: "{{ shared_service_pdns_hostname }}" + }, + { + ip: "{{ shared_service_kube_prometheus_ip }}", + name: "{{ shared_service_kube_prometheus_hostname }}" } ] diff --git a/group_vars/stage_qa/plain.yml b/group_vars/stage_qa/plain.yml index 0d89207..6f8bd01 100644 --- a/group_vars/stage_qa/plain.yml +++ b/group_vars/stage_qa/plain.yml @@ -130,6 +130,7 @@ shared_service_kube_node_03: "{{ stage_server_infos | list | first | default('-') }}" +shared_service_kube_prometheus_ip: 10.1.0.26 shared_service_maria_hostname: "{{ stage }}-maria-01" shared_service_postgres_01_hostname: "{{ stage }}-postgres-01" @@ -149,6 +150,7 @@ shared_service_iam_hostname: "{{ stage }}-iam-01.{{ domain }}" shared_service_mail_hostname: "{{ stage }}-mail-01.{{ domain }}" shared_service_gitea_hostname: "{{ stage }}-gitea-01.{{ domain }}" shared_service_redis_hostname: "{{ stage }}-redis-01.{{ domain }}" +shared_service_kube_prometheus_hostname: "{{ stage }}-kube-prometheus.{{ domain }}" shared_service_pdns_hostname: "{{ stage }}-pdns-01.{{ domain }}" shared_service_webdav_hostname: "{{ stage }}-webdav-01.{{ domain }}" shared_service_keycloak_hostname: "{{ stage }}-keycloak-01.{{ domain }}" @@ -242,6 +244,10 @@ shared_service_hosts: [ { ip: "{{ shared_service_pdns_ip }}", name: "{{ shared_service_pdns_hostname }}" + }, + { + ip: "{{ shared_service_kube_prometheus_ip }}", + name: "{{ shared_service_kube_prometheus_hostname }}" } ] diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 02f4fc4..9b6fb46 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -110,22 +110,24 @@ - users - config -- name: "Install common dependencies" +- name: "Install common apt-dependencies" apt: - name: [ - 'mc', - 'vim', - 'zip', - 'curl', - 'htop', - 'net-tools', - 'bash-completion', - ] + name: "{{ item }}" state: 'present' + loop: "{{ common_apt_dependencies }}" when: ansible_distribution == "Ubuntu" tags: - install +- name: "Install python3-pip dependencies for {{ inventory_hostname }}" + pip: + name: "{{ item }}" + state: present + become: True + loop: "{{ common_pip_dependencies }}" + tags: + - install + - name: 'Ensures directory exists' file: state: directory diff --git a/templates/prometheus/config/grafana/provisioning/dashboards/NGINX_Ingress_controller__dashboardID_9614__-1638872710623.json b/templates/prometheus/config/grafana/provisioning/dashboards/NGINX_Ingress_controller__dashboardID_9614__-1638872710623.json new file mode 100644 index 0000000..b3fafc2 --- /dev/null +++ b/templates/prometheus/config/grafana/provisioning/dashboards/NGINX_Ingress_controller__dashboardID_9614__-1638872710623.json @@ -0,0 +1,1464 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": "Prometheus", + "enable": true, + "expr": "sum(changes(nginx_ingress_controller_config_last_reload_successful_timestamp_seconds{instance!=\"unknown\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[30s])) by (controller_class)", + "hide": false, + "iconColor": "rgba(255, 96, 96, 1)", + "limit": 100, + "name": "Config Reloads", + "showIn": 0, + "step": "30s", + "tagKeys": "controller_class", + "tags": [], + "titleFormat": "Config Reloaded", + "type": "tags" + } + ] + }, + "description": "Ingress-nginx supports a rich collection of prometheus metrics. If you have prometheus and grafana installed on your cluster then prometheus will already be scraping this data due to the scrape annotation on the deployment.", + "editable": true, + "gnetId": 9614, + "graphTooltip": 0, + "id": 18, + "iteration": 1638872651516, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "expr": "round(sum(irate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[2m])), 0.001)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "title": "Controller Request Volume", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 82, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "expr": "sum(avg_over_time(nginx_ingress_controller_nginx_process_connections{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "title": "Controller Connections", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 95 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "expr": "sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",status!~\"[4-5].*\"}[2m])) / sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[2m]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "title": "Controller Success Rate (non-4|5xx responses)", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 81, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "expr": "avg(nginx_ingress_controller_success{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "title": "Config Reloads", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 83, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "expr": "count(nginx_ingress_controller_config_last_reload_successful{controller_pod=~\"$controller\",controller_namespace=~\"$namespace\"} == 0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "title": "Last Config Failed", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 3 + }, + "height": "200px", + "hiddenSeries": false, + "id": 86, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(irate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress), 0.001)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "metric": "network", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingress Request Volume", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max - istio-proxy": "#890f02", + "max - master": "#bf1b00", + "max - prometheus": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": false, + "error": false, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 87, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",ingress=~\"$ingress\",status!~\"[4-5].*\"}[2m])) by (ingress) / sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", + "format": "time_series", + "instant": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingress Success Rate (non-4|5xx responses)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 1, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 10 + }, + "height": "200px", + "hiddenSeries": false, + "id": 32, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (nginx_ingress_controller_request_size_sum{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", + "format": "time_series", + "instant": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Received", + "metric": "network", + "refId": "A", + "step": 10 + }, + { + "expr": "- sum (irate (nginx_ingress_controller_response_size_sum{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", + "format": "time_series", + "hide": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Sent", + "metric": "network", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O pressure", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max - istio-proxy": "#890f02", + "max - master": "#bf1b00", + "max - prometheus": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": false, + "error": false, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 10 + }, + "hiddenSeries": false, + "id": 77, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(nginx_ingress_controller_nginx_process_resident_memory_bytes{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}) ", + "format": "time_series", + "instant": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "nginx", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average Memory Usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max - istio-proxy": "#890f02", + "max - master": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "editable": false, + "error": false, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 10 + }, + "height": "", + "hiddenSeries": false, + "id": 79, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate (nginx_ingress_controller_nginx_process_cpu_seconds_total{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m])) ", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "nginx", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt" + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average CPU Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "Prometheus", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "hideTimeOverride": false, + "id": 75, + "links": [], + "pageSize": 7, + "repeat": null, + "repeatDirection": "h", + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "styles": [ + { + "alias": "Ingress", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ingress", + "preserveFormat": false, + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Requests", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value #A", + "thresholds": [ + "" + ], + "type": "number", + "unit": "ops" + }, + { + "alias": "Errors", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value #B", + "thresholds": [], + "type": "number", + "unit": "ops" + }, + { + "alias": "P50 Latency", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "dtdurations" + }, + { + "alias": "P90 Latency", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value #D", + "thresholds": [], + "type": "number", + "unit": "dtdurations" + }, + { + "alias": "P99 Latency", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value #E", + "thresholds": [], + "type": "number", + "unit": "dtdurations" + }, + { + "alias": "IN", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value #F", + "thresholds": [ + "" + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "OUT", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #G", + "thresholds": [], + "type": "number", + "unit": "Bps" + } + ], + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "D" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ destination_service }}", + "refId": "E" + }, + { + "expr": "sum(irate(nginx_ingress_controller_request_size_sum{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "F" + }, + { + "expr": "sum(irate(nginx_ingress_controller_response_size_sum{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "G" + } + ], + "timeFrom": null, + "title": "Ingress Percentile Response Times and Transfer Rates", + "transform": "table", + "type": "table-old" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "Prometheus", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 24 + }, + "height": "1024", + "id": 85, + "links": [], + "pageSize": 7, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "TTL", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [ + "0", + "691200" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "avg(nginx_ingress_controller_ssl_expire_time_seconds{kubernetes_pod_name=~\"$controller\",namespace=~\"$namespace\",ingress=~\"$ingress\"}) by (host) - time()", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ host }}", + "metric": "gke_letsencrypt_cert_expiration", + "refId": "A", + "step": 1 + } + ], + "title": "Ingress Certificate Expiry", + "transform": "timeseries_aggregations", + "type": "table-old" + } + ], + "refresh": "5s", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "nginx" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(nginx_ingress_controller_config_hash, controller_namespace)", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Controller Class", + "multi": false, + "name": "controller_class", + "options": [], + "query": { + "query": "label_values(nginx_ingress_controller_config_hash{namespace=~\"$namespace\"}, controller_class) ", + "refId": "Prometheus-controller_class-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Controller", + "multi": false, + "name": "controller", + "options": [], + "query": { + "query": "label_values(nginx_ingress_controller_config_hash{namespace=~\"$namespace\",controller_class=~\"$controller_class\"}, controller_pod) ", + "refId": "Prometheus-controller-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Ingress", + "multi": false, + "name": "ingress", + "options": [], + "query": { + "query": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller=~\"$controller\"}, ingress) ", + "refId": "Prometheus-ingress-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "2m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "NGINX Ingress controller", + "uid": "Dh7lqchnz", + "version": 1 +} \ No newline at end of file diff --git a/templates/prometheus/config/prometheus/prometheus.yml.j2 b/templates/prometheus/config/prometheus/prometheus.yml.j2 index 05cb46b..9e27da8 100644 --- a/templates/prometheus/config/prometheus/prometheus.yml.j2 +++ b/templates/prometheus/config/prometheus/prometheus.yml.j2 @@ -457,3 +457,18 @@ scrape_configs: target_label: instance - target_label: __address__ replacement: "{{ lookup('community.general.dig', blackbox_exporter_fqdn ) }}:9115" +############################################## +### federation #### +############################################## + - job_name: 'federate - kube' + scheme: https + + honor_labels: true + metrics_path: '/federate' + + params: + 'match[]': + - '{job=~".*"}' + + static_configs: + - targets: ['{{ kubernetes_prometheus_endpoint }}']