From 14719bd4c824e31dda7bb5f2a6e31e560cd7fcc3 Mon Sep 17 00:00:00 2001 From: Edward McFarlane Date: Thu, 8 Nov 2018 17:12:15 +0000 Subject: [PATCH 1/4] Fix Node/Cluster dashboards for node-exporter v0.16.0 --- src/dashboards/k8s-cluster.json | 57 ++++++++++++---- src/dashboards/k8s-node.json | 115 +++++++++++++++++++++----------- 2 files changed, 120 insertions(+), 52 deletions(-) diff --git a/src/dashboards/k8s-cluster.json b/src/dashboards/k8s-cluster.json index 8a02e64..fc019f0 100644 --- a/src/dashboards/k8s-cluster.json +++ b/src/dashboards/k8s-cluster.json @@ -20,7 +20,19 @@ "version": "" } ], - "annotations": {}, + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, "description": "Summary metrics about containers running on Kubernetes nodes.", "editable": true, "gnetId": null, @@ -120,7 +132,7 @@ "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster Pod Usage", "type": "singlestat", "valueFontSize": "80%", @@ -200,7 +212,7 @@ "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster CPU Usage", "type": "singlestat", "valueFontSize": "80%", @@ -280,7 +292,7 @@ "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster Memory Usage", "type": "singlestat", "valueFontSize": "80%", @@ -354,13 +366,13 @@ "tableColumn": "", "targets": [ { - "expr": "(sum (node_filesystem_size{nodename=~\"$node\"}) - sum (node_filesystem_free{nodename=~\"$node\"})) / sum (node_filesystem_size{nodename=~\"$node\"})", + "expr": "(sum (node_filesystem_size_bytes{nodename=~\"$node\"}) - sum (node_filesystem_free_bytes{nodename=~\"$node\"})) / sum (node_filesystem_size_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster Disk Usage", "type": "singlestat", "valueFontSize": "80%", @@ -465,7 +477,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -560,7 +576,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -654,7 +674,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -693,14 +717,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_filesystem_size{nodename=~\"$node\"}) - sum(node_filesystem_free{nodename=~\"$node\"})", + "expr": "sum(node_filesystem_size_bytes{nodename=~\"$node\"}) - sum(node_filesystem_free_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "usage", "refId": "A" }, { - "expr": "sum(node_filesystem_size{nodename=~\"$node\"})", + "expr": "sum(node_filesystem_size_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "limit", @@ -741,7 +765,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -2515,6 +2543,7 @@ "query": "grafana-kubernetes-datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -2529,6 +2558,7 @@ "query": "datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -2544,6 +2574,7 @@ "query": "prometheus", "refresh": 1, "regex": "/$ds/", + "skipUrlSync": false, "type": "datasource" }, { @@ -2558,6 +2589,7 @@ "query": "node", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": null, "tags": [], @@ -2577,6 +2609,7 @@ "query": "namespace", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": null, "tags": [], diff --git a/src/dashboards/k8s-node.json b/src/dashboards/k8s-node.json index 29c829f..af9a728 100644 --- a/src/dashboards/k8s-node.json +++ b/src/dashboards/k8s-node.json @@ -43,7 +43,6 @@ "gnetId": 470, "graphTooltip": 1, "id": null, - "iteration": 1516060966755, "links": [ { "asDropdown": true, @@ -90,7 +89,7 @@ "h": 1, "w": 24, "x": 0, - "y": 8 + "y": 11 }, "id": 27, "panels": [], @@ -115,7 +114,7 @@ "h": 7, "w": 10, "x": 0, - "y": 9 + "y": 12 }, "id": 1, "legend": { @@ -143,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"system\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"system\"}[5m])) * 100))", "format": "time_series", "instant": false, "interval": "", @@ -152,21 +151,21 @@ "refId": "A" }, { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"user\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"user\"}[5m])) * 100))", "format": "time_series", "intervalFactor": 1, "legendFormat": "user", "refId": "B" }, { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"iowait\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"iowait\"}[5m])) * 100))", "format": "time_series", "intervalFactor": 1, "legendFormat": "iowait", "refId": "C" }, { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"steal\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"steal\"}[5m])) * 100))", "format": "time_series", "intervalFactor": 1, "legendFormat": "steal", @@ -206,7 +205,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -223,7 +226,7 @@ "h": 7, "w": 10, "x": 10, - "y": 9 + "y": 12 }, "id": 10, "legend": { @@ -256,7 +259,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_memory_MemAvailable{nodename=~\"$node\"})", + "expr": "sum(node_memory_MemAvailable_bytes{nodename=~\"$node\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -264,14 +267,14 @@ "refId": "A" }, { - "expr": "sum(node_memory_MemFree{nodename=~\"$node\"})", + "expr": "sum(node_memory_MemFree_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "free", "refId": "B" }, { - "expr": "sum(node_memory_Active{nodename=~\"$node\"})", + "expr": "sum(node_memory_Active_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "active", @@ -298,7 +301,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "bits", "logBase": 1, "max": null, "min": 0, @@ -311,7 +314,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "cacheTimeout": null, @@ -337,7 +344,7 @@ "h": 7, "w": 4, "x": 20, - "y": 9 + "y": 12 }, "id": 23, "interval": null, @@ -409,7 +416,7 @@ "h": 6, "w": 24, "x": 0, - "y": 16 + "y": 19 }, "id": 24, "legend": { @@ -437,14 +444,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_filesystem_free{nodename=~\"$node\"}) by (nodename)", + "expr": "sum(node_filesystem_free_bytes{nodename=~\"$node\"}) by (nodename)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ nodename }} free", "refId": "B" }, { - "expr": "(sum(node_filesystem_size{nodename=~\"$node\"}) by (nodename) - sum(node_filesystem_free) by (nodename))", + "expr": "(sum(node_filesystem_size_bytes{nodename=~\"$node\"}) by (nodename) - sum(node_filesystem_free) by (nodename))", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -487,7 +494,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -504,7 +515,7 @@ "h": 6, "w": 12, "x": 0, - "y": 22 + "y": 25 }, "id": 8, "legend": { @@ -530,14 +541,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_disk_bytes_read{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_read_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", "refId": "A" }, { - "expr": "sum(rate(node_disk_bytes_written{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_written_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", @@ -577,7 +588,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -594,7 +609,7 @@ "h": 6, "w": 12, "x": 12, - "y": 22 + "y": 25 }, "id": 7, "legend": { @@ -620,14 +635,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_disk_reads_completed{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_reads_completed_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", "refId": "A" }, { - "expr": "sum(rate(node_disk_writes_completed{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_writes_completed_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", @@ -667,7 +682,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -684,7 +703,7 @@ "h": 5, "w": 12, "x": 0, - "y": 28 + "y": 31 }, "id": 9, "legend": { @@ -715,14 +734,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_bytes{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_receive_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "receive", "refId": "A" }, { - "expr": "sum(rate(node_network_transmit_bytes{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_transmit_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "transmit", @@ -762,7 +781,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -779,7 +802,7 @@ "h": 5, "w": 12, "x": 12, - "y": 28 + "y": 31 }, "id": 18, "legend": { @@ -810,14 +833,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_network_receive_errs{nodename=~\"$node\"})", + "expr": "sum(node_network_receive_errs_total{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "in", "refId": "A" }, { - "expr": "sum(node_network_transmit_errs{nodename=~\"$node\"})", + "expr": "sum(node_network_transmit_errs_total{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "in", @@ -857,7 +880,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -874,7 +901,7 @@ "h": 5, "w": 24, "x": 0, - "y": 33 + "y": 36 }, "id": 17, "legend": { @@ -905,14 +932,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_packets{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_receive_packets_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "receive", "refId": "A" }, { - "expr": "sum(rate(node_network_transmit_packets{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_transmit_packets_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "transmit", @@ -952,7 +979,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": false, @@ -972,6 +1003,7 @@ "query": "grafana-kubernetes-datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -986,6 +1018,7 @@ "query": "datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -1001,6 +1034,7 @@ "query": "prometheus", "refresh": 1, "regex": "/$ds/", + "skipUrlSync": false, "type": "datasource" }, { @@ -1015,6 +1049,7 @@ "query": "node", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": null, "tags": [], @@ -1056,4 +1091,4 @@ }, "timezone": "browser", "title": "K8s Node" -} \ No newline at end of file +} From 608e52cf6b2763865a1726c80172e4dea85f9992 Mon Sep 17 00:00:00 2001 From: Edward McFarlane Date: Thu, 31 Jan 2019 17:44:41 +0000 Subject: [PATCH 2/4] Fix k8s-container.json namespace/node selector queries --- dist/dashboards/k8s-cluster.json | 57 +++++++++++--- dist/dashboards/k8s-container.json | 14 ++-- dist/dashboards/k8s-node.json | 115 +++++++++++++++++++---------- src/dashboards/k8s-container.json | 14 ++-- 4 files changed, 134 insertions(+), 66 deletions(-) diff --git a/dist/dashboards/k8s-cluster.json b/dist/dashboards/k8s-cluster.json index 8a02e64..fc019f0 100644 --- a/dist/dashboards/k8s-cluster.json +++ b/dist/dashboards/k8s-cluster.json @@ -20,7 +20,19 @@ "version": "" } ], - "annotations": {}, + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, "description": "Summary metrics about containers running on Kubernetes nodes.", "editable": true, "gnetId": null, @@ -120,7 +132,7 @@ "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster Pod Usage", "type": "singlestat", "valueFontSize": "80%", @@ -200,7 +212,7 @@ "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster CPU Usage", "type": "singlestat", "valueFontSize": "80%", @@ -280,7 +292,7 @@ "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster Memory Usage", "type": "singlestat", "valueFontSize": "80%", @@ -354,13 +366,13 @@ "tableColumn": "", "targets": [ { - "expr": "(sum (node_filesystem_size{nodename=~\"$node\"}) - sum (node_filesystem_free{nodename=~\"$node\"})) / sum (node_filesystem_size{nodename=~\"$node\"})", + "expr": "(sum (node_filesystem_size_bytes{nodename=~\"$node\"}) - sum (node_filesystem_free_bytes{nodename=~\"$node\"})) / sum (node_filesystem_size_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": ".8,.9", + "thresholds": "0.8,0.9", "title": "Cluster Disk Usage", "type": "singlestat", "valueFontSize": "80%", @@ -465,7 +477,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -560,7 +576,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -654,7 +674,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -693,14 +717,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_filesystem_size{nodename=~\"$node\"}) - sum(node_filesystem_free{nodename=~\"$node\"})", + "expr": "sum(node_filesystem_size_bytes{nodename=~\"$node\"}) - sum(node_filesystem_free_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "usage", "refId": "A" }, { - "expr": "sum(node_filesystem_size{nodename=~\"$node\"})", + "expr": "sum(node_filesystem_size_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "limit", @@ -741,7 +765,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -2515,6 +2543,7 @@ "query": "grafana-kubernetes-datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -2529,6 +2558,7 @@ "query": "datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -2544,6 +2574,7 @@ "query": "prometheus", "refresh": 1, "regex": "/$ds/", + "skipUrlSync": false, "type": "datasource" }, { @@ -2558,6 +2589,7 @@ "query": "node", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": null, "tags": [], @@ -2577,6 +2609,7 @@ "query": "namespace", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": null, "tags": [], diff --git a/dist/dashboards/k8s-container.json b/dist/dashboards/k8s-container.json index 782640c..4b974c7 100644 --- a/dist/dashboards/k8s-container.json +++ b/dist/dashboards/k8s-container.json @@ -126,7 +126,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\"}) by (pod_name)", + "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -217,7 +217,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\"}[2m])) by (pod_name)", + "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -307,7 +307,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_network_transmit_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\"}[2m])", + "expr": "rate(container_network_transmit_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -398,7 +398,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_network_receive_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\"}[2m])", + "expr": "rate(container_network_receive_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -489,7 +489,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_fs_io_time_seconds_total{pod_name=~\"$pod\"}[2m])", + "expr": "rate(container_fs_io_time_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod_name}}", @@ -577,7 +577,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_fs_write_seconds_total{pod_name=~\"$pod\"}[2m])", + "expr": "rate(container_fs_write_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -760,4 +760,4 @@ }, "timezone": "browser", "title": "K8s Container" -} \ No newline at end of file +} diff --git a/dist/dashboards/k8s-node.json b/dist/dashboards/k8s-node.json index 29c829f..af9a728 100644 --- a/dist/dashboards/k8s-node.json +++ b/dist/dashboards/k8s-node.json @@ -43,7 +43,6 @@ "gnetId": 470, "graphTooltip": 1, "id": null, - "iteration": 1516060966755, "links": [ { "asDropdown": true, @@ -90,7 +89,7 @@ "h": 1, "w": 24, "x": 0, - "y": 8 + "y": 11 }, "id": 27, "panels": [], @@ -115,7 +114,7 @@ "h": 7, "w": 10, "x": 0, - "y": 9 + "y": 12 }, "id": 1, "legend": { @@ -143,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"system\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"system\"}[5m])) * 100))", "format": "time_series", "instant": false, "interval": "", @@ -152,21 +151,21 @@ "refId": "A" }, { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"user\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"user\"}[5m])) * 100))", "format": "time_series", "intervalFactor": 1, "legendFormat": "user", "refId": "B" }, { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"iowait\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"iowait\"}[5m])) * 100))", "format": "time_series", "intervalFactor": 1, "legendFormat": "iowait", "refId": "C" }, { - "expr": "sum((avg(irate(node_cpu{nodename=~\"$node\", mode=\"steal\"}[5m])) * 100))", + "expr": "sum((avg(irate(node_cpu_seconds_total{nodename=~\"$node\", mode=\"steal\"}[5m])) * 100))", "format": "time_series", "intervalFactor": 1, "legendFormat": "steal", @@ -206,7 +205,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -223,7 +226,7 @@ "h": 7, "w": 10, "x": 10, - "y": 9 + "y": 12 }, "id": 10, "legend": { @@ -256,7 +259,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_memory_MemAvailable{nodename=~\"$node\"})", + "expr": "sum(node_memory_MemAvailable_bytes{nodename=~\"$node\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -264,14 +267,14 @@ "refId": "A" }, { - "expr": "sum(node_memory_MemFree{nodename=~\"$node\"})", + "expr": "sum(node_memory_MemFree_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "free", "refId": "B" }, { - "expr": "sum(node_memory_Active{nodename=~\"$node\"})", + "expr": "sum(node_memory_Active_bytes{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "active", @@ -298,7 +301,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "bits", "logBase": 1, "max": null, "min": 0, @@ -311,7 +314,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "cacheTimeout": null, @@ -337,7 +344,7 @@ "h": 7, "w": 4, "x": 20, - "y": 9 + "y": 12 }, "id": 23, "interval": null, @@ -409,7 +416,7 @@ "h": 6, "w": 24, "x": 0, - "y": 16 + "y": 19 }, "id": 24, "legend": { @@ -437,14 +444,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_filesystem_free{nodename=~\"$node\"}) by (nodename)", + "expr": "sum(node_filesystem_free_bytes{nodename=~\"$node\"}) by (nodename)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ nodename }} free", "refId": "B" }, { - "expr": "(sum(node_filesystem_size{nodename=~\"$node\"}) by (nodename) - sum(node_filesystem_free) by (nodename))", + "expr": "(sum(node_filesystem_size_bytes{nodename=~\"$node\"}) by (nodename) - sum(node_filesystem_free) by (nodename))", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -487,7 +494,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -504,7 +515,7 @@ "h": 6, "w": 12, "x": 0, - "y": 22 + "y": 25 }, "id": 8, "legend": { @@ -530,14 +541,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_disk_bytes_read{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_read_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", "refId": "A" }, { - "expr": "sum(rate(node_disk_bytes_written{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_written_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", @@ -577,7 +588,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -594,7 +609,7 @@ "h": 6, "w": 12, "x": 12, - "y": 22 + "y": 25 }, "id": 7, "legend": { @@ -620,14 +635,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_disk_reads_completed{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_reads_completed_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", "refId": "A" }, { - "expr": "sum(rate(node_disk_writes_completed{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_disk_writes_completed_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", @@ -667,7 +682,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -684,7 +703,7 @@ "h": 5, "w": 12, "x": 0, - "y": 28 + "y": 31 }, "id": 9, "legend": { @@ -715,14 +734,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_bytes{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_receive_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "receive", "refId": "A" }, { - "expr": "sum(rate(node_network_transmit_bytes{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_transmit_bytes_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "transmit", @@ -762,7 +781,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -779,7 +802,7 @@ "h": 5, "w": 12, "x": 12, - "y": 28 + "y": 31 }, "id": 18, "legend": { @@ -810,14 +833,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(node_network_receive_errs{nodename=~\"$node\"})", + "expr": "sum(node_network_receive_errs_total{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "in", "refId": "A" }, { - "expr": "sum(node_network_transmit_errs{nodename=~\"$node\"})", + "expr": "sum(node_network_transmit_errs_total{nodename=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "in", @@ -857,7 +880,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "alerting": {}, @@ -874,7 +901,7 @@ "h": 5, "w": 24, "x": 0, - "y": 33 + "y": 36 }, "id": 17, "legend": { @@ -905,14 +932,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_packets{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_receive_packets_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "receive", "refId": "A" }, { - "expr": "sum(rate(node_network_transmit_packets{nodename=~\"$node\"}[5m]))", + "expr": "sum(rate(node_network_transmit_packets_total{nodename=~\"$node\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "transmit", @@ -952,7 +979,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": false, @@ -972,6 +1003,7 @@ "query": "grafana-kubernetes-datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -986,6 +1018,7 @@ "query": "datasource", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -1001,6 +1034,7 @@ "query": "prometheus", "refresh": 1, "regex": "/$ds/", + "skipUrlSync": false, "type": "datasource" }, { @@ -1015,6 +1049,7 @@ "query": "node", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": null, "tags": [], @@ -1056,4 +1091,4 @@ }, "timezone": "browser", "title": "K8s Node" -} \ No newline at end of file +} diff --git a/src/dashboards/k8s-container.json b/src/dashboards/k8s-container.json index 782640c..4b974c7 100644 --- a/src/dashboards/k8s-container.json +++ b/src/dashboards/k8s-container.json @@ -126,7 +126,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\"}) by (pod_name)", + "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -217,7 +217,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\"}[2m])) by (pod_name)", + "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -307,7 +307,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_network_transmit_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\"}[2m])", + "expr": "rate(container_network_transmit_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -398,7 +398,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_network_receive_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\"}[2m])", + "expr": "rate(container_network_receive_bytes_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -489,7 +489,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_fs_io_time_seconds_total{pod_name=~\"$pod\"}[2m])", + "expr": "rate(container_fs_io_time_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod_name}}", @@ -577,7 +577,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(container_fs_write_seconds_total{pod_name=~\"$pod\"}[2m])", + "expr": "rate(container_fs_write_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -760,4 +760,4 @@ }, "timezone": "browser", "title": "K8s Container" -} \ No newline at end of file +} From 4069b1185ba011c233923156ac6808678c38344c Mon Sep 17 00:00:00 2001 From: Edward McFarlane Date: Fri, 1 Feb 2019 11:50:51 +0000 Subject: [PATCH 3/4] Fix cluster cpu allocatable/capacity labels --- dist/dashboards/k8s-cluster.json | 4 ++-- src/dashboards/k8s-cluster.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dist/dashboards/k8s-cluster.json b/dist/dashboards/k8s-cluster.json index fc019f0..d9d1f66 100644 --- a/dist/dashboards/k8s-cluster.json +++ b/dist/dashboards/k8s-cluster.json @@ -520,14 +520,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kube_node_status_capacity_cpu_cores{node=~\"$node\"})", + "expr": "sum(kube_node_status_allocatable_cpu_cores{node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "allocatable", "refId": "A" }, { - "expr": "sum(kube_node_status_allocatable_cpu_cores{node=~\"$node\"})", + "expr": "sum(kube_node_status_capacity_cpu_cores{node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "capacity", diff --git a/src/dashboards/k8s-cluster.json b/src/dashboards/k8s-cluster.json index fc019f0..d9d1f66 100644 --- a/src/dashboards/k8s-cluster.json +++ b/src/dashboards/k8s-cluster.json @@ -520,14 +520,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kube_node_status_capacity_cpu_cores{node=~\"$node\"})", + "expr": "sum(kube_node_status_allocatable_cpu_cores{node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "allocatable", "refId": "A" }, { - "expr": "sum(kube_node_status_allocatable_cpu_cores{node=~\"$node\"})", + "expr": "sum(kube_node_status_capacity_cpu_cores{node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "capacity", From cedbf7491fb95819517b1b0f5f47c48ba11c8825 Mon Sep 17 00:00:00 2001 From: Edward McFarlane Date: Wed, 6 Feb 2019 11:44:07 +0000 Subject: [PATCH 4/4] Fix duplicate memory/cpu container reporting --- dist/dashboards/k8s-container.json | 4 ++-- src/dashboards/k8s-container.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dist/dashboards/k8s-container.json b/dist/dashboards/k8s-container.json index 4b974c7..ed542e8 100644 --- a/dist/dashboards/k8s-container.json +++ b/dist/dashboards/k8s-container.json @@ -126,7 +126,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}) by (pod_name)", + "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\", image!=\"\"}) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -217,7 +217,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])) by (pod_name)", + "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\", image!=\"\"}[2m])) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", diff --git a/src/dashboards/k8s-container.json b/src/dashboards/k8s-container.json index 4b974c7..ed542e8 100644 --- a/src/dashboards/k8s-container.json +++ b/src/dashboards/k8s-container.json @@ -126,7 +126,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}) by (pod_name)", + "expr": "sum(container_memory_usage_bytes{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\", image!=\"\"}) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", @@ -217,7 +217,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\"}[2m])) by (pod_name)", + "expr": "sum(irate(container_cpu_usage_seconds_total{pod_name=~\"$pod\", kubernetes_io_hostname=~\"$node\", namespace=~\"$namespace\", image!=\"\"}[2m])) by (pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod_name }}",