diff --git a/.idea/alpina.iml b/.idea/alpina.iml
index d07cdd8..bf75220 100644
--- a/.idea/alpina.iml
+++ b/.idea/alpina.iml
@@ -4,7 +4,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index aa18f0d..4fdd347 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/roles/alpina/templates/services/monitoring/Dockerfile b/roles/alpina/templates/services/monitoring/Dockerfile
index 4604147..eebe9b2 100644
--- a/roles/alpina/templates/services/monitoring/Dockerfile
+++ b/roles/alpina/templates/services/monitoring/Dockerfile
@@ -4,6 +4,10 @@ RUN pip install grafanalib
COPY ./grafana_config/dashboards /dashboards
+# Required for grafanalib to find the shared python files like common.py
+# https://github.com/weaveworks/grafanalib/issues/58
+ENV PYTHONPATH=/dashboards
+
RUN generate-dashboards /dashboards/*.dashboard.py
FROM grafana/grafana:latest
diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/common.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/common.py
new file mode 100644
index 0000000..571b3da
--- /dev/null
+++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/common.py
@@ -0,0 +1,27 @@
+from grafanalib.core import Template
+
+# TODO: consider default params for common params like line width, show points, tooltip
+
+PrometheusTemplate = Template(
+ name='datasource',
+ type='datasource',
+ label='Prometheus',
+ query='prometheus',
+)
+
+# TODO: this slightly less (clown emoji), normal Target gave me errors in grafana
+class LokiTarget(object):
+ def __init__(self, loki_datasource, expr, legendFormat, refId):
+ self.loki_datasource = loki_datasource
+ self.expr = expr
+ self.legendFormat = legendFormat
+ self.refId = refId
+
+ def to_json_data(self):
+ return {
+ 'datasource': self.loki_datasource,
+ 'expr': self.expr,
+ 'legendFormat': self.legendFormat,
+ 'refId': self.refId,
+ 'queryType': 'range',
+ }
diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py
index 484e21c..6de2cc3 100644
--- a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py
+++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py
@@ -5,28 +5,21 @@ from grafanalib.core import (
)
from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC
-prom_datasource='prometheus'
-loki_datasource='loki'
+from common import LokiTarget, PrometheusTemplate
-# TODO: this is (clown emoji), normal Target gave me errors in grafana
-class LokiTarget(object):
- def to_json_data(self):
- return {
- 'datasource': loki_datasource,
- 'expr': '{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
- 'legendFormat': '{{ container_name }}',
- 'refId': 'A',
- 'queryType': 'range',
- }
+prom_datasource='${datasource}'
+loki_datasource='loki'
dashboard = Dashboard(
title='Containers',
uid='containers',
description='Data for compose projects from default Prometheus datasource collected by Cadvisor',
tags=[
- 'example'
+ 'linux',
+ 'docker',
],
templating=Templating(list=[
+ PrometheusTemplate,
Template(
name='compose_project',
label='Compose Project',
@@ -44,7 +37,6 @@ dashboard = Dashboard(
includeAll=True,
multi=True,
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
-
),
Template(
name='logs_query',
@@ -56,7 +48,6 @@ dashboard = Dashboard(
timezone='browser',
panels=[
TimeSeries(
- id=1,
title='Container Memory Usage',
unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=0),
@@ -76,13 +67,14 @@ dashboard = Dashboard(
],
),
TimeSeries(
- id=2,
title='Container CPU Usage',
unit=SECONDS,
gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=2,
fillOpacity=10,
showPoints='never',
+ tooltipMode='all',
+ tooltipSort='desc',
targets=[
Target(
datasource=prom_datasource,
@@ -93,7 +85,6 @@ dashboard = Dashboard(
],
),
TimeSeries(
- id=3,
title='Container Network Traffic',
unit=BYTES_SEC_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=8),
@@ -118,7 +109,6 @@ dashboard = Dashboard(
],
),
Logs(
- id=4,
title='',
gridPos=GridPos(h=8, w=12, x=12, y=8),
showLabels=True,
@@ -127,13 +117,12 @@ dashboard = Dashboard(
prettifyLogMessage=True,
dedupStrategy='numbers',
targets=[
- LokiTarget(),
- # Target(
- # datasource=loki_datasource,
- # expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
- # legendFormat='{{ container_name }}',
- # refId='A',
- # ),
+ LokiTarget(
+ loki_datasource=loki_datasource,
+ expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
+ legendFormat='{{ container_name }}',
+ refId='A',
+ ),
],
),
],
diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node.dashboard.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node.dashboard.py
new file mode 100644
index 0000000..d5c108b
--- /dev/null
+++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node.dashboard.py
@@ -0,0 +1,139 @@
+from grafanalib.core import Dashboard, Templating, Template, TimeSeries, PERCENT_UNIT_FORMAT, GridPos, Target
+from grafanalib.formatunits import BYTES_IEC
+
+from common import PrometheusTemplate
+from node_consts import CPU_BASIC_COLORS, MEMORY_BASIC_COLORS
+
+dashboard = Dashboard(
+ title='Node Exporter',
+ uid='node',
+ description='Node Exporter (not quite full)',
+ tags=[
+ 'linux',
+ ],
+ timezone='browser',
+ templating=Templating(list=[
+ # Datasource
+ PrometheusTemplate,
+ # Job
+ Template(
+ name='job',
+ label='Job',
+ dataSource='${datasource}',
+ query='label_values(node_uname_info, job)',
+ ),
+ # Instance
+ Template(
+ name='instance',
+ label='Instance',
+ dataSource='${datasource}',
+ query='label_values(node_uname_info{job="$job"}, instance)',
+ ),
+ ]),
+ panels=[
+ # CPU Basic
+ TimeSeries(
+ title='CPU Basic',
+ description='Basic CPU usage info',
+ unit=PERCENT_UNIT_FORMAT,
+ gridPos=GridPos(h=8, w=12, x=0, y=0),
+ lineWidth=1,
+ fillOpacity=30,
+ showPoints='never',
+ stacking={'mode': 'percent', 'group': 'A'},
+ tooltipMode='all',
+ tooltipSort='desc',
+ targets=[
+ Target(
+ datasource='${datasource}',
+ expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="system"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
+ legendFormat='Busy System',
+ refId='A',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="user"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
+ legendFormat='Busy User',
+ refId='B',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="iowait"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
+ legendFormat='Busy Iowait',
+ refId='C',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode=~".*irq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
+ legendFormat='Busy IRQs',
+ refId='D',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode!="idle",mode!="user",mode!="system",mode!="iowait",mode!="irq",mode!="softirq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
+ legendFormat='Busy Other',
+ refId='E',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="idle"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
+ legendFormat='Idle',
+ refId='F',
+ ),
+ ],
+ # Extra JSON for the colors
+ extraJson=CPU_BASIC_COLORS,
+ ),
+ # Memory Basic
+ TimeSeries(
+ title='Memory Basic',
+ description='Basic memory usage',
+ unit=BYTES_IEC,
+ gridPos=GridPos(h=8, w=12, x=12, y=0),
+ lineWidth=1,
+ fillOpacity=30,
+ showPoints='never',
+ stacking={'mode': 'normal', 'group': 'A'},
+ tooltipMode='all',
+ tooltipSort='desc',
+ targets=[
+ Target(
+ datasource='${datasource}',
+ expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"}',
+ format='time_series',
+ legendFormat='RAM Total',
+ refId='A',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"})',
+ format='time_series',
+ legendFormat='RAM Used',
+ refId='B',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}',
+ legendFormat='RAM Cache + Buffer',
+ refId='C',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='node_memory_MemFree_bytes{instance="$instance",job="$job"}',
+ legendFormat='RAM Free',
+ refId='D',
+ ),
+ Target(
+ datasource='${datasource}',
+ expr='(node_memory_SwapTotal_bytes{instance="$instance",job="$job"} - node_memory_SwapFree_bytes{instance="$instance",job="$job"})',
+ legendFormat='SWAP Used',
+ refId='E',
+ ),
+ ],
+ # Extra JSON for the colors
+ extraJson=MEMORY_BASIC_COLORS,
+ ),
+ # TODO: Network Basic
+ # TODO: Disk Basic
+ ],
+).auto_panel_ids()
diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node_consts.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node_consts.py
new file mode 100644
index 0000000..a527781
--- /dev/null
+++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node_consts.py
@@ -0,0 +1,487 @@
+# TODO: Question life decisions (I'm not sure if this is good)
+
+CPU_BASIC_COLORS = {
+ "fieldConfig": {
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Busy Iowait"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#890F02",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Idle"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#052B51",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Busy Iowait"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#890F02",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Idle"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Busy System"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Busy User"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#0A437C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Busy Other"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6D1F62",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+}
+
+MEMORY_BASIC_COLORS = {
+ "fieldConfig": {
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Apps"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#629E51",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Buffers"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#614D93",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Cache"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#6D1F62",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Cached"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#511749",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Committed"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#508642",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Free"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#0A437C",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#CFFAFF",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Inactive"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#584477",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "PageTables"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#0A50A1",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Page_Tables"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#0A50A1",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM_Free"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E0F9D7",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "SWAP Used"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#BF1B00",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Slab"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#806EB7",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Slab_Cache"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E0752D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Swap"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#BF1B00",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Swap Used"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#BF1B00",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Swap_Cache"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#C15C17",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Swap_Free"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#2F575E",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Unused"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#EAB839",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM Total"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#E0F9D7",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ },
+ {
+ "id": "custom.stacking",
+ "value": {
+ "group": False,
+ "mode": "normal"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM Cache + Buffer"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#052B51",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "RAM Free"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#7EB26D",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Available"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "#DEDAF7",
+ "mode": "fixed"
+ }
+ },
+ {
+ "id": "custom.fillOpacity",
+ "value": 0
+ },
+ {
+ "id": "custom.stacking",
+ "value": {
+ "group": False,
+ "mode": "normal"
+ }
+ }
+ ]
+ }
+ ]
+ }
+}