diff --git a/.idea/alpina.iml b/.idea/alpina.iml index bf75220..e519e85 100644 --- a/.idea/alpina.iml +++ b/.idea/alpina.iml @@ -4,7 +4,7 @@ - + diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/common.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/common.py index 571b3da..5950a05 100644 --- a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/common.py +++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/common.py @@ -1,27 +1,81 @@ -from grafanalib.core import Template +from attrs import define +from grafanalib.core import Template, TimeSeries, Dashboard, HIDE_VARIABLE, Target -# TODO: consider default params for common params like line width, show points, tooltip +CONF_SUPPORT_LOKI = True +CONF_SUPPORT_ZFS = True -PrometheusTemplate = Template( - name='datasource', +CONF_DATASOURCE_VAR_PROM = 'prom_datasource' +CONF_DATASOURCE_VAR_LOKI = 'loki_datasource' + +prom_datasource = f'${{{CONF_DATASOURCE_VAR_PROM}}}' +loki_datasource = f'${{{CONF_DATASOURCE_VAR_LOKI}}}' + +prom_template = Template( + name=CONF_DATASOURCE_VAR_PROM, type='datasource', label='Prometheus', query='prometheus', + hide=HIDE_VARIABLE, ) -# TODO: this slightly less (clown emoji), normal Target gave me errors in grafana +loki_template = Template( + name=CONF_DATASOURCE_VAR_LOKI, + type='datasource', + label='Loki', + query='loki', + hide=HIDE_VARIABLE, +) + + +@define +class MyDashboard(Dashboard): + """Wrapper class for Dashboard with some default values""" + timezone: str = 'browser' + sharedCrosshair: bool = True + + +@define +class MyTimeSeries(TimeSeries): + """Wrapper class for TimeSeries with some default values and custom fields""" + fillOpacity: int = 10 + lineWidth: int = 1 + showPoints: str = 'never' + tooltipMode: str = 'multi' + maxDataPoints: int = None + + # new fields + axisCenteredZero: bool = False + + def to_json_data(self): + data = super().to_json_data() + data['fieldConfig']['defaults']['custom']['axisCenteredZero'] = self.axisCenteredZero + return data + + +@define +class PromTarget(Target): + """Wrapper class for Target with default prometheus datasource""" + datasource: str = prom_datasource + + +@define class LokiTarget(object): - def __init__(self, loki_datasource, expr, legendFormat, refId): - self.loki_datasource = loki_datasource - self.expr = expr - self.legendFormat = legendFormat - self.refId = refId + """Custom class for Loki Target, because normal Target gave errors in grafana""" + expr: str + legendFormat: str + datasource: str = loki_datasource + refId: str = None + queryType: str = 'range' def to_json_data(self): return { - 'datasource': self.loki_datasource, + 'datasource': self.datasource, 'expr': self.expr, 'legendFormat': self.legendFormat, 'refId': self.refId, - 'queryType': 'range', + 'queryType': self.queryType, } + + +def filter_none(l: list): + return [i for i in l if i is not None] diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py index 6de2cc3..cbe4b3f 100644 --- a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py +++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py @@ -1,16 +1,10 @@ -from grafanalib.core import ( - Dashboard, TimeSeries, - Target, GridPos, - Templating, Template, REFRESH_ON_TIME_RANGE_CHANGE, Logs -) +from grafanalib.core import GridPos, Templating, Template, Logs from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC -from common import LokiTarget, PrometheusTemplate +from common import LokiTarget, prom_template, loki_template, MyTimeSeries, MyDashboard, CONF_SUPPORT_LOKI, filter_none, \ + prom_datasource, PromTarget -prom_datasource='${datasource}' -loki_datasource='loki' - -dashboard = Dashboard( +dashboard = MyDashboard( title='Containers', uid='containers', description='Data for compose projects from default Prometheus datasource collected by Cadvisor', @@ -18,8 +12,9 @@ dashboard = Dashboard( 'linux', 'docker', ], - templating=Templating(list=[ - PrometheusTemplate, + templating=Templating(list=filter_none([ + prom_template, + loki_template if CONF_SUPPORT_LOKI else None, Template( name='compose_project', label='Compose Project', @@ -27,7 +22,6 @@ dashboard = Dashboard( query='label_values({__name__=~"container.*"}, container_label_com_docker_compose_project)', includeAll=True, multi=True, - refresh=REFRESH_ON_TIME_RANGE_CHANGE, ), Template( name='container_name', @@ -36,7 +30,6 @@ dashboard = Dashboard( query='label_values({__name__=~"container.*", container_label_com_docker_compose_project=~"$compose_project"}, name)', includeAll=True, multi=True, - refresh=REFRESH_ON_TIME_RANGE_CHANGE, ), Template( name='logs_query', @@ -44,67 +37,48 @@ dashboard = Dashboard( query='', type='textbox', ), - ]), - timezone='browser', - panels=[ - TimeSeries( + ])), + panels=filter_none([ + MyTimeSeries( title='Container Memory Usage', unit=BYTES_IEC, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=2, - fillOpacity=10, - showPoints='never', - stacking={'mode': 'normal'}, - tooltipMode='all', tooltipSort='desc', + stacking={'mode': 'normal'}, targets=[ - Target( - datasource=prom_datasource, + PromTarget( expr='max by (name) (container_memory_usage_bytes{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"})', legendFormat='{{ name }}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Container CPU Usage', unit=SECONDS, gridPos=GridPos(h=8, w=12, x=12, y=0), - lineWidth=2, - fillOpacity=10, - showPoints='never', - tooltipMode='all', tooltipSort='desc', + stacking={'mode': 'normal'}, targets=[ - Target( - datasource=prom_datasource, - expr='max by (name) (rate(container_cpu_usage_seconds_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + PromTarget( + expr='max by (name) (irate(container_cpu_usage_seconds_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', legendFormat='{{ name }}', - refId='A', ), ], ), - TimeSeries( + MyTimeSeries( title='Container Network Traffic', unit=BYTES_SEC_IEC, gridPos=GridPos(h=8, w=12, x=0, y=8), - lineWidth=2, - fillOpacity=10, - showPoints='never', - tooltipMode='all', tooltipSort='desc', + axisCenteredZero=True, targets=[ - Target( - datasource=prom_datasource, - expr='max by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + PromTarget( + expr='max by (name) (irate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', legendFormat="rx {{ name }}", - refId='A', ), - Target( - datasource=prom_datasource, - expr='-max by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + PromTarget( + expr='-max by (name) (irate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', legendFormat="tx {{ name }}", - refId='B', ), ], ), @@ -118,12 +92,10 @@ dashboard = Dashboard( dedupStrategy='numbers', targets=[ LokiTarget( - loki_datasource=loki_datasource, expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`', legendFormat='{{ container_name }}', - refId='A', ), ], - ), - ], + ) if CONF_SUPPORT_LOKI else None, + ]), ).auto_panel_ids() diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node.dashboard.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node.dashboard.py index d5c108b..bdf0d54 100644 --- a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node.dashboard.py +++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node.dashboard.py @@ -1,139 +1,159 @@ -from grafanalib.core import Dashboard, Templating, Template, TimeSeries, PERCENT_UNIT_FORMAT, GridPos, Target -from grafanalib.formatunits import BYTES_IEC +from grafanalib.core import Templating, Template, GridPos +from grafanalib.formatunits import BYTES_IEC, BITS_SEC, PERCENT_UNIT -from common import PrometheusTemplate -from node_consts import CPU_BASIC_COLORS, MEMORY_BASIC_COLORS +from common import prom_template, MyTimeSeries, MyDashboard, CONF_SUPPORT_ZFS, PromTarget, prom_datasource -dashboard = Dashboard( +dashboard = MyDashboard( title='Node Exporter', uid='node', description='Node Exporter (not quite full)', tags=[ 'linux', ], - timezone='browser', templating=Templating(list=[ # Datasource - PrometheusTemplate, + prom_template, # Job Template( name='job', label='Job', - dataSource='${datasource}', + dataSource=prom_datasource, query='label_values(node_uname_info, job)', ), # Instance Template( name='instance', label='Instance', - dataSource='${datasource}', + dataSource=prom_datasource, query='label_values(node_uname_info{job="$job"}, instance)', ), ]), panels=[ # CPU Basic - TimeSeries( + MyTimeSeries( title='CPU Basic', description='Basic CPU usage info', - unit=PERCENT_UNIT_FORMAT, + unit=PERCENT_UNIT, gridPos=GridPos(h=8, w=12, x=0, y=0), - lineWidth=1, - fillOpacity=30, - showPoints='never', - stacking={'mode': 'percent', 'group': 'A'}, - tooltipMode='all', - tooltipSort='desc', + stacking={'mode': 'percent'}, targets=[ - Target( - datasource='${datasource}', + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="system"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy System', - refId='A', ), - Target( - datasource='${datasource}', + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="user"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy User', - refId='B', ), - Target( - datasource='${datasource}', + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="iowait"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy Iowait', - refId='C', ), - Target( - datasource='${datasource}', + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode=~".*irq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy IRQs', - refId='D', ), - Target( - datasource='${datasource}', + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode!="idle",mode!="user",mode!="system",mode!="iowait",mode!="irq",mode!="softirq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Busy Other', - refId='E', ), - Target( - datasource='${datasource}', + PromTarget( expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="idle"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))', legendFormat='Idle', - refId='F', ), ], - # Extra JSON for the colors - extraJson=CPU_BASIC_COLORS, ), # Memory Basic - TimeSeries( + MyTimeSeries( title='Memory Basic', description='Basic memory usage', unit=BYTES_IEC, gridPos=GridPos(h=8, w=12, x=12, y=0), - lineWidth=1, - fillOpacity=30, - showPoints='never', - stacking={'mode': 'normal', 'group': 'A'}, - tooltipMode='all', - tooltipSort='desc', + stacking={'mode': 'normal'}, + valueMin=0, targets=[ - Target( - datasource='${datasource}', + PromTarget( expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"}', format='time_series', legendFormat='RAM Total', - refId='A', ), - Target( - datasource='${datasource}', + PromTarget( expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"})', format='time_series', legendFormat='RAM Used', - refId='B', + hide=CONF_SUPPORT_ZFS, ), - Target( - datasource='${datasource}', + PromTarget( + expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}) - node_zfs_arc_size{instance="$instance",job="$job"}', + format='time_series', + legendFormat='RAM Used', + hide=not CONF_SUPPORT_ZFS, + ), + PromTarget( expr='node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}', legendFormat='RAM Cache + Buffer', - refId='C', ), - Target( - datasource='${datasource}', + PromTarget( + expr='node_zfs_arc_size{instance="$instance",job="$job"}', + legendFormat='ZFS Arc', + hide=not CONF_SUPPORT_ZFS, + ), + PromTarget( expr='node_memory_MemFree_bytes{instance="$instance",job="$job"}', legendFormat='RAM Free', - refId='D', ), - Target( - datasource='${datasource}', + PromTarget( expr='(node_memory_SwapTotal_bytes{instance="$instance",job="$job"} - node_memory_SwapFree_bytes{instance="$instance",job="$job"})', legendFormat='SWAP Used', - refId='E', ), ], - # Extra JSON for the colors - extraJson=MEMORY_BASIC_COLORS, + overrides=[ + # Prevent total memory from being stacked + { + 'matcher': { + 'id': 'byName', + 'options': 'RAM Total' + }, + 'properties': [ + { + 'id': 'custom.stacking', + 'value': {'mode': 'none'} + } + ] + }, + ], + ), + # Network Traffic Basic + MyTimeSeries( + title='Network Traffic Basic', + description='Basic network usage info per interface', + unit=BITS_SEC, + gridPos=GridPos(h=8, w=12, x=0, y=8), + tooltipSort='desc', + axisCenteredZero=True, + targets=[ + PromTarget( + expr='irate(node_network_receive_bytes_total{instance="$instance",job="$job"}[$__rate_interval]) * 8', + legendFormat='rx {{ device }}', + ), + PromTarget( + expr='-irate(node_network_transmit_bytes_total{instance="$instance",job="$job"}[$__rate_interval]) * 8', + legendFormat='tx {{ device }}', + ), + ], + ), + # Disk Space Basic + MyTimeSeries( + title='Disk Space Used Basic', + description='Disk space used of all filesystems mounted', + unit=PERCENT_UNIT, + gridPos=GridPos(h=8, w=12, x=12, y=8), + targets=[ + PromTarget( + expr='1 - (node_filesystem_avail_bytes{instance="$instance",job="$job",device!~"rootfs"} / node_filesystem_size_bytes{instance="$instance",job="$job",device!~"rootfs"})', + legendFormat='{{ mountpoint }}', + ), + ], ), - # TODO: Network Basic - # TODO: Disk Basic ], ).auto_panel_ids() diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node_consts.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node_consts.py deleted file mode 100644 index a527781..0000000 --- a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/node_consts.py +++ /dev/null @@ -1,487 +0,0 @@ -# TODO: Question life decisions (I'm not sure if this is good) - -CPU_BASIC_COLORS = { - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Busy Iowait" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy Iowait" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy System" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy User" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy Other" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - } - ] - }, -} - -MEMORY_BASIC_COLORS = { - "fieldConfig": { - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "SWAP Used" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap Used" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": False, - "mode": "normal" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM Cache + Buffer" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Available" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#DEDAF7", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": False, - "mode": "normal" - } - } - ] - } - ] - } -}