From c38f94f4ceb86f18911100eae4bb68639703a284 Mon Sep 17 00:00:00 2001 From: Yuri Tatishchev Date: Sat, 12 Oct 2024 23:30:55 -0700 Subject: [PATCH] WIP: monitoring improvements - containers dashboard --- .../dashboards/containers.dashboard.py | 95 +++++++++++++++++++ .../dashboards/other.dashboard.py | 59 ------------ 2 files changed, 95 insertions(+), 59 deletions(-) create mode 100644 roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py delete mode 100644 roles/alpina/templates/services/monitoring/grafana_config/dashboards/other.dashboard.py diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py new file mode 100644 index 0000000..ec574d1 --- /dev/null +++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/containers.dashboard.py @@ -0,0 +1,95 @@ +from grafanalib.core import ( + Dashboard, TimeSeries, + Target, GridPos, + Templating, Template, REFRESH_ON_TIME_RANGE_CHANGE +) +from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC + +prom_datasource='prometheus' + +dashboard = Dashboard( + title='Containers', + uid='containers', + description='Data for compose projects from default Prometheus datasource collected by Cadvisor', + tags=[ + 'example' + ], + templating=Templating(list=[ + Template( + name='compose_project', + label='Compose Project', + dataSource=prom_datasource, + query='label_values({__name__=~"container.*"}, container_label_com_docker_compose_project)', + includeAll=True, + multi=True, + refresh=REFRESH_ON_TIME_RANGE_CHANGE, + ), + Template( + name='container_name', + label='Container', + dataSource=prom_datasource, + query='label_values({__name__=~"container.*", container_label_com_docker_compose_project=~"$compose_project"}, name)', + includeAll=True, + multi=True, + refresh=REFRESH_ON_TIME_RANGE_CHANGE, + + ), + ]), + timezone='browser', + panels=[ + TimeSeries( + title='Container Memory Usage', + unit=BYTES_IEC, + gridPos=GridPos(h=8, w=12, x=0, y=0), + lineWidth=2, + fillOpacity=10, + showPoints='never', + targets=[ + Target( + datasource=prom_datasource, + expr='max by (name) (container_memory_usage_bytes{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"})', + legendFormat='{{ name }}', + refId='A', + ), + ], + ), + TimeSeries( + title='Container CPU Usage', + unit=SECONDS, + gridPos=GridPos(h=8, w=12, x=12, y=0), + lineWidth=2, + fillOpacity=10, + showPoints='never', + targets=[ + Target( + datasource=prom_datasource, + expr='max by (name) (rate(container_cpu_usage_seconds_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + legendFormat='{{ name }}', + refId='A', + ), + ], + ), + TimeSeries( + title='Container Network Traffic', + unit=BYTES_SEC_IEC, + gridPos=GridPos(h=8, w=12, x=0, y=8), + lineWidth=2, + fillOpacity=10, + showPoints='never', + targets=[ + Target( + datasource=prom_datasource, + expr='sum by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + legendFormat="recv {{ name }}", + refId='A', + ), + Target( + datasource=prom_datasource, + expr='-sum by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', + legendFormat="trans {{ name }}", + refId='B', + ), + ], + ), + ], +).auto_panel_ids() diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/other.dashboard.py b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/other.dashboard.py deleted file mode 100644 index e207de9..0000000 --- a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/other.dashboard.py +++ /dev/null @@ -1,59 +0,0 @@ -from grafanalib.core import ( - Dashboard, TimeSeries, GaugePanel, - Target, GridPos, - OPS_FORMAT, Templating, Template, REFRESH_ON_TIME_RANGE_CHANGE -) -from grafanalib.formatunits import BYTES_IEC - -dashboard = Dashboard( - title="Containers", - description="Data for compose projects from default Prometheus datasource collected by Cadvisor", - tags=[ - 'example' - ], - templating=Templating(list=[ - # TODO: test how much of this is actually necessary - Template( - name="compose_project", - label="compose_project", - dataSource="prometheus", - query='label_values({__name__=~"container.*"}, container_label_com_docker_compose_project)', - includeAll=True, - multi=True, - hide=0, - sort=1, - type="query", - refresh=REFRESH_ON_TIME_RANGE_CHANGE, - ), - Template( - name="container_name", - label="container_name", - dataSource="prometheus", - query='label_values({__name__=~"container.*", container_label_com_docker_compose_project=~"$compose_project"}, name)', - includeAll=True, - multi=True, - hide=0, - sort=1, - type="query", - refresh=REFRESH_ON_TIME_RANGE_CHANGE, - - ), - ]), - timezone="browser", - panels=[ - TimeSeries( - title="Container Memory Usage", - # dataSource='prometheus', - targets=[ - Target( - datasource='prometheus', - expr='max by (name) (container_memory_usage_bytes{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"})', - legendFormat="{{ name }}", - refId='A', - ), - ], - unit=BYTES_IEC, - gridPos=GridPos(h=8, w=16, x=0, y=0), - ), - ], -).auto_panel_ids()