monitoring: add grafanalib, containers dashboard

This commit is contained in:
2024-10-30 18:16:17 -07:00
parent fc6e485a61
commit 0e43a68754
13 changed files with 472 additions and 228 deletions

View File

@@ -0,0 +1,13 @@
FROM python:3-alpine AS builder
RUN pip install grafanalib
COPY ./grafana_config/dashboards /dashboards
RUN generate-dashboards /dashboards/*.dashboard.py
FROM grafana/grafana:latest
#COPY ./grafana_config /etc/grafana
COPY ./grafana_config/dashboards/*.yaml /etc/grafana/provisioning/dashboards
COPY --from=builder /dashboards/*.json /etc/grafana/provisioning/dashboards

View File

@@ -7,25 +7,30 @@ networks:
services:
grafana:
image: grafana/grafana:latest
{# image: grafana/grafana:latest#}
build:
context: .
dockerfile: Dockerfile
container_name: grafana
labels:
- {{ helpers.traefik_labels('grafana', port='3000') | indent(6) }}
restart: unless-stopped
# Needed to make config files readable
# Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}"
networks:
- default
- traefik_traefik
volumes:
- {{ base_volume_path }}/monitoring/grafana:/var/lib/grafana
- ./grafana_config:/etc/grafana:ro
- ./grafana_config/grafana.ini:/etc/grafana/grafana.ini:ro
- ./grafana_config/datasources:/etc/grafana/provisioning/datasources:ro
{# - ./grafana_config:/etc/grafana:ro#}
loki:
image: grafana/loki:latest
container_name: loki
restart: unless-stopped
# Needed to make config files readable
# Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}"
command:
- -config.file=/etc/loki/loki-config.yaml
@@ -56,7 +61,7 @@ services:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
# Needed to make config files readable
# Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}"
command:
- --config.file=/etc/prometheus/prometheus.yml

View File

@@ -0,0 +1,9 @@
apiVersion: 1
providers:
- name: "Grafana"
org_id: 1
folder: "Alpina"
type: "file"
options:
path: "/etc/grafana/provisioning/dashboards"

View File

@@ -0,0 +1,140 @@
from grafanalib.core import (
Dashboard, TimeSeries,
Target, GridPos,
Templating, Template, REFRESH_ON_TIME_RANGE_CHANGE, Logs
)
from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC
prom_datasource='prometheus'
loki_datasource='loki'
# TODO: this is (clown emoji), normal Target gave me errors in grafana
class LokiTarget(object):
def to_json_data(self):
return {
'datasource': loki_datasource,
'expr': '{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
'legendFormat': '{{ container_name }}',
'refId': 'A',
'queryType': 'range',
}
dashboard = Dashboard(
title='Containers',
uid='containers',
description='Data for compose projects from default Prometheus datasource collected by Cadvisor',
tags=[
'example'
],
templating=Templating(list=[
Template(
name='compose_project',
label='Compose Project',
dataSource=prom_datasource,
query='label_values({__name__=~"container.*"}, container_label_com_docker_compose_project)',
includeAll=True,
multi=True,
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
),
Template(
name='container_name',
label='Container',
dataSource=prom_datasource,
query='label_values({__name__=~"container.*", container_label_com_docker_compose_project=~"$compose_project"}, name)',
includeAll=True,
multi=True,
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
),
Template(
name='logs_query',
label='Log Search',
query='',
type='textbox',
),
]),
timezone='browser',
panels=[
TimeSeries(
id=1,
title='Container Memory Usage',
unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=0),
lineWidth=2,
fillOpacity=10,
showPoints='never',
stacking={'mode': 'normal'},
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource=prom_datasource,
expr='max by (name) (container_memory_usage_bytes{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"})',
legendFormat='{{ name }}',
refId='A',
),
],
),
TimeSeries(
id=2,
title='Container CPU Usage',
unit=SECONDS,
gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=2,
fillOpacity=10,
showPoints='never',
targets=[
Target(
datasource=prom_datasource,
expr='max by (name) (rate(container_cpu_usage_seconds_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
legendFormat='{{ name }}',
refId='A',
),
],
),
TimeSeries(
id=3,
title='Container Network Traffic',
unit=BYTES_SEC_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=8),
lineWidth=2,
fillOpacity=10,
showPoints='never',
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource=prom_datasource,
expr='max by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
legendFormat="rx {{ name }}",
refId='A',
),
Target(
datasource=prom_datasource,
expr='-max by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
legendFormat="tx {{ name }}",
refId='B',
),
],
),
Logs(
id=4,
title='',
gridPos=GridPos(h=8, w=12, x=12, y=8),
showLabels=True,
showCommonLabels=True,
wrapLogMessages=True,
prettifyLogMessage=True,
dedupStrategy='numbers',
targets=[
LokiTarget(),
# Target(
# datasource=loki_datasource,
# expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
# legendFormat='{{ container_name }}',
# refId='A',
# ),
],
),
],
).auto_panel_ids()

View File

@@ -26,5 +26,5 @@ schema_config:
store: tsdb
# TODO: Figure this out
ruler:
alertmanager_url: http://localhost:9093
# ruler:
# alertmanager_url: http://localhost:9093