Compare commits
4 Commits
002eb40b68
...
feature/mo
| Author | SHA1 | Date | |
|---|---|---|---|
|
eb264b73fa
|
|||
|
8c6b862495
|
|||
|
30510c6690
|
|||
|
c38f94f4ce
|
22
.idea/jsonSchemas.xml
generated
22
.idea/jsonSchemas.xml
generated
@@ -116,6 +116,28 @@
|
|||||||
<Item>
|
<Item>
|
||||||
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
|
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
|
||||||
</Item>
|
</Item>
|
||||||
|
<Item>
|
||||||
|
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2" />
|
||||||
|
</Item>
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</SchemaInfo>
|
||||||
|
</value>
|
||||||
|
</entry>
|
||||||
|
<entry key="prometheus.rules.json">
|
||||||
|
<value>
|
||||||
|
<SchemaInfo>
|
||||||
|
<option name="name" value="prometheus.rules.json" />
|
||||||
|
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
|
||||||
|
<option name="applicationDefined" value="true" />
|
||||||
|
<option name="patterns">
|
||||||
|
<list>
|
||||||
|
<Item>
|
||||||
|
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
|
||||||
|
</Item>
|
||||||
|
<Item>
|
||||||
|
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
|
||||||
|
</Item>
|
||||||
</list>
|
</list>
|
||||||
</option>
|
</option>
|
||||||
</SchemaInfo>
|
</SchemaInfo>
|
||||||
|
|||||||
@@ -60,12 +60,15 @@ services:
|
|||||||
prometheus:
|
prometheus:
|
||||||
image: prom/prometheus:latest
|
image: prom/prometheus:latest
|
||||||
container_name: prometheus
|
container_name: prometheus
|
||||||
|
labels:
|
||||||
|
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# Needed to make config files readable (not anymore, TODO: remove)
|
# Needed to make config files readable (not anymore, TODO: remove)
|
||||||
user: "{{ remote_uid }}"
|
user: "{{ remote_uid }}"
|
||||||
command:
|
command:
|
||||||
- --config.file=/etc/prometheus/prometheus.yml
|
- --config.file=/etc/prometheus/prometheus.yml
|
||||||
- --storage.tsdb.retention.time=30d
|
- --storage.tsdb.retention.time=30d
|
||||||
|
- --web.external-url=https://prom.{{ domain }}/
|
||||||
volumes:
|
volumes:
|
||||||
- ./prometheus_config:/etc/prometheus:ro
|
- ./prometheus_config:/etc/prometheus:ro
|
||||||
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
||||||
@@ -74,12 +77,15 @@ services:
|
|||||||
alertmanager:
|
alertmanager:
|
||||||
image: prom/alertmanager:latest
|
image: prom/alertmanager:latest
|
||||||
container_name: alertmanager
|
container_name: alertmanager
|
||||||
|
labels:
|
||||||
|
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
command:
|
command:
|
||||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
- --config.file=/etc/alertmanager/alertmanager.yml
|
||||||
|
- --web.external-url=https://alert.{{ domain }}/
|
||||||
volumes:
|
volumes:
|
||||||
- ./alertmanager_config:/etc/alertmanager:ro
|
- ./alertmanager_config:/etc/alertmanager:ro
|
||||||
# TODO: add volume for alertmanager data
|
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
|
||||||
|
|
||||||
node-exporter:
|
node-exporter:
|
||||||
image: prom/node-exporter:latest
|
image: prom/node-exporter:latest
|
||||||
|
|||||||
@@ -0,0 +1,140 @@
|
|||||||
|
from grafanalib.core import (
|
||||||
|
Dashboard, TimeSeries,
|
||||||
|
Target, GridPos,
|
||||||
|
Templating, Template, REFRESH_ON_TIME_RANGE_CHANGE, Logs
|
||||||
|
)
|
||||||
|
from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC
|
||||||
|
|
||||||
|
prom_datasource='prometheus'
|
||||||
|
loki_datasource='loki'
|
||||||
|
|
||||||
|
# TODO: this is (clown emoji), normal Target gave me errors in grafana
|
||||||
|
class LokiTarget(object):
|
||||||
|
def to_json_data(self):
|
||||||
|
return {
|
||||||
|
'datasource': loki_datasource,
|
||||||
|
'expr': '{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
|
||||||
|
'legendFormat': '{{ container_name }}',
|
||||||
|
'refId': 'A',
|
||||||
|
'queryType': 'range',
|
||||||
|
}
|
||||||
|
|
||||||
|
dashboard = Dashboard(
|
||||||
|
title='Containers',
|
||||||
|
uid='containers',
|
||||||
|
description='Data for compose projects from default Prometheus datasource collected by Cadvisor',
|
||||||
|
tags=[
|
||||||
|
'example'
|
||||||
|
],
|
||||||
|
templating=Templating(list=[
|
||||||
|
Template(
|
||||||
|
name='compose_project',
|
||||||
|
label='Compose Project',
|
||||||
|
dataSource=prom_datasource,
|
||||||
|
query='label_values({__name__=~"container.*"}, container_label_com_docker_compose_project)',
|
||||||
|
includeAll=True,
|
||||||
|
multi=True,
|
||||||
|
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
|
||||||
|
),
|
||||||
|
Template(
|
||||||
|
name='container_name',
|
||||||
|
label='Container',
|
||||||
|
dataSource=prom_datasource,
|
||||||
|
query='label_values({__name__=~"container.*", container_label_com_docker_compose_project=~"$compose_project"}, name)',
|
||||||
|
includeAll=True,
|
||||||
|
multi=True,
|
||||||
|
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
|
||||||
|
|
||||||
|
),
|
||||||
|
Template(
|
||||||
|
name='logs_query',
|
||||||
|
label='Log Search',
|
||||||
|
query='',
|
||||||
|
type='textbox',
|
||||||
|
),
|
||||||
|
]),
|
||||||
|
timezone='browser',
|
||||||
|
panels=[
|
||||||
|
TimeSeries(
|
||||||
|
id=1,
|
||||||
|
title='Container Memory Usage',
|
||||||
|
unit=BYTES_IEC,
|
||||||
|
gridPos=GridPos(h=8, w=12, x=0, y=0),
|
||||||
|
lineWidth=2,
|
||||||
|
fillOpacity=10,
|
||||||
|
showPoints='never',
|
||||||
|
stacking={'mode': 'normal'},
|
||||||
|
tooltipMode='all',
|
||||||
|
tooltipSort='desc',
|
||||||
|
targets=[
|
||||||
|
Target(
|
||||||
|
datasource=prom_datasource,
|
||||||
|
expr='max by (name) (container_memory_usage_bytes{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"})',
|
||||||
|
legendFormat='{{ name }}',
|
||||||
|
refId='A',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
TimeSeries(
|
||||||
|
id=2,
|
||||||
|
title='Container CPU Usage',
|
||||||
|
unit=SECONDS,
|
||||||
|
gridPos=GridPos(h=8, w=12, x=12, y=0),
|
||||||
|
lineWidth=2,
|
||||||
|
fillOpacity=10,
|
||||||
|
showPoints='never',
|
||||||
|
targets=[
|
||||||
|
Target(
|
||||||
|
datasource=prom_datasource,
|
||||||
|
expr='max by (name) (rate(container_cpu_usage_seconds_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
|
||||||
|
legendFormat='{{ name }}',
|
||||||
|
refId='A',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
TimeSeries(
|
||||||
|
id=3,
|
||||||
|
title='Container Network Traffic',
|
||||||
|
unit=BYTES_SEC_IEC,
|
||||||
|
gridPos=GridPos(h=8, w=12, x=0, y=8),
|
||||||
|
lineWidth=2,
|
||||||
|
fillOpacity=10,
|
||||||
|
showPoints='never',
|
||||||
|
tooltipMode='all',
|
||||||
|
tooltipSort='desc',
|
||||||
|
targets=[
|
||||||
|
Target(
|
||||||
|
datasource=prom_datasource,
|
||||||
|
expr='max by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
|
||||||
|
legendFormat="rx {{ name }}",
|
||||||
|
refId='A',
|
||||||
|
),
|
||||||
|
Target(
|
||||||
|
datasource=prom_datasource,
|
||||||
|
expr='-max by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
|
||||||
|
legendFormat="tx {{ name }}",
|
||||||
|
refId='B',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
Logs(
|
||||||
|
id=4,
|
||||||
|
title='',
|
||||||
|
gridPos=GridPos(h=8, w=12, x=12, y=8),
|
||||||
|
showLabels=True,
|
||||||
|
showCommonLabels=True,
|
||||||
|
wrapLogMessages=True,
|
||||||
|
prettifyLogMessage=True,
|
||||||
|
dedupStrategy='numbers',
|
||||||
|
targets=[
|
||||||
|
LokiTarget(),
|
||||||
|
# Target(
|
||||||
|
# datasource=loki_datasource,
|
||||||
|
# expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
|
||||||
|
# legendFormat='{{ container_name }}',
|
||||||
|
# refId='A',
|
||||||
|
# ),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
).auto_panel_ids()
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
from grafanalib.core import (
|
|
||||||
Dashboard, TimeSeries, GaugePanel,
|
|
||||||
Target, GridPos,
|
|
||||||
OPS_FORMAT, Templating, Template, REFRESH_ON_TIME_RANGE_CHANGE
|
|
||||||
)
|
|
||||||
from grafanalib.formatunits import BYTES_IEC
|
|
||||||
|
|
||||||
dashboard = Dashboard(
|
|
||||||
title="Containers",
|
|
||||||
description="Data for compose projects from default Prometheus datasource collected by Cadvisor",
|
|
||||||
tags=[
|
|
||||||
'example'
|
|
||||||
],
|
|
||||||
templating=Templating(list=[
|
|
||||||
# TODO: test how much of this is actually necessary
|
|
||||||
Template(
|
|
||||||
name="compose_project",
|
|
||||||
label="compose_project",
|
|
||||||
dataSource="prometheus",
|
|
||||||
query='label_values({__name__=~"container.*"}, container_label_com_docker_compose_project)',
|
|
||||||
includeAll=True,
|
|
||||||
multi=True,
|
|
||||||
hide=0,
|
|
||||||
sort=1,
|
|
||||||
type="query",
|
|
||||||
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
|
|
||||||
),
|
|
||||||
Template(
|
|
||||||
name="container_name",
|
|
||||||
label="container_name",
|
|
||||||
dataSource="prometheus",
|
|
||||||
query='label_values({__name__=~"container.*", container_label_com_docker_compose_project=~"$compose_project"}, name)',
|
|
||||||
includeAll=True,
|
|
||||||
multi=True,
|
|
||||||
hide=0,
|
|
||||||
sort=1,
|
|
||||||
type="query",
|
|
||||||
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
|
|
||||||
|
|
||||||
),
|
|
||||||
]),
|
|
||||||
timezone="browser",
|
|
||||||
panels=[
|
|
||||||
TimeSeries(
|
|
||||||
title="Container Memory Usage",
|
|
||||||
# dataSource='prometheus',
|
|
||||||
targets=[
|
|
||||||
Target(
|
|
||||||
datasource='prometheus',
|
|
||||||
expr='max by (name) (container_memory_usage_bytes{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"})',
|
|
||||||
legendFormat="{{ name }}",
|
|
||||||
refId='A',
|
|
||||||
),
|
|
||||||
],
|
|
||||||
unit=BYTES_IEC,
|
|
||||||
gridPos=GridPos(h=8, w=16, x=0, y=0),
|
|
||||||
),
|
|
||||||
],
|
|
||||||
).auto_panel_ids()
|
|
||||||
@@ -20,10 +20,17 @@ schema_config:
|
|||||||
- from: 2020-10-24
|
- from: 2020-10-24
|
||||||
store: boltdb-shipper
|
store: boltdb-shipper
|
||||||
object_store: filesystem
|
object_store: filesystem
|
||||||
schema: v11
|
schema: v12
|
||||||
index:
|
index:
|
||||||
prefix: index_
|
prefix: index_
|
||||||
period: 24h
|
period: 24h
|
||||||
|
- from: 2024-10-18
|
||||||
|
index:
|
||||||
|
period: 24h
|
||||||
|
prefix: index_
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v13
|
||||||
|
store: tsdb
|
||||||
|
|
||||||
# TODO: Figure this out
|
# TODO: Figure this out
|
||||||
ruler:
|
ruler:
|
||||||
|
|||||||
@@ -0,0 +1,23 @@
|
|||||||
|
groups:
|
||||||
|
- name: qbit-low-traffic
|
||||||
|
interval: 1m
|
||||||
|
rules:
|
||||||
|
- alert: QbitLowTraffic
|
||||||
|
expr: |
|
||||||
|
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
title: 'Low traffic on qBit'
|
||||||
|
description: |
|
||||||
|
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
|
||||||
|
|
||||||
|
Last value was x bytes/s.
|
||||||
|
|
||||||
|
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
|
||||||
|
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
|
||||||
|
|
||||||
|
__dashboard__uid: 'containers'
|
||||||
|
__orgId__: 1
|
||||||
|
__panelId__: 3
|
||||||
@@ -43,7 +43,7 @@ scrape_configs:
|
|||||||
- 'demo.promlabs.com:10002'
|
- 'demo.promlabs.com:10002'
|
||||||
|
|
||||||
rule_files:
|
rule_files:
|
||||||
- "/etc/prometheus/demo-alerts.yml"
|
- "/etc/prometheus/container.alerts.yml"
|
||||||
- "/etc/prometheus/extra/rules/*.yml"
|
- "/etc/prometheus/extra/rules/*.yml"
|
||||||
- "/etc/prometheus/extra/rules/*.json"
|
- "/etc/prometheus/extra/rules/*.json"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user