Compare commits

...

2 Commits

7 changed files with 74 additions and 7 deletions

22
.idea/jsonSchemas.xml generated
View File

@ -116,6 +116,28 @@
<Item> <Item>
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" /> <option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
</Item> </Item>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2" />
</Item>
</list>
</option>
</SchemaInfo>
</value>
</entry>
<entry key="prometheus.rules.json">
<value>
<SchemaInfo>
<option name="name" value="prometheus.rules.json" />
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
</Item>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
</Item>
</list> </list>
</option> </option>
</SchemaInfo> </SchemaInfo>

View File

@ -60,12 +60,15 @@ services:
prometheus: prometheus:
image: prom/prometheus:latest image: prom/prometheus:latest
container_name: prometheus container_name: prometheus
labels:
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
restart: unless-stopped restart: unless-stopped
# Needed to make config files readable (not anymore, TODO: remove) # Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}" user: "{{ remote_uid }}"
command: command:
- --config.file=/etc/prometheus/prometheus.yml - --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=30d - --storage.tsdb.retention.time=30d
- --web.external-url=https://prom.{{ domain }}/
volumes: volumes:
- ./prometheus_config:/etc/prometheus:ro - ./prometheus_config:/etc/prometheus:ro
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro - {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
@ -74,12 +77,15 @@ services:
alertmanager: alertmanager:
image: prom/alertmanager:latest image: prom/alertmanager:latest
container_name: alertmanager container_name: alertmanager
labels:
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
restart: unless-stopped restart: unless-stopped
command: command:
- --config.file=/etc/alertmanager/alertmanager.yml - --config.file=/etc/alertmanager/alertmanager.yml
- --web.external-url=https://alert.{{ domain }}/
volumes: volumes:
- ./alertmanager_config:/etc/alertmanager:ro - ./alertmanager_config:/etc/alertmanager:ro
# TODO: add volume for alertmanager data - {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
node-exporter: node-exporter:
image: prom/node-exporter:latest image: prom/node-exporter:latest

View File

@ -56,12 +56,16 @@ dashboard = Dashboard(
timezone='browser', timezone='browser',
panels=[ panels=[
TimeSeries( TimeSeries(
id=1,
title='Container Memory Usage', title='Container Memory Usage',
unit=BYTES_IEC, unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=0), gridPos=GridPos(h=8, w=12, x=0, y=0),
lineWidth=2, lineWidth=2,
fillOpacity=10, fillOpacity=10,
showPoints='never', showPoints='never',
stacking={'mode': 'normal'},
tooltipMode='all',
tooltipSort='desc',
targets=[ targets=[
Target( Target(
datasource=prom_datasource, datasource=prom_datasource,
@ -72,6 +76,7 @@ dashboard = Dashboard(
], ],
), ),
TimeSeries( TimeSeries(
id=2,
title='Container CPU Usage', title='Container CPU Usage',
unit=SECONDS, unit=SECONDS,
gridPos=GridPos(h=8, w=12, x=12, y=0), gridPos=GridPos(h=8, w=12, x=12, y=0),
@ -88,28 +93,32 @@ dashboard = Dashboard(
], ],
), ),
TimeSeries( TimeSeries(
id=3,
title='Container Network Traffic', title='Container Network Traffic',
unit=BYTES_SEC_IEC, unit=BYTES_SEC_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=8), gridPos=GridPos(h=8, w=12, x=0, y=8),
lineWidth=2, lineWidth=2,
fillOpacity=10, fillOpacity=10,
showPoints='never', showPoints='never',
tooltipMode='all',
tooltipSort='desc',
targets=[ targets=[
Target( Target(
datasource=prom_datasource, datasource=prom_datasource,
expr='sum by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', expr='max by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
legendFormat="recv {{ name }}", legendFormat="rx {{ name }}",
refId='A', refId='A',
), ),
Target( Target(
datasource=prom_datasource, datasource=prom_datasource,
expr='-sum by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))', expr='-max by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
legendFormat="trans {{ name }}", legendFormat="tx {{ name }}",
refId='B', refId='B',
), ),
], ],
), ),
Logs( Logs(
id=4,
title='', title='',
gridPos=GridPos(h=8, w=12, x=12, y=8), gridPos=GridPos(h=8, w=12, x=12, y=8),
showLabels=True, showLabels=True,

View File

@ -20,10 +20,17 @@ schema_config:
- from: 2020-10-24 - from: 2020-10-24
store: boltdb-shipper store: boltdb-shipper
object_store: filesystem object_store: filesystem
schema: v11 schema: v12
index: index:
prefix: index_ prefix: index_
period: 24h period: 24h
- from: 2024-10-18
index:
period: 24h
prefix: index_
object_store: filesystem
schema: v13
store: tsdb
# TODO: Figure this out # TODO: Figure this out
ruler: ruler:

View File

@ -0,0 +1,23 @@
groups:
- name: qbit-low-traffic
interval: 1m
rules:
- alert: QbitLowTraffic
expr: |
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
for: 2m
labels:
severity: warning
annotations:
title: 'Low traffic on qBit'
description: |
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
Last value was x bytes/s.
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
__dashboard__uid: 'containers'
__orgId__: 1
__panelId__: 3

View File

@ -43,7 +43,7 @@ scrape_configs:
- 'demo.promlabs.com:10002' - 'demo.promlabs.com:10002'
rule_files: rule_files:
- "/etc/prometheus/demo-alerts.yml" - "/etc/prometheus/container.alerts.yml"
- "/etc/prometheus/extra/rules/*.yml" - "/etc/prometheus/extra/rules/*.yml"
- "/etc/prometheus/extra/rules/*.json" - "/etc/prometheus/extra/rules/*.json"