(WIP) monitoring: attempt at container alerts

This commit is contained in:
Yuri Tatishchev 2024-10-22 23:07:41 -07:00
parent 8c6b862495
commit eb264b73fa
Signed by: CaZzzer
GPG Key ID: E0EBF441EA424369
5 changed files with 53 additions and 2 deletions

22
.idea/jsonSchemas.xml generated
View File

@ -116,6 +116,28 @@
<Item>
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
</Item>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2" />
</Item>
</list>
</option>
</SchemaInfo>
</value>
</entry>
<entry key="prometheus.rules.json">
<value>
<SchemaInfo>
<option name="name" value="prometheus.rules.json" />
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
</Item>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
</Item>
</list>
</option>
</SchemaInfo>

View File

@ -60,12 +60,15 @@ services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
labels:
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
restart: unless-stopped
# Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}"
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=30d
- --web.external-url=https://prom.{{ domain }}/
volumes:
- ./prometheus_config:/etc/prometheus:ro
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
@ -74,12 +77,15 @@ services:
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
labels:
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
restart: unless-stopped
command:
- --config.file=/etc/alertmanager/alertmanager.yml
- --web.external-url=https://alert.{{ domain }}/
volumes:
- ./alertmanager_config:/etc/alertmanager:ro
# TODO: add volume for alertmanager data
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
node-exporter:
image: prom/node-exporter:latest

View File

@ -0,0 +1,23 @@
groups:
- name: qbit-low-traffic
interval: 1m
rules:
- alert: QbitLowTraffic
expr: |
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
for: 2m
labels:
severity: warning
annotations:
title: 'Low traffic on qBit'
description: |
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
Last value was x bytes/s.
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
__dashboard__uid: 'containers'
__orgId__: 1
__panelId__: 3

View File

@ -43,7 +43,7 @@ scrape_configs:
- 'demo.promlabs.com:10002'
rule_files:
- "/etc/prometheus/demo-alerts.yml"
- "/etc/prometheus/container.alerts.yml"
- "/etc/prometheus/extra/rules/*.yml"
- "/etc/prometheus/extra/rules/*.json"