(WIP) monitoring: attempt at container alerts
This commit is contained in:
parent
8c6b862495
commit
eb264b73fa
22
.idea/jsonSchemas.xml
generated
22
.idea/jsonSchemas.xml
generated
@ -116,6 +116,28 @@
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
|
||||
</Item>
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2" />
|
||||
</Item>
|
||||
</list>
|
||||
</option>
|
||||
</SchemaInfo>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="prometheus.rules.json">
|
||||
<value>
|
||||
<SchemaInfo>
|
||||
<option name="name" value="prometheus.rules.json" />
|
||||
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
|
||||
<option name="applicationDefined" value="true" />
|
||||
<option name="patterns">
|
||||
<list>
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
|
||||
</Item>
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
|
||||
</Item>
|
||||
</list>
|
||||
</option>
|
||||
</SchemaInfo>
|
||||
|
@ -60,12 +60,15 @@ services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
# Needed to make config files readable (not anymore, TODO: remove)
|
||||
user: "{{ remote_uid }}"
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.retention.time=30d
|
||||
- --web.external-url=https://prom.{{ domain }}/
|
||||
volumes:
|
||||
- ./prometheus_config:/etc/prometheus:ro
|
||||
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
||||
@ -74,12 +77,15 @@ services:
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
container_name: alertmanager
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
||||
- --web.external-url=https://alert.{{ domain }}/
|
||||
volumes:
|
||||
- ./alertmanager_config:/etc/alertmanager:ro
|
||||
# TODO: add volume for alertmanager data
|
||||
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
|
@ -0,0 +1,23 @@
|
||||
groups:
|
||||
- name: qbit-low-traffic
|
||||
interval: 1m
|
||||
rules:
|
||||
- alert: QbitLowTraffic
|
||||
expr: |
|
||||
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
title: 'Low traffic on qBit'
|
||||
description: |
|
||||
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
|
||||
|
||||
Last value was x bytes/s.
|
||||
|
||||
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
|
||||
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
|
||||
|
||||
__dashboard__uid: 'containers'
|
||||
__orgId__: 1
|
||||
__panelId__: 3
|
@ -43,7 +43,7 @@ scrape_configs:
|
||||
- 'demo.promlabs.com:10002'
|
||||
|
||||
rule_files:
|
||||
- "/etc/prometheus/demo-alerts.yml"
|
||||
- "/etc/prometheus/container.alerts.yml"
|
||||
- "/etc/prometheus/extra/rules/*.yml"
|
||||
- "/etc/prometheus/extra/rules/*.json"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user