(WIP) monitoring: attempt at container alerts
This commit is contained in:
parent
8c6b862495
commit
eb264b73fa
22
.idea/jsonSchemas.xml
generated
22
.idea/jsonSchemas.xml
generated
@ -116,6 +116,28 @@
|
|||||||
<Item>
|
<Item>
|
||||||
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
|
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
|
||||||
</Item>
|
</Item>
|
||||||
|
<Item>
|
||||||
|
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2" />
|
||||||
|
</Item>
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</SchemaInfo>
|
||||||
|
</value>
|
||||||
|
</entry>
|
||||||
|
<entry key="prometheus.rules.json">
|
||||||
|
<value>
|
||||||
|
<SchemaInfo>
|
||||||
|
<option name="name" value="prometheus.rules.json" />
|
||||||
|
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
|
||||||
|
<option name="applicationDefined" value="true" />
|
||||||
|
<option name="patterns">
|
||||||
|
<list>
|
||||||
|
<Item>
|
||||||
|
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
|
||||||
|
</Item>
|
||||||
|
<Item>
|
||||||
|
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
|
||||||
|
</Item>
|
||||||
</list>
|
</list>
|
||||||
</option>
|
</option>
|
||||||
</SchemaInfo>
|
</SchemaInfo>
|
||||||
|
@ -60,12 +60,15 @@ services:
|
|||||||
prometheus:
|
prometheus:
|
||||||
image: prom/prometheus:latest
|
image: prom/prometheus:latest
|
||||||
container_name: prometheus
|
container_name: prometheus
|
||||||
|
labels:
|
||||||
|
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# Needed to make config files readable (not anymore, TODO: remove)
|
# Needed to make config files readable (not anymore, TODO: remove)
|
||||||
user: "{{ remote_uid }}"
|
user: "{{ remote_uid }}"
|
||||||
command:
|
command:
|
||||||
- --config.file=/etc/prometheus/prometheus.yml
|
- --config.file=/etc/prometheus/prometheus.yml
|
||||||
- --storage.tsdb.retention.time=30d
|
- --storage.tsdb.retention.time=30d
|
||||||
|
- --web.external-url=https://prom.{{ domain }}/
|
||||||
volumes:
|
volumes:
|
||||||
- ./prometheus_config:/etc/prometheus:ro
|
- ./prometheus_config:/etc/prometheus:ro
|
||||||
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
||||||
@ -74,12 +77,15 @@ services:
|
|||||||
alertmanager:
|
alertmanager:
|
||||||
image: prom/alertmanager:latest
|
image: prom/alertmanager:latest
|
||||||
container_name: alertmanager
|
container_name: alertmanager
|
||||||
|
labels:
|
||||||
|
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
command:
|
command:
|
||||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
- --config.file=/etc/alertmanager/alertmanager.yml
|
||||||
|
- --web.external-url=https://alert.{{ domain }}/
|
||||||
volumes:
|
volumes:
|
||||||
- ./alertmanager_config:/etc/alertmanager:ro
|
- ./alertmanager_config:/etc/alertmanager:ro
|
||||||
# TODO: add volume for alertmanager data
|
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
|
||||||
|
|
||||||
node-exporter:
|
node-exporter:
|
||||||
image: prom/node-exporter:latest
|
image: prom/node-exporter:latest
|
||||||
|
@ -0,0 +1,23 @@
|
|||||||
|
groups:
|
||||||
|
- name: qbit-low-traffic
|
||||||
|
interval: 1m
|
||||||
|
rules:
|
||||||
|
- alert: QbitLowTraffic
|
||||||
|
expr: |
|
||||||
|
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
title: 'Low traffic on qBit'
|
||||||
|
description: |
|
||||||
|
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
|
||||||
|
|
||||||
|
Last value was x bytes/s.
|
||||||
|
|
||||||
|
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
|
||||||
|
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
|
||||||
|
|
||||||
|
__dashboard__uid: 'containers'
|
||||||
|
__orgId__: 1
|
||||||
|
__panelId__: 3
|
@ -43,7 +43,7 @@ scrape_configs:
|
|||||||
- 'demo.promlabs.com:10002'
|
- 'demo.promlabs.com:10002'
|
||||||
|
|
||||||
rule_files:
|
rule_files:
|
||||||
- "/etc/prometheus/demo-alerts.yml"
|
- "/etc/prometheus/container.alerts.yml"
|
||||||
- "/etc/prometheus/extra/rules/*.yml"
|
- "/etc/prometheus/extra/rules/*.yml"
|
||||||
- "/etc/prometheus/extra/rules/*.json"
|
- "/etc/prometheus/extra/rules/*.json"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user