Compare commits
2 Commits
30510c6690
...
eb264b73fa
Author | SHA1 | Date | |
---|---|---|---|
eb264b73fa | |||
8c6b862495 |
22
.idea/jsonSchemas.xml
generated
22
.idea/jsonSchemas.xml
generated
@ -116,6 +116,28 @@
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/collections/services/monitoring/templates/prometheus_config/prometheus.yml.j2" />
|
||||
</Item>
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2" />
|
||||
</Item>
|
||||
</list>
|
||||
</option>
|
||||
</SchemaInfo>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="prometheus.rules.json">
|
||||
<value>
|
||||
<SchemaInfo>
|
||||
<option name="name" value="prometheus.rules.json" />
|
||||
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
|
||||
<option name="applicationDefined" value="true" />
|
||||
<option name="patterns">
|
||||
<list>
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
|
||||
</Item>
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
|
||||
</Item>
|
||||
</list>
|
||||
</option>
|
||||
</SchemaInfo>
|
||||
|
@ -60,12 +60,15 @@ services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
# Needed to make config files readable (not anymore, TODO: remove)
|
||||
user: "{{ remote_uid }}"
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.retention.time=30d
|
||||
- --web.external-url=https://prom.{{ domain }}/
|
||||
volumes:
|
||||
- ./prometheus_config:/etc/prometheus:ro
|
||||
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
||||
@ -74,12 +77,15 @@ services:
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
container_name: alertmanager
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
||||
- --web.external-url=https://alert.{{ domain }}/
|
||||
volumes:
|
||||
- ./alertmanager_config:/etc/alertmanager:ro
|
||||
# TODO: add volume for alertmanager data
|
||||
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
|
@ -56,12 +56,16 @@ dashboard = Dashboard(
|
||||
timezone='browser',
|
||||
panels=[
|
||||
TimeSeries(
|
||||
id=1,
|
||||
title='Container Memory Usage',
|
||||
unit=BYTES_IEC,
|
||||
gridPos=GridPos(h=8, w=12, x=0, y=0),
|
||||
lineWidth=2,
|
||||
fillOpacity=10,
|
||||
showPoints='never',
|
||||
stacking={'mode': 'normal'},
|
||||
tooltipMode='all',
|
||||
tooltipSort='desc',
|
||||
targets=[
|
||||
Target(
|
||||
datasource=prom_datasource,
|
||||
@ -72,6 +76,7 @@ dashboard = Dashboard(
|
||||
],
|
||||
),
|
||||
TimeSeries(
|
||||
id=2,
|
||||
title='Container CPU Usage',
|
||||
unit=SECONDS,
|
||||
gridPos=GridPos(h=8, w=12, x=12, y=0),
|
||||
@ -88,28 +93,32 @@ dashboard = Dashboard(
|
||||
],
|
||||
),
|
||||
TimeSeries(
|
||||
id=3,
|
||||
title='Container Network Traffic',
|
||||
unit=BYTES_SEC_IEC,
|
||||
gridPos=GridPos(h=8, w=12, x=0, y=8),
|
||||
lineWidth=2,
|
||||
fillOpacity=10,
|
||||
showPoints='never',
|
||||
tooltipMode='all',
|
||||
tooltipSort='desc',
|
||||
targets=[
|
||||
Target(
|
||||
datasource=prom_datasource,
|
||||
expr='sum by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
|
||||
legendFormat="recv {{ name }}",
|
||||
expr='max by (name) (rate(container_network_receive_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
|
||||
legendFormat="rx {{ name }}",
|
||||
refId='A',
|
||||
),
|
||||
Target(
|
||||
datasource=prom_datasource,
|
||||
expr='-sum by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
|
||||
legendFormat="trans {{ name }}",
|
||||
expr='-max by (name) (rate(container_network_transmit_bytes_total{name=~"$container_name", container_label_com_docker_compose_project=~"$compose_project"}[$__rate_interval]))',
|
||||
legendFormat="tx {{ name }}",
|
||||
refId='B',
|
||||
),
|
||||
],
|
||||
),
|
||||
Logs(
|
||||
id=4,
|
||||
title='',
|
||||
gridPos=GridPos(h=8, w=12, x=12, y=8),
|
||||
showLabels=True,
|
||||
|
@ -20,10 +20,17 @@ schema_config:
|
||||
- from: 2020-10-24
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v11
|
||||
schema: v12
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
- from: 2024-10-18
|
||||
index:
|
||||
period: 24h
|
||||
prefix: index_
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
store: tsdb
|
||||
|
||||
# TODO: Figure this out
|
||||
ruler:
|
||||
|
@ -0,0 +1,23 @@
|
||||
groups:
|
||||
- name: qbit-low-traffic
|
||||
interval: 1m
|
||||
rules:
|
||||
- alert: QbitLowTraffic
|
||||
expr: |
|
||||
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
title: 'Low traffic on qBit'
|
||||
description: |
|
||||
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
|
||||
|
||||
Last value was x bytes/s.
|
||||
|
||||
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
|
||||
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
|
||||
|
||||
__dashboard__uid: 'containers'
|
||||
__orgId__: 1
|
||||
__panelId__: 3
|
@ -43,7 +43,7 @@ scrape_configs:
|
||||
- 'demo.promlabs.com:10002'
|
||||
|
||||
rule_files:
|
||||
- "/etc/prometheus/demo-alerts.yml"
|
||||
- "/etc/prometheus/container.alerts.yml"
|
||||
- "/etc/prometheus/extra/rules/*.yml"
|
||||
- "/etc/prometheus/extra/rules/*.json"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user