diff --git a/.idea/jsonSchemas.xml b/.idea/jsonSchemas.xml index f779688..d7304b3 100644 --- a/.idea/jsonSchemas.xml +++ b/.idea/jsonSchemas.xml @@ -116,6 +116,28 @@ + + + + + + + + + + + diff --git a/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 b/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 index dfa5398..95850f1 100644 --- a/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 +++ b/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 @@ -60,12 +60,15 @@ services: prometheus: image: prom/prometheus:latest container_name: prometheus + labels: + - {{ helpers.traefik_labels('prom', port='9090') | indent(6) }} restart: unless-stopped # Needed to make config files readable (not anymore, TODO: remove) user: "{{ remote_uid }}" command: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.retention.time=30d + - --web.external-url=https://prom.{{ domain }}/ volumes: - ./prometheus_config:/etc/prometheus:ro - {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro @@ -74,12 +77,15 @@ services: alertmanager: image: prom/alertmanager:latest container_name: alertmanager + labels: + - {{ helpers.traefik_labels('alert', port='9093') | indent(6) }} restart: unless-stopped command: - --config.file=/etc/alertmanager/alertmanager.yml + - --web.external-url=https://alert.{{ domain }}/ volumes: - ./alertmanager_config:/etc/alertmanager:ro - # TODO: add volume for alertmanager data + - {{ base_volume_path }}/monitoring/alertmanager:/alertmanager node-exporter: image: prom/node-exporter:latest diff --git a/roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml b/roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml new file mode 100644 index 0000000..0a52f6b --- /dev/null +++ b/roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml @@ -0,0 +1,23 @@ +groups: + - name: qbit-low-traffic + interval: 1m + rules: + - alert: QbitLowTraffic + expr: | + rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024 + for: 2m + labels: + severity: warning + annotations: + title: 'Low traffic on qBit' + description: | + The traffic on qBittorrent is lower than 1KiB/s for 2 minutes. + + Last value was x bytes/s. + + [Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1) + [View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3) + + __dashboard__uid: 'containers' + __orgId__: 1 + __panelId__: 3 diff --git a/roles/alpina/templates/services/monitoring/prometheus_config/demo-alerts.yml.j2 b/roles/alpina/templates/services/monitoring/prometheus_config/demo.alerts.yml.j2 similarity index 100% rename from roles/alpina/templates/services/monitoring/prometheus_config/demo-alerts.yml.j2 rename to roles/alpina/templates/services/monitoring/prometheus_config/demo.alerts.yml.j2 diff --git a/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 b/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 index 8870efe..588b09b 100644 --- a/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 +++ b/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 @@ -43,7 +43,7 @@ scrape_configs: - 'demo.promlabs.com:10002' rule_files: - - "/etc/prometheus/demo-alerts.yml" + - "/etc/prometheus/container.alerts.yml" - "/etc/prometheus/extra/rules/*.yml" - "/etc/prometheus/extra/rules/*.json"