monitoring: prepare for merge to master
This commit is contained in:
parent
929a6619c8
commit
531febffe2
@ -1 +0,0 @@
|
||||
DISCORD_WEBHOOK={{ alertmanager_discord_webhook }}
|
@ -1,68 +0,0 @@
|
||||
# The root route on which each incoming alert enters.
|
||||
route:
|
||||
group_by: ["alertname", "job"]
|
||||
group_wait: 20s
|
||||
group_interval: 5m
|
||||
repeat_interval: 3h
|
||||
receiver: discord_webhook
|
||||
|
||||
receivers:
|
||||
- name: "discord_webhook"
|
||||
discord_configs:
|
||||
- webhook_url: "{{ alertmanager_discord_webhook }}"
|
||||
{# - send_resolved: true#}
|
||||
{# username: 'Alertmanager'#}
|
||||
{# webhook_configs:#}
|
||||
{# - send_resolved: true#}
|
||||
{# url: '{{ alertmanager_discord_webhook }}'#}
|
||||
{# username: 'Alertmanager'#}
|
||||
{# icon_url: 'https://prometheus.io/assets/icon.png'#}
|
||||
{# icon_emoji: ':alert:'#}
|
||||
{# send_resolved: true#}
|
||||
{# text: "{{ .CommonAnnotations.summary }}"#}
|
||||
{# title: "{{ .CommonLabels.alertname }}"#}
|
||||
{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#}
|
||||
{# footer: '{{ .CommonLabels.monitor }}'#}
|
||||
{# footer_icon: 'https://prometheus.io/assets/icon.png'#}
|
||||
{# actions:#}
|
||||
{# - type: 'button'#}
|
||||
{# text: 'Open in Grafana'#}
|
||||
{# url: '{{ .ExternalURL }}'#}
|
||||
{# style: 'primary'#}
|
||||
{# send_resolved: true#}
|
||||
{# confirm:#}
|
||||
{# title: 'Are you sure?'#}
|
||||
{# text: 'This will open Grafana in a new tab.'#}
|
||||
{# ok_text: 'Yes'#}
|
||||
{# dismiss_text: 'No'#}
|
||||
{# fields:#}
|
||||
{# - title: 'Description'#}
|
||||
{# value: "{{ .CommonAnnotations.description }}"#}
|
||||
{# short: false#}
|
||||
{# - title: 'Details'#}
|
||||
{# value: "{{ .CommonAnnotations.details }}"#}
|
||||
{# short: false#}
|
||||
{# - title: 'Severity'#}
|
||||
{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Host'#}
|
||||
{# value: '{{ .CommonLabels.monitor }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Starts At'#}
|
||||
{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Ends At'#}
|
||||
{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Runbook'#}
|
||||
{# value: '{{ .CommonAnnotations.runbook_url }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Dashboard'#}
|
||||
{# value: '{{ .CommonAnnotations.dashboard_url }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Alerting Rule'#}
|
||||
{# value: '{{ .CommonLabels.alertname }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Alerting Rule Description'#}
|
||||
{# value: '{{ .CommonLabels.alertname }}'#}
|
||||
{# short: true#}
|
@ -60,8 +60,6 @@ services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
# Needed to make config files readable (not anymore, TODO: remove)
|
||||
user: "{{ remote_uid }}"
|
||||
@ -74,19 +72,6 @@ services:
|
||||
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
||||
- {{ base_volume_path }}/monitoring/prometheus:/prometheus
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
container_name: alertmanager
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
||||
- --web.external-url=https://alert.{{ domain }}/
|
||||
volumes:
|
||||
- ./alertmanager_config:/etc/alertmanager:ro
|
||||
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
container_name: node-exporter
|
||||
|
@ -3,7 +3,7 @@ apiVersion: 1
|
||||
providers:
|
||||
- name: "Grafana"
|
||||
org_id: 1
|
||||
folder: "Services"
|
||||
folder: "Alpina"
|
||||
type: "file"
|
||||
options:
|
||||
path: "/etc/grafana/provisioning/dashboards"
|
||||
|
@ -15,18 +15,6 @@ datasources:
|
||||
url: http://prometheus:9090
|
||||
editable: false
|
||||
|
||||
- name: Alertmanager
|
||||
type: alertmanager
|
||||
access: proxy
|
||||
uid: alertmanager
|
||||
url: http://alertmanager:9093
|
||||
jsonData:
|
||||
# Valid options for implementation include mimir, cortex and prometheus
|
||||
implementation: prometheus
|
||||
# Whether Grafana should send alert instances to this Alertmanager
|
||||
handleGrafanaManagedAlerts: true
|
||||
editable: false
|
||||
|
||||
- name: InfluxDB
|
||||
type: influxdb
|
||||
access: proxy
|
||||
|
@ -26,5 +26,5 @@ schema_config:
|
||||
store: tsdb
|
||||
|
||||
# TODO: Figure this out
|
||||
ruler:
|
||||
alertmanager_url: http://localhost:9093
|
||||
# ruler:
|
||||
# alertmanager_url: http://localhost:9093
|
||||
|
@ -1,23 +0,0 @@
|
||||
groups:
|
||||
- name: qbit-low-traffic
|
||||
interval: 1m
|
||||
rules:
|
||||
- alert: QbitLowTraffic
|
||||
expr: |
|
||||
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
title: 'Low traffic on qBit'
|
||||
description: |
|
||||
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
|
||||
|
||||
Last value was x bytes/s.
|
||||
|
||||
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
|
||||
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
|
||||
|
||||
__dashboard__uid: 'containers'
|
||||
__orgId__: 1
|
||||
__panelId__: 3
|
@ -1,20 +0,0 @@
|
||||
groups:
|
||||
- name: demo-service-alerts
|
||||
rules:
|
||||
- alert: DemoServiceHighErrorRate
|
||||
expr: |
|
||||
(
|
||||
sum without(status, instance) (
|
||||
rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m])
|
||||
)
|
||||
/
|
||||
sum without(status, instance) (
|
||||
rate(demo_api_request_duration_seconds_count{job="demo"}[1m])
|
||||
) * 100 > 0.5
|
||||
)
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}'
|
||||
description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.'
|
@ -5,11 +5,6 @@ global:
|
||||
external_labels:
|
||||
monitor: "{{ ansible_host }}"
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ["alertmanager:9093"]
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "prometheus"
|
||||
static_configs:
|
||||
@ -43,7 +38,6 @@ scrape_configs:
|
||||
- 'demo.promlabs.com:10002'
|
||||
|
||||
rule_files:
|
||||
{# - "/etc/prometheus/container.alerts.yml"#}
|
||||
- "/etc/prometheus/extra/rules/*.yml"
|
||||
- "/etc/prometheus/extra/rules/*.json"
|
||||
|
||||
|
10
services.yml
10
services.yml
@ -5,11 +5,11 @@
|
||||
post_tasks:
|
||||
- name: Docker prune objects
|
||||
docker_prune:
|
||||
containers: yes
|
||||
images: yes
|
||||
containers: true
|
||||
# Keep images for building grafana
|
||||
images: true
|
||||
images_filters:
|
||||
dangling: false
|
||||
until: "720h"
|
||||
networks: true
|
||||
volumes: true
|
||||
builder_cache: true
|
||||
when: false
|
||||
builder_cache: false
|
||||
|
Loading…
x
Reference in New Issue
Block a user