monitoring: prepare for merge to master
This commit is contained in:
parent
929a6619c8
commit
531febffe2
@ -1 +0,0 @@
|
|||||||
DISCORD_WEBHOOK={{ alertmanager_discord_webhook }}
|
|
@ -1,68 +0,0 @@
|
|||||||
# The root route on which each incoming alert enters.
|
|
||||||
route:
|
|
||||||
group_by: ["alertname", "job"]
|
|
||||||
group_wait: 20s
|
|
||||||
group_interval: 5m
|
|
||||||
repeat_interval: 3h
|
|
||||||
receiver: discord_webhook
|
|
||||||
|
|
||||||
receivers:
|
|
||||||
- name: "discord_webhook"
|
|
||||||
discord_configs:
|
|
||||||
- webhook_url: "{{ alertmanager_discord_webhook }}"
|
|
||||||
{# - send_resolved: true#}
|
|
||||||
{# username: 'Alertmanager'#}
|
|
||||||
{# webhook_configs:#}
|
|
||||||
{# - send_resolved: true#}
|
|
||||||
{# url: '{{ alertmanager_discord_webhook }}'#}
|
|
||||||
{# username: 'Alertmanager'#}
|
|
||||||
{# icon_url: 'https://prometheus.io/assets/icon.png'#}
|
|
||||||
{# icon_emoji: ':alert:'#}
|
|
||||||
{# send_resolved: true#}
|
|
||||||
{# text: "{{ .CommonAnnotations.summary }}"#}
|
|
||||||
{# title: "{{ .CommonLabels.alertname }}"#}
|
|
||||||
{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#}
|
|
||||||
{# footer: '{{ .CommonLabels.monitor }}'#}
|
|
||||||
{# footer_icon: 'https://prometheus.io/assets/icon.png'#}
|
|
||||||
{# actions:#}
|
|
||||||
{# - type: 'button'#}
|
|
||||||
{# text: 'Open in Grafana'#}
|
|
||||||
{# url: '{{ .ExternalURL }}'#}
|
|
||||||
{# style: 'primary'#}
|
|
||||||
{# send_resolved: true#}
|
|
||||||
{# confirm:#}
|
|
||||||
{# title: 'Are you sure?'#}
|
|
||||||
{# text: 'This will open Grafana in a new tab.'#}
|
|
||||||
{# ok_text: 'Yes'#}
|
|
||||||
{# dismiss_text: 'No'#}
|
|
||||||
{# fields:#}
|
|
||||||
{# - title: 'Description'#}
|
|
||||||
{# value: "{{ .CommonAnnotations.description }}"#}
|
|
||||||
{# short: false#}
|
|
||||||
{# - title: 'Details'#}
|
|
||||||
{# value: "{{ .CommonAnnotations.details }}"#}
|
|
||||||
{# short: false#}
|
|
||||||
{# - title: 'Severity'#}
|
|
||||||
{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#}
|
|
||||||
{# short: true#}
|
|
||||||
{# - title: 'Host'#}
|
|
||||||
{# value: '{{ .CommonLabels.monitor }}'#}
|
|
||||||
{# short: true#}
|
|
||||||
{# - title: 'Starts At'#}
|
|
||||||
{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#}
|
|
||||||
{# short: true#}
|
|
||||||
{# - title: 'Ends At'#}
|
|
||||||
{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#}
|
|
||||||
{# short: true#}
|
|
||||||
{# - title: 'Runbook'#}
|
|
||||||
{# value: '{{ .CommonAnnotations.runbook_url }}'#}
|
|
||||||
{# short: true#}
|
|
||||||
{# - title: 'Dashboard'#}
|
|
||||||
{# value: '{{ .CommonAnnotations.dashboard_url }}'#}
|
|
||||||
{# short: true#}
|
|
||||||
{# - title: 'Alerting Rule'#}
|
|
||||||
{# value: '{{ .CommonLabels.alertname }}'#}
|
|
||||||
{# short: true#}
|
|
||||||
{# - title: 'Alerting Rule Description'#}
|
|
||||||
{# value: '{{ .CommonLabels.alertname }}'#}
|
|
||||||
{# short: true#}
|
|
@ -60,8 +60,6 @@ services:
|
|||||||
prometheus:
|
prometheus:
|
||||||
image: prom/prometheus:latest
|
image: prom/prometheus:latest
|
||||||
container_name: prometheus
|
container_name: prometheus
|
||||||
labels:
|
|
||||||
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
|
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# Needed to make config files readable (not anymore, TODO: remove)
|
# Needed to make config files readable (not anymore, TODO: remove)
|
||||||
user: "{{ remote_uid }}"
|
user: "{{ remote_uid }}"
|
||||||
@ -74,19 +72,6 @@ services:
|
|||||||
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
||||||
- {{ base_volume_path }}/monitoring/prometheus:/prometheus
|
- {{ base_volume_path }}/monitoring/prometheus:/prometheus
|
||||||
|
|
||||||
alertmanager:
|
|
||||||
image: prom/alertmanager:latest
|
|
||||||
container_name: alertmanager
|
|
||||||
labels:
|
|
||||||
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
|
|
||||||
restart: unless-stopped
|
|
||||||
command:
|
|
||||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
|
||||||
- --web.external-url=https://alert.{{ domain }}/
|
|
||||||
volumes:
|
|
||||||
- ./alertmanager_config:/etc/alertmanager:ro
|
|
||||||
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
|
|
||||||
|
|
||||||
node-exporter:
|
node-exporter:
|
||||||
image: prom/node-exporter:latest
|
image: prom/node-exporter:latest
|
||||||
container_name: node-exporter
|
container_name: node-exporter
|
||||||
|
@ -3,7 +3,7 @@ apiVersion: 1
|
|||||||
providers:
|
providers:
|
||||||
- name: "Grafana"
|
- name: "Grafana"
|
||||||
org_id: 1
|
org_id: 1
|
||||||
folder: "Services"
|
folder: "Alpina"
|
||||||
type: "file"
|
type: "file"
|
||||||
options:
|
options:
|
||||||
path: "/etc/grafana/provisioning/dashboards"
|
path: "/etc/grafana/provisioning/dashboards"
|
||||||
|
@ -15,18 +15,6 @@ datasources:
|
|||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
editable: false
|
editable: false
|
||||||
|
|
||||||
- name: Alertmanager
|
|
||||||
type: alertmanager
|
|
||||||
access: proxy
|
|
||||||
uid: alertmanager
|
|
||||||
url: http://alertmanager:9093
|
|
||||||
jsonData:
|
|
||||||
# Valid options for implementation include mimir, cortex and prometheus
|
|
||||||
implementation: prometheus
|
|
||||||
# Whether Grafana should send alert instances to this Alertmanager
|
|
||||||
handleGrafanaManagedAlerts: true
|
|
||||||
editable: false
|
|
||||||
|
|
||||||
- name: InfluxDB
|
- name: InfluxDB
|
||||||
type: influxdb
|
type: influxdb
|
||||||
access: proxy
|
access: proxy
|
||||||
|
@ -26,5 +26,5 @@ schema_config:
|
|||||||
store: tsdb
|
store: tsdb
|
||||||
|
|
||||||
# TODO: Figure this out
|
# TODO: Figure this out
|
||||||
ruler:
|
# ruler:
|
||||||
alertmanager_url: http://localhost:9093
|
# alertmanager_url: http://localhost:9093
|
||||||
|
@ -1,23 +0,0 @@
|
|||||||
groups:
|
|
||||||
- name: qbit-low-traffic
|
|
||||||
interval: 1m
|
|
||||||
rules:
|
|
||||||
- alert: QbitLowTraffic
|
|
||||||
expr: |
|
|
||||||
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
title: 'Low traffic on qBit'
|
|
||||||
description: |
|
|
||||||
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
|
|
||||||
|
|
||||||
Last value was x bytes/s.
|
|
||||||
|
|
||||||
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
|
|
||||||
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
|
|
||||||
|
|
||||||
__dashboard__uid: 'containers'
|
|
||||||
__orgId__: 1
|
|
||||||
__panelId__: 3
|
|
@ -1,20 +0,0 @@
|
|||||||
groups:
|
|
||||||
- name: demo-service-alerts
|
|
||||||
rules:
|
|
||||||
- alert: DemoServiceHighErrorRate
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
sum without(status, instance) (
|
|
||||||
rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m])
|
|
||||||
)
|
|
||||||
/
|
|
||||||
sum without(status, instance) (
|
|
||||||
rate(demo_api_request_duration_seconds_count{job="demo"}[1m])
|
|
||||||
) * 100 > 0.5
|
|
||||||
)
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}'
|
|
||||||
description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.'
|
|
@ -5,11 +5,6 @@ global:
|
|||||||
external_labels:
|
external_labels:
|
||||||
monitor: "{{ ansible_host }}"
|
monitor: "{{ ansible_host }}"
|
||||||
|
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- static_configs:
|
|
||||||
- targets: ["alertmanager:9093"]
|
|
||||||
|
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
- job_name: "prometheus"
|
- job_name: "prometheus"
|
||||||
static_configs:
|
static_configs:
|
||||||
@ -43,7 +38,6 @@ scrape_configs:
|
|||||||
- 'demo.promlabs.com:10002'
|
- 'demo.promlabs.com:10002'
|
||||||
|
|
||||||
rule_files:
|
rule_files:
|
||||||
{# - "/etc/prometheus/container.alerts.yml"#}
|
|
||||||
- "/etc/prometheus/extra/rules/*.yml"
|
- "/etc/prometheus/extra/rules/*.yml"
|
||||||
- "/etc/prometheus/extra/rules/*.json"
|
- "/etc/prometheus/extra/rules/*.json"
|
||||||
|
|
||||||
|
10
services.yml
10
services.yml
@ -5,11 +5,11 @@
|
|||||||
post_tasks:
|
post_tasks:
|
||||||
- name: Docker prune objects
|
- name: Docker prune objects
|
||||||
docker_prune:
|
docker_prune:
|
||||||
containers: yes
|
containers: true
|
||||||
images: yes
|
# Keep images for building grafana
|
||||||
|
images: true
|
||||||
images_filters:
|
images_filters:
|
||||||
dangling: false
|
until: "720h"
|
||||||
networks: true
|
networks: true
|
||||||
volumes: true
|
volumes: true
|
||||||
builder_cache: true
|
builder_cache: false
|
||||||
when: false
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user