monitoring: prepare for merge to master

This commit is contained in:
Yuri Tatishchev 2024-10-30 15:27:03 -07:00
parent 929a6619c8
commit 531febffe2
Signed by: CaZzzer
GPG Key ID: 28BE602058C08557
10 changed files with 8 additions and 153 deletions

View File

@ -1 +0,0 @@
DISCORD_WEBHOOK={{ alertmanager_discord_webhook }}

View File

@ -1,68 +0,0 @@
# The root route on which each incoming alert enters.
route:
group_by: ["alertname", "job"]
group_wait: 20s
group_interval: 5m
repeat_interval: 3h
receiver: discord_webhook
receivers:
- name: "discord_webhook"
discord_configs:
- webhook_url: "{{ alertmanager_discord_webhook }}"
{# - send_resolved: true#}
{# username: 'Alertmanager'#}
{# webhook_configs:#}
{# - send_resolved: true#}
{# url: '{{ alertmanager_discord_webhook }}'#}
{# username: 'Alertmanager'#}
{# icon_url: 'https://prometheus.io/assets/icon.png'#}
{# icon_emoji: ':alert:'#}
{# send_resolved: true#}
{# text: "{{ .CommonAnnotations.summary }}"#}
{# title: "{{ .CommonLabels.alertname }}"#}
{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#}
{# footer: '{{ .CommonLabels.monitor }}'#}
{# footer_icon: 'https://prometheus.io/assets/icon.png'#}
{# actions:#}
{# - type: 'button'#}
{# text: 'Open in Grafana'#}
{# url: '{{ .ExternalURL }}'#}
{# style: 'primary'#}
{# send_resolved: true#}
{# confirm:#}
{# title: 'Are you sure?'#}
{# text: 'This will open Grafana in a new tab.'#}
{# ok_text: 'Yes'#}
{# dismiss_text: 'No'#}
{# fields:#}
{# - title: 'Description'#}
{# value: "{{ .CommonAnnotations.description }}"#}
{# short: false#}
{# - title: 'Details'#}
{# value: "{{ .CommonAnnotations.details }}"#}
{# short: false#}
{# - title: 'Severity'#}
{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#}
{# short: true#}
{# - title: 'Host'#}
{# value: '{{ .CommonLabels.monitor }}'#}
{# short: true#}
{# - title: 'Starts At'#}
{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Ends At'#}
{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Runbook'#}
{# value: '{{ .CommonAnnotations.runbook_url }}'#}
{# short: true#}
{# - title: 'Dashboard'#}
{# value: '{{ .CommonAnnotations.dashboard_url }}'#}
{# short: true#}
{# - title: 'Alerting Rule'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}
{# - title: 'Alerting Rule Description'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}

View File

@ -60,8 +60,6 @@ services:
prometheus: prometheus:
image: prom/prometheus:latest image: prom/prometheus:latest
container_name: prometheus container_name: prometheus
labels:
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
restart: unless-stopped restart: unless-stopped
# Needed to make config files readable (not anymore, TODO: remove) # Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}" user: "{{ remote_uid }}"
@ -74,19 +72,6 @@ services:
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro - {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
- {{ base_volume_path }}/monitoring/prometheus:/prometheus - {{ base_volume_path }}/monitoring/prometheus:/prometheus
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
labels:
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
restart: unless-stopped
command:
- --config.file=/etc/alertmanager/alertmanager.yml
- --web.external-url=https://alert.{{ domain }}/
volumes:
- ./alertmanager_config:/etc/alertmanager:ro
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
node-exporter: node-exporter:
image: prom/node-exporter:latest image: prom/node-exporter:latest
container_name: node-exporter container_name: node-exporter

View File

@ -3,7 +3,7 @@ apiVersion: 1
providers: providers:
- name: "Grafana" - name: "Grafana"
org_id: 1 org_id: 1
folder: "Services" folder: "Alpina"
type: "file" type: "file"
options: options:
path: "/etc/grafana/provisioning/dashboards" path: "/etc/grafana/provisioning/dashboards"

View File

@ -15,18 +15,6 @@ datasources:
url: http://prometheus:9090 url: http://prometheus:9090
editable: false editable: false
- name: Alertmanager
type: alertmanager
access: proxy
uid: alertmanager
url: http://alertmanager:9093
jsonData:
# Valid options for implementation include mimir, cortex and prometheus
implementation: prometheus
# Whether Grafana should send alert instances to this Alertmanager
handleGrafanaManagedAlerts: true
editable: false
- name: InfluxDB - name: InfluxDB
type: influxdb type: influxdb
access: proxy access: proxy

View File

@ -26,5 +26,5 @@ schema_config:
store: tsdb store: tsdb
# TODO: Figure this out # TODO: Figure this out
ruler: # ruler:
alertmanager_url: http://localhost:9093 # alertmanager_url: http://localhost:9093

View File

@ -1,23 +0,0 @@
groups:
- name: qbit-low-traffic
interval: 1m
rules:
- alert: QbitLowTraffic
expr: |
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
for: 2m
labels:
severity: warning
annotations:
title: 'Low traffic on qBit'
description: |
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
Last value was x bytes/s.
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
__dashboard__uid: 'containers'
__orgId__: 1
__panelId__: 3

View File

@ -1,20 +0,0 @@
groups:
- name: demo-service-alerts
rules:
- alert: DemoServiceHighErrorRate
expr: |
(
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m])
)
/
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{job="demo"}[1m])
) * 100 > 0.5
)
for: 1m
labels:
severity: critical
annotations:
title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}'
description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.'

View File

@ -5,11 +5,6 @@ global:
external_labels: external_labels:
monitor: "{{ ansible_host }}" monitor: "{{ ansible_host }}"
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:9093"]
scrape_configs: scrape_configs:
- job_name: "prometheus" - job_name: "prometheus"
static_configs: static_configs:
@ -43,7 +38,6 @@ scrape_configs:
- 'demo.promlabs.com:10002' - 'demo.promlabs.com:10002'
rule_files: rule_files:
{# - "/etc/prometheus/container.alerts.yml"#}
- "/etc/prometheus/extra/rules/*.yml" - "/etc/prometheus/extra/rules/*.yml"
- "/etc/prometheus/extra/rules/*.json" - "/etc/prometheus/extra/rules/*.json"

View File

@ -5,11 +5,11 @@
post_tasks: post_tasks:
- name: Docker prune objects - name: Docker prune objects
docker_prune: docker_prune:
containers: yes containers: true
images: yes # Keep images for building grafana
images: true
images_filters: images_filters:
dangling: false until: "720h"
networks: true networks: true
volumes: true volumes: true
builder_cache: true builder_cache: false
when: false