diff --git a/roles/alpina/templates/services/monitoring/.env.alertmanager.j2 b/roles/alpina/templates/services/monitoring/.env.alertmanager.j2 deleted file mode 100644 index 006fa49..0000000 --- a/roles/alpina/templates/services/monitoring/.env.alertmanager.j2 +++ /dev/null @@ -1 +0,0 @@ -DISCORD_WEBHOOK={{ alertmanager_discord_webhook }} diff --git a/roles/alpina/templates/services/monitoring/alertmanager_config/alertmanager.yml.j2 b/roles/alpina/templates/services/monitoring/alertmanager_config/alertmanager.yml.j2 deleted file mode 100644 index f9de64a..0000000 --- a/roles/alpina/templates/services/monitoring/alertmanager_config/alertmanager.yml.j2 +++ /dev/null @@ -1,68 +0,0 @@ -# The root route on which each incoming alert enters. -route: - group_by: ["alertname", "job"] - group_wait: 20s - group_interval: 5m - repeat_interval: 3h - receiver: discord_webhook - -receivers: - - name: "discord_webhook" - discord_configs: - - webhook_url: "{{ alertmanager_discord_webhook }}" -{# - send_resolved: true#} -{# username: 'Alertmanager'#} -{# webhook_configs:#} -{# - send_resolved: true#} -{# url: '{{ alertmanager_discord_webhook }}'#} -{# username: 'Alertmanager'#} -{# icon_url: 'https://prometheus.io/assets/icon.png'#} -{# icon_emoji: ':alert:'#} -{# send_resolved: true#} -{# text: "{{ .CommonAnnotations.summary }}"#} -{# title: "{{ .CommonLabels.alertname }}"#} -{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#} -{# footer: '{{ .CommonLabels.monitor }}'#} -{# footer_icon: 'https://prometheus.io/assets/icon.png'#} -{# actions:#} -{# - type: 'button'#} -{# text: 'Open in Grafana'#} -{# url: '{{ .ExternalURL }}'#} -{# style: 'primary'#} -{# send_resolved: true#} -{# confirm:#} -{# title: 'Are you sure?'#} -{# text: 'This will open Grafana in a new tab.'#} -{# ok_text: 'Yes'#} -{# dismiss_text: 'No'#} -{# fields:#} -{# - title: 'Description'#} -{# value: "{{ .CommonAnnotations.description }}"#} -{# short: false#} -{# - title: 'Details'#} -{# value: "{{ .CommonAnnotations.details }}"#} -{# short: false#} -{# - title: 'Severity'#} -{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#} -{# short: true#} -{# - title: 'Host'#} -{# value: '{{ .CommonLabels.monitor }}'#} -{# short: true#} -{# - title: 'Starts At'#} -{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#} -{# short: true#} -{# - title: 'Ends At'#} -{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#} -{# short: true#} -{# - title: 'Runbook'#} -{# value: '{{ .CommonAnnotations.runbook_url }}'#} -{# short: true#} -{# - title: 'Dashboard'#} -{# value: '{{ .CommonAnnotations.dashboard_url }}'#} -{# short: true#} -{# - title: 'Alerting Rule'#} -{# value: '{{ .CommonLabels.alertname }}'#} -{# short: true#} -{# - title: 'Alerting Rule Description'#} -{# value: '{{ .CommonLabels.alertname }}'#} -{# short: true#} diff --git a/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 b/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 index 44751ae..81f6e19 100644 --- a/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 +++ b/roles/alpina/templates/services/monitoring/docker-compose.yml.j2 @@ -60,8 +60,6 @@ services: prometheus: image: prom/prometheus:latest container_name: prometheus - labels: - - {{ helpers.traefik_labels('prom', port='9090') | indent(6) }} restart: unless-stopped # Needed to make config files readable (not anymore, TODO: remove) user: "{{ remote_uid }}" @@ -74,19 +72,6 @@ services: - {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro - {{ base_volume_path }}/monitoring/prometheus:/prometheus - alertmanager: - image: prom/alertmanager:latest - container_name: alertmanager - labels: - - {{ helpers.traefik_labels('alert', port='9093') | indent(6) }} - restart: unless-stopped - command: - - --config.file=/etc/alertmanager/alertmanager.yml - - --web.external-url=https://alert.{{ domain }}/ - volumes: - - ./alertmanager_config:/etc/alertmanager:ro - - {{ base_volume_path }}/monitoring/alertmanager:/alertmanager - node-exporter: image: prom/node-exporter:latest container_name: node-exporter diff --git a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/alpina.yaml b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/alpina.yaml index 57265b2..b6f74c9 100644 --- a/roles/alpina/templates/services/monitoring/grafana_config/dashboards/alpina.yaml +++ b/roles/alpina/templates/services/monitoring/grafana_config/dashboards/alpina.yaml @@ -3,7 +3,7 @@ apiVersion: 1 providers: - name: "Grafana" org_id: 1 - folder: "Services" + folder: "Alpina" type: "file" options: path: "/etc/grafana/provisioning/dashboards" diff --git a/roles/alpina/templates/services/monitoring/grafana_config/datasources/alpina.yaml.j2 b/roles/alpina/templates/services/monitoring/grafana_config/datasources/alpina.yaml.j2 index 3285752..2ce42bf 100644 --- a/roles/alpina/templates/services/monitoring/grafana_config/datasources/alpina.yaml.j2 +++ b/roles/alpina/templates/services/monitoring/grafana_config/datasources/alpina.yaml.j2 @@ -15,18 +15,6 @@ datasources: url: http://prometheus:9090 editable: false - - name: Alertmanager - type: alertmanager - access: proxy - uid: alertmanager - url: http://alertmanager:9093 - jsonData: - # Valid options for implementation include mimir, cortex and prometheus - implementation: prometheus - # Whether Grafana should send alert instances to this Alertmanager - handleGrafanaManagedAlerts: true - editable: false - - name: InfluxDB type: influxdb access: proxy diff --git a/roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2 b/roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2 index 8b50e93..ae75987 100644 --- a/roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2 +++ b/roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2 @@ -26,5 +26,5 @@ schema_config: store: tsdb # TODO: Figure this out -ruler: - alertmanager_url: http://localhost:9093 +# ruler: +# alertmanager_url: http://localhost:9093 diff --git a/roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml b/roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml deleted file mode 100644 index 0a52f6b..0000000 --- a/roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml +++ /dev/null @@ -1,23 +0,0 @@ -groups: - - name: qbit-low-traffic - interval: 1m - rules: - - alert: QbitLowTraffic - expr: | - rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024 - for: 2m - labels: - severity: warning - annotations: - title: 'Low traffic on qBit' - description: | - The traffic on qBittorrent is lower than 1KiB/s for 2 minutes. - - Last value was x bytes/s. - - [Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1) - [View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3) - - __dashboard__uid: 'containers' - __orgId__: 1 - __panelId__: 3 diff --git a/roles/alpina/templates/services/monitoring/prometheus_config/demo.alerts.yml.j2 b/roles/alpina/templates/services/monitoring/prometheus_config/demo.alerts.yml.j2 deleted file mode 100644 index 60ea5ee..0000000 --- a/roles/alpina/templates/services/monitoring/prometheus_config/demo.alerts.yml.j2 +++ /dev/null @@ -1,20 +0,0 @@ -groups: - - name: demo-service-alerts - rules: - - alert: DemoServiceHighErrorRate - expr: | - ( - sum without(status, instance) ( - rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m]) - ) - / - sum without(status, instance) ( - rate(demo_api_request_duration_seconds_count{job="demo"}[1m]) - ) * 100 > 0.5 - ) - for: 1m - labels: - severity: critical - annotations: - title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}' - description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.' diff --git a/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 b/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 index f25d93d..981c13e 100644 --- a/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 +++ b/roles/alpina/templates/services/monitoring/prometheus_config/prometheus.yml.j2 @@ -5,11 +5,6 @@ global: external_labels: monitor: "{{ ansible_host }}" -alerting: - alertmanagers: - - static_configs: - - targets: ["alertmanager:9093"] - scrape_configs: - job_name: "prometheus" static_configs: @@ -43,7 +38,6 @@ scrape_configs: - 'demo.promlabs.com:10002' rule_files: -{# - "/etc/prometheus/container.alerts.yml"#} - "/etc/prometheus/extra/rules/*.yml" - "/etc/prometheus/extra/rules/*.json" diff --git a/services.yml b/services.yml index 772319b..4e9ff71 100644 --- a/services.yml +++ b/services.yml @@ -5,11 +5,11 @@ post_tasks: - name: Docker prune objects docker_prune: - containers: yes - images: yes + containers: true + # Keep images for building grafana + images: true images_filters: - dangling: false + until: "720h" networks: true volumes: true - builder_cache: true - when: false + builder_cache: false