Compare commits
11 Commits
feature/mo
...
531febffe2
| Author | SHA1 | Date | |
|---|---|---|---|
|
531febffe2
|
|||
|
929a6619c8
|
|||
|
e8fe6adca3
|
|||
|
e6cdae8e6b
|
|||
|
6a22c3f815
|
|||
|
f9b0288038
|
|||
|
d8bef9a23d
|
|||
|
581346f019
|
|||
|
6504100cd5
|
|||
|
dd0330c85a
|
|||
|
40fbdc414e
|
2
.idea/alpina.iml
generated
2
.idea/alpina.iml
generated
@@ -4,7 +4,7 @@
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Poetry (alpina) (4)" jdkType="Python SDK" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyDocumentationSettings">
|
||||
|
||||
16
.idea/jsonSchemas.xml
generated
16
.idea/jsonSchemas.xml
generated
@@ -39,6 +39,22 @@
|
||||
</SchemaInfo>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="Loki">
|
||||
<value>
|
||||
<SchemaInfo>
|
||||
<option name="name" value="Loki" />
|
||||
<option name="relativePathToSchema" value="https://json.schemastore.org/loki.json" />
|
||||
<option name="applicationDefined" value="true" />
|
||||
<option name="patterns">
|
||||
<list>
|
||||
<Item>
|
||||
<option name="path" value="roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2" />
|
||||
</Item>
|
||||
</list>
|
||||
</option>
|
||||
</SchemaInfo>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="Traefik v2">
|
||||
<value>
|
||||
<SchemaInfo>
|
||||
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
@@ -3,5 +3,5 @@
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Poetry (alpina) (2)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina) (4)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
16
README.md
16
README.md
@@ -8,6 +8,22 @@ running on top of TrueNAS SCALE, separating all the docker stuff from the applia
|
||||
|
||||
# Notes
|
||||
|
||||
## Monitoring
|
||||
The monitoring stack is set up to monitor all the containers and the host.
|
||||
|
||||
This is a work in progress, Grafana is set up with grafanalib, a Python library that generates Grafana dashboards.
|
||||
The dashboards are generated from Python scripts in
|
||||
[grafana_config/dashboards](roles/alpina/templates/services/monitoring/grafana_config/dashboards).
|
||||
|
||||
This requires a custom grafana image, which is built from the
|
||||
[Dockerfile](roles/alpina/templates/services/monitoring/Dockerfile).
|
||||
|
||||
This also means it has to be manually rebuilt whenever the dashboards are updated.
|
||||
From the services/monitoring directory, run:
|
||||
```bash
|
||||
docker compose up -d --build --force-recreate grafana
|
||||
```
|
||||
|
||||
## IPv6
|
||||
The current configuration is designed to work with IPv6.
|
||||
However, because of how (not properly) I'm doing the subnetting
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
DISCORD_WEBHOOK={{ alertmanager_discord_webhook }}
|
||||
@@ -4,6 +4,10 @@ RUN pip install grafanalib
|
||||
|
||||
COPY ./grafana_config/dashboards /dashboards
|
||||
|
||||
# Required for grafanalib to find the shared python files like common.py
|
||||
# https://github.com/weaveworks/grafanalib/issues/58
|
||||
ENV PYTHONPATH=/dashboards
|
||||
|
||||
RUN generate-dashboards /dashboards/*.dashboard.py
|
||||
|
||||
FROM grafana/grafana:latest
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
# The root route on which each incoming alert enters.
|
||||
route:
|
||||
group_by: ["alertname", "job"]
|
||||
group_wait: 20s
|
||||
group_interval: 5m
|
||||
repeat_interval: 3h
|
||||
receiver: discord_webhook
|
||||
|
||||
receivers:
|
||||
- name: "discord_webhook"
|
||||
discord_configs:
|
||||
- webhook_url: "{{ alertmanager_discord_webhook }}"
|
||||
{# - send_resolved: true#}
|
||||
{# username: 'Alertmanager'#}
|
||||
{# webhook_configs:#}
|
||||
{# - send_resolved: true#}
|
||||
{# url: '{{ alertmanager_discord_webhook }}'#}
|
||||
{# username: 'Alertmanager'#}
|
||||
{# icon_url: 'https://prometheus.io/assets/icon.png'#}
|
||||
{# icon_emoji: ':alert:'#}
|
||||
{# send_resolved: true#}
|
||||
{# text: "{{ .CommonAnnotations.summary }}"#}
|
||||
{# title: "{{ .CommonLabels.alertname }}"#}
|
||||
{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#}
|
||||
{# footer: '{{ .CommonLabels.monitor }}'#}
|
||||
{# footer_icon: 'https://prometheus.io/assets/icon.png'#}
|
||||
{# actions:#}
|
||||
{# - type: 'button'#}
|
||||
{# text: 'Open in Grafana'#}
|
||||
{# url: '{{ .ExternalURL }}'#}
|
||||
{# style: 'primary'#}
|
||||
{# send_resolved: true#}
|
||||
{# confirm:#}
|
||||
{# title: 'Are you sure?'#}
|
||||
{# text: 'This will open Grafana in a new tab.'#}
|
||||
{# ok_text: 'Yes'#}
|
||||
{# dismiss_text: 'No'#}
|
||||
{# fields:#}
|
||||
{# - title: 'Description'#}
|
||||
{# value: "{{ .CommonAnnotations.description }}"#}
|
||||
{# short: false#}
|
||||
{# - title: 'Details'#}
|
||||
{# value: "{{ .CommonAnnotations.details }}"#}
|
||||
{# short: false#}
|
||||
{# - title: 'Severity'#}
|
||||
{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Host'#}
|
||||
{# value: '{{ .CommonLabels.monitor }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Starts At'#}
|
||||
{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Ends At'#}
|
||||
{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Runbook'#}
|
||||
{# value: '{{ .CommonAnnotations.runbook_url }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Dashboard'#}
|
||||
{# value: '{{ .CommonAnnotations.dashboard_url }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Alerting Rule'#}
|
||||
{# value: '{{ .CommonLabels.alertname }}'#}
|
||||
{# short: true#}
|
||||
{# - title: 'Alerting Rule Description'#}
|
||||
{# value: '{{ .CommonLabels.alertname }}'#}
|
||||
{# short: true#}
|
||||
@@ -60,8 +60,6 @@ services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
# Needed to make config files readable (not anymore, TODO: remove)
|
||||
user: "{{ remote_uid }}"
|
||||
@@ -74,19 +72,6 @@ services:
|
||||
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
|
||||
- {{ base_volume_path }}/monitoring/prometheus:/prometheus
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
container_name: alertmanager
|
||||
labels:
|
||||
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
||||
- --web.external-url=https://alert.{{ domain }}/
|
||||
volumes:
|
||||
- ./alertmanager_config:/etc/alertmanager:ro
|
||||
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
container_name: node-exporter
|
||||
@@ -100,6 +85,11 @@ services:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
container_name: cadvisor
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --docker_only=true
|
||||
- --store_container_labels=false
|
||||
- --whitelisted_container_labels=com.docker.compose.project,com.docker.compose.service
|
||||
- --enable_metrics=cpu,cpuLoad,diskIO,memory,network,oom_event,process
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:rw
|
||||
|
||||
@@ -3,7 +3,7 @@ apiVersion: 1
|
||||
providers:
|
||||
- name: "Grafana"
|
||||
org_id: 1
|
||||
folder: "Services"
|
||||
folder: "Alpina"
|
||||
type: "file"
|
||||
options:
|
||||
path: "/etc/grafana/provisioning/dashboards"
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
from grafanalib.core import Template
|
||||
|
||||
# TODO: consider default params for common params like line width, show points, tooltip
|
||||
|
||||
PrometheusTemplate = Template(
|
||||
name='datasource',
|
||||
type='datasource',
|
||||
label='Prometheus',
|
||||
query='prometheus',
|
||||
)
|
||||
|
||||
# TODO: this slightly less (clown emoji), normal Target gave me errors in grafana
|
||||
class LokiTarget(object):
|
||||
def __init__(self, loki_datasource, expr, legendFormat, refId):
|
||||
self.loki_datasource = loki_datasource
|
||||
self.expr = expr
|
||||
self.legendFormat = legendFormat
|
||||
self.refId = refId
|
||||
|
||||
def to_json_data(self):
|
||||
return {
|
||||
'datasource': self.loki_datasource,
|
||||
'expr': self.expr,
|
||||
'legendFormat': self.legendFormat,
|
||||
'refId': self.refId,
|
||||
'queryType': 'range',
|
||||
}
|
||||
@@ -5,28 +5,21 @@ from grafanalib.core import (
|
||||
)
|
||||
from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC
|
||||
|
||||
prom_datasource='prometheus'
|
||||
loki_datasource='loki'
|
||||
from common import LokiTarget, PrometheusTemplate
|
||||
|
||||
# TODO: this is (clown emoji), normal Target gave me errors in grafana
|
||||
class LokiTarget(object):
|
||||
def to_json_data(self):
|
||||
return {
|
||||
'datasource': loki_datasource,
|
||||
'expr': '{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
|
||||
'legendFormat': '{{ container_name }}',
|
||||
'refId': 'A',
|
||||
'queryType': 'range',
|
||||
}
|
||||
prom_datasource='${datasource}'
|
||||
loki_datasource='loki'
|
||||
|
||||
dashboard = Dashboard(
|
||||
title='Containers',
|
||||
uid='containers',
|
||||
description='Data for compose projects from default Prometheus datasource collected by Cadvisor',
|
||||
tags=[
|
||||
'example'
|
||||
'linux',
|
||||
'docker',
|
||||
],
|
||||
templating=Templating(list=[
|
||||
PrometheusTemplate,
|
||||
Template(
|
||||
name='compose_project',
|
||||
label='Compose Project',
|
||||
@@ -44,7 +37,6 @@ dashboard = Dashboard(
|
||||
includeAll=True,
|
||||
multi=True,
|
||||
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
|
||||
|
||||
),
|
||||
Template(
|
||||
name='logs_query',
|
||||
@@ -56,7 +48,6 @@ dashboard = Dashboard(
|
||||
timezone='browser',
|
||||
panels=[
|
||||
TimeSeries(
|
||||
id=1,
|
||||
title='Container Memory Usage',
|
||||
unit=BYTES_IEC,
|
||||
gridPos=GridPos(h=8, w=12, x=0, y=0),
|
||||
@@ -76,13 +67,14 @@ dashboard = Dashboard(
|
||||
],
|
||||
),
|
||||
TimeSeries(
|
||||
id=2,
|
||||
title='Container CPU Usage',
|
||||
unit=SECONDS,
|
||||
gridPos=GridPos(h=8, w=12, x=12, y=0),
|
||||
lineWidth=2,
|
||||
fillOpacity=10,
|
||||
showPoints='never',
|
||||
tooltipMode='all',
|
||||
tooltipSort='desc',
|
||||
targets=[
|
||||
Target(
|
||||
datasource=prom_datasource,
|
||||
@@ -93,7 +85,6 @@ dashboard = Dashboard(
|
||||
],
|
||||
),
|
||||
TimeSeries(
|
||||
id=3,
|
||||
title='Container Network Traffic',
|
||||
unit=BYTES_SEC_IEC,
|
||||
gridPos=GridPos(h=8, w=12, x=0, y=8),
|
||||
@@ -118,7 +109,6 @@ dashboard = Dashboard(
|
||||
],
|
||||
),
|
||||
Logs(
|
||||
id=4,
|
||||
title='',
|
||||
gridPos=GridPos(h=8, w=12, x=12, y=8),
|
||||
showLabels=True,
|
||||
@@ -127,13 +117,12 @@ dashboard = Dashboard(
|
||||
prettifyLogMessage=True,
|
||||
dedupStrategy='numbers',
|
||||
targets=[
|
||||
LokiTarget(),
|
||||
# Target(
|
||||
# datasource=loki_datasource,
|
||||
# expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
|
||||
# legendFormat='{{ container_name }}',
|
||||
# refId='A',
|
||||
# ),
|
||||
LokiTarget(
|
||||
loki_datasource=loki_datasource,
|
||||
expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
|
||||
legendFormat='{{ container_name }}',
|
||||
refId='A',
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
from grafanalib.core import Dashboard, Templating, Template, TimeSeries, PERCENT_UNIT_FORMAT, GridPos, Target
|
||||
from grafanalib.formatunits import BYTES_IEC
|
||||
|
||||
from common import PrometheusTemplate
|
||||
from node_consts import CPU_BASIC_COLORS, MEMORY_BASIC_COLORS
|
||||
|
||||
dashboard = Dashboard(
|
||||
title='Node Exporter',
|
||||
uid='node',
|
||||
description='Node Exporter (not quite full)',
|
||||
tags=[
|
||||
'linux',
|
||||
],
|
||||
timezone='browser',
|
||||
templating=Templating(list=[
|
||||
# Datasource
|
||||
PrometheusTemplate,
|
||||
# Job
|
||||
Template(
|
||||
name='job',
|
||||
label='Job',
|
||||
dataSource='${datasource}',
|
||||
query='label_values(node_uname_info, job)',
|
||||
),
|
||||
# Instance
|
||||
Template(
|
||||
name='instance',
|
||||
label='Instance',
|
||||
dataSource='${datasource}',
|
||||
query='label_values(node_uname_info{job="$job"}, instance)',
|
||||
),
|
||||
]),
|
||||
panels=[
|
||||
# CPU Basic
|
||||
TimeSeries(
|
||||
title='CPU Basic',
|
||||
description='Basic CPU usage info',
|
||||
unit=PERCENT_UNIT_FORMAT,
|
||||
gridPos=GridPos(h=8, w=12, x=0, y=0),
|
||||
lineWidth=1,
|
||||
fillOpacity=30,
|
||||
showPoints='never',
|
||||
stacking={'mode': 'percent', 'group': 'A'},
|
||||
tooltipMode='all',
|
||||
tooltipSort='desc',
|
||||
targets=[
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="system"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
|
||||
legendFormat='Busy System',
|
||||
refId='A',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="user"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
|
||||
legendFormat='Busy User',
|
||||
refId='B',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="iowait"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
|
||||
legendFormat='Busy Iowait',
|
||||
refId='C',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode=~".*irq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
|
||||
legendFormat='Busy IRQs',
|
||||
refId='D',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode!="idle",mode!="user",mode!="system",mode!="iowait",mode!="irq",mode!="softirq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
|
||||
legendFormat='Busy Other',
|
||||
refId='E',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="idle"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
|
||||
legendFormat='Idle',
|
||||
refId='F',
|
||||
),
|
||||
],
|
||||
# Extra JSON for the colors
|
||||
extraJson=CPU_BASIC_COLORS,
|
||||
),
|
||||
# Memory Basic
|
||||
TimeSeries(
|
||||
title='Memory Basic',
|
||||
description='Basic memory usage',
|
||||
unit=BYTES_IEC,
|
||||
gridPos=GridPos(h=8, w=12, x=12, y=0),
|
||||
lineWidth=1,
|
||||
fillOpacity=30,
|
||||
showPoints='never',
|
||||
stacking={'mode': 'normal', 'group': 'A'},
|
||||
tooltipMode='all',
|
||||
tooltipSort='desc',
|
||||
targets=[
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"}',
|
||||
format='time_series',
|
||||
legendFormat='RAM Total',
|
||||
refId='A',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"})',
|
||||
format='time_series',
|
||||
legendFormat='RAM Used',
|
||||
refId='B',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}',
|
||||
legendFormat='RAM Cache + Buffer',
|
||||
refId='C',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='node_memory_MemFree_bytes{instance="$instance",job="$job"}',
|
||||
legendFormat='RAM Free',
|
||||
refId='D',
|
||||
),
|
||||
Target(
|
||||
datasource='${datasource}',
|
||||
expr='(node_memory_SwapTotal_bytes{instance="$instance",job="$job"} - node_memory_SwapFree_bytes{instance="$instance",job="$job"})',
|
||||
legendFormat='SWAP Used',
|
||||
refId='E',
|
||||
),
|
||||
],
|
||||
# Extra JSON for the colors
|
||||
extraJson=MEMORY_BASIC_COLORS,
|
||||
),
|
||||
# TODO: Network Basic
|
||||
# TODO: Disk Basic
|
||||
],
|
||||
).auto_panel_ids()
|
||||
@@ -0,0 +1,487 @@
|
||||
# TODO: Question life decisions (I'm not sure if this is good)
|
||||
|
||||
CPU_BASIC_COLORS = {
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Busy Iowait"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#890F02",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Idle"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#052B51",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Busy Iowait"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#890F02",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Idle"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#7EB26D",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Busy System"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#EAB839",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Busy User"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#0A437C",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Busy Other"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#6D1F62",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
MEMORY_BASIC_COLORS = {
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Apps"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#629E51",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Buffers"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#614D93",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Cache"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#6D1F62",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Cached"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#511749",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Committed"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#508642",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#0A437C",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#CFFAFF",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Inactive"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#584477",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "PageTables"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#0A50A1",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Page_Tables"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#0A50A1",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "RAM_Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#E0F9D7",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "SWAP Used"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#BF1B00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Slab"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#806EB7",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Slab_Cache"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#E0752D",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Swap"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#BF1B00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Swap Used"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#BF1B00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Swap_Cache"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#C15C17",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Swap_Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#2F575E",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Unused"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#EAB839",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "RAM Total"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#E0F9D7",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.fillOpacity",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"id": "custom.stacking",
|
||||
"value": {
|
||||
"group": False,
|
||||
"mode": "normal"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "RAM Cache + Buffer"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#052B51",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "RAM Free"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#7EB26D",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Available"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#DEDAF7",
|
||||
"mode": "fixed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.fillOpacity",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"id": "custom.stacking",
|
||||
"value": {
|
||||
"group": False,
|
||||
"mode": "normal"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -15,18 +15,6 @@ datasources:
|
||||
url: http://prometheus:9090
|
||||
editable: false
|
||||
|
||||
- name: Alertmanager
|
||||
type: alertmanager
|
||||
access: proxy
|
||||
uid: alertmanager
|
||||
url: http://alertmanager:9093
|
||||
jsonData:
|
||||
# Valid options for implementation include mimir, cortex and prometheus
|
||||
implementation: prometheus
|
||||
# Whether Grafana should send alert instances to this Alertmanager
|
||||
handleGrafanaManagedAlerts: true
|
||||
editable: false
|
||||
|
||||
- name: InfluxDB
|
||||
type: influxdb
|
||||
access: proxy
|
||||
|
||||
@@ -17,13 +17,6 @@ common:
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v12
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
- from: 2024-10-18
|
||||
index:
|
||||
period: 24h
|
||||
@@ -33,5 +26,5 @@ schema_config:
|
||||
store: tsdb
|
||||
|
||||
# TODO: Figure this out
|
||||
ruler:
|
||||
alertmanager_url: http://localhost:9093
|
||||
# ruler:
|
||||
# alertmanager_url: http://localhost:9093
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
groups:
|
||||
- name: qbit-low-traffic
|
||||
interval: 1m
|
||||
rules:
|
||||
- alert: QbitLowTraffic
|
||||
expr: |
|
||||
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
title: 'Low traffic on qBit'
|
||||
description: |
|
||||
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
|
||||
|
||||
Last value was x bytes/s.
|
||||
|
||||
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
|
||||
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
|
||||
|
||||
__dashboard__uid: 'containers'
|
||||
__orgId__: 1
|
||||
__panelId__: 3
|
||||
@@ -1,20 +0,0 @@
|
||||
groups:
|
||||
- name: demo-service-alerts
|
||||
rules:
|
||||
- alert: DemoServiceHighErrorRate
|
||||
expr: |
|
||||
(
|
||||
sum without(status, instance) (
|
||||
rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m])
|
||||
)
|
||||
/
|
||||
sum without(status, instance) (
|
||||
rate(demo_api_request_duration_seconds_count{job="demo"}[1m])
|
||||
) * 100 > 0.5
|
||||
)
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}'
|
||||
description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.'
|
||||
@@ -5,11 +5,6 @@ global:
|
||||
external_labels:
|
||||
monitor: "{{ ansible_host }}"
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ["alertmanager:9093"]
|
||||
|
||||
scrape_configs:
|
||||
- job_name: "prometheus"
|
||||
static_configs:
|
||||
@@ -43,7 +38,6 @@ scrape_configs:
|
||||
- 'demo.promlabs.com:10002'
|
||||
|
||||
rule_files:
|
||||
- "/etc/prometheus/container.alerts.yml"
|
||||
- "/etc/prometheus/extra/rules/*.yml"
|
||||
- "/etc/prometheus/extra/rules/*.json"
|
||||
|
||||
|
||||
@@ -5,10 +5,11 @@
|
||||
post_tasks:
|
||||
- name: Docker prune objects
|
||||
docker_prune:
|
||||
containers: yes
|
||||
images: yes
|
||||
containers: true
|
||||
# Keep images for building grafana
|
||||
images: true
|
||||
images_filters:
|
||||
dangling: false
|
||||
until: "720h"
|
||||
networks: true
|
||||
volumes: true
|
||||
builder_cache: true
|
||||
builder_cache: false
|
||||
|
||||
Reference in New Issue
Block a user