12 Commits

21 changed files with 805 additions and 283 deletions

2
.idea/alpina.iml generated
View File

@@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="jdk" jdkName="Poetry (alpina) (4)" jdkType="Python SDK" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">

16
.idea/jsonSchemas.xml generated
View File

@@ -39,6 +39,22 @@
</SchemaInfo>
</value>
</entry>
<entry key="Loki">
<value>
<SchemaInfo>
<option name="name" value="Loki" />
<option name="relativePathToSchema" value="https://json.schemastore.org/loki.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2" />
</Item>
</list>
</option>
</SchemaInfo>
</value>
</entry>
<entry key="Traefik v2">
<value>
<SchemaInfo>

2
.idea/misc.xml generated
View File

@@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="Poetry (alpina) (2)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina) (4)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina)" project-jdk-type="Python SDK" />
</project>

View File

@@ -8,6 +8,22 @@ running on top of TrueNAS SCALE, separating all the docker stuff from the applia
# Notes
## Monitoring
The monitoring stack is set up to monitor all the containers and the host.
This is a work in progress, Grafana is set up with grafanalib, a Python library that generates Grafana dashboards.
The dashboards are generated from Python scripts in
[grafana_config/dashboards](roles/alpina/templates/services/monitoring/grafana_config/dashboards).
This requires a custom grafana image, which is built from the
[Dockerfile](roles/alpina/templates/services/monitoring/Dockerfile).
This also means it has to be manually rebuilt whenever the dashboards are updated.
From the services/monitoring directory, run:
```bash
docker compose up -d --build --force-recreate grafana
```
## IPv6
The current configuration is designed to work with IPv6.
However, because of how (not properly) I'm doing the subnetting

View File

@@ -24,8 +24,6 @@ minio_password: "{{ vault_minio_password }}"
influxdb_admin_password: "{{ vault_influxdb_admin_password }}"
influxdb_admin_token: "{{ vault_influxdb_admin_token }}"
alertmanager_discord_webhook: "{{ vault_alertmanager_discord_webhook }}"
# Traefik
acme_email: "{{ vault_acme_email }}"
cloudflare_api_token: "{{ vault_cloudflare_api_token }}"

View File

@@ -1,96 +1,88 @@
$ANSIBLE_VAULT;1.1;AES256
32653863663065353431636364373163613536643238613961666561653663633530646165643766
3833323937353331313136633965393061616135366534660a333037383066303431623830313464
65346431633238666534373033663138353438313762326361666233353866663534363536643034
3636323439316261630a623262336331663431633266336235653034323234383566323963623365
32626363626164373536663464643632393761346137623866633237643038306265636362626561
61313634353634373530383061393364613461303132326335316566326436633635633131643433
31376539396639326464333233643933373737313064363262323639363964643862633035396161
35643037636535623966626131393538643432396536643365383736636262356135373434376433
32316361343330303431376234323632323932376635343964383733633761326639393966383039
35646131343034663962363335373661323065663764396631343461383661663738386163323633
36303464646532633235663662666663343238633465663334326463383133643239666634653739
35396130393961303230396236303766336666643930626161333338326137663235323066663032
33376564373563323635356233616264313663373534333636643236393866613062656338353864
66386132663362363832366661646462316139353132626662663934336530386534376538633235
62653131653835323261373435373631396466353738306362616266616532313435323633613933
61646132346536323632643865326234356535346566346532383162393265613931343962303463
31636334343736666434353835633734396465653862613234386431306463326134613931646232
32353535663133623434643866336165616232613662336533383432633338373763643337616637
38323237646461376433316164646366383438316639633162303739383263656265633364303565
36643339356136653332666230633939636264306431636562323864373037623138363739616561
37613364653737353638646564323439646138646536636564303866636233616264383466656439
33646232653061616437656162353036313834616162313936353533393833313432656534343363
35636638326236646163323463356634326534623165306461316530353936646162323435633862
64396464303363323837316162353734626663643962303534336637336632333463393734383532
66616534666466393333386337363238383432643764373864613461363766333932333862363332
61313364613031376334326635636432346532613462613265643462636436663963323862353733
38396261613332396633666130653262313234633132353264363266336231373535306532383661
65323530653531646339626537653433303332656535346639393466353133363833326236656231
33336265373463396135653730616266346331376461346433343464326238323034653330393732
36643432316662333633333036633761653031393433333338663633386264656535623534653463
36363565303333356361616539376532353066336137336134656465383364636361656664356439
65326334643631663665376530646433323439653864623964323363396561313663636538356536
63626336303862333364363166353437353163656238303765636662636137383337623563666264
66326633343230386638616438393436633431343264343231386563613935626430306337343533
66656366333332326131343661356236396430303832303834653530623639353036663436373862
61336437386338343965653563646664643438353232306231316564616462643236646239333062
38643461346639623964626438396631396139383332666130316635656530653136333662353566
36313261646330373963663032316662383137366436636534383366636362366435393036373264
34646537666462363531343335336638343038333633663862666163306662643634326533316561
61613235366233636530663462353066646530386265623534663336376364323237343936646134
31616563653864383565306439613932396562613835613562326264326535636630646666366335
36653631353961353933386236636534393636356334633336313333383238353838336335646630
63633365666530623562323634303935326362643762616532303531303139333565643835396163
36353130656365326435343130613234336637346461313639653133623933376163393935366266
66653337353732363038663164363663623266356366663637343466393836353965343730666362
38663636336265383331666666616535366334616431306164303738306436333364653765356662
37316433323563323431623164386337343563663538333435616333343433396236356363333262
61396664326234343136666331356465333233663135613839616334623033316362336162613731
38646530326538643337323838326563303130643934623939346635343331356531373235663937
62396530383365666439373632613633633233376139616138323033613135383330333132643839
65363833616337656662653462323436303531653635663739633366616532333761323238353764
39373836303735393165393435323139346661346135636138613731373165386533386333393364
32336265386334386338653734353565343733393931373436336233333031356531313739636666
61376234393631343236643137616631373564376132623534333939346162353662306661393438
32326566373934653463653737383131386431363664333535626361646637613632383132623533
32343465366562363765353366333330633631353936613930376631336538306230626632303966
31343936386535663165663066663862656439306363326337313561396132316338363930323632
33313061623534373338623931663934396339633564353533626639373837323832366132343538
63373862663137306665383732303863343564343830636233613139666631626532373938386663
35646331646462356639383964373732393866653963643832633661323430323430613330633364
35343262366362646165383032333236623863656264353964623136643631326135623538306261
37393839343331653665356131343063316232303963636462653238333466636334616435666463
65636662383930353238623130363834616137643830633261646338363435343839633565303562
37623231396163346464303464333962336261353634396236613132306464643764356265656137
32373263613964396430646332666235303634373431643939623963633334326135626565656662
30646166303732643562653166633232666635343665616665653566316632303861613861313333
38393636663137333231613239353661656338333536656563616237343234623031363535666637
61343662663965663161666436366630366432363733663537613064386130326466343366383232
32363662343561666665323565356163383932336361656132373263363239636666613461366339
31323264393866386239353333386161643330343262366666323533303737373163313262313766
61303638366263346232353134333431613730386431623235323537323962666133613939353762
63326361633630323937353163383930626336663365626532613031623532393932316138353335
32363262393764663135393466616639373965313238323935383531633434633038663437646662
31633265373937316533373332316132363061386133356231623230393739326464333761336338
38626234646164616265633061346239363164376532383834356435346232653065326362343363
39613532356166633133626563643238373661323937353635343464666339323561326136623366
62633637656462376136633963653263346565366563646533373431613761616231653739613537
32343332356435393635363837396463613165626337346235303363613764306132343539333836
63386633626332396339383165303166653334663239313066666632356165643161356262346230
32636365636364663466343939663538386439343336303537636230306263643534653339313538
31373165363962373337636138336561336638633762373363646139366339323031313664306534
30623130663037323839666166323162393065643535663866383062356330633137343239316436
32303132393739653363376138633430313832383165663366626436653033663637616664346632
63633439663734393236343265323533633639316133323336373064633138363266316135363335
31336637666331333139306537333565333064666433653730633430336261656665613263663937
64313230656333373838346439623061393164393239393934306336373063303934663334353532
31313637623466313835313566616161376230343532653561343364383133653736646338303631
36356164303630303433356332343630616465383831623036383833393330663566616333653161
63393361643266323336393962663263323338633634633033393762656139393665353630633637
39386462303731396261613961613238616237373332656361303139633763303837653765623464
64333565666532653864383861333433353731343161613231383836353966353636373762306132
35333536373939656638356333383135313231306433656536383933623634653263353434393238
32323037666135316337633465666335376332326633346665643333656139386465353134356636
36333434303538326135346539313734393939353163316666366438613133333464623732666438
663934323030303937623038343662646163
66313038633762313266633234323232303734353935383962356166316262303532666530653432
6639323962333630623362663535306136633937316666610a393739383862626234636235626563
39333239663065303536633839306530626132633136383236643430653037353032653938386565
6164623333306630620a663539643737393637653466643162383930376636653366333062346432
35633666303436313139626337316337636335393636343137616661363030373264386534303762
32646336313635386439623932636537613365633561306165396535393862363764326436666337
65393363343230656635666666613139336432613563383730363030643630643861393035393033
30383866366563663337333330333132646631613764303261616336326439386133386431666233
61663866653839623634313836306362383066653135333535643630343939323235613963393837
38623439353038643130646664373063656130333533633936643066363030313534373038646166
37633739353562303631663565626636393136636332313932396237393866343762386565626164
63393932656533366137353864343238646234346461323162653465326334333136303333313362
33646362613436643733643763623337386661346531666136623635303166633838353132386264
66386136616531633138316631663437306130626665626333386137356431326334323361383737
37393465626264386336373936323564333031356562393239623636616338626563323065316634
31313461343030323637363432666130306338353934633164316332613065303237326234666264
35383666636236393030353732663866363738613463633832313336356637613838336263343330
34343061386539633635323565613061373930326665636361316636363232626239343233376238
36653834646132333938373637346633613365626636303836646435306166316138666139626134
36356663333237646234393437396334663366396630393562303536323866376432643539646264
36343433633563396363623435323466386337313762643136316665663936366232366233666165
31656531643230363231313166343461373462663536333165633432326634623435623762616665
63343534396232303266303062666563636432323739656434653138366437303030393535343930
38326562336338663565343762313132303138383461353034326638376230623232313365363038
38663830336131306336336162383335616132666239343931643838376165643063383034363761
62633034396163306539383039316164333664316566376436653432383837363437653766643933
61333830653263306366306234653166636634333162386362333734626338333766363939643335
66613466626130363233393738663763636665656632336634333963636436643430613430316439
66333964636233613233393434633938333964376334346637303135346334653536393635393563
34316230353838626566326436323836373630356138323632383635663432336563316464633637
62316361323138653234616634633633326231626462313964326339353839663738656132393731
36353030633637373633376134373739646639623264346362363030383064323336346538356264
32373739616166633462623134353538363036323833393837393335396131316233373335356531
36613262303639633031306238376165366432653664613233663562326336303433646664616337
66356334633863383631376365383634326430623165383337336166326662623738383966663436
34343136326332626536653963656534336366376366326636353364333437383435633335313535
65343831333562616362373832666136633764303731323632333032376634636663626364373765
61306339306439666362656361653837313038616538646637383734386435646539303565366332
63366630303139623464323539303365393666333230646138393131336131626635333466333633
62393261313539363836643666343735653466616433396333326439663931663666333164643465
31393930343733313062643366643661313532636432616338666361623964303961363730643531
38613934646238666663336233356631323738663962336634623436613564616535623161303664
65366464636235356435333666643036316639646437376463366562346231386436663736633364
30316138393062363162353962313366323936323433396332383337326530326538653764336536
61646232313633626632643530636565376435343562663338613336353533666165613665323564
36396339383637643532633630613135653262306662383737353939636533646531656639643733
39353464336161623266646533633837373334336535663532646439316533393436333430653133
32353066386562653563313733343233303534396663656233656462636661356331346134306332
30633862643232333362353238633632336135623861383931653334333161623764333865613135
30633830636136306335646338613261616265653166393939306365306261313933363639363333
39303063633033336637306233326232373665623430386332333765326539653035336565313330
30343836336165313932626633326565356664393162363561326466623133313663616161383166
66646230383033336630363536623734653764333665383261663362633339356462626161373061
30316138653563616563303762663166366230313062626631623964323434663561303939333934
63313037386264653866373535643233666339663433616438363237613733633633363236396438
64353664333634396531346465623064626338613136666161666663323762333135316265326662
66376237316563333834383431323033366135383937383465666666613835303938393936303764
38636239303535326166363261386339356330366533323938333066386236396665356362383134
37323066666233633035666262366133396134633165633633386233633166313465623335626536
37333931666135333638306665633539613362646633623831386538646262383565396633323037
39306561666238643438616238356632633165343732663261663836623333356165343663613239
35303436633666376637366233323662613933313234646265633738663561386664333462656238
39393662633037663764663639396132636337323636633631353565616666663463393663373465
35373731643164373065623138346432396661613065303230386634393864336333356134366464
30653438653933323839326539613038326461623735393361346230333835326631356134376366
32626230643163393932356231623365653832333237353237303438616439323463376539333236
64663166306536353262613731373136633432376564636331396435313735616638306363643762
61623038633564356165353562336462396138313534393636653233373732343437383632313266
66343434633431383162633135313639656666386139306165343536333265623633373062363038
36326236366130303034633339626235353661316237613232333432336264373131376364323334
34343039623165663861363463323466333863333764663439366233636132656238363961623463
66363336633061363237623238383338323430616261303430313535396666636165356166363166
38376363396636643239636238666532396537623737623538383130623239666630376661303536
36613334663164303361366165653964323132393135376666646663323538653066326461333932
65663730626164636334626264393539623637313661383963663733383636363663386665386332
65363735646633613762343230653731646261653937633032383332653264643532386263333865
31373435313230346336616230306336643763613439666365303363613865313331366537316431
35613439643036663136303164626134646332333465383264353036353564333035633262303166
34393138343463646532323136623562386237376333636531626561393633376238393138303239
66386365303166383736323435336432383634616239353565623962333939373266376632333734
62356230323531316564316439376137346431636462303062333933303965616232313739643665
33653962333037306333363534313933666163393465306534653837303164346333333665353032
66336333656335353239356232383561663831323763376663666365383834353166373461383631
38666665343036353437323961636534303537386266303133356465633262393132333134663034
66303939393562633363373131303730663634303162396565656266613163646331333230306234
37366137323230613331623239383765383230633134306466633839363765633961626265356365
623166373834383030373932623664303765

View File

@@ -1 +0,0 @@
DISCORD_WEBHOOK={{ alertmanager_discord_webhook }}

View File

@@ -4,6 +4,10 @@ RUN pip install grafanalib
COPY ./grafana_config/dashboards /dashboards
# Required for grafanalib to find the shared python files like common.py
# https://github.com/weaveworks/grafanalib/issues/58
ENV PYTHONPATH=/dashboards
RUN generate-dashboards /dashboards/*.dashboard.py
FROM grafana/grafana:latest

View File

@@ -1,68 +0,0 @@
# The root route on which each incoming alert enters.
route:
group_by: ["alertname", "job"]
group_wait: 20s
group_interval: 5m
repeat_interval: 3h
receiver: discord_webhook
receivers:
- name: "discord_webhook"
discord_configs:
- webhook_url: "{{ alertmanager_discord_webhook }}"
{# - send_resolved: true#}
{# username: 'Alertmanager'#}
{# webhook_configs:#}
{# - send_resolved: true#}
{# url: '{{ alertmanager_discord_webhook }}'#}
{# username: 'Alertmanager'#}
{# icon_url: 'https://prometheus.io/assets/icon.png'#}
{# icon_emoji: ':alert:'#}
{# send_resolved: true#}
{# text: "{{ .CommonAnnotations.summary }}"#}
{# title: "{{ .CommonLabels.alertname }}"#}
{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#}
{# footer: '{{ .CommonLabels.monitor }}'#}
{# footer_icon: 'https://prometheus.io/assets/icon.png'#}
{# actions:#}
{# - type: 'button'#}
{# text: 'Open in Grafana'#}
{# url: '{{ .ExternalURL }}'#}
{# style: 'primary'#}
{# send_resolved: true#}
{# confirm:#}
{# title: 'Are you sure?'#}
{# text: 'This will open Grafana in a new tab.'#}
{# ok_text: 'Yes'#}
{# dismiss_text: 'No'#}
{# fields:#}
{# - title: 'Description'#}
{# value: "{{ .CommonAnnotations.description }}"#}
{# short: false#}
{# - title: 'Details'#}
{# value: "{{ .CommonAnnotations.details }}"#}
{# short: false#}
{# - title: 'Severity'#}
{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#}
{# short: true#}
{# - title: 'Host'#}
{# value: '{{ .CommonLabels.monitor }}'#}
{# short: true#}
{# - title: 'Starts At'#}
{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Ends At'#}
{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Runbook'#}
{# value: '{{ .CommonAnnotations.runbook_url }}'#}
{# short: true#}
{# - title: 'Dashboard'#}
{# value: '{{ .CommonAnnotations.dashboard_url }}'#}
{# short: true#}
{# - title: 'Alerting Rule'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}
{# - title: 'Alerting Rule Description'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}

View File

@@ -60,8 +60,6 @@ services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
labels:
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
restart: unless-stopped
# Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}"
@@ -74,19 +72,6 @@ services:
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
- {{ base_volume_path }}/monitoring/prometheus:/prometheus
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
labels:
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
restart: unless-stopped
command:
- --config.file=/etc/alertmanager/alertmanager.yml
- --web.external-url=https://alert.{{ domain }}/
volumes:
- ./alertmanager_config:/etc/alertmanager:ro
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
@@ -100,6 +85,11 @@ services:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
restart: unless-stopped
command:
- --docker_only=true
- --store_container_labels=false
- --whitelisted_container_labels=com.docker.compose.project,com.docker.compose.service
- --enable_metrics=cpu,cpuLoad,diskIO,memory,network,oom_event,process
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw

View File

@@ -3,7 +3,7 @@ apiVersion: 1
providers:
- name: "Grafana"
org_id: 1
folder: "Services"
folder: "Alpina"
type: "file"
options:
path: "/etc/grafana/provisioning/dashboards"

View File

@@ -0,0 +1,27 @@
from grafanalib.core import Template
# TODO: consider default params for common params like line width, show points, tooltip
PrometheusTemplate = Template(
name='datasource',
type='datasource',
label='Prometheus',
query='prometheus',
)
# TODO: this slightly less (clown emoji), normal Target gave me errors in grafana
class LokiTarget(object):
def __init__(self, loki_datasource, expr, legendFormat, refId):
self.loki_datasource = loki_datasource
self.expr = expr
self.legendFormat = legendFormat
self.refId = refId
def to_json_data(self):
return {
'datasource': self.loki_datasource,
'expr': self.expr,
'legendFormat': self.legendFormat,
'refId': self.refId,
'queryType': 'range',
}

View File

@@ -5,28 +5,21 @@ from grafanalib.core import (
)
from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC
prom_datasource='prometheus'
loki_datasource='loki'
from common import LokiTarget, PrometheusTemplate
# TODO: this is (clown emoji), normal Target gave me errors in grafana
class LokiTarget(object):
def to_json_data(self):
return {
'datasource': loki_datasource,
'expr': '{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
'legendFormat': '{{ container_name }}',
'refId': 'A',
'queryType': 'range',
}
prom_datasource='${datasource}'
loki_datasource='loki'
dashboard = Dashboard(
title='Containers',
uid='containers',
description='Data for compose projects from default Prometheus datasource collected by Cadvisor',
tags=[
'example'
'linux',
'docker',
],
templating=Templating(list=[
PrometheusTemplate,
Template(
name='compose_project',
label='Compose Project',
@@ -44,7 +37,6 @@ dashboard = Dashboard(
includeAll=True,
multi=True,
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
),
Template(
name='logs_query',
@@ -56,7 +48,6 @@ dashboard = Dashboard(
timezone='browser',
panels=[
TimeSeries(
id=1,
title='Container Memory Usage',
unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=0),
@@ -76,13 +67,14 @@ dashboard = Dashboard(
],
),
TimeSeries(
id=2,
title='Container CPU Usage',
unit=SECONDS,
gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=2,
fillOpacity=10,
showPoints='never',
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource=prom_datasource,
@@ -93,7 +85,6 @@ dashboard = Dashboard(
],
),
TimeSeries(
id=3,
title='Container Network Traffic',
unit=BYTES_SEC_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=8),
@@ -118,7 +109,6 @@ dashboard = Dashboard(
],
),
Logs(
id=4,
title='',
gridPos=GridPos(h=8, w=12, x=12, y=8),
showLabels=True,
@@ -127,13 +117,12 @@ dashboard = Dashboard(
prettifyLogMessage=True,
dedupStrategy='numbers',
targets=[
LokiTarget(),
# Target(
# datasource=loki_datasource,
# expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
# legendFormat='{{ container_name }}',
# refId='A',
# ),
LokiTarget(
loki_datasource=loki_datasource,
expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
legendFormat='{{ container_name }}',
refId='A',
),
],
),
],

View File

@@ -0,0 +1,139 @@
from grafanalib.core import Dashboard, Templating, Template, TimeSeries, PERCENT_UNIT_FORMAT, GridPos, Target
from grafanalib.formatunits import BYTES_IEC
from common import PrometheusTemplate
from node_consts import CPU_BASIC_COLORS, MEMORY_BASIC_COLORS
dashboard = Dashboard(
title='Node Exporter',
uid='node',
description='Node Exporter (not quite full)',
tags=[
'linux',
],
timezone='browser',
templating=Templating(list=[
# Datasource
PrometheusTemplate,
# Job
Template(
name='job',
label='Job',
dataSource='${datasource}',
query='label_values(node_uname_info, job)',
),
# Instance
Template(
name='instance',
label='Instance',
dataSource='${datasource}',
query='label_values(node_uname_info{job="$job"}, instance)',
),
]),
panels=[
# CPU Basic
TimeSeries(
title='CPU Basic',
description='Basic CPU usage info',
unit=PERCENT_UNIT_FORMAT,
gridPos=GridPos(h=8, w=12, x=0, y=0),
lineWidth=1,
fillOpacity=30,
showPoints='never',
stacking={'mode': 'percent', 'group': 'A'},
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="system"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy System',
refId='A',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="user"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy User',
refId='B',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="iowait"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy Iowait',
refId='C',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode=~".*irq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy IRQs',
refId='D',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode!="idle",mode!="user",mode!="system",mode!="iowait",mode!="irq",mode!="softirq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy Other',
refId='E',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="idle"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Idle',
refId='F',
),
],
# Extra JSON for the colors
extraJson=CPU_BASIC_COLORS,
),
# Memory Basic
TimeSeries(
title='Memory Basic',
description='Basic memory usage',
unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=1,
fillOpacity=30,
showPoints='never',
stacking={'mode': 'normal', 'group': 'A'},
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource='${datasource}',
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"}',
format='time_series',
legendFormat='RAM Total',
refId='A',
),
Target(
datasource='${datasource}',
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"})',
format='time_series',
legendFormat='RAM Used',
refId='B',
),
Target(
datasource='${datasource}',
expr='node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}',
legendFormat='RAM Cache + Buffer',
refId='C',
),
Target(
datasource='${datasource}',
expr='node_memory_MemFree_bytes{instance="$instance",job="$job"}',
legendFormat='RAM Free',
refId='D',
),
Target(
datasource='${datasource}',
expr='(node_memory_SwapTotal_bytes{instance="$instance",job="$job"} - node_memory_SwapFree_bytes{instance="$instance",job="$job"})',
legendFormat='SWAP Used',
refId='E',
),
],
# Extra JSON for the colors
extraJson=MEMORY_BASIC_COLORS,
),
# TODO: Network Basic
# TODO: Disk Basic
],
).auto_panel_ids()

View File

@@ -0,0 +1,487 @@
# TODO: Question life decisions (I'm not sure if this is good)
CPU_BASIC_COLORS = {
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Busy Iowait"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#890F02",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Idle"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#052B51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy Iowait"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#890F02",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Idle"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy System"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy User"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A437C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy Other"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6D1F62",
"mode": "fixed"
}
}
]
}
]
},
}
MEMORY_BASIC_COLORS = {
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Apps"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#629E51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Buffers"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#614D93",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6D1F62",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Cached"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#511749",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Committed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#508642",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A437C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#CFFAFF",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Inactive"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#584477",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "PageTables"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A50A1",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Page_Tables"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A50A1",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM_Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0F9D7",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "SWAP Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Slab"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#806EB7",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Slab_Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0752D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap_Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#C15C17",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap_Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#2F575E",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Unused"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Total"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0F9D7",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.stacking",
"value": {
"group": False,
"mode": "normal"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Cache + Buffer"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#052B51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Available"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#DEDAF7",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.stacking",
"value": {
"group": False,
"mode": "normal"
}
}
]
}
]
}
}

View File

@@ -15,18 +15,6 @@ datasources:
url: http://prometheus:9090
editable: false
- name: Alertmanager
type: alertmanager
access: proxy
uid: alertmanager
url: http://alertmanager:9093
jsonData:
# Valid options for implementation include mimir, cortex and prometheus
implementation: prometheus
# Whether Grafana should send alert instances to this Alertmanager
handleGrafanaManagedAlerts: true
editable: false
- name: InfluxDB
type: influxdb
access: proxy

View File

@@ -17,13 +17,6 @@ common:
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v12
index:
prefix: index_
period: 24h
- from: 2024-10-18
index:
period: 24h
@@ -33,5 +26,5 @@ schema_config:
store: tsdb
# TODO: Figure this out
ruler:
alertmanager_url: http://localhost:9093
# ruler:
# alertmanager_url: http://localhost:9093

View File

@@ -1,23 +0,0 @@
groups:
- name: qbit-low-traffic
interval: 1m
rules:
- alert: QbitLowTraffic
expr: |
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
for: 2m
labels:
severity: warning
annotations:
title: 'Low traffic on qBit'
description: |
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
Last value was x bytes/s.
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
__dashboard__uid: 'containers'
__orgId__: 1
__panelId__: 3

View File

@@ -1,20 +0,0 @@
groups:
- name: demo-service-alerts
rules:
- alert: DemoServiceHighErrorRate
expr: |
(
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m])
)
/
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{job="demo"}[1m])
) * 100 > 0.5
)
for: 1m
labels:
severity: critical
annotations:
title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}'
description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.'

View File

@@ -5,11 +5,6 @@ global:
external_labels:
monitor: "{{ ansible_host }}"
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:9093"]
scrape_configs:
- job_name: "prometheus"
static_configs:
@@ -43,7 +38,6 @@ scrape_configs:
- 'demo.promlabs.com:10002'
rule_files:
- "/etc/prometheus/container.alerts.yml"
- "/etc/prometheus/extra/rules/*.yml"
- "/etc/prometheus/extra/rules/*.json"

View File

@@ -5,10 +5,11 @@
post_tasks:
- name: Docker prune objects
docker_prune:
containers: yes
images: yes
containers: true
# Keep images for building grafana
images: true
images_filters:
dangling: false
until: "720h"
networks: true
volumes: true
builder_cache: true
builder_cache: false