5 Commits

23 changed files with 805 additions and 361 deletions

2
.idea/alpina.iml generated
View File

@@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="jdk" jdkName="Poetry (alpina) (4)" jdkType="Python SDK" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">

35
.idea/jsonSchemas.xml generated
View File

@@ -39,6 +39,22 @@
</SchemaInfo>
</value>
</entry>
<entry key="Loki">
<value>
<SchemaInfo>
<option name="name" value="Loki" />
<option name="relativePathToSchema" value="https://json.schemastore.org/loki.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2" />
</Item>
</list>
</option>
</SchemaInfo>
</value>
</entry>
<entry key="Traefik v2">
<value>
<SchemaInfo>
@@ -124,25 +140,6 @@
</SchemaInfo>
</value>
</entry>
<entry key="prometheus.rules.json">
<value>
<SchemaInfo>
<option name="name" value="prometheus.rules.json" />
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
</Item>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
</Item>
</list>
</option>
</SchemaInfo>
</value>
</entry>
</map>
</state>
</component>

2
.idea/misc.xml generated
View File

@@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="Poetry (alpina) (2)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina) (4)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina)" project-jdk-type="Python SDK" />
</project>

View File

@@ -8,6 +8,22 @@ running on top of TrueNAS SCALE, separating all the docker stuff from the applia
# Notes
## Monitoring
The monitoring stack is set up to monitor all the containers and the host.
This is a work in progress, Grafana is set up with grafanalib, a Python library that generates Grafana dashboards.
The dashboards are generated from Python scripts in
[grafana_config/dashboards](roles/alpina/templates/services/monitoring/grafana_config/dashboards).
This requires a custom grafana image, which is built from the
[Dockerfile](roles/alpina/templates/services/monitoring/Dockerfile).
This also means it has to be manually rebuilt whenever the dashboards are updated.
From the services/monitoring directory, run:
```bash
docker compose up -d --build --force-recreate grafana
```
## IPv6
The current configuration is designed to work with IPv6.
However, because of how (not properly) I'm doing the subnetting

View File

@@ -24,8 +24,6 @@ minio_password: "{{ vault_minio_password }}"
influxdb_admin_password: "{{ vault_influxdb_admin_password }}"
influxdb_admin_token: "{{ vault_influxdb_admin_token }}"
alertmanager_discord_webhook: "{{ vault_alertmanager_discord_webhook }}"
# Traefik
acme_email: "{{ vault_acme_email }}"
cloudflare_api_token: "{{ vault_cloudflare_api_token }}"

View File

@@ -1,96 +1,88 @@
$ANSIBLE_VAULT;1.1;AES256
32653863663065353431636364373163613536643238613961666561653663633530646165643766
3833323937353331313136633965393061616135366534660a333037383066303431623830313464
65346431633238666534373033663138353438313762326361666233353866663534363536643034
3636323439316261630a623262336331663431633266336235653034323234383566323963623365
32626363626164373536663464643632393761346137623866633237643038306265636362626561
61313634353634373530383061393364613461303132326335316566326436633635633131643433
31376539396639326464333233643933373737313064363262323639363964643862633035396161
35643037636535623966626131393538643432396536643365383736636262356135373434376433
32316361343330303431376234323632323932376635343964383733633761326639393966383039
35646131343034663962363335373661323065663764396631343461383661663738386163323633
36303464646532633235663662666663343238633465663334326463383133643239666634653739
35396130393961303230396236303766336666643930626161333338326137663235323066663032
33376564373563323635356233616264313663373534333636643236393866613062656338353864
66386132663362363832366661646462316139353132626662663934336530386534376538633235
62653131653835323261373435373631396466353738306362616266616532313435323633613933
61646132346536323632643865326234356535346566346532383162393265613931343962303463
31636334343736666434353835633734396465653862613234386431306463326134613931646232
32353535663133623434643866336165616232613662336533383432633338373763643337616637
38323237646461376433316164646366383438316639633162303739383263656265633364303565
36643339356136653332666230633939636264306431636562323864373037623138363739616561
37613364653737353638646564323439646138646536636564303866636233616264383466656439
33646232653061616437656162353036313834616162313936353533393833313432656534343363
35636638326236646163323463356634326534623165306461316530353936646162323435633862
64396464303363323837316162353734626663643962303534336637336632333463393734383532
66616534666466393333386337363238383432643764373864613461363766333932333862363332
61313364613031376334326635636432346532613462613265643462636436663963323862353733
38396261613332396633666130653262313234633132353264363266336231373535306532383661
65323530653531646339626537653433303332656535346639393466353133363833326236656231
33336265373463396135653730616266346331376461346433343464326238323034653330393732
36643432316662333633333036633761653031393433333338663633386264656535623534653463
36363565303333356361616539376532353066336137336134656465383364636361656664356439
65326334643631663665376530646433323439653864623964323363396561313663636538356536
63626336303862333364363166353437353163656238303765636662636137383337623563666264
66326633343230386638616438393436633431343264343231386563613935626430306337343533
66656366333332326131343661356236396430303832303834653530623639353036663436373862
61336437386338343965653563646664643438353232306231316564616462643236646239333062
38643461346639623964626438396631396139383332666130316635656530653136333662353566
36313261646330373963663032316662383137366436636534383366636362366435393036373264
34646537666462363531343335336638343038333633663862666163306662643634326533316561
61613235366233636530663462353066646530386265623534663336376364323237343936646134
31616563653864383565306439613932396562613835613562326264326535636630646666366335
36653631353961353933386236636534393636356334633336313333383238353838336335646630
63633365666530623562323634303935326362643762616532303531303139333565643835396163
36353130656365326435343130613234336637346461313639653133623933376163393935366266
66653337353732363038663164363663623266356366663637343466393836353965343730666362
38663636336265383331666666616535366334616431306164303738306436333364653765356662
37316433323563323431623164386337343563663538333435616333343433396236356363333262
61396664326234343136666331356465333233663135613839616334623033316362336162613731
38646530326538643337323838326563303130643934623939346635343331356531373235663937
62396530383365666439373632613633633233376139616138323033613135383330333132643839
65363833616337656662653462323436303531653635663739633366616532333761323238353764
39373836303735393165393435323139346661346135636138613731373165386533386333393364
32336265386334386338653734353565343733393931373436336233333031356531313739636666
61376234393631343236643137616631373564376132623534333939346162353662306661393438
32326566373934653463653737383131386431363664333535626361646637613632383132623533
32343465366562363765353366333330633631353936613930376631336538306230626632303966
31343936386535663165663066663862656439306363326337313561396132316338363930323632
33313061623534373338623931663934396339633564353533626639373837323832366132343538
63373862663137306665383732303863343564343830636233613139666631626532373938386663
35646331646462356639383964373732393866653963643832633661323430323430613330633364
35343262366362646165383032333236623863656264353964623136643631326135623538306261
37393839343331653665356131343063316232303963636462653238333466636334616435666463
65636662383930353238623130363834616137643830633261646338363435343839633565303562
37623231396163346464303464333962336261353634396236613132306464643764356265656137
32373263613964396430646332666235303634373431643939623963633334326135626565656662
30646166303732643562653166633232666635343665616665653566316632303861613861313333
38393636663137333231613239353661656338333536656563616237343234623031363535666637
61343662663965663161666436366630366432363733663537613064386130326466343366383232
32363662343561666665323565356163383932336361656132373263363239636666613461366339
31323264393866386239353333386161643330343262366666323533303737373163313262313766
61303638366263346232353134333431613730386431623235323537323962666133613939353762
63326361633630323937353163383930626336663365626532613031623532393932316138353335
32363262393764663135393466616639373965313238323935383531633434633038663437646662
31633265373937316533373332316132363061386133356231623230393739326464333761336338
38626234646164616265633061346239363164376532383834356435346232653065326362343363
39613532356166633133626563643238373661323937353635343464666339323561326136623366
62633637656462376136633963653263346565366563646533373431613761616231653739613537
32343332356435393635363837396463613165626337346235303363613764306132343539333836
63386633626332396339383165303166653334663239313066666632356165643161356262346230
32636365636364663466343939663538386439343336303537636230306263643534653339313538
31373165363962373337636138336561336638633762373363646139366339323031313664306534
30623130663037323839666166323162393065643535663866383062356330633137343239316436
32303132393739653363376138633430313832383165663366626436653033663637616664346632
63633439663734393236343265323533633639316133323336373064633138363266316135363335
31336637666331333139306537333565333064666433653730633430336261656665613263663937
64313230656333373838346439623061393164393239393934306336373063303934663334353532
31313637623466313835313566616161376230343532653561343364383133653736646338303631
36356164303630303433356332343630616465383831623036383833393330663566616333653161
63393361643266323336393962663263323338633634633033393762656139393665353630633637
39386462303731396261613961613238616237373332656361303139633763303837653765623464
64333565666532653864383861333433353731343161613231383836353966353636373762306132
35333536373939656638356333383135313231306433656536383933623634653263353434393238
32323037666135316337633465666335376332326633346665643333656139386465353134356636
36333434303538326135346539313734393939353163316666366438613133333464623732666438
663934323030303937623038343662646163
36636236366435333738633465323539336231393239656538643863643233346563333836623335
3136393936656261396434316232356338313838373666660a653464613833306133343232623864
61666561336462376664363463313533353238623031613664353063396236343663643936303730
6235646336306636360a653238633038306532613436633132363231613862383636313838623461
32633366326136346435613232396632396365656138643361643139353430663637353565383664
36623961663030653639316131376535363138343965636437653139646233613765323439393030
31666137346339663162393836636638636431326232323461353661613062623032306130393965
38313931313935666633343835303232333961633232623538383138366262663335323764323939
32373333663834626633363265373632356439633862316562323565646530383534653338353165
38396434353332623164346137383238343536303130616666643065306431656137303263323135
34316662353031653932396239623733313037383935383762623136346636323434363231623161
30393864353466643637316566663366363231373335663331323932663837626239663633663965
66333531323861663130353531323339386566303630366236636135393439356634393732623033
31336231363935633436363962316666666336303338313636386163313666636336343464336133
33313730303961663632323435323963663530623265663664343735643061323332343265343431
61363039333730623562363233373537633138663239313132336666313237373137353663326538
32366130326635366433393434653735616132366264386461363063393265623765666461626366
38636239376534653230663932393930343162333262643130633835343363613061623932363761
64643164323335376565646137643763316562343565366462376162333633313737303465373362
63343734633536353661353165346632666230616138396461336332623365366432313734343837
30613736313961663334326335333834336634373338326631313739363765303036303132346166
37313030373264383564383936396339623061616134356663333733653838393537306336313135
32336261356437653863653839373130323035346538343938646265653239376236373932646433
35373932326535643763396563373138626239393661373231393066323335336264373835336635
38393732643630336364363834303534663334396363623261383339313939663461303236646237
36393330373534383836373065373239353836653137306338336638396662363434303839363466
37303332343464663733653632363239366337656364333532313237633935616637333361383763
62363063323362323565363837333264346161353032643039323839336666656333336433376231
36363335626137366135373230613436653232663138343862623562306331336330356630316166
30613264353165343634663461373630653632366333313837373237613339336638396338376465
64633638373263376330343561303664666139663237326637663964386133623164626339346635
66636365366562343636653362656133306164353761346661343430356633613063656466316262
31633932313532663930303837353863333664393563646566396164666236633832633235653362
63663931353436623034653733313766393465363466363831643130643939356335643166356436
38386530333264313263636438376134666235646636316233653330613735323234313036356639
61316164376434616239646235326661323363333835393430646462323234356138653163616530
65623233636435396462343437626130353735643530376538633762346332653162353563386366
32656633633935626238323431643631633434633032303435383037353834653964326336616530
30363765663133313239373664383830393238303439653531316664636532363135636563356666
34376636373033353665373261363536393562653638306661663832326139383565613862333831
38616238616332326532656430393331383161376237393365666639363732363164306332343336
37366638326464373261386431623731306663616262633837313965633530616265326536323136
62366365666461383535663637633332626464643062653139623333663038316536353930653266
37343830613062346533613762663738343138383537396435643765323237623130363564396462
61663063643135303539313062396338353061346336303938626361343238366366393533363638
31313437623631626437393761366537636664393863306164373431653133316639623630353336
65313037636533393362363266366231393334613264343331623531393666336336626265366163
34663161396633666162326564313735373137303337386538633866653331646635633532336465
34386166373436386566656135313438363733353139663630613430363332656239356139393532
35626337666639376664346631323938316538333066353363646562323266353165366632656137
66366162376165626564363230353062666364646363366637666433636333316536623435623836
62346566363362363939353038396566653238666138666531396338323262323965383031336362
34613332363334653531383231363539343133333531666564386133346562323338366139663438
31613466366438643566333632326239653662636464373337326537313234393038306132343730
36633136366162643966396362643165313336383862653435343630646431306366656636353230
64326633346561613662383863356531306563623439363566643733336535303335303164633535
36356463616162313039386434323637383937613133623131373033373462363365643730666166
65383166346638313533326366346433656461346439343838306564393336383536633732343965
39306231386130303433616361366363366163646534316138623362393063663438313165643762
39393332653564333762663762366633386135353865366338396138666265653662373535666366
35613937613366323064316561643435353830316239396464393737613835373964626437316464
39643664656565633966393832643033323130636562383233323636363361353430353062323439
39396464633336623963633963326461316562333162333766613064336462613235336531623437
30383063653666633839646533386239366637346230363033306161386537303039376465303535
34643162323065326264343662303138313063303834353832393663616239383739313133393532
62393766343037666564326132386139346661383564366366646530346434373366326531356138
31323531653338653130303733363764636430336563336439666132626434363463306631363334
39623332376334383338633132653262653735346563626365613336623435396539383630366332
31316638393562376131363166633163333332633332393062393962613132366538653865663264
38313237393436353333323431336361653938343034346164353335366535396265633961333138
65386137356161643732636531613166633464326163303336303439383435376331373935333563
64633961623761393131333234656530653737346563643963643833383262383434653266343362
35623832643032346133346363646136646438663761363330666231316434306232623339656535
34393337666237656262313439386336336466373466663663616139353463316265396135626366
62313562306334343831616364633933343463386233323637313832316635346235623830333461
33663530343966383739643261653736363865323438363430653661653964643339633833386438
36333331366334366461346636636462343335313234663864613864366134356161396662383632
36663538373761353937313666363262626435623537646665646364353934373638366261333234
36353439303663656531666637376364313838386130343966316138356338643135316139363630
30386635376565363931333331336431303562346431323534643238333337386264616161356163
35663766306635626235373663643064393233346364666663393236353561653362373361666164
65653566666234626464356338613834323332383939643935323337376162316163333034643062
63646237646234636561313038383636373936656164333735323461626233633337623764383830
66383161346336633962643032376662656566396666343662656337306333313836613335643961
64323961663032373239636430306430383639306333363938303837386139643230353061623937
36373733636337616264313432643230303935626666633533666135666538626266626266643864
376430653461346366626432636336653437

View File

@@ -1 +0,0 @@
DISCORD_WEBHOOK={{ alertmanager_discord_webhook }}

View File

@@ -4,6 +4,10 @@ RUN pip install grafanalib
COPY ./grafana_config/dashboards /dashboards
# Required for grafanalib to find the shared python files like common.py
# https://github.com/weaveworks/grafanalib/issues/58
ENV PYTHONPATH=/dashboards
RUN generate-dashboards /dashboards/*.dashboard.py
FROM grafana/grafana:latest

View File

@@ -1,68 +0,0 @@
# The root route on which each incoming alert enters.
route:
group_by: ["alertname", "job"]
group_wait: 20s
group_interval: 5m
repeat_interval: 3h
receiver: discord_webhook
receivers:
- name: "discord_webhook"
discord_configs:
- webhook_url: "{{ alertmanager_discord_webhook }}"
{# - send_resolved: true#}
{# username: 'Alertmanager'#}
{# webhook_configs:#}
{# - send_resolved: true#}
{# url: '{{ alertmanager_discord_webhook }}'#}
{# username: 'Alertmanager'#}
{# icon_url: 'https://prometheus.io/assets/icon.png'#}
{# icon_emoji: ':alert:'#}
{# send_resolved: true#}
{# text: "{{ .CommonAnnotations.summary }}"#}
{# title: "{{ .CommonLabels.alertname }}"#}
{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#}
{# footer: '{{ .CommonLabels.monitor }}'#}
{# footer_icon: 'https://prometheus.io/assets/icon.png'#}
{# actions:#}
{# - type: 'button'#}
{# text: 'Open in Grafana'#}
{# url: '{{ .ExternalURL }}'#}
{# style: 'primary'#}
{# send_resolved: true#}
{# confirm:#}
{# title: 'Are you sure?'#}
{# text: 'This will open Grafana in a new tab.'#}
{# ok_text: 'Yes'#}
{# dismiss_text: 'No'#}
{# fields:#}
{# - title: 'Description'#}
{# value: "{{ .CommonAnnotations.description }}"#}
{# short: false#}
{# - title: 'Details'#}
{# value: "{{ .CommonAnnotations.details }}"#}
{# short: false#}
{# - title: 'Severity'#}
{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#}
{# short: true#}
{# - title: 'Host'#}
{# value: '{{ .CommonLabels.monitor }}'#}
{# short: true#}
{# - title: 'Starts At'#}
{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Ends At'#}
{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Runbook'#}
{# value: '{{ .CommonAnnotations.runbook_url }}'#}
{# short: true#}
{# - title: 'Dashboard'#}
{# value: '{{ .CommonAnnotations.dashboard_url }}'#}
{# short: true#}
{# - title: 'Alerting Rule'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}
{# - title: 'Alerting Rule Description'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}

View File

@@ -60,33 +60,17 @@ services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
labels:
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
restart: unless-stopped
# Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}"
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=30d
- --web.external-url=https://prom.{{ domain }}/
volumes:
- ./prometheus_config:/etc/prometheus:ro
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
- {{ base_volume_path }}/monitoring/prometheus:/prometheus
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
labels:
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
restart: unless-stopped
command:
- --config.file=/etc/alertmanager/alertmanager.yml
- --web.external-url=https://alert.{{ domain }}/
volumes:
- ./alertmanager_config:/etc/alertmanager:ro
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
@@ -100,6 +84,11 @@ services:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
restart: unless-stopped
command:
- --docker_only=true
- --store_container_labels=false
- --whitelisted_container_labels=com.docker.compose.project,com.docker.compose.service
- --enable_metrics=cpu,cpuLoad,diskIO,memory,network,oom_event,process
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw

View File

@@ -3,7 +3,7 @@ apiVersion: 1
providers:
- name: "Grafana"
org_id: 1
folder: "Services"
folder: "Alpina"
type: "file"
options:
path: "/etc/grafana/provisioning/dashboards"

View File

@@ -0,0 +1,27 @@
from grafanalib.core import Template
# TODO: consider default params for common params like line width, show points, tooltip
PrometheusTemplate = Template(
name='datasource',
type='datasource',
label='Prometheus',
query='prometheus',
)
# TODO: this slightly less (clown emoji), normal Target gave me errors in grafana
class LokiTarget(object):
def __init__(self, loki_datasource, expr, legendFormat, refId):
self.loki_datasource = loki_datasource
self.expr = expr
self.legendFormat = legendFormat
self.refId = refId
def to_json_data(self):
return {
'datasource': self.loki_datasource,
'expr': self.expr,
'legendFormat': self.legendFormat,
'refId': self.refId,
'queryType': 'range',
}

View File

@@ -5,28 +5,21 @@ from grafanalib.core import (
)
from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC
prom_datasource='prometheus'
loki_datasource='loki'
from common import LokiTarget, PrometheusTemplate
# TODO: this is (clown emoji), normal Target gave me errors in grafana
class LokiTarget(object):
def to_json_data(self):
return {
'datasource': loki_datasource,
'expr': '{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
'legendFormat': '{{ container_name }}',
'refId': 'A',
'queryType': 'range',
}
prom_datasource='${datasource}'
loki_datasource='loki'
dashboard = Dashboard(
title='Containers',
uid='containers',
description='Data for compose projects from default Prometheus datasource collected by Cadvisor',
tags=[
'example'
'linux',
'docker',
],
templating=Templating(list=[
PrometheusTemplate,
Template(
name='compose_project',
label='Compose Project',
@@ -44,7 +37,6 @@ dashboard = Dashboard(
includeAll=True,
multi=True,
refresh=REFRESH_ON_TIME_RANGE_CHANGE,
),
Template(
name='logs_query',
@@ -56,7 +48,6 @@ dashboard = Dashboard(
timezone='browser',
panels=[
TimeSeries(
id=1,
title='Container Memory Usage',
unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=0),
@@ -76,13 +67,14 @@ dashboard = Dashboard(
],
),
TimeSeries(
id=2,
title='Container CPU Usage',
unit=SECONDS,
gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=2,
fillOpacity=10,
showPoints='never',
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource=prom_datasource,
@@ -93,7 +85,6 @@ dashboard = Dashboard(
],
),
TimeSeries(
id=3,
title='Container Network Traffic',
unit=BYTES_SEC_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=8),
@@ -118,7 +109,6 @@ dashboard = Dashboard(
],
),
Logs(
id=4,
title='',
gridPos=GridPos(h=8, w=12, x=12, y=8),
showLabels=True,
@@ -127,13 +117,12 @@ dashboard = Dashboard(
prettifyLogMessage=True,
dedupStrategy='numbers',
targets=[
LokiTarget(),
# Target(
# datasource=loki_datasource,
# expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
# legendFormat='{{ container_name }}',
# refId='A',
# ),
LokiTarget(
loki_datasource=loki_datasource,
expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
legendFormat='{{ container_name }}',
refId='A',
),
],
),
],

View File

@@ -1,51 +0,0 @@
from grafanalib.core import (
Dashboard, TimeSeries, GaugePanel,
Target, GridPos,
OPS_FORMAT
)
dashboard = Dashboard(
title="Python generated example dashboard",
description="Example dashboard using the Random Walk and default Prometheus datasource",
tags=[
'example'
],
timezone="browser",
panels=[
TimeSeries(
title="Random Walk",
dataSource='default',
targets=[
Target(
datasource='grafana',
expr='example',
),
],
gridPos=GridPos(h=8, w=16, x=0, y=0),
),
GaugePanel(
title="Random Walk",
dataSource='default',
targets=[
Target(
datasource='grafana',
expr='example',
),
],
gridPos=GridPos(h=4, w=4, x=17, y=0),
),
TimeSeries(
title="Prometheus http requests",
dataSource='prometheus',
targets=[
Target(
expr='rate(prometheus_http_requests_total[5m])',
legendFormat="{{ handler }}",
refId='A',
),
],
unit=OPS_FORMAT,
gridPos=GridPos(h=8, w=16, x=0, y=10),
),
],
).auto_panel_ids()

View File

@@ -0,0 +1,139 @@
from grafanalib.core import Dashboard, Templating, Template, TimeSeries, PERCENT_UNIT_FORMAT, GridPos, Target
from grafanalib.formatunits import BYTES_IEC
from common import PrometheusTemplate
from node_consts import CPU_BASIC_COLORS, MEMORY_BASIC_COLORS
dashboard = Dashboard(
title='Node Exporter',
uid='node',
description='Node Exporter (not quite full)',
tags=[
'linux',
],
timezone='browser',
templating=Templating(list=[
# Datasource
PrometheusTemplate,
# Job
Template(
name='job',
label='Job',
dataSource='${datasource}',
query='label_values(node_uname_info, job)',
),
# Instance
Template(
name='instance',
label='Instance',
dataSource='${datasource}',
query='label_values(node_uname_info{job="$job"}, instance)',
),
]),
panels=[
# CPU Basic
TimeSeries(
title='CPU Basic',
description='Basic CPU usage info',
unit=PERCENT_UNIT_FORMAT,
gridPos=GridPos(h=8, w=12, x=0, y=0),
lineWidth=1,
fillOpacity=30,
showPoints='never',
stacking={'mode': 'percent', 'group': 'A'},
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="system"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy System',
refId='A',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="user"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy User',
refId='B',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="iowait"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy Iowait',
refId='C',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode=~".*irq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy IRQs',
refId='D',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode!="idle",mode!="user",mode!="system",mode!="iowait",mode!="irq",mode!="softirq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy Other',
refId='E',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="idle"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Idle',
refId='F',
),
],
# Extra JSON for the colors
extraJson=CPU_BASIC_COLORS,
),
# Memory Basic
TimeSeries(
title='Memory Basic',
description='Basic memory usage',
unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=1,
fillOpacity=30,
showPoints='never',
stacking={'mode': 'normal', 'group': 'A'},
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource='${datasource}',
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"}',
format='time_series',
legendFormat='RAM Total',
refId='A',
),
Target(
datasource='${datasource}',
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"})',
format='time_series',
legendFormat='RAM Used',
refId='B',
),
Target(
datasource='${datasource}',
expr='node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}',
legendFormat='RAM Cache + Buffer',
refId='C',
),
Target(
datasource='${datasource}',
expr='node_memory_MemFree_bytes{instance="$instance",job="$job"}',
legendFormat='RAM Free',
refId='D',
),
Target(
datasource='${datasource}',
expr='(node_memory_SwapTotal_bytes{instance="$instance",job="$job"} - node_memory_SwapFree_bytes{instance="$instance",job="$job"})',
legendFormat='SWAP Used',
refId='E',
),
],
# Extra JSON for the colors
extraJson=MEMORY_BASIC_COLORS,
),
# TODO: Network Basic
# TODO: Disk Basic
],
).auto_panel_ids()

View File

@@ -0,0 +1,487 @@
# TODO: Question life decisions (I'm not sure if this is good)
CPU_BASIC_COLORS = {
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Busy Iowait"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#890F02",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Idle"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#052B51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy Iowait"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#890F02",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Idle"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy System"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy User"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A437C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy Other"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6D1F62",
"mode": "fixed"
}
}
]
}
]
},
}
MEMORY_BASIC_COLORS = {
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Apps"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#629E51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Buffers"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#614D93",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6D1F62",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Cached"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#511749",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Committed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#508642",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A437C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#CFFAFF",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Inactive"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#584477",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "PageTables"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A50A1",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Page_Tables"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A50A1",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM_Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0F9D7",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "SWAP Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Slab"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#806EB7",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Slab_Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0752D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap_Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#C15C17",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap_Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#2F575E",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Unused"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Total"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0F9D7",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.stacking",
"value": {
"group": False,
"mode": "normal"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Cache + Buffer"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#052B51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Available"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#DEDAF7",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.stacking",
"value": {
"group": False,
"mode": "normal"
}
}
]
}
]
}
}

View File

@@ -15,18 +15,6 @@ datasources:
url: http://prometheus:9090
editable: false
- name: Alertmanager
type: alertmanager
access: proxy
uid: alertmanager
url: http://alertmanager:9093
jsonData:
# Valid options for implementation include mimir, cortex and prometheus
implementation: prometheus
# Whether Grafana should send alert instances to this Alertmanager
handleGrafanaManagedAlerts: true
editable: false
- name: InfluxDB
type: influxdb
access: proxy

View File

@@ -17,13 +17,6 @@ common:
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v12
index:
prefix: index_
period: 24h
- from: 2024-10-18
index:
period: 24h
@@ -33,5 +26,5 @@ schema_config:
store: tsdb
# TODO: Figure this out
ruler:
alertmanager_url: http://localhost:9093
# ruler:
# alertmanager_url: http://localhost:9093

View File

@@ -1,23 +0,0 @@
groups:
- name: qbit-low-traffic
interval: 1m
rules:
- alert: QbitLowTraffic
expr: |
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
for: 2m
labels:
severity: warning
annotations:
title: 'Low traffic on qBit'
description: |
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
Last value was x bytes/s.
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
__dashboard__uid: 'containers'
__orgId__: 1
__panelId__: 3

View File

@@ -1,20 +0,0 @@
groups:
- name: demo-service-alerts
rules:
- alert: DemoServiceHighErrorRate
expr: |
(
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m])
)
/
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{job="demo"}[1m])
) * 100 > 0.5
)
for: 1m
labels:
severity: critical
annotations:
title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}'
description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.'

View File

@@ -5,11 +5,6 @@ global:
external_labels:
monitor: "{{ ansible_host }}"
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:9093"]
scrape_configs:
- job_name: "prometheus"
static_configs:
@@ -35,15 +30,7 @@ scrape_configs:
static_configs:
- targets: ["promtail:9080"]
- job_name: 'demo'
static_configs:
- targets:
- 'demo.promlabs.com:10000'
- 'demo.promlabs.com:10001'
- 'demo.promlabs.com:10002'
rule_files:
- "/etc/prometheus/container.alerts.yml"
- "/etc/prometheus/extra/rules/*.yml"
- "/etc/prometheus/extra/rules/*.json"

View File

@@ -5,10 +5,11 @@
post_tasks:
- name: Docker prune objects
docker_prune:
containers: yes
images: yes
containers: true
# Keep images for building grafana
images: true
images_filters:
dangling: false
until: "720h"
networks: true
volumes: true
builder_cache: true
builder_cache: false