5 Commits

23 changed files with 361 additions and 805 deletions

2
.idea/alpina.iml generated
View File

@@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" /> <excludeFolder url="file://$MODULE_DIR$/venv" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="jdk" jdkName="Poetry (alpina) (4)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="PyDocumentationSettings"> <component name="PyDocumentationSettings">

35
.idea/jsonSchemas.xml generated
View File

@@ -39,22 +39,6 @@
</SchemaInfo> </SchemaInfo>
</value> </value>
</entry> </entry>
<entry key="Loki">
<value>
<SchemaInfo>
<option name="name" value="Loki" />
<option name="relativePathToSchema" value="https://json.schemastore.org/loki.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/loki_config/loki-config.yaml.j2" />
</Item>
</list>
</option>
</SchemaInfo>
</value>
</entry>
<entry key="Traefik v2"> <entry key="Traefik v2">
<value> <value>
<SchemaInfo> <SchemaInfo>
@@ -140,6 +124,25 @@
</SchemaInfo> </SchemaInfo>
</value> </value>
</entry> </entry>
<entry key="prometheus.rules.json">
<value>
<SchemaInfo>
<option name="name" value="prometheus.rules.json" />
<option name="relativePathToSchema" value="https://json.schemastore.org/prometheus.rules.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container-alerts.yml" />
</Item>
<Item>
<option name="path" value="roles/alpina/templates/services/monitoring/prometheus_config/container.alerts.yml" />
</Item>
</list>
</option>
</SchemaInfo>
</value>
</entry>
</map> </map>
</state> </state>
</component> </component>

2
.idea/misc.xml generated
View File

@@ -3,5 +3,5 @@
<component name="Black"> <component name="Black">
<option name="sdkName" value="Poetry (alpina) (2)" /> <option name="sdkName" value="Poetry (alpina) (2)" />
</component> </component>
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Poetry (alpina) (4)" project-jdk-type="Python SDK" />
</project> </project>

View File

@@ -8,22 +8,6 @@ running on top of TrueNAS SCALE, separating all the docker stuff from the applia
# Notes # Notes
## Monitoring
The monitoring stack is set up to monitor all the containers and the host.
This is a work in progress, Grafana is set up with grafanalib, a Python library that generates Grafana dashboards.
The dashboards are generated from Python scripts in
[grafana_config/dashboards](roles/alpina/templates/services/monitoring/grafana_config/dashboards).
This requires a custom grafana image, which is built from the
[Dockerfile](roles/alpina/templates/services/monitoring/Dockerfile).
This also means it has to be manually rebuilt whenever the dashboards are updated.
From the services/monitoring directory, run:
```bash
docker compose up -d --build --force-recreate grafana
```
## IPv6 ## IPv6
The current configuration is designed to work with IPv6. The current configuration is designed to work with IPv6.
However, because of how (not properly) I'm doing the subnetting However, because of how (not properly) I'm doing the subnetting

View File

@@ -24,6 +24,8 @@ minio_password: "{{ vault_minio_password }}"
influxdb_admin_password: "{{ vault_influxdb_admin_password }}" influxdb_admin_password: "{{ vault_influxdb_admin_password }}"
influxdb_admin_token: "{{ vault_influxdb_admin_token }}" influxdb_admin_token: "{{ vault_influxdb_admin_token }}"
alertmanager_discord_webhook: "{{ vault_alertmanager_discord_webhook }}"
# Traefik # Traefik
acme_email: "{{ vault_acme_email }}" acme_email: "{{ vault_acme_email }}"
cloudflare_api_token: "{{ vault_cloudflare_api_token }}" cloudflare_api_token: "{{ vault_cloudflare_api_token }}"

View File

@@ -1,88 +1,96 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
36636236366435333738633465323539336231393239656538643863643233346563333836623335 32653863663065353431636364373163613536643238613961666561653663633530646165643766
3136393936656261396434316232356338313838373666660a653464613833306133343232623864 3833323937353331313136633965393061616135366534660a333037383066303431623830313464
61666561336462376664363463313533353238623031613664353063396236343663643936303730 65346431633238666534373033663138353438313762326361666233353866663534363536643034
6235646336306636360a653238633038306532613436633132363231613862383636313838623461 3636323439316261630a623262336331663431633266336235653034323234383566323963623365
32633366326136346435613232396632396365656138643361643139353430663637353565383664 32626363626164373536663464643632393761346137623866633237643038306265636362626561
36623961663030653639316131376535363138343965636437653139646233613765323439393030 61313634353634373530383061393364613461303132326335316566326436633635633131643433
31666137346339663162393836636638636431326232323461353661613062623032306130393965 31376539396639326464333233643933373737313064363262323639363964643862633035396161
38313931313935666633343835303232333961633232623538383138366262663335323764323939 35643037636535623966626131393538643432396536643365383736636262356135373434376433
32373333663834626633363265373632356439633862316562323565646530383534653338353165 32316361343330303431376234323632323932376635343964383733633761326639393966383039
38396434353332623164346137383238343536303130616666643065306431656137303263323135 35646131343034663962363335373661323065663764396631343461383661663738386163323633
34316662353031653932396239623733313037383935383762623136346636323434363231623161 36303464646532633235663662666663343238633465663334326463383133643239666634653739
30393864353466643637316566663366363231373335663331323932663837626239663633663965 35396130393961303230396236303766336666643930626161333338326137663235323066663032
66333531323861663130353531323339386566303630366236636135393439356634393732623033 33376564373563323635356233616264313663373534333636643236393866613062656338353864
31336231363935633436363962316666666336303338313636386163313666636336343464336133 66386132663362363832366661646462316139353132626662663934336530386534376538633235
33313730303961663632323435323963663530623265663664343735643061323332343265343431 62653131653835323261373435373631396466353738306362616266616532313435323633613933
61363039333730623562363233373537633138663239313132336666313237373137353663326538 61646132346536323632643865326234356535346566346532383162393265613931343962303463
32366130326635366433393434653735616132366264386461363063393265623765666461626366 31636334343736666434353835633734396465653862613234386431306463326134613931646232
38636239376534653230663932393930343162333262643130633835343363613061623932363761 32353535663133623434643866336165616232613662336533383432633338373763643337616637
64643164323335376565646137643763316562343565366462376162333633313737303465373362 38323237646461376433316164646366383438316639633162303739383263656265633364303565
63343734633536353661353165346632666230616138396461336332623365366432313734343837 36643339356136653332666230633939636264306431636562323864373037623138363739616561
30613736313961663334326335333834336634373338326631313739363765303036303132346166 37613364653737353638646564323439646138646536636564303866636233616264383466656439
37313030373264383564383936396339623061616134356663333733653838393537306336313135 33646232653061616437656162353036313834616162313936353533393833313432656534343363
32336261356437653863653839373130323035346538343938646265653239376236373932646433 35636638326236646163323463356634326534623165306461316530353936646162323435633862
35373932326535643763396563373138626239393661373231393066323335336264373835336635 64396464303363323837316162353734626663643962303534336637336632333463393734383532
38393732643630336364363834303534663334396363623261383339313939663461303236646237 66616534666466393333386337363238383432643764373864613461363766333932333862363332
36393330373534383836373065373239353836653137306338336638396662363434303839363466 61313364613031376334326635636432346532613462613265643462636436663963323862353733
37303332343464663733653632363239366337656364333532313237633935616637333361383763 38396261613332396633666130653262313234633132353264363266336231373535306532383661
62363063323362323565363837333264346161353032643039323839336666656333336433376231 65323530653531646339626537653433303332656535346639393466353133363833326236656231
36363335626137366135373230613436653232663138343862623562306331336330356630316166 33336265373463396135653730616266346331376461346433343464326238323034653330393732
30613264353165343634663461373630653632366333313837373237613339336638396338376465 36643432316662333633333036633761653031393433333338663633386264656535623534653463
64633638373263376330343561303664666139663237326637663964386133623164626339346635 36363565303333356361616539376532353066336137336134656465383364636361656664356439
66636365366562343636653362656133306164353761346661343430356633613063656466316262 65326334643631663665376530646433323439653864623964323363396561313663636538356536
31633932313532663930303837353863333664393563646566396164666236633832633235653362 63626336303862333364363166353437353163656238303765636662636137383337623563666264
63663931353436623034653733313766393465363466363831643130643939356335643166356436 66326633343230386638616438393436633431343264343231386563613935626430306337343533
38386530333264313263636438376134666235646636316233653330613735323234313036356639 66656366333332326131343661356236396430303832303834653530623639353036663436373862
61316164376434616239646235326661323363333835393430646462323234356138653163616530 61336437386338343965653563646664643438353232306231316564616462643236646239333062
65623233636435396462343437626130353735643530376538633762346332653162353563386366 38643461346639623964626438396631396139383332666130316635656530653136333662353566
32656633633935626238323431643631633434633032303435383037353834653964326336616530 36313261646330373963663032316662383137366436636534383366636362366435393036373264
30363765663133313239373664383830393238303439653531316664636532363135636563356666 34646537666462363531343335336638343038333633663862666163306662643634326533316561
34376636373033353665373261363536393562653638306661663832326139383565613862333831 61613235366233636530663462353066646530386265623534663336376364323237343936646134
38616238616332326532656430393331383161376237393365666639363732363164306332343336 31616563653864383565306439613932396562613835613562326264326535636630646666366335
37366638326464373261386431623731306663616262633837313965633530616265326536323136 36653631353961353933386236636534393636356334633336313333383238353838336335646630
62366365666461383535663637633332626464643062653139623333663038316536353930653266 63633365666530623562323634303935326362643762616532303531303139333565643835396163
37343830613062346533613762663738343138383537396435643765323237623130363564396462 36353130656365326435343130613234336637346461313639653133623933376163393935366266
61663063643135303539313062396338353061346336303938626361343238366366393533363638 66653337353732363038663164363663623266356366663637343466393836353965343730666362
31313437623631626437393761366537636664393863306164373431653133316639623630353336 38663636336265383331666666616535366334616431306164303738306436333364653765356662
65313037636533393362363266366231393334613264343331623531393666336336626265366163 37316433323563323431623164386337343563663538333435616333343433396236356363333262
34663161396633666162326564313735373137303337386538633866653331646635633532336465 61396664326234343136666331356465333233663135613839616334623033316362336162613731
34386166373436386566656135313438363733353139663630613430363332656239356139393532 38646530326538643337323838326563303130643934623939346635343331356531373235663937
35626337666639376664346631323938316538333066353363646562323266353165366632656137 62396530383365666439373632613633633233376139616138323033613135383330333132643839
66366162376165626564363230353062666364646363366637666433636333316536623435623836 65363833616337656662653462323436303531653635663739633366616532333761323238353764
62346566363362363939353038396566653238666138666531396338323262323965383031336362 39373836303735393165393435323139346661346135636138613731373165386533386333393364
34613332363334653531383231363539343133333531666564386133346562323338366139663438 32336265386334386338653734353565343733393931373436336233333031356531313739636666
31613466366438643566333632326239653662636464373337326537313234393038306132343730 61376234393631343236643137616631373564376132623534333939346162353662306661393438
36633136366162643966396362643165313336383862653435343630646431306366656636353230 32326566373934653463653737383131386431363664333535626361646637613632383132623533
64326633346561613662383863356531306563623439363566643733336535303335303164633535 32343465366562363765353366333330633631353936613930376631336538306230626632303966
36356463616162313039386434323637383937613133623131373033373462363365643730666166 31343936386535663165663066663862656439306363326337313561396132316338363930323632
65383166346638313533326366346433656461346439343838306564393336383536633732343965 33313061623534373338623931663934396339633564353533626639373837323832366132343538
39306231386130303433616361366363366163646534316138623362393063663438313165643762 63373862663137306665383732303863343564343830636233613139666631626532373938386663
39393332653564333762663762366633386135353865366338396138666265653662373535666366 35646331646462356639383964373732393866653963643832633661323430323430613330633364
35613937613366323064316561643435353830316239396464393737613835373964626437316464 35343262366362646165383032333236623863656264353964623136643631326135623538306261
39643664656565633966393832643033323130636562383233323636363361353430353062323439 37393839343331653665356131343063316232303963636462653238333466636334616435666463
39396464633336623963633963326461316562333162333766613064336462613235336531623437 65636662383930353238623130363834616137643830633261646338363435343839633565303562
30383063653666633839646533386239366637346230363033306161386537303039376465303535 37623231396163346464303464333962336261353634396236613132306464643764356265656137
34643162323065326264343662303138313063303834353832393663616239383739313133393532 32373263613964396430646332666235303634373431643939623963633334326135626565656662
62393766343037666564326132386139346661383564366366646530346434373366326531356138 30646166303732643562653166633232666635343665616665653566316632303861613861313333
31323531653338653130303733363764636430336563336439666132626434363463306631363334 38393636663137333231613239353661656338333536656563616237343234623031363535666637
39623332376334383338633132653262653735346563626365613336623435396539383630366332 61343662663965663161666436366630366432363733663537613064386130326466343366383232
31316638393562376131363166633163333332633332393062393962613132366538653865663264 32363662343561666665323565356163383932336361656132373263363239636666613461366339
38313237393436353333323431336361653938343034346164353335366535396265633961333138 31323264393866386239353333386161643330343262366666323533303737373163313262313766
65386137356161643732636531613166633464326163303336303439383435376331373935333563 61303638366263346232353134333431613730386431623235323537323962666133613939353762
64633961623761393131333234656530653737346563643963643833383262383434653266343362 63326361633630323937353163383930626336663365626532613031623532393932316138353335
35623832643032346133346363646136646438663761363330666231316434306232623339656535 32363262393764663135393466616639373965313238323935383531633434633038663437646662
34393337666237656262313439386336336466373466663663616139353463316265396135626366 31633265373937316533373332316132363061386133356231623230393739326464333761336338
62313562306334343831616364633933343463386233323637313832316635346235623830333461 38626234646164616265633061346239363164376532383834356435346232653065326362343363
33663530343966383739643261653736363865323438363430653661653964643339633833386438 39613532356166633133626563643238373661323937353635343464666339323561326136623366
36333331366334366461346636636462343335313234663864613864366134356161396662383632 62633637656462376136633963653263346565366563646533373431613761616231653739613537
36663538373761353937313666363262626435623537646665646364353934373638366261333234 32343332356435393635363837396463613165626337346235303363613764306132343539333836
36353439303663656531666637376364313838386130343966316138356338643135316139363630 63386633626332396339383165303166653334663239313066666632356165643161356262346230
30386635376565363931333331336431303562346431323534643238333337386264616161356163 32636365636364663466343939663538386439343336303537636230306263643534653339313538
35663766306635626235373663643064393233346364666663393236353561653362373361666164 31373165363962373337636138336561336638633762373363646139366339323031313664306534
65653566666234626464356338613834323332383939643935323337376162316163333034643062 30623130663037323839666166323162393065643535663866383062356330633137343239316436
63646237646234636561313038383636373936656164333735323461626233633337623764383830 32303132393739653363376138633430313832383165663366626436653033663637616664346632
66383161346336633962643032376662656566396666343662656337306333313836613335643961 63633439663734393236343265323533633639316133323336373064633138363266316135363335
64323961663032373239636430306430383639306333363938303837386139643230353061623937 31336637666331333139306537333565333064666433653730633430336261656665613263663937
36373733636337616264313432643230303935626666633533666135666538626266626266643864 64313230656333373838346439623061393164393239393934306336373063303934663334353532
376430653461346366626432636336653437 31313637623466313835313566616161376230343532653561343364383133653736646338303631
36356164303630303433356332343630616465383831623036383833393330663566616333653161
63393361643266323336393962663263323338633634633033393762656139393665353630633637
39386462303731396261613961613238616237373332656361303139633763303837653765623464
64333565666532653864383861333433353731343161613231383836353966353636373762306132
35333536373939656638356333383135313231306433656536383933623634653263353434393238
32323037666135316337633465666335376332326633346665643333656139386465353134356636
36333434303538326135346539313734393939353163316666366438613133333464623732666438
663934323030303937623038343662646163

View File

@@ -0,0 +1 @@
DISCORD_WEBHOOK={{ alertmanager_discord_webhook }}

View File

@@ -4,10 +4,6 @@ RUN pip install grafanalib
COPY ./grafana_config/dashboards /dashboards COPY ./grafana_config/dashboards /dashboards
# Required for grafanalib to find the shared python files like common.py
# https://github.com/weaveworks/grafanalib/issues/58
ENV PYTHONPATH=/dashboards
RUN generate-dashboards /dashboards/*.dashboard.py RUN generate-dashboards /dashboards/*.dashboard.py
FROM grafana/grafana:latest FROM grafana/grafana:latest

View File

@@ -0,0 +1,68 @@
# The root route on which each incoming alert enters.
route:
group_by: ["alertname", "job"]
group_wait: 20s
group_interval: 5m
repeat_interval: 3h
receiver: discord_webhook
receivers:
- name: "discord_webhook"
discord_configs:
- webhook_url: "{{ alertmanager_discord_webhook }}"
{# - send_resolved: true#}
{# username: 'Alertmanager'#}
{# webhook_configs:#}
{# - send_resolved: true#}
{# url: '{{ alertmanager_discord_webhook }}'#}
{# username: 'Alertmanager'#}
{# icon_url: 'https://prometheus.io/assets/icon.png'#}
{# icon_emoji: ':alert:'#}
{# send_resolved: true#}
{# text: "{{ .CommonAnnotations.summary }}"#}
{# title: "{{ .CommonLabels.alertname }}"#}
{# color: '{{ if eq .Status "firing" }}#FF0000{{ else }}#00FF00{{ end }}'#}
{# footer: '{{ .CommonLabels.monitor }}'#}
{# footer_icon: 'https://prometheus.io/assets/icon.png'#}
{# actions:#}
{# - type: 'button'#}
{# text: 'Open in Grafana'#}
{# url: '{{ .ExternalURL }}'#}
{# style: 'primary'#}
{# send_resolved: true#}
{# confirm:#}
{# title: 'Are you sure?'#}
{# text: 'This will open Grafana in a new tab.'#}
{# ok_text: 'Yes'#}
{# dismiss_text: 'No'#}
{# fields:#}
{# - title: 'Description'#}
{# value: "{{ .CommonAnnotations.description }}"#}
{# short: false#}
{# - title: 'Details'#}
{# value: "{{ .CommonAnnotations.details }}"#}
{# short: false#}
{# - title: 'Severity'#}
{# value: '{{ if eq .Labels.severity "critical" }}Critical{{ else if eq .Labels.severity "warning" }}Warning{{ else }}Info{{ end }}'#}
{# short: true#}
{# - title: 'Host'#}
{# value: '{{ .CommonLabels.monitor }}'#}
{# short: true#}
{# - title: 'Starts At'#}
{# value: '{{ .StartsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Ends At'#}
{# value: '{{ .EndsAt.Format "2006-01-02 15:04:05" }}'#}
{# short: true#}
{# - title: 'Runbook'#}
{# value: '{{ .CommonAnnotations.runbook_url }}'#}
{# short: true#}
{# - title: 'Dashboard'#}
{# value: '{{ .CommonAnnotations.dashboard_url }}'#}
{# short: true#}
{# - title: 'Alerting Rule'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}
{# - title: 'Alerting Rule Description'#}
{# value: '{{ .CommonLabels.alertname }}'#}
{# short: true#}

View File

@@ -60,17 +60,33 @@ services:
prometheus: prometheus:
image: prom/prometheus:latest image: prom/prometheus:latest
container_name: prometheus container_name: prometheus
labels:
- {{ helpers.traefik_labels('prom', port='9090') | indent(6) }}
restart: unless-stopped restart: unless-stopped
# Needed to make config files readable (not anymore, TODO: remove) # Needed to make config files readable (not anymore, TODO: remove)
user: "{{ remote_uid }}" user: "{{ remote_uid }}"
command: command:
- --config.file=/etc/prometheus/prometheus.yml - --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=30d - --storage.tsdb.retention.time=30d
- --web.external-url=https://prom.{{ domain }}/
volumes: volumes:
- ./prometheus_config:/etc/prometheus:ro - ./prometheus_config:/etc/prometheus:ro
- {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro - {{ base_volume_path }}/monitoring/prometheus_configs:/etc/prometheus/extra:ro
- {{ base_volume_path }}/monitoring/prometheus:/prometheus - {{ base_volume_path }}/monitoring/prometheus:/prometheus
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
labels:
- {{ helpers.traefik_labels('alert', port='9093') | indent(6) }}
restart: unless-stopped
command:
- --config.file=/etc/alertmanager/alertmanager.yml
- --web.external-url=https://alert.{{ domain }}/
volumes:
- ./alertmanager_config:/etc/alertmanager:ro
- {{ base_volume_path }}/monitoring/alertmanager:/alertmanager
node-exporter: node-exporter:
image: prom/node-exporter:latest image: prom/node-exporter:latest
container_name: node-exporter container_name: node-exporter
@@ -84,11 +100,6 @@ services:
image: gcr.io/cadvisor/cadvisor:latest image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor container_name: cadvisor
restart: unless-stopped restart: unless-stopped
command:
- --docker_only=true
- --store_container_labels=false
- --whitelisted_container_labels=com.docker.compose.project,com.docker.compose.service
- --enable_metrics=cpu,cpuLoad,diskIO,memory,network,oom_event,process
volumes: volumes:
- /:/rootfs:ro - /:/rootfs:ro
- /var/run:/var/run:rw - /var/run:/var/run:rw

View File

@@ -3,7 +3,7 @@ apiVersion: 1
providers: providers:
- name: "Grafana" - name: "Grafana"
org_id: 1 org_id: 1
folder: "Alpina" folder: "Services"
type: "file" type: "file"
options: options:
path: "/etc/grafana/provisioning/dashboards" path: "/etc/grafana/provisioning/dashboards"

View File

@@ -1,27 +0,0 @@
from grafanalib.core import Template
# TODO: consider default params for common params like line width, show points, tooltip
PrometheusTemplate = Template(
name='datasource',
type='datasource',
label='Prometheus',
query='prometheus',
)
# TODO: this slightly less (clown emoji), normal Target gave me errors in grafana
class LokiTarget(object):
def __init__(self, loki_datasource, expr, legendFormat, refId):
self.loki_datasource = loki_datasource
self.expr = expr
self.legendFormat = legendFormat
self.refId = refId
def to_json_data(self):
return {
'datasource': self.loki_datasource,
'expr': self.expr,
'legendFormat': self.legendFormat,
'refId': self.refId,
'queryType': 'range',
}

View File

@@ -5,21 +5,28 @@ from grafanalib.core import (
) )
from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC from grafanalib.formatunits import BYTES_IEC, SECONDS, BYTES_SEC_IEC
from common import LokiTarget, PrometheusTemplate prom_datasource='prometheus'
prom_datasource='${datasource}'
loki_datasource='loki' loki_datasource='loki'
# TODO: this is (clown emoji), normal Target gave me errors in grafana
class LokiTarget(object):
def to_json_data(self):
return {
'datasource': loki_datasource,
'expr': '{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
'legendFormat': '{{ container_name }}',
'refId': 'A',
'queryType': 'range',
}
dashboard = Dashboard( dashboard = Dashboard(
title='Containers', title='Containers',
uid='containers', uid='containers',
description='Data for compose projects from default Prometheus datasource collected by Cadvisor', description='Data for compose projects from default Prometheus datasource collected by Cadvisor',
tags=[ tags=[
'linux', 'example'
'docker',
], ],
templating=Templating(list=[ templating=Templating(list=[
PrometheusTemplate,
Template( Template(
name='compose_project', name='compose_project',
label='Compose Project', label='Compose Project',
@@ -37,6 +44,7 @@ dashboard = Dashboard(
includeAll=True, includeAll=True,
multi=True, multi=True,
refresh=REFRESH_ON_TIME_RANGE_CHANGE, refresh=REFRESH_ON_TIME_RANGE_CHANGE,
), ),
Template( Template(
name='logs_query', name='logs_query',
@@ -48,6 +56,7 @@ dashboard = Dashboard(
timezone='browser', timezone='browser',
panels=[ panels=[
TimeSeries( TimeSeries(
id=1,
title='Container Memory Usage', title='Container Memory Usage',
unit=BYTES_IEC, unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=0), gridPos=GridPos(h=8, w=12, x=0, y=0),
@@ -67,14 +76,13 @@ dashboard = Dashboard(
], ],
), ),
TimeSeries( TimeSeries(
id=2,
title='Container CPU Usage', title='Container CPU Usage',
unit=SECONDS, unit=SECONDS,
gridPos=GridPos(h=8, w=12, x=12, y=0), gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=2, lineWidth=2,
fillOpacity=10, fillOpacity=10,
showPoints='never', showPoints='never',
tooltipMode='all',
tooltipSort='desc',
targets=[ targets=[
Target( Target(
datasource=prom_datasource, datasource=prom_datasource,
@@ -85,6 +93,7 @@ dashboard = Dashboard(
], ],
), ),
TimeSeries( TimeSeries(
id=3,
title='Container Network Traffic', title='Container Network Traffic',
unit=BYTES_SEC_IEC, unit=BYTES_SEC_IEC,
gridPos=GridPos(h=8, w=12, x=0, y=8), gridPos=GridPos(h=8, w=12, x=0, y=8),
@@ -109,6 +118,7 @@ dashboard = Dashboard(
], ],
), ),
Logs( Logs(
id=4,
title='', title='',
gridPos=GridPos(h=8, w=12, x=12, y=8), gridPos=GridPos(h=8, w=12, x=12, y=8),
showLabels=True, showLabels=True,
@@ -117,12 +127,13 @@ dashboard = Dashboard(
prettifyLogMessage=True, prettifyLogMessage=True,
dedupStrategy='numbers', dedupStrategy='numbers',
targets=[ targets=[
LokiTarget( LokiTarget(),
loki_datasource=loki_datasource, # Target(
expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`', # datasource=loki_datasource,
legendFormat='{{ container_name }}', # expr='{compose_project=~"$compose_project", container_name=~"$container_name"} |= `$logs_query`',
refId='A', # legendFormat='{{ container_name }}',
), # refId='A',
# ),
], ],
), ),
], ],

View File

@@ -0,0 +1,51 @@
from grafanalib.core import (
Dashboard, TimeSeries, GaugePanel,
Target, GridPos,
OPS_FORMAT
)
dashboard = Dashboard(
title="Python generated example dashboard",
description="Example dashboard using the Random Walk and default Prometheus datasource",
tags=[
'example'
],
timezone="browser",
panels=[
TimeSeries(
title="Random Walk",
dataSource='default',
targets=[
Target(
datasource='grafana',
expr='example',
),
],
gridPos=GridPos(h=8, w=16, x=0, y=0),
),
GaugePanel(
title="Random Walk",
dataSource='default',
targets=[
Target(
datasource='grafana',
expr='example',
),
],
gridPos=GridPos(h=4, w=4, x=17, y=0),
),
TimeSeries(
title="Prometheus http requests",
dataSource='prometheus',
targets=[
Target(
expr='rate(prometheus_http_requests_total[5m])',
legendFormat="{{ handler }}",
refId='A',
),
],
unit=OPS_FORMAT,
gridPos=GridPos(h=8, w=16, x=0, y=10),
),
],
).auto_panel_ids()

View File

@@ -1,139 +0,0 @@
from grafanalib.core import Dashboard, Templating, Template, TimeSeries, PERCENT_UNIT_FORMAT, GridPos, Target
from grafanalib.formatunits import BYTES_IEC
from common import PrometheusTemplate
from node_consts import CPU_BASIC_COLORS, MEMORY_BASIC_COLORS
dashboard = Dashboard(
title='Node Exporter',
uid='node',
description='Node Exporter (not quite full)',
tags=[
'linux',
],
timezone='browser',
templating=Templating(list=[
# Datasource
PrometheusTemplate,
# Job
Template(
name='job',
label='Job',
dataSource='${datasource}',
query='label_values(node_uname_info, job)',
),
# Instance
Template(
name='instance',
label='Instance',
dataSource='${datasource}',
query='label_values(node_uname_info{job="$job"}, instance)',
),
]),
panels=[
# CPU Basic
TimeSeries(
title='CPU Basic',
description='Basic CPU usage info',
unit=PERCENT_UNIT_FORMAT,
gridPos=GridPos(h=8, w=12, x=0, y=0),
lineWidth=1,
fillOpacity=30,
showPoints='never',
stacking={'mode': 'percent', 'group': 'A'},
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="system"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy System',
refId='A',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="user"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy User',
refId='B',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="iowait"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy Iowait',
refId='C',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode=~".*irq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy IRQs',
refId='D',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode!="idle",mode!="user",mode!="system",mode!="iowait",mode!="irq",mode!="softirq"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Busy Other',
refId='E',
),
Target(
datasource='${datasource}',
expr='sum(irate(node_cpu_seconds_total{instance="$instance",job="$job", mode="idle"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance="$instance",job="$job"}) by (cpu)))',
legendFormat='Idle',
refId='F',
),
],
# Extra JSON for the colors
extraJson=CPU_BASIC_COLORS,
),
# Memory Basic
TimeSeries(
title='Memory Basic',
description='Basic memory usage',
unit=BYTES_IEC,
gridPos=GridPos(h=8, w=12, x=12, y=0),
lineWidth=1,
fillOpacity=30,
showPoints='never',
stacking={'mode': 'normal', 'group': 'A'},
tooltipMode='all',
tooltipSort='desc',
targets=[
Target(
datasource='${datasource}',
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"}',
format='time_series',
legendFormat='RAM Total',
refId='A',
),
Target(
datasource='${datasource}',
expr='node_memory_MemTotal_bytes{instance="$instance",job="$job"} - node_memory_MemFree_bytes{instance="$instance",job="$job"} - (node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"})',
format='time_series',
legendFormat='RAM Used',
refId='B',
),
Target(
datasource='${datasource}',
expr='node_memory_Cached_bytes{instance="$instance",job="$job"} + node_memory_Buffers_bytes{instance="$instance",job="$job"} + node_memory_SReclaimable_bytes{instance="$instance",job="$job"}',
legendFormat='RAM Cache + Buffer',
refId='C',
),
Target(
datasource='${datasource}',
expr='node_memory_MemFree_bytes{instance="$instance",job="$job"}',
legendFormat='RAM Free',
refId='D',
),
Target(
datasource='${datasource}',
expr='(node_memory_SwapTotal_bytes{instance="$instance",job="$job"} - node_memory_SwapFree_bytes{instance="$instance",job="$job"})',
legendFormat='SWAP Used',
refId='E',
),
],
# Extra JSON for the colors
extraJson=MEMORY_BASIC_COLORS,
),
# TODO: Network Basic
# TODO: Disk Basic
],
).auto_panel_ids()

View File

@@ -1,487 +0,0 @@
# TODO: Question life decisions (I'm not sure if this is good)
CPU_BASIC_COLORS = {
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Busy Iowait"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#890F02",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Idle"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#052B51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy Iowait"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#890F02",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Idle"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy System"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy User"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A437C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Busy Other"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6D1F62",
"mode": "fixed"
}
}
]
}
]
},
}
MEMORY_BASIC_COLORS = {
"fieldConfig": {
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Apps"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#629E51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Buffers"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#614D93",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6D1F62",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Cached"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#511749",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Committed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#508642",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A437C",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#CFFAFF",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Inactive"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#584477",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "PageTables"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A50A1",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Page_Tables"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0A50A1",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM_Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0F9D7",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "SWAP Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Slab"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#806EB7",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Slab_Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0752D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap Used"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#BF1B00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap_Cache"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#C15C17",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Swap_Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#2F575E",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Unused"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#EAB839",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Total"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E0F9D7",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.stacking",
"value": {
"group": False,
"mode": "normal"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Cache + Buffer"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#052B51",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "RAM Free"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#7EB26D",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Available"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#DEDAF7",
"mode": "fixed"
}
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.stacking",
"value": {
"group": False,
"mode": "normal"
}
}
]
}
]
}
}

View File

@@ -15,6 +15,18 @@ datasources:
url: http://prometheus:9090 url: http://prometheus:9090
editable: false editable: false
- name: Alertmanager
type: alertmanager
access: proxy
uid: alertmanager
url: http://alertmanager:9093
jsonData:
# Valid options for implementation include mimir, cortex and prometheus
implementation: prometheus
# Whether Grafana should send alert instances to this Alertmanager
handleGrafanaManagedAlerts: true
editable: false
- name: InfluxDB - name: InfluxDB
type: influxdb type: influxdb
access: proxy access: proxy

View File

@@ -17,6 +17,13 @@ common:
schema_config: schema_config:
configs: configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v12
index:
prefix: index_
period: 24h
- from: 2024-10-18 - from: 2024-10-18
index: index:
period: 24h period: 24h
@@ -26,5 +33,5 @@ schema_config:
store: tsdb store: tsdb
# TODO: Figure this out # TODO: Figure this out
# ruler: ruler:
# alertmanager_url: http://localhost:9093 alertmanager_url: http://localhost:9093

View File

@@ -0,0 +1,23 @@
groups:
- name: qbit-low-traffic
interval: 1m
rules:
- alert: QbitLowTraffic
expr: |
rate(container_network_transmit_bytes_total{name=~"gluetun"}[1m]) < 1024
for: 2m
labels:
severity: warning
annotations:
title: 'Low traffic on qBit'
description: |
The traffic on qBittorrent is lower than 1KiB/s for 2 minutes.
Last value was x bytes/s.
[Grafana Dashboard](https://grafana.{{ domain }}/d/containers?orgId=1)
[View in Grafana](https://grafana.{{ domain }}/d/containers?orgId=1&viewPanel=3)
__dashboard__uid: 'containers'
__orgId__: 1
__panelId__: 3

View File

@@ -0,0 +1,20 @@
groups:
- name: demo-service-alerts
rules:
- alert: DemoServiceHighErrorRate
expr: |
(
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{status=~"5..",job="demo"}[1m])
)
/
sum without(status, instance) (
rate(demo_api_request_duration_seconds_count{job="demo"}[1m])
) * 100 > 0.5
)
for: 1m
labels:
severity: critical
annotations:
title: 'High 5xx rate for {{'{{ $labels.method }}'}} on {{'{{ $labels.path }}'}}'
description: 'The 5xx error rate for path {{'{{ $labels.path }}'}} with method {{'{{ $labels.method }}'}} in {{'{{ $labels.job }}'}} is {{'{{ printf "%.2f" $value }}'}}%.'

View File

@@ -5,6 +5,11 @@ global:
external_labels: external_labels:
monitor: "{{ ansible_host }}" monitor: "{{ ansible_host }}"
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:9093"]
scrape_configs: scrape_configs:
- job_name: "prometheus" - job_name: "prometheus"
static_configs: static_configs:
@@ -30,7 +35,15 @@ scrape_configs:
static_configs: static_configs:
- targets: ["promtail:9080"] - targets: ["promtail:9080"]
- job_name: 'demo'
static_configs:
- targets:
- 'demo.promlabs.com:10000'
- 'demo.promlabs.com:10001'
- 'demo.promlabs.com:10002'
rule_files: rule_files:
- "/etc/prometheus/container.alerts.yml"
- "/etc/prometheus/extra/rules/*.yml" - "/etc/prometheus/extra/rules/*.yml"
- "/etc/prometheus/extra/rules/*.json" - "/etc/prometheus/extra/rules/*.json"

View File

@@ -5,11 +5,10 @@
post_tasks: post_tasks:
- name: Docker prune objects - name: Docker prune objects
docker_prune: docker_prune:
containers: true containers: yes
# Keep images for building grafana images: yes
images: true
images_filters: images_filters:
until: "720h" dangling: false
networks: true networks: true
volumes: true volumes: true
builder_cache: false builder_cache: true