Merge pull request 'monitoring' (#8) from monitoring into master

Reviewed-on: https://codeberg.org/stura-htw-dresden/stura-infra/pulls/8
This commit is contained in:
goeranh 2026-04-20 14:29:19 +02:00
commit 3c186a9e56
4 changed files with 417 additions and 37 deletions

View file

@ -193,6 +193,7 @@
[
./hosts/${input}
./default.nix
./modules/monitoring.nix
disko.nixosModules.disko
authentik.nixosModules.default
mailserver.nixosModules.mailserver

View file

@ -0,0 +1,246 @@
{
config,
lib,
pkgs,
modulesPath,
...
}:
{
imports = [
"${modulesPath}/virtualisation/proxmox-lxc.nix"
];
networking = {
hostName = "monitoring";
fqdn = "monitoring.adm.htw.stura-dresden.de";
interfaces.eth0.ipv4.addresses = [
{
address = "141.56.51.20";
prefixLength = 24;
}
];
defaultGateway = {
address = "141.56.51.254";
interface = "eth0";
};
firewall = {
enable = true;
allowedTCPPorts = [
80
443
4317 # OTLP gRPC (Proxmox)
4318 # OTLP HTTP (Proxmox)
];
};
};
# Loki - Log aggregation system
services.loki = {
enable = true;
configuration = {
auth_enabled = false;
server = {
http_listen_port = 3100;
grpc_listen_port = 9096;
};
common = {
path_prefix = "/var/lib/loki";
storage.filesystem = {
chunks_directory = "/var/lib/loki/chunks";
rules_directory = "/var/lib/loki/rules";
};
replication_factor = 1;
ring = {
instance_addr = "127.0.0.1";
kvstore.store = "inmemory";
};
};
limits_config = {
ingestion_rate_mb = 32;
ingestion_burst_size_mb = 64;
per_stream_rate_limit = "32MB";
per_stream_rate_limit_burst = "64MB";
max_query_series = 100000;
retention_period = "672h"; # 28 days
};
compactor = {
retention_enabled = true;
working_directory = "/var/lib/loki/compactor";
delete_request_store = "filesystem";
};
schema_config = {
configs = [
{
from = "2024-01-01";
store = "tsdb";
object_store = "filesystem";
schema = "v13";
index = {
prefix = "index_";
period = "24h";
};
}
];
};
};
};
# Mimir - Scalable metrics storage
services.mimir = {
enable = true;
configuration = {
multitenancy_enabled = false;
memberlist = {
bind_addr = [ "0.0.0.0" ];
bind_port = 7946;
advertise_addr = "141.56.51.20";
join_members = [ "141.56.51.20:7946" ];
};
blocks_storage = {
backend = "filesystem";
filesystem = {
dir = "/var/lib/mimir/data";
};
};
compactor = {
data_dir = "/var/lib/mimir/compactor";
};
distributor = {
ring = {
kvstore.store = "memberlist";
};
};
ingester = {
ring = {
kvstore.store = "memberlist";
replication_factor = 1;
};
};
ruler_storage = {
backend = "filesystem";
filesystem = {
dir = "/var/lib/mimir/rules";
};
};
server = {
http_listen_port = 9009;
grpc_listen_port = 9095;
};
limits = {
ingestion_rate = 100000;
ingestion_burst_size = 200000;
max_global_series_per_user = 0;
compactor_blocks_retention_period = "672h"; # 28 days
};
store_gateway = {
sharding_ring = {
replication_factor = 1;
kvstore.store = "memberlist";
};
};
};
};
# Grafana - Visualization and dashboarding
services.grafana = {
enable = true;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = 3000;
domain = "mon.adm.htw.stura-dresden.de";
root_url = "https://mon.adm.htw.stura-dresden.de";
};
security = {
admin_user = "admin";
admin_password = "$__file{/var/lib/grafana/admin_password}";
};
};
provision = {
enable = true;
datasources.settings.datasources = [
{
name = "Mimir";
type = "prometheus";
url = "http://localhost:9009/prometheus";
isDefault = true;
}
{
name = "Loki";
type = "loki";
url = "http://localhost:3100";
}
];
};
};
# Nginx reverse proxy with ACME certificates
services.nginx = {
enable = true;
virtualHosts."log.adm.htw.stura-dresden.de" = {
forceSSL = true;
enableACME = true;
locations."/" = {
proxyPass = "http://127.0.0.1:3100";
proxyWebsockets = true;
recommendedProxySettings = true;
};
};
virtualHosts."met.adm.htw.stura-dresden.de" = {
forceSSL = true;
enableACME = true;
locations."/" = {
proxyPass = "http://127.0.0.1:9009";
recommendedProxySettings = true;
};
};
virtualHosts."mon.adm.htw.stura-dresden.de" = {
forceSSL = true;
enableACME = true;
locations."/" = {
proxyPass = "http://127.0.0.1:3000";
proxyWebsockets = true;
recommendedProxySettings = true;
};
};
};
# Vector - receive OpenTelemetry data from Proxmox and forward to Loki/Mimir
services.vector.settings = {
sources.proxmox_otlp = {
type = "opentelemetry";
grpc.address = "0.0.0.0:4317";
http.address = "0.0.0.0:4318";
};
transforms.proxmox_normalize_logs = {
type = "remap";
inputs = [ "proxmox_otlp.logs" ];
source = ''
.host = string(.resources."host.name") ?? "proxmox"
.unit = string(.attributes."service.name") ?? "proxmox"
'';
};
transforms.proxmox_normalize_metrics = {
type = "remap";
inputs = [ "proxmox_otlp.metrics" ];
source = ''
.tags.host = string(.resources."host.name") ?? "proxmox"
'';
};
};
stura.monitoring = {
extraMetricInputs = [ "proxmox_normalize_metrics" ];
extraLogInputs = [ "proxmox_normalize_logs" ];
};
services.openssh.enable = true;
system.stateVersion = "25.11";
}

View file

@ -295,43 +295,43 @@
"127.0.0.1"
];
listenOnIpv6 = [ ];
zones = {
"htw.stura-dresden.de" = {
master = true;
file = pkgs.writeText "htw.stura-dresden.de.zone" ''
$TTL 3600
@ IN SOA proxy.htw.stura-dresden.de. hostmaster.htw.stura-dresden.de. (
2026031301 ; Serial (YYYYMMDDNN)
3600 ; Refresh (1 hour)
1800 ; Retry (30 minutes)
604800 ; Expire (1 week)
86400 ) ; Minimum TTL (1 day)
; Name servers
@ IN NS proxy.htw.stura-dresden.de.
; Proxy host - main IPv4 gateway
proxy IN A 141.56.51.1
proxy IN AAAA 2a01:4f8:1c19:96f8::1
; Auto-generated CNAME records for all subdomains pointing to proxy
${lib.foldlAttrs (
prev: name: value:
let
zoneSuffix = ".htw.stura-dresden.de";
# Check if this domain belongs to our zone
isInZone = lib.hasSuffix zoneSuffix value.domain;
# Extract subdomain by removing the zone suffix
subdomain = lib.removeSuffix zoneSuffix value.domain;
in
if isInZone && subdomain != "" && subdomain != "htw.stura-dresden.de" then
prev + "${subdomain}${" "}IN${" "}CNAME${" "}proxy.htw.stura-dresden.de.\n"
else
prev
) "" forwards}
'';
};
};
# zones = {
# "htw.stura-dresden.de" = {
# master = true;
# file = pkgs.writeText "htw.stura-dresden.de.zone" ''
# $TTL 3600
# @ IN SOA proxy.htw.stura-dresden.de. hostmaster.htw.stura-dresden.de. (
# 2026031301 ; Serial (YYYYMMDDNN)
# 3600 ; Refresh (1 hour)
# 1800 ; Retry (30 minutes)
# 604800 ; Expire (1 week)
# 86400 ) ; Minimum TTL (1 day)
#
# ; Name servers
# @ IN NS proxy.htw.stura-dresden.de.
#
# ; Proxy host - main IPv4 gateway
# proxy IN A 141.56.51.1
# proxy IN AAAA 2a01:4f8:1c19:96f8::1
#
# ; Auto-generated CNAME records for all subdomains pointing to proxy
# ${lib.foldlAttrs (
# prev: name: value:
# let
# zoneSuffix = ".htw.stura-dresden.de";
# # Check if this domain belongs to our zone
# isInZone = lib.hasSuffix zoneSuffix value.domain;
# # Extract subdomain by removing the zone suffix
# subdomain = lib.removeSuffix zoneSuffix value.domain;
# in
# if isInZone && subdomain != "" && subdomain != "htw.stura-dresden.de" then
# prev + "${subdomain}${" "}IN${" "}CNAME${" "}proxy.htw.stura-dresden.de.\n"
# else
# prev
# ) "" forwards}
# '';
# };
# };
};
# Chrony NTP server for the internal network
@ -448,6 +448,7 @@
stats show-node
stats show-modules
stats admin if TRUE # Enable admin operations
http-request use-service prometheus-exporter if { path /metrics }
frontend http-in
bind *:80
@ -547,8 +548,27 @@
) "" forwards}
'';
};
vector.settings = {
sources.haproxy_metrics = {
type = "prometheus_scrape";
endpoints = [ "http://127.0.0.1:8404/metrics" ];
scrape_interval_secs = 15;
};
transforms.add_host_label_haproxy = {
type = "remap";
inputs = [ "haproxy_metrics" ];
source = ''
.tags.host = get_hostname!()
'';
};
sinks.mimir.inputs = lib.mkForce [
"add_host_label_metrics"
"add_host_label_haproxy"
];
};
};
environment.systemPackages = with pkgs; [
];

113
modules/monitoring.nix Normal file
View file

@ -0,0 +1,113 @@
{ pkgs, lib, config, ... }:
let
cfg = config.stura.monitoring;
in {
options.stura.monitoring = {
extraLogFiles = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [];
description = "Additional log file paths for vector to scrape and forward to Loki.";
example = [ "/var/log/nginx/access.log" "/var/log/nginx/error.log" ];
};
extraGroups = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [];
description = "Supplementary groups added to the vector systemd service to allow reading protected log files.";
example = [ "nginx" "postfix" ];
};
extraMetricInputs = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [];
description = "Additional vector component IDs to feed into the mimir sink alongside host_metrics.";
example = [ "proxmox_normalize_metrics" ];
};
extraLogInputs = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [];
description = "Additional vector component IDs to feed into the loki sink alongside journald_logs.";
example = [ "proxmox_normalize_logs" ];
};
};
config = {
networking.hosts = {
# rewrite these host entries on each system, this does not go through proxy
"141.56.51.20" = [
"mon.adm.htw.stura-dresden.de"
"log.adm.htw.stura-dresden.de"
"met.adm.htw.stura-dresden.de"
];
};
services.vector = {
enable = true;
settings = {
sources = {
host_metrics = {
type = "host_metrics";
collectors = [ "cpu" "disk" "filesystem" "load" "memory" "network" ];
};
journald_logs = {
type = "journald";
include_units = []; # empty = collect all units
};
} // lib.optionalAttrs (cfg.extraLogFiles != [] || config.services.nginx.enable) {
extra_log_files = {
type = "file";
include = lib.optional config.services.nginx.enable "/var/log/nginx/access.log"
++ cfg.extraLogFiles;
};
};
transforms = {
add_host_label_metrics = {
type = "remap";
inputs = [ "host_metrics" ];
source = ''
.tags.host = get_hostname!()
'';
};
add_host_label_logs = {
type = "remap";
inputs = [ "journald_logs" ] ++ lib.optional (cfg.extraLogFiles != [] || config.services.nginx.enable) "extra_log_files";
source = ''
.host = get_hostname!()
.unit = string(."_SYSTEMD_UNIT") ?? "file"
'';
};
};
sinks = {
mimir = {
type = "prometheus_remote_write";
inputs = [ "add_host_label_metrics" ] ++ cfg.extraMetricInputs;
endpoint = "https://met.adm.htw.stura-dresden.de/api/v1/push";
tls.verify_certificate = false;
healthcheck.enabled = false;
};
loki = {
type = "loki";
inputs = [ "add_host_label_logs" ] ++ cfg.extraLogInputs;
endpoint = "https://log.adm.htw.stura-dresden.de";
labels = {
host = "{{ host }}";
unit = "{{ unit }}";
};
tls.verify_certificate = false;
encoding.codec = "json";
};
};
};
};
systemd.services.vector.serviceConfig.SupplementaryGroups =
[ "systemd-journal" ]
++ lib.optional config.services.nginx.enable "nginx"
++ cfg.extraGroups;
};
}