diff --git a/flake.nix b/flake.nix index 2d1426d..f0f177e 100644 --- a/flake.nix +++ b/flake.nix @@ -193,6 +193,7 @@ [ ./hosts/${input} ./default.nix + ./modules/monitoring.nix disko.nixosModules.disko authentik.nixosModules.default mailserver.nixosModules.mailserver diff --git a/hosts/monitoring/default.nix b/hosts/monitoring/default.nix new file mode 100644 index 0000000..e30daba --- /dev/null +++ b/hosts/monitoring/default.nix @@ -0,0 +1,246 @@ +{ + config, + lib, + pkgs, + modulesPath, + ... +}: +{ + imports = [ + "${modulesPath}/virtualisation/proxmox-lxc.nix" + ]; + + networking = { + hostName = "monitoring"; + fqdn = "monitoring.adm.htw.stura-dresden.de"; + interfaces.eth0.ipv4.addresses = [ + { + address = "141.56.51.20"; + prefixLength = 24; + } + ]; + defaultGateway = { + address = "141.56.51.254"; + interface = "eth0"; + }; + firewall = { + enable = true; + allowedTCPPorts = [ + 80 + 443 + 4317 # OTLP gRPC (Proxmox) + 4318 # OTLP HTTP (Proxmox) + ]; + }; + }; + + # Loki - Log aggregation system + services.loki = { + enable = true; + configuration = { + auth_enabled = false; + server = { + http_listen_port = 3100; + grpc_listen_port = 9096; + }; + common = { + path_prefix = "/var/lib/loki"; + storage.filesystem = { + chunks_directory = "/var/lib/loki/chunks"; + rules_directory = "/var/lib/loki/rules"; + }; + replication_factor = 1; + ring = { + instance_addr = "127.0.0.1"; + kvstore.store = "inmemory"; + }; + }; + limits_config = { + ingestion_rate_mb = 32; + ingestion_burst_size_mb = 64; + per_stream_rate_limit = "32MB"; + per_stream_rate_limit_burst = "64MB"; + max_query_series = 100000; + retention_period = "672h"; # 28 days + }; + compactor = { + retention_enabled = true; + working_directory = "/var/lib/loki/compactor"; + delete_request_store = "filesystem"; + }; + schema_config = { + configs = [ + { + from = "2024-01-01"; + store = "tsdb"; + object_store = "filesystem"; + schema = "v13"; + index = { + prefix = "index_"; + period = "24h"; + }; + } + ]; + }; + }; + }; + + # Mimir - Scalable metrics storage + services.mimir = { + enable = true; + configuration = { + multitenancy_enabled = false; + memberlist = { + bind_addr = [ "0.0.0.0" ]; + bind_port = 7946; + advertise_addr = "141.56.51.20"; + join_members = [ "141.56.51.20:7946" ]; + }; + blocks_storage = { + backend = "filesystem"; + filesystem = { + dir = "/var/lib/mimir/data"; + }; + }; + compactor = { + data_dir = "/var/lib/mimir/compactor"; + }; + distributor = { + ring = { + kvstore.store = "memberlist"; + }; + }; + ingester = { + ring = { + kvstore.store = "memberlist"; + replication_factor = 1; + }; + }; + ruler_storage = { + backend = "filesystem"; + filesystem = { + dir = "/var/lib/mimir/rules"; + }; + }; + server = { + http_listen_port = 9009; + grpc_listen_port = 9095; + }; + limits = { + ingestion_rate = 100000; + ingestion_burst_size = 200000; + max_global_series_per_user = 0; + compactor_blocks_retention_period = "672h"; # 28 days + }; + store_gateway = { + sharding_ring = { + replication_factor = 1; + kvstore.store = "memberlist"; + }; + }; + }; + }; + + # Grafana - Visualization and dashboarding + services.grafana = { + enable = true; + settings = { + server = { + http_addr = "127.0.0.1"; + http_port = 3000; + domain = "mon.adm.htw.stura-dresden.de"; + root_url = "https://mon.adm.htw.stura-dresden.de"; + }; + security = { + admin_user = "admin"; + admin_password = "$__file{/var/lib/grafana/admin_password}"; + }; + }; + provision = { + enable = true; + datasources.settings.datasources = [ + { + name = "Mimir"; + type = "prometheus"; + url = "http://localhost:9009/prometheus"; + isDefault = true; + } + { + name = "Loki"; + type = "loki"; + url = "http://localhost:3100"; + } + ]; + }; + }; + + # Nginx reverse proxy with ACME certificates + services.nginx = { + enable = true; + + virtualHosts."log.adm.htw.stura-dresden.de" = { + forceSSL = true; + enableACME = true; + locations."/" = { + proxyPass = "http://127.0.0.1:3100"; + proxyWebsockets = true; + recommendedProxySettings = true; + }; + }; + + virtualHosts."met.adm.htw.stura-dresden.de" = { + forceSSL = true; + enableACME = true; + locations."/" = { + proxyPass = "http://127.0.0.1:9009"; + recommendedProxySettings = true; + }; + }; + + virtualHosts."mon.adm.htw.stura-dresden.de" = { + forceSSL = true; + enableACME = true; + locations."/" = { + proxyPass = "http://127.0.0.1:3000"; + proxyWebsockets = true; + recommendedProxySettings = true; + }; + }; + }; + + # Vector - receive OpenTelemetry data from Proxmox and forward to Loki/Mimir + services.vector.settings = { + sources.proxmox_otlp = { + type = "opentelemetry"; + grpc.address = "0.0.0.0:4317"; + http.address = "0.0.0.0:4318"; + }; + + transforms.proxmox_normalize_logs = { + type = "remap"; + inputs = [ "proxmox_otlp.logs" ]; + source = '' + .host = string(.resources."host.name") ?? "proxmox" + .unit = string(.attributes."service.name") ?? "proxmox" + ''; + }; + + transforms.proxmox_normalize_metrics = { + type = "remap"; + inputs = [ "proxmox_otlp.metrics" ]; + source = '' + .tags.host = string(.resources."host.name") ?? "proxmox" + ''; + }; + + }; + + stura.monitoring = { + extraMetricInputs = [ "proxmox_normalize_metrics" ]; + extraLogInputs = [ "proxmox_normalize_logs" ]; + }; + + services.openssh.enable = true; + + system.stateVersion = "25.11"; +} diff --git a/hosts/proxy/default.nix b/hosts/proxy/default.nix index a806790..83306c5 100644 --- a/hosts/proxy/default.nix +++ b/hosts/proxy/default.nix @@ -295,43 +295,43 @@ "127.0.0.1" ]; listenOnIpv6 = [ ]; - zones = { - "htw.stura-dresden.de" = { - master = true; - file = pkgs.writeText "htw.stura-dresden.de.zone" '' - $TTL 3600 - @ IN SOA proxy.htw.stura-dresden.de. hostmaster.htw.stura-dresden.de. ( - 2026031301 ; Serial (YYYYMMDDNN) - 3600 ; Refresh (1 hour) - 1800 ; Retry (30 minutes) - 604800 ; Expire (1 week) - 86400 ) ; Minimum TTL (1 day) - - ; Name servers - @ IN NS proxy.htw.stura-dresden.de. - - ; Proxy host - main IPv4 gateway - proxy IN A 141.56.51.1 - proxy IN AAAA 2a01:4f8:1c19:96f8::1 - - ; Auto-generated CNAME records for all subdomains pointing to proxy - ${lib.foldlAttrs ( - prev: name: value: - let - zoneSuffix = ".htw.stura-dresden.de"; - # Check if this domain belongs to our zone - isInZone = lib.hasSuffix zoneSuffix value.domain; - # Extract subdomain by removing the zone suffix - subdomain = lib.removeSuffix zoneSuffix value.domain; - in - if isInZone && subdomain != "" && subdomain != "htw.stura-dresden.de" then - prev + "${subdomain}${" "}IN${" "}CNAME${" "}proxy.htw.stura-dresden.de.\n" - else - prev - ) "" forwards} - ''; - }; - }; + # zones = { + # "htw.stura-dresden.de" = { + # master = true; + # file = pkgs.writeText "htw.stura-dresden.de.zone" '' + # $TTL 3600 + # @ IN SOA proxy.htw.stura-dresden.de. hostmaster.htw.stura-dresden.de. ( + # 2026031301 ; Serial (YYYYMMDDNN) + # 3600 ; Refresh (1 hour) + # 1800 ; Retry (30 minutes) + # 604800 ; Expire (1 week) + # 86400 ) ; Minimum TTL (1 day) + # + # ; Name servers + # @ IN NS proxy.htw.stura-dresden.de. + # + # ; Proxy host - main IPv4 gateway + # proxy IN A 141.56.51.1 + # proxy IN AAAA 2a01:4f8:1c19:96f8::1 + # + # ; Auto-generated CNAME records for all subdomains pointing to proxy + # ${lib.foldlAttrs ( + # prev: name: value: + # let + # zoneSuffix = ".htw.stura-dresden.de"; + # # Check if this domain belongs to our zone + # isInZone = lib.hasSuffix zoneSuffix value.domain; + # # Extract subdomain by removing the zone suffix + # subdomain = lib.removeSuffix zoneSuffix value.domain; + # in + # if isInZone && subdomain != "" && subdomain != "htw.stura-dresden.de" then + # prev + "${subdomain}${" "}IN${" "}CNAME${" "}proxy.htw.stura-dresden.de.\n" + # else + # prev + # ) "" forwards} + # ''; + # }; + # }; }; # Chrony NTP server for the internal network @@ -448,6 +448,7 @@ stats show-node stats show-modules stats admin if TRUE # Enable admin operations + http-request use-service prometheus-exporter if { path /metrics } frontend http-in bind *:80 @@ -547,8 +548,27 @@ ) "" forwards} ''; }; + vector.settings = { + sources.haproxy_metrics = { + type = "prometheus_scrape"; + endpoints = [ "http://127.0.0.1:8404/metrics" ]; + scrape_interval_secs = 15; + }; + transforms.add_host_label_haproxy = { + type = "remap"; + inputs = [ "haproxy_metrics" ]; + source = '' + .tags.host = get_hostname!() + ''; + }; + sinks.mimir.inputs = lib.mkForce [ + "add_host_label_metrics" + "add_host_label_haproxy" + ]; + }; }; + environment.systemPackages = with pkgs; [ ]; diff --git a/modules/monitoring.nix b/modules/monitoring.nix new file mode 100644 index 0000000..c46b660 --- /dev/null +++ b/modules/monitoring.nix @@ -0,0 +1,113 @@ +{ pkgs, lib, config, ... }: +let + cfg = config.stura.monitoring; +in { + options.stura.monitoring = { + extraLogFiles = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = []; + description = "Additional log file paths for vector to scrape and forward to Loki."; + example = [ "/var/log/nginx/access.log" "/var/log/nginx/error.log" ]; + }; + + extraGroups = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = []; + description = "Supplementary groups added to the vector systemd service to allow reading protected log files."; + example = [ "nginx" "postfix" ]; + }; + + extraMetricInputs = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = []; + description = "Additional vector component IDs to feed into the mimir sink alongside host_metrics."; + example = [ "proxmox_normalize_metrics" ]; + }; + + extraLogInputs = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = []; + description = "Additional vector component IDs to feed into the loki sink alongside journald_logs."; + example = [ "proxmox_normalize_logs" ]; + }; + }; + + config = { + networking.hosts = { + # rewrite these host entries on each system, this does not go through proxy + "141.56.51.20" = [ + "mon.adm.htw.stura-dresden.de" + "log.adm.htw.stura-dresden.de" + "met.adm.htw.stura-dresden.de" + ]; + }; + services.vector = { + enable = true; + settings = { + sources = { + host_metrics = { + type = "host_metrics"; + collectors = [ "cpu" "disk" "filesystem" "load" "memory" "network" ]; + }; + + journald_logs = { + type = "journald"; + include_units = []; # empty = collect all units + }; + } // lib.optionalAttrs (cfg.extraLogFiles != [] || config.services.nginx.enable) { + extra_log_files = { + type = "file"; + include = lib.optional config.services.nginx.enable "/var/log/nginx/access.log" + ++ cfg.extraLogFiles; + }; + }; + + transforms = { + add_host_label_metrics = { + type = "remap"; + inputs = [ "host_metrics" ]; + source = '' + .tags.host = get_hostname!() + ''; + }; + + add_host_label_logs = { + type = "remap"; + inputs = [ "journald_logs" ] ++ lib.optional (cfg.extraLogFiles != [] || config.services.nginx.enable) "extra_log_files"; + source = '' + .host = get_hostname!() + .unit = string(."_SYSTEMD_UNIT") ?? "file" + ''; + }; + }; + + sinks = { + mimir = { + type = "prometheus_remote_write"; + inputs = [ "add_host_label_metrics" ] ++ cfg.extraMetricInputs; + endpoint = "https://met.adm.htw.stura-dresden.de/api/v1/push"; + tls.verify_certificate = false; + healthcheck.enabled = false; + }; + + loki = { + type = "loki"; + inputs = [ "add_host_label_logs" ] ++ cfg.extraLogInputs; + endpoint = "https://log.adm.htw.stura-dresden.de"; + labels = { + host = "{{ host }}"; + unit = "{{ unit }}"; + }; + tls.verify_certificate = false; + encoding.codec = "json"; + }; + }; + }; + }; + + systemd.services.vector.serviceConfig.SupplementaryGroups = + [ "systemd-journal" ] + ++ lib.optional config.services.nginx.enable "nginx" + ++ cfg.extraGroups; + }; +}