Merge pull request 'monitoring' (#8) from monitoring into master
Reviewed-on: https://codeberg.org/stura-htw-dresden/stura-infra/pulls/8
This commit is contained in:
commit
3c186a9e56
4 changed files with 417 additions and 37 deletions
|
|
@ -193,6 +193,7 @@
|
|||
[
|
||||
./hosts/${input}
|
||||
./default.nix
|
||||
./modules/monitoring.nix
|
||||
disko.nixosModules.disko
|
||||
authentik.nixosModules.default
|
||||
mailserver.nixosModules.mailserver
|
||||
|
|
|
|||
246
hosts/monitoring/default.nix
Normal file
246
hosts/monitoring/default.nix
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
modulesPath,
|
||||
...
|
||||
}:
|
||||
{
|
||||
imports = [
|
||||
"${modulesPath}/virtualisation/proxmox-lxc.nix"
|
||||
];
|
||||
|
||||
networking = {
|
||||
hostName = "monitoring";
|
||||
fqdn = "monitoring.adm.htw.stura-dresden.de";
|
||||
interfaces.eth0.ipv4.addresses = [
|
||||
{
|
||||
address = "141.56.51.20";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
defaultGateway = {
|
||||
address = "141.56.51.254";
|
||||
interface = "eth0";
|
||||
};
|
||||
firewall = {
|
||||
enable = true;
|
||||
allowedTCPPorts = [
|
||||
80
|
||||
443
|
||||
4317 # OTLP gRPC (Proxmox)
|
||||
4318 # OTLP HTTP (Proxmox)
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
# Loki - Log aggregation system
|
||||
services.loki = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
auth_enabled = false;
|
||||
server = {
|
||||
http_listen_port = 3100;
|
||||
grpc_listen_port = 9096;
|
||||
};
|
||||
common = {
|
||||
path_prefix = "/var/lib/loki";
|
||||
storage.filesystem = {
|
||||
chunks_directory = "/var/lib/loki/chunks";
|
||||
rules_directory = "/var/lib/loki/rules";
|
||||
};
|
||||
replication_factor = 1;
|
||||
ring = {
|
||||
instance_addr = "127.0.0.1";
|
||||
kvstore.store = "inmemory";
|
||||
};
|
||||
};
|
||||
limits_config = {
|
||||
ingestion_rate_mb = 32;
|
||||
ingestion_burst_size_mb = 64;
|
||||
per_stream_rate_limit = "32MB";
|
||||
per_stream_rate_limit_burst = "64MB";
|
||||
max_query_series = 100000;
|
||||
retention_period = "672h"; # 28 days
|
||||
};
|
||||
compactor = {
|
||||
retention_enabled = true;
|
||||
working_directory = "/var/lib/loki/compactor";
|
||||
delete_request_store = "filesystem";
|
||||
};
|
||||
schema_config = {
|
||||
configs = [
|
||||
{
|
||||
from = "2024-01-01";
|
||||
store = "tsdb";
|
||||
object_store = "filesystem";
|
||||
schema = "v13";
|
||||
index = {
|
||||
prefix = "index_";
|
||||
period = "24h";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Mimir - Scalable metrics storage
|
||||
services.mimir = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
multitenancy_enabled = false;
|
||||
memberlist = {
|
||||
bind_addr = [ "0.0.0.0" ];
|
||||
bind_port = 7946;
|
||||
advertise_addr = "141.56.51.20";
|
||||
join_members = [ "141.56.51.20:7946" ];
|
||||
};
|
||||
blocks_storage = {
|
||||
backend = "filesystem";
|
||||
filesystem = {
|
||||
dir = "/var/lib/mimir/data";
|
||||
};
|
||||
};
|
||||
compactor = {
|
||||
data_dir = "/var/lib/mimir/compactor";
|
||||
};
|
||||
distributor = {
|
||||
ring = {
|
||||
kvstore.store = "memberlist";
|
||||
};
|
||||
};
|
||||
ingester = {
|
||||
ring = {
|
||||
kvstore.store = "memberlist";
|
||||
replication_factor = 1;
|
||||
};
|
||||
};
|
||||
ruler_storage = {
|
||||
backend = "filesystem";
|
||||
filesystem = {
|
||||
dir = "/var/lib/mimir/rules";
|
||||
};
|
||||
};
|
||||
server = {
|
||||
http_listen_port = 9009;
|
||||
grpc_listen_port = 9095;
|
||||
};
|
||||
limits = {
|
||||
ingestion_rate = 100000;
|
||||
ingestion_burst_size = 200000;
|
||||
max_global_series_per_user = 0;
|
||||
compactor_blocks_retention_period = "672h"; # 28 days
|
||||
};
|
||||
store_gateway = {
|
||||
sharding_ring = {
|
||||
replication_factor = 1;
|
||||
kvstore.store = "memberlist";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Grafana - Visualization and dashboarding
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
server = {
|
||||
http_addr = "127.0.0.1";
|
||||
http_port = 3000;
|
||||
domain = "mon.adm.htw.stura-dresden.de";
|
||||
root_url = "https://mon.adm.htw.stura-dresden.de";
|
||||
};
|
||||
security = {
|
||||
admin_user = "admin";
|
||||
admin_password = "$__file{/var/lib/grafana/admin_password}";
|
||||
};
|
||||
};
|
||||
provision = {
|
||||
enable = true;
|
||||
datasources.settings.datasources = [
|
||||
{
|
||||
name = "Mimir";
|
||||
type = "prometheus";
|
||||
url = "http://localhost:9009/prometheus";
|
||||
isDefault = true;
|
||||
}
|
||||
{
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
url = "http://localhost:3100";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
# Nginx reverse proxy with ACME certificates
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
|
||||
virtualHosts."log.adm.htw.stura-dresden.de" = {
|
||||
forceSSL = true;
|
||||
enableACME = true;
|
||||
locations."/" = {
|
||||
proxyPass = "http://127.0.0.1:3100";
|
||||
proxyWebsockets = true;
|
||||
recommendedProxySettings = true;
|
||||
};
|
||||
};
|
||||
|
||||
virtualHosts."met.adm.htw.stura-dresden.de" = {
|
||||
forceSSL = true;
|
||||
enableACME = true;
|
||||
locations."/" = {
|
||||
proxyPass = "http://127.0.0.1:9009";
|
||||
recommendedProxySettings = true;
|
||||
};
|
||||
};
|
||||
|
||||
virtualHosts."mon.adm.htw.stura-dresden.de" = {
|
||||
forceSSL = true;
|
||||
enableACME = true;
|
||||
locations."/" = {
|
||||
proxyPass = "http://127.0.0.1:3000";
|
||||
proxyWebsockets = true;
|
||||
recommendedProxySettings = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Vector - receive OpenTelemetry data from Proxmox and forward to Loki/Mimir
|
||||
services.vector.settings = {
|
||||
sources.proxmox_otlp = {
|
||||
type = "opentelemetry";
|
||||
grpc.address = "0.0.0.0:4317";
|
||||
http.address = "0.0.0.0:4318";
|
||||
};
|
||||
|
||||
transforms.proxmox_normalize_logs = {
|
||||
type = "remap";
|
||||
inputs = [ "proxmox_otlp.logs" ];
|
||||
source = ''
|
||||
.host = string(.resources."host.name") ?? "proxmox"
|
||||
.unit = string(.attributes."service.name") ?? "proxmox"
|
||||
'';
|
||||
};
|
||||
|
||||
transforms.proxmox_normalize_metrics = {
|
||||
type = "remap";
|
||||
inputs = [ "proxmox_otlp.metrics" ];
|
||||
source = ''
|
||||
.tags.host = string(.resources."host.name") ?? "proxmox"
|
||||
'';
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
stura.monitoring = {
|
||||
extraMetricInputs = [ "proxmox_normalize_metrics" ];
|
||||
extraLogInputs = [ "proxmox_normalize_logs" ];
|
||||
};
|
||||
|
||||
services.openssh.enable = true;
|
||||
|
||||
system.stateVersion = "25.11";
|
||||
}
|
||||
|
|
@ -295,43 +295,43 @@
|
|||
"127.0.0.1"
|
||||
];
|
||||
listenOnIpv6 = [ ];
|
||||
zones = {
|
||||
"htw.stura-dresden.de" = {
|
||||
master = true;
|
||||
file = pkgs.writeText "htw.stura-dresden.de.zone" ''
|
||||
$TTL 3600
|
||||
@ IN SOA proxy.htw.stura-dresden.de. hostmaster.htw.stura-dresden.de. (
|
||||
2026031301 ; Serial (YYYYMMDDNN)
|
||||
3600 ; Refresh (1 hour)
|
||||
1800 ; Retry (30 minutes)
|
||||
604800 ; Expire (1 week)
|
||||
86400 ) ; Minimum TTL (1 day)
|
||||
|
||||
; Name servers
|
||||
@ IN NS proxy.htw.stura-dresden.de.
|
||||
|
||||
; Proxy host - main IPv4 gateway
|
||||
proxy IN A 141.56.51.1
|
||||
proxy IN AAAA 2a01:4f8:1c19:96f8::1
|
||||
|
||||
; Auto-generated CNAME records for all subdomains pointing to proxy
|
||||
${lib.foldlAttrs (
|
||||
prev: name: value:
|
||||
let
|
||||
zoneSuffix = ".htw.stura-dresden.de";
|
||||
# Check if this domain belongs to our zone
|
||||
isInZone = lib.hasSuffix zoneSuffix value.domain;
|
||||
# Extract subdomain by removing the zone suffix
|
||||
subdomain = lib.removeSuffix zoneSuffix value.domain;
|
||||
in
|
||||
if isInZone && subdomain != "" && subdomain != "htw.stura-dresden.de" then
|
||||
prev + "${subdomain}${" "}IN${" "}CNAME${" "}proxy.htw.stura-dresden.de.\n"
|
||||
else
|
||||
prev
|
||||
) "" forwards}
|
||||
'';
|
||||
};
|
||||
};
|
||||
# zones = {
|
||||
# "htw.stura-dresden.de" = {
|
||||
# master = true;
|
||||
# file = pkgs.writeText "htw.stura-dresden.de.zone" ''
|
||||
# $TTL 3600
|
||||
# @ IN SOA proxy.htw.stura-dresden.de. hostmaster.htw.stura-dresden.de. (
|
||||
# 2026031301 ; Serial (YYYYMMDDNN)
|
||||
# 3600 ; Refresh (1 hour)
|
||||
# 1800 ; Retry (30 minutes)
|
||||
# 604800 ; Expire (1 week)
|
||||
# 86400 ) ; Minimum TTL (1 day)
|
||||
#
|
||||
# ; Name servers
|
||||
# @ IN NS proxy.htw.stura-dresden.de.
|
||||
#
|
||||
# ; Proxy host - main IPv4 gateway
|
||||
# proxy IN A 141.56.51.1
|
||||
# proxy IN AAAA 2a01:4f8:1c19:96f8::1
|
||||
#
|
||||
# ; Auto-generated CNAME records for all subdomains pointing to proxy
|
||||
# ${lib.foldlAttrs (
|
||||
# prev: name: value:
|
||||
# let
|
||||
# zoneSuffix = ".htw.stura-dresden.de";
|
||||
# # Check if this domain belongs to our zone
|
||||
# isInZone = lib.hasSuffix zoneSuffix value.domain;
|
||||
# # Extract subdomain by removing the zone suffix
|
||||
# subdomain = lib.removeSuffix zoneSuffix value.domain;
|
||||
# in
|
||||
# if isInZone && subdomain != "" && subdomain != "htw.stura-dresden.de" then
|
||||
# prev + "${subdomain}${" "}IN${" "}CNAME${" "}proxy.htw.stura-dresden.de.\n"
|
||||
# else
|
||||
# prev
|
||||
# ) "" forwards}
|
||||
# '';
|
||||
# };
|
||||
# };
|
||||
};
|
||||
|
||||
# Chrony NTP server for the internal network
|
||||
|
|
@ -448,6 +448,7 @@
|
|||
stats show-node
|
||||
stats show-modules
|
||||
stats admin if TRUE # Enable admin operations
|
||||
http-request use-service prometheus-exporter if { path /metrics }
|
||||
|
||||
frontend http-in
|
||||
bind *:80
|
||||
|
|
@ -547,8 +548,27 @@
|
|||
) "" forwards}
|
||||
'';
|
||||
};
|
||||
vector.settings = {
|
||||
sources.haproxy_metrics = {
|
||||
type = "prometheus_scrape";
|
||||
endpoints = [ "http://127.0.0.1:8404/metrics" ];
|
||||
scrape_interval_secs = 15;
|
||||
};
|
||||
transforms.add_host_label_haproxy = {
|
||||
type = "remap";
|
||||
inputs = [ "haproxy_metrics" ];
|
||||
source = ''
|
||||
.tags.host = get_hostname!()
|
||||
'';
|
||||
};
|
||||
sinks.mimir.inputs = lib.mkForce [
|
||||
"add_host_label_metrics"
|
||||
"add_host_label_haproxy"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
];
|
||||
|
||||
|
|
|
|||
113
modules/monitoring.nix
Normal file
113
modules/monitoring.nix
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
{ pkgs, lib, config, ... }:
|
||||
let
|
||||
cfg = config.stura.monitoring;
|
||||
in {
|
||||
options.stura.monitoring = {
|
||||
extraLogFiles = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.str;
|
||||
default = [];
|
||||
description = "Additional log file paths for vector to scrape and forward to Loki.";
|
||||
example = [ "/var/log/nginx/access.log" "/var/log/nginx/error.log" ];
|
||||
};
|
||||
|
||||
extraGroups = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.str;
|
||||
default = [];
|
||||
description = "Supplementary groups added to the vector systemd service to allow reading protected log files.";
|
||||
example = [ "nginx" "postfix" ];
|
||||
};
|
||||
|
||||
extraMetricInputs = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.str;
|
||||
default = [];
|
||||
description = "Additional vector component IDs to feed into the mimir sink alongside host_metrics.";
|
||||
example = [ "proxmox_normalize_metrics" ];
|
||||
};
|
||||
|
||||
extraLogInputs = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.str;
|
||||
default = [];
|
||||
description = "Additional vector component IDs to feed into the loki sink alongside journald_logs.";
|
||||
example = [ "proxmox_normalize_logs" ];
|
||||
};
|
||||
};
|
||||
|
||||
config = {
|
||||
networking.hosts = {
|
||||
# rewrite these host entries on each system, this does not go through proxy
|
||||
"141.56.51.20" = [
|
||||
"mon.adm.htw.stura-dresden.de"
|
||||
"log.adm.htw.stura-dresden.de"
|
||||
"met.adm.htw.stura-dresden.de"
|
||||
];
|
||||
};
|
||||
services.vector = {
|
||||
enable = true;
|
||||
settings = {
|
||||
sources = {
|
||||
host_metrics = {
|
||||
type = "host_metrics";
|
||||
collectors = [ "cpu" "disk" "filesystem" "load" "memory" "network" ];
|
||||
};
|
||||
|
||||
journald_logs = {
|
||||
type = "journald";
|
||||
include_units = []; # empty = collect all units
|
||||
};
|
||||
} // lib.optionalAttrs (cfg.extraLogFiles != [] || config.services.nginx.enable) {
|
||||
extra_log_files = {
|
||||
type = "file";
|
||||
include = lib.optional config.services.nginx.enable "/var/log/nginx/access.log"
|
||||
++ cfg.extraLogFiles;
|
||||
};
|
||||
};
|
||||
|
||||
transforms = {
|
||||
add_host_label_metrics = {
|
||||
type = "remap";
|
||||
inputs = [ "host_metrics" ];
|
||||
source = ''
|
||||
.tags.host = get_hostname!()
|
||||
'';
|
||||
};
|
||||
|
||||
add_host_label_logs = {
|
||||
type = "remap";
|
||||
inputs = [ "journald_logs" ] ++ lib.optional (cfg.extraLogFiles != [] || config.services.nginx.enable) "extra_log_files";
|
||||
source = ''
|
||||
.host = get_hostname!()
|
||||
.unit = string(."_SYSTEMD_UNIT") ?? "file"
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
sinks = {
|
||||
mimir = {
|
||||
type = "prometheus_remote_write";
|
||||
inputs = [ "add_host_label_metrics" ] ++ cfg.extraMetricInputs;
|
||||
endpoint = "https://met.adm.htw.stura-dresden.de/api/v1/push";
|
||||
tls.verify_certificate = false;
|
||||
healthcheck.enabled = false;
|
||||
};
|
||||
|
||||
loki = {
|
||||
type = "loki";
|
||||
inputs = [ "add_host_label_logs" ] ++ cfg.extraLogInputs;
|
||||
endpoint = "https://log.adm.htw.stura-dresden.de";
|
||||
labels = {
|
||||
host = "{{ host }}";
|
||||
unit = "{{ unit }}";
|
||||
};
|
||||
tls.verify_certificate = false;
|
||||
encoding.codec = "json";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.vector.serviceConfig.SupplementaryGroups =
|
||||
[ "systemd-journal" ]
|
||||
++ lib.optional config.services.nginx.enable "nginx"
|
||||
++ cfg.extraGroups;
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue