{ description = "kubernetes"; inputs = { nixpkgs = { url = "nixpkgs/nixos-24.11"; }; flake-utils = { url = "github:numtide/flake-utils"; }; }; outputs = { self, nixpkgs, flake-utils, ... }: flake-utils.lib.eachDefaultSystem ( system: let pkgs = import nixpkgs { inherit system; }; arch = { "x86_64-linux" = "amd64"; "aarch64-linux" = "arm64"; } .${system}; hashes = builtins.fromJSON (builtins.readFile ./hashes.json) // { crictl = { "amd64" = { "1.28.0" = "sha256-jceHdPfL6veHmU04buxmPwo88k3h6kiTWYCWyznvJQg="; "1.27.0" = "sha256-0zXW4Wwwn7w/8aKafkm7JTtcm0sDCZC/fGtIaH+YXO4="; "1.24.1" = "sha256-QAbTrxELFEhlvg0wgUXh+Q97QN+gsgjYLT0dJfO3v5c="; "1.23.0" = "sha256-t1T4PICs3HX5OroZH/Jp2mvkXQ/C0/QHlwTn0UJPHKg="; }; }; cni = { "amd64" = { "1.2.0" = "sha256-86hBMkhFymvw1AkbT8f5fhimIxchWLcvw/3NudQtLTc="; "0.8.7" = "sha256-l3gkky1WZ8ejeqajy7pAEApoc+e9l+g+i+g34+ev0Kg="; }; }; }; kubePackage = pname: version: pkgs.stdenvNoCC.mkDerivation { pname = pname; version = version; src = pkgs.fetchurl { url = "https://dl.k8s.io/release/v${version}/bin/linux/${arch}/${pname}"; sha256 = hashes.${pname}.${arch}.${version}; }; nativeBuildInputs = [ pkgs.autoPatchelfHook pkgs.installShellFiles ]; dontUnpack = true; dontPatch = true; dontConfigure = true; dontBuild = true; installPhase = '' $preInstall install -d $out/bin install $src -m 0555 $out/bin/${pname} $postInstall ''; postFixup = '' installShellCompletion --cmd ${pname} \ --bash <($out/bin/${pname} completion bash) \ --fish <($out/bin/${pname} completion fish) \ --zsh <($out/bin/${pname} completion zsh) ''; }; in { apps = { hashes = let py = pkgs.python3.withPackages (p: with p; [ requests ]); program = pkgs.writeScript "program" '' ${py}/bin/python3 gethashes.py ''; in { type = "app"; program = "${program}"; }; }; packages = { kubeadm = version: kubePackage "kubeadm" version; kubectl = version: kubePackage "kubectl" version; kubelet = version: kubePackage "kubelet" version; fstab-cifs = let pname = "fstab-cifs"; buildInputs = [ pkgs.bash pkgs.cifs-utils pkgs.coreutils pkgs.jq pkgs.keyutils pkgs.util-linux pkgs.which ]; in pkgs.stdenv.mkDerivation { inherit pname; name = pname; src = pkgs.fetchFromGitHub { owner = "fstab"; repo = "cifs"; rev = "3b640936ef51614d3c1ad68cba50c4db5da3d61b"; hash = "sha256-C5ze3CWyDxdWJ9cYWUmjKVPCrrdUYXP2JvlnRcW/lgg="; }; nativeBuildInputs = [ pkgs.makeWrapper ]; buildInputs = buildInputs; dontUnpack = true; dontPatch = true; dontConfigure = true; dontBuild = true; installPhase = '' install -d $out/bin install $src/cifs -m 0555 $out/bin/cifs wrapProgram $out/bin/cifs --prefix PATH : ${pkgs.lib.makeBinPath buildInputs} ''; meta = { homepage = "https://github.com/fstab/cifs"; description = "Driver for CIFS (SMB, Samba, Windows Share) network filesystems as Kubernetes volumes."; longDescription = '' Docker containers running in Kubernetes have an ephemeral file system: Once a container is terminated, all files are gone. In order to store persistent data in Kubernetes, you need to mount a Persistent Volume into your container. Kubernetes has built-in support for network filesystems found in the most common cloud providers, like Amazon's EBS, Microsoft's Azure disk, etc. However, some cloud hosting services, like the Hetzner cloud, provide network storage using the CIFS (SMB, Samba, Windows Share) protocol, which is not natively supported in Kubernetes. Fortunately, Kubernetes provides Flexvolume, which is a plugin mechanism enabling users to write their own drivers. There are a few flexvolume drivers for CIFS out there, but for different reasons none of them seemed to work for me. So I wrote my own, which can be found on github.com/fstab/cifs. ''; license = pkgs.lib.licenses.mit; }; }; crictl = let pname = "crictl"; version = "1.28.0"; in pkgs.stdenvNoCC.mkDerivation { inherit pname version; src = pkgs.fetchurl { url = "https://github.com/kubernetes-sigs/cri-tools/releases/download/v${version}/${pname}-v${version}-linux-${arch}.tar.gz"; hash = hashes.${pname}.${arch}.${version}; }; nativeBuildInputs = [ pkgs.autoPatchelfHook ]; setSourceRoot = "sourceRoot=`pwd`"; dontUnpack = false; dontPatch = true; dontConfigure = true; dontBuild = true; installPhase = '' install -d $out/bin install ${pname} -m 0555 $out/bin/${pname} ''; }; cni = let pname = "cni"; version = "1.2.0"; in pkgs.stdenv.mkDerivation { pname = pname; version = version; src = pkgs.fetchurl { url = "https://github.com/containernetworking/plugins/releases/download/v${version}/cni-plugins-linux-${arch}-v${version}.tgz"; hash = hashes.${pname}.${arch}.${version}; }; nativeBuildInputs = [ pkgs.autoPatchelfHook ]; setSourceRoot = "sourceRoot=`pwd`"; dontPatch = true; dontConfigure = true; dontBuild = true; installPhase = '' install -d $out/bin install -m 0555 `ls | grep -v env-vars` $out/bin ''; }; }; } ) // { nixosModules.kubernetes = { config, lib, pkgs, name, ... }: let cfg = config.jk8s; in { options = with lib; { jk8s = mkOption { type = with types; submodule { options = { enable = mkEnableOption "Kubernetes"; package_versions = mkOption { type = submodule { options = { kubeadm = mkOption { type = str; }; kubectl = mkOption { type = str; }; kubelet = mkOption { type = str; }; }; }; }; etcd = mkOption { type = submodule { options = { certPath = mkOption { type = path; }; keyPath = mkOption { type = path; }; }; }; default = {}; }; service_subnet = mkOption { type = str; }; pod_subnet = mkOption { type = str; }; hosts = mkOption { type = attrsOf ( submodule { options = { role = mkOption { type = types.enum ["etcd" "master+etcd" "master" "worker"]; }; domain = mkOption { type = str; }; endpoint = mkOption { type = submodule { options = { address = mkOption { type = str; }; port = mkOption { type = addCheck int (port: port >= 1025 && port <= 65535); }; }; }; }; wireguard_subnet = mkOption { type = submodule { options = { address = mkOption { type = str; }; mask = mkOption { type = addCheck int (mask: mask >= 0 && mask <= 32); }; }; }; }; private_key = mkOption { type = str; }; public_key = mkOption { type = str; }; pod_subnet = mkOption { type = submodule { options = { address = mkOption { type = str; }; mask = mkOption { type = addCheck int (mask: mask >= 0 && mask <= 32); }; gateway = mkOption { type = str; }; }; }; }; }; } ); }; }; }; }; }; config = with lib; mkIf cfg.enable ( let kubeadm = self.packages.${pkgs.system}.kubeadm cfg.package_versions.kubeadm; kubectl = self.packages.${pkgs.system}.kubectl cfg.package_versions.kubectl; kubelet = self.packages.${pkgs.system}.kubelet cfg.package_versions.kubelet; ca-config-json = pkgs.writeText "ca-config-json" ( builtins.toJSON { signing = { default = { expiry = "87600h"; }; profiles = { server = { expiry = "87600h"; usages = [ "signing" "key encipherment" "server auth" "client auth" ]; }; client = { expiry = "87600h"; usages = [ "signing" "key encipherment" "client auth" ]; }; peer = { expiry = "87600h"; usages = [ "signing" "key encipherment" "server auth" "client auth" ]; }; }; }; } ); apiserver-etcd-client-csr-json = pkgs.writeText "apiserver-etcd-client-csr-json" ( builtins.toJSON { CN = "kube-apiserver-etcd-client"; names = [ { O = "system:masters"; } ]; key = { algo = "rsa"; size = 2048; }; } ); healthcheck-client-csr-json = pkgs.writeText "healthcheck-client-csr-json" ( builtins.toJSON { CN = "kube-etcd-healthcheck-client"; names = [ { O = "system:masters"; } ]; key = { algo = "rsa"; size = 2048; }; } ); server-csr-json = pkgs.writeText "server-csr-json" ( builtins.toJSON { CN = "${name}.${cfg.hosts.${name}.domain}"; hosts = [ "localhost" "${name}.${cfg.hosts.${name}.domain}" "127.0.0.1" "0:0:0:0:0:0:0:1" "${cfg.hosts.${name}.endpoint.address}" "${cfg.hosts.${name}.wireguard_subnet.address}" ]; key = { algo = "rsa"; size = 2048; }; } ); etcd-ca-csr-json = pkgs.writeText "etcd-ca-csr-json" ( builtins.toJSON { CN = "etcd-ca"; key = { algo = "rsa"; size = 2048; }; } ); peer-csr-json = pkgs.writeText "peer-csr-json" ( builtins.toJSON { CN = "${name}.${cfg.hosts.${name}.domain}"; hosts = [ "localhost" "${name}.${cfg.hosts.${name}.domain}" "127.0.0.1" "0:0:0:0:0:0:0:1" "${cfg.hosts.${name}.endpoint.address}" "${cfg.hosts.${name}.wireguard_subnet.address}" ]; key = { algo = "rsa"; size = 2048; }; } ); in { deployment.keys."private_key" = { text = cfg.hosts.${name}.private_key; destDir = "/etc/wireguard"; user = "root"; group = "systemd-network"; permissions = "0440"; uploadAt = "pre-activation"; }; systemd.tmpfiles.rules = [ # this is needed so that the cephfs csi module can mount cephfs volumes (lib.mkIf (cfg.hosts.${name}.role != "etcd") "L+ /lib/modules - - - - ${pkgs.linux_latest}/lib/modules") # link cni plugins to where kubelet expects them (lib.mkIf (cfg.hosts.${name}.role != "etcd") "L+ /opt/cni/bin - - - - ${self.packages.${pkgs.system}.cni}/bin") # (lib.mkIf (cfg.hosts.${name}.role != "etcd") "d /etc/kubernetes/manifests 0755 root root -") # install fstab-cifs plugin (lib.mkIf (cfg.hosts.${name}.role != "etcd") "L+ /usr/libexec/kubernetes/kubelet-plugins/volume/exec/fstab~cifs/cifs - - - - ${self.packages.${pkgs.system}.fstab-cifs}/bin/cifs") "d /etc/kubernetes 0755 root root -" "d /etc/kubernetes/pki 0755 root root -" "d /root/.kube 0750 root root -" "L+ /root/.kube/config - - - - /etc/kubernetes/admin.conf" ]; boot.kernelModules = [ "br_netfilter" "overlay" "ceph" ]; boot.kernel.sysctl = { "fs.inotify.max_user_instances" = 1024; "fs.inotify.max_user_watches" = 4064932; "net.bridge.bridge-nf-call-arptables" = 1; "net.bridge.bridge-nf-call-ip6tables" = 1; "net.bridge.bridge-nf-call-iptables" = 1; "net.ipv4.ip_forward" = 1; "net.ipv6.ip_forward" = 1; "user.max_inotify_instances" = 1024; "user.max_inotify_watches" = 4064932; }; networking.firewall.enable = false; services.openssh.openFirewall = false; # systemd.enableUnifiedCgroupHierarchy = true; environment.systemPackages = let endpoints = concatStringsSep "," ( map ( n: "https://${cfg.hosts.${n}.wireguard_subnet.address}:2379" ) ( filter (n: cfg.hosts.${n}.role == "etcd" || cfg.hosts.${n}.role == "master+etcd") (attrNames cfg.hosts) ) ); etcdctl = pkgs.writeShellScriptBin "etcdctl" '' export ETCDCTL_API=3 export ETCDCTL_CERT=${ if cfg.hosts.${name}.role == "etcd" then "/var/lib/etcd/healthcheck-client.pem" else "/etc/kubernetes/pki/etcd/healthcheck-client.crt" } export ETCDCTL_KEY=${ if cfg.hosts.${name}.role == "etcd" then "/var/lib/etcd/healthcheck-client-key.pem" else "/etc/kubernetes/pki/etcd/healthcheck-client.key" } export ETCDCTL_CACERT=${ if cfg.hosts.${name}.role == "etcd" then cfg.etcd.certPath else "/etc/kubernetes/pki/etcd/ca.crt" } #export ETCDCTL_ENDPOINTS=${endpoints} export ETCDCTL_ENDPOINTS=https://${cfg.hosts.${name}.wireguard_subnet.address}:2379 exec ${pkgs.etcd_3_5}/bin/etcdctl "$@" ''; in [ (lib.mkIf (cfg.hosts.${name}.role != "etcd") self.packages.${pkgs.system}.cni) (lib.mkIf (cfg.hosts.${name}.role != "etcd") self.packages.${pkgs.system}.crictl) (lib.mkIf (cfg.hosts.${name}.role != "etcd") kubeadm) (lib.mkIf (cfg.hosts.${name}.role != "etcd") kubectl) (lib.mkIf (cfg.hosts.${name}.role != "etcd") kubelet) (lib.mkIf (builtins.elem cfg.hosts.${name}.role ["etcd" "master+etcd"]) etcdctl) pkgs.cfssl pkgs.cifs-utils pkgs.conntrack-tools pkgs.ethtool pkgs.file pkgs.iptables pkgs.keyutils pkgs.nfs-utils pkgs.socat pkgs.wireguard-tools pkgs.wireshark-cli ]; virtualisation.containerd = { enable = true; settings = { plugins = { "io.containerd.runtime.v1.linux" = { runtime = "runc"; }; "io.containerd.grpc.v1.cri" = { cni = { bin_dir = "${self.packages.${pkgs.system}.cni}/bin"; conf_dir = "/etc/cni/net.d"; }; containerd = { runtimes = { runc = { runtime_type = "io.containerd.runc.v2"; options = { SystemdCgroup = true; }; }; }; }; }; }; }; }; environment.etc = { "crictl.yaml" = lib.mkIf (cfg.hosts.${name}.role != "etcd") { text = '' runtime-endpoint: "unix:///run/containerd/containerd.sock" image-endpoint: "" timeout: 0 debug: false pull-image-on-create: false disable-pull-on-run: false ''; }; "cni/net.d/10-cni0.conf" = lib.mkIf (cfg.hosts.${name}.role != "etcd") { text = '' { "cniVersion": "0.4.0", "name": "cni0", "type": "bridge", "bridge": "cni0", "isDefaultGateway": true, "forceAddress": false, "ipMasq": false, "ipam": { "type": "host-local", "subnet": "${cfg.hosts.${name}.pod_subnet.address}/${toString cfg.hosts.${name}.pod_subnet.mask}", "gateway": "${cfg.hosts.${name}.pod_subnet.gateway}" } } ''; }; }; systemd.network.netdevs."05-cni0" = lib.mkIf (cfg.hosts.${name}.role != "etcd") { netdevConfig = { Name = "cni0"; Kind = "bridge"; }; }; systemd.network.networks."05-cni0" = lib.mkIf (cfg.hosts.${name}.role != "etcd") { name = "cni0"; linkConfig = { RequiredForOnline = "no"; }; networkConfig = { DHCP = "no"; LinkLocalAddressing = "no"; LLMNR = "no"; LLDP = "no"; EmitLLDP = "no"; IPv4Forwarding = "yes"; IPv6Forwarding = "yes"; ConfigureWithoutCarrier = "yes"; }; addresses = [ { Address = "${cfg.hosts.${name}.pod_subnet.gateway}/${toString cfg.hosts.${name}.pod_subnet.mask}"; } ]; }; systemd.network.netdevs."10-kube" = { netdevConfig = { Name = "kube"; Kind = "wireguard"; }; wireguardConfig = { ListenPort = cfg.hosts.${name}.endpoint.port; PrivateKeyFile = "/etc/wireguard/private_key"; }; wireguardPeers = map ( n: { PublicKey = cfg.hosts.${n}.public_key; Endpoint = "${cfg.hosts.${n}.endpoint.address}:${toString cfg.hosts.${n}.endpoint.port}"; AllowedIPs = [ cfg.service_subnet cfg.pod_subnet cfg.hosts.${n}.wireguard_subnet.address "${cfg.hosts.${n}.pod_subnet.address}/${toString cfg.hosts.${n}.pod_subnet.mask}" ]; PersistentKeepalive = 25; } ) ( filter (n: n != name) (attrNames cfg.hosts) ); }; systemd.network.networks."10-kube" = { name = "kube"; linkConfig = { RequiredForOnline = "no"; }; networkConfig = { DHCP = "no"; IPv4Forwarding = "yes"; IPv6Forwarding = "yes"; }; addresses = map ( n: { Address = "${cfg.hosts.${n}.wireguard_subnet.address}/${toString cfg.hosts.${n}.wireguard_subnet.mask}"; } ) ( filter (n: n == name) (attrNames cfg.hosts) ); routes = map ( n: { Destination = "${cfg.hosts.${n}.pod_subnet.address}/${toString cfg.hosts.${n}.pod_subnet.mask}"; Gateway = cfg.hosts.${n}.wireguard_subnet.address; } ) ( filter (n: n != name) (attrNames cfg.hosts) ); }; systemd.services.etcd.preStart = if (cfg.hosts.${name}.role == "etcd") then '' cd /var/lib/etcd if [ ! -f server.pem ] then cat ${server-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=server - | ${pkgs.cfssl}/bin/cfssljson -bare server fi if [ ! -f peer.pem ] then cat ${peer-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=peer - | ${pkgs.cfssl}/bin/cfssljson -bare peer fi if [ ! -f healthcheck-client.pem ] then cat ${healthcheck-client-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=client - | ${pkgs.cfssl}/bin/cfssljson -bare healthcheck-client fi '' else ""; systemd.services.etcd.serviceConfig.ExecStart = if (cfg.hosts.${name}.role == "etcd") then (lib.mkForce "${pkgs.etcd_3_5}/bin/etcd") else ""; services.etcd = lib.mkIf (cfg.hosts.${name}.role == "etcd") { enable = true; name = "${name}.${config.networking.domain}"; advertiseClientUrls = [ "https://${cfg.hosts.${name}.wireguard_subnet.address}:2379" ]; initialAdvertisePeerUrls = [ "https://${cfg.hosts.${name}.wireguard_subnet.address}:2380" ]; initialCluster = map ( n: "${n}.${cfg.hosts.${n}.domain}=https://${cfg.hosts.${n}.wireguard_subnet.address}:2380" ) ( filter (n: cfg.hosts.${n}.role == "etcd") (attrNames cfg.hosts) ); initialClusterState = "existing"; listenClientUrls = [ "https://${cfg.hosts.${name}.wireguard_subnet.address}:2379" ]; listenPeerUrls = [ "https://${cfg.hosts.${name}.wireguard_subnet.address}:2380" ]; peerClientCertAuth = true; clientCertAuth = true; certFile = "/var/lib/etcd/server.pem"; keyFile = "/var/lib/etcd/server-key.pem"; trustedCaFile = cfg.etcd.certPath; # trustedCaFile = "/var/lib/etcd/etcd-ca.pem"; peerCertFile = "/var/lib/etcd/peer.pem"; peerKeyFile = "/var/lib/etcd/peer-key.pem"; peerTrustedCaFile = cfg.etcd.certPath; # peerTrustedCaFile = "/var/lib/etcd/etcd-ca.pem"; extraConf = { SNAPSHOT_COUNT = "10000"; EXPERIMENTAL_INITIAL_CORRUPT_CHECK = "true"; LISTEN_METRICS_URLS = "http://${cfg.hosts.${name}.wireguard_subnet.address}:2381"; }; }; systemd.services."kubelet" = let in lib.mkIf (cfg.hosts.${name}.role != "etcd") { enable = true; description = "kubelet"; path = [ pkgs.ceph-client pkgs.ethtool pkgs.iproute2 pkgs.iptables pkgs.kmod pkgs.socat pkgs.thin-provisioning-tools pkgs.util-linux ]; unitConfig = { StartLimitIntervalSec = 0; After = ["network-online.target"]; Wants = ["network-online.target"]; }; preStart = if (cfg.hosts.${name}.role == "master") then '' mkdir -p /etc/kubernetes/pki cd /etc/kubernetes/pki if [ ! -f apiserver-etcd-client.crt ] then cat ${apiserver-etcd-client-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=client - | ${pkgs.cfssl}/bin/cfssljson -bare apiserver-etcd-client mv apiserver-etcd-client.pem apiserver-etcd-client.crt mv apiserver-etcd-client-key.pem apiserver-etcd-client.key fi '' else ""; serviceConfig = { Slice = "kubernetes.slice"; CPUAccounting = true; MemoryAccounting = true; Type = "simple"; Environment = [ "KUBELET_KUBECONFIG_ARGS=\"--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf\"" "KUBELET_CONFIG_ARGS=\"--config=/var/lib/kubelet/config.yaml\"" ]; EnvironmentFile = [ "-/var/lib/kubelet/kubeadm-flags.env" "-/etc/sysconfig/kubelet" ]; ExecStart = "${kubelet}/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS"; Restart = "always"; RestartSec = "10s"; }; wantedBy = ["multi-user.target"]; }; } ); }; }; }