nixos-kubernetes/flake.nix

813 lines
31 KiB
Nix

{
description = "kubernetes";
inputs = {
nixpkgs = {
url = "nixpkgs/nixos-24.11";
};
flake-utils = {
url = "github:numtide/flake-utils";
};
};
outputs = {
self,
nixpkgs,
flake-utils,
...
}:
flake-utils.lib.eachDefaultSystem
(
system: let
pkgs = import nixpkgs {
inherit system;
};
arch =
{
"x86_64-linux" = "amd64";
"aarch64-linux" = "arm64";
}
.${system};
hashes =
builtins.fromJSON (builtins.readFile ./hashes.json)
// {
crictl = {
"amd64" = {
"1.28.0" = "sha256-jceHdPfL6veHmU04buxmPwo88k3h6kiTWYCWyznvJQg=";
"1.27.0" = "sha256-0zXW4Wwwn7w/8aKafkm7JTtcm0sDCZC/fGtIaH+YXO4=";
"1.24.1" = "sha256-QAbTrxELFEhlvg0wgUXh+Q97QN+gsgjYLT0dJfO3v5c=";
"1.23.0" = "sha256-t1T4PICs3HX5OroZH/Jp2mvkXQ/C0/QHlwTn0UJPHKg=";
};
};
cni = {
"amd64" = {
"1.2.0" = "sha256-86hBMkhFymvw1AkbT8f5fhimIxchWLcvw/3NudQtLTc=";
"0.8.7" = "sha256-l3gkky1WZ8ejeqajy7pAEApoc+e9l+g+i+g34+ev0Kg=";
};
};
};
kubePackage = pname: version:
pkgs.stdenvNoCC.mkDerivation {
pname = pname;
version = version;
src = pkgs.fetchurl {
url = "https://dl.k8s.io/release/v${version}/bin/linux/${arch}/${pname}";
sha256 = hashes.${pname}.${arch}.${version};
};
nativeBuildInputs = [
pkgs.autoPatchelfHook
pkgs.installShellFiles
];
dontUnpack = true;
dontPatch = true;
dontConfigure = true;
dontBuild = true;
installPhase = ''
$preInstall
install -d $out/bin
install $src -m 0555 $out/bin/${pname}
$postInstall
'';
postFixup = ''
installShellCompletion --cmd ${pname} \
--bash <($out/bin/${pname} completion bash) \
--fish <($out/bin/${pname} completion fish) \
--zsh <($out/bin/${pname} completion zsh)
'';
};
in {
apps = {
hashes = let
py = pkgs.python3.withPackages (p:
with p; [
requests
]);
program = pkgs.writeScript "program" ''
${py}/bin/python3 gethashes.py
'';
in {
type = "app";
program = "${program}";
};
};
packages = {
kubeadm = version: kubePackage "kubeadm" version;
kubectl = version: kubePackage "kubectl" version;
kubelet = version: kubePackage "kubelet" version;
fstab-cifs = let
pname = "fstab-cifs";
buildInputs = [
pkgs.bash
pkgs.cifs-utils
pkgs.coreutils
pkgs.jq
pkgs.keyutils
pkgs.util-linux
pkgs.which
];
in
pkgs.stdenv.mkDerivation {
inherit pname;
name = pname;
src = pkgs.fetchFromGitHub {
owner = "fstab";
repo = "cifs";
rev = "3b640936ef51614d3c1ad68cba50c4db5da3d61b";
hash = "sha256-C5ze3CWyDxdWJ9cYWUmjKVPCrrdUYXP2JvlnRcW/lgg=";
};
nativeBuildInputs = [
pkgs.makeWrapper
];
buildInputs = buildInputs;
dontUnpack = true;
dontPatch = true;
dontConfigure = true;
dontBuild = true;
installPhase = ''
install -d $out/bin
install $src/cifs -m 0555 $out/bin/cifs
wrapProgram $out/bin/cifs --prefix PATH : ${pkgs.lib.makeBinPath buildInputs}
'';
meta = {
homepage = "https://github.com/fstab/cifs";
description = "Driver for CIFS (SMB, Samba, Windows Share) network filesystems as Kubernetes volumes.";
longDescription = ''
Docker containers running in Kubernetes have an
ephemeral file system: Once a container is
terminated, all files are gone. In order to store
persistent data in Kubernetes, you need to mount a
Persistent Volume into your container. Kubernetes
has built-in support for network filesystems found
in the most common cloud providers, like Amazon's
EBS, Microsoft's Azure disk, etc. However, some
cloud hosting services, like the Hetzner cloud,
provide network storage using the CIFS (SMB, Samba,
Windows Share) protocol, which is not natively
supported in Kubernetes.
Fortunately, Kubernetes provides Flexvolume, which
is a plugin mechanism enabling users to write their
own drivers. There are a few flexvolume drivers for
CIFS out there, but for different reasons none of
them seemed to work for me. So I wrote my own, which
can be found on github.com/fstab/cifs.
'';
license = pkgs.lib.licenses.mit;
};
};
crictl = let
pname = "crictl";
version = "1.28.0";
in
pkgs.stdenvNoCC.mkDerivation {
inherit pname version;
src = pkgs.fetchurl {
url = "https://github.com/kubernetes-sigs/cri-tools/releases/download/v${version}/${pname}-v${version}-linux-${arch}.tar.gz";
hash = hashes.${pname}.${arch}.${version};
};
nativeBuildInputs = [
pkgs.autoPatchelfHook
];
setSourceRoot = "sourceRoot=`pwd`";
dontUnpack = false;
dontPatch = true;
dontConfigure = true;
dontBuild = true;
installPhase = ''
install -d $out/bin
install ${pname} -m 0555 $out/bin/${pname}
'';
};
cni = let
pname = "cni";
version = "1.2.0";
in
pkgs.stdenv.mkDerivation {
pname = pname;
version = version;
src = pkgs.fetchurl {
url = "https://github.com/containernetworking/plugins/releases/download/v${version}/cni-plugins-linux-${arch}-v${version}.tgz";
hash = hashes.${pname}.${arch}.${version};
};
nativeBuildInputs = [
pkgs.autoPatchelfHook
];
setSourceRoot = "sourceRoot=`pwd`";
dontPatch = true;
dontConfigure = true;
dontBuild = true;
installPhase = ''
install -d $out/bin
install -m 0555 `ls | grep -v env-vars` $out/bin
'';
};
};
}
)
// {
nixosModules.kubernetes = {
config,
lib,
pkgs,
name,
...
}: let
cfg = config.jk8s;
in {
options = with lib; {
jk8s = mkOption {
type = with types;
submodule {
options = {
enable = mkEnableOption "Kubernetes";
package_versions = mkOption {
type = submodule {
options = {
kubeadm = mkOption {
type = str;
};
kubectl = mkOption {
type = str;
};
kubelet = mkOption {
type = str;
};
};
};
};
etcd = mkOption {
type = submodule {
options = {
certPath = mkOption {
type = path;
};
keyPath = mkOption {
type = path;
};
};
};
default = {};
};
service_subnet = mkOption {
type = str;
};
pod_subnet = mkOption {
type = str;
};
hosts = mkOption {
type =
attrsOf
(
submodule {
options = {
role = mkOption {
type = types.enum ["etcd" "master+etcd" "master" "worker"];
};
domain = mkOption {
type = str;
};
endpoint = mkOption {
type = submodule {
options = {
address = mkOption {
type = str;
};
port = mkOption {
type = addCheck int (port: port >= 1025 && port <= 65535);
};
};
};
};
wireguard_subnet = mkOption {
type = submodule {
options = {
address = mkOption {
type = str;
};
mask = mkOption {
type = addCheck int (mask: mask >= 0 && mask <= 32);
};
};
};
};
private_key = mkOption {
type = str;
};
public_key = mkOption {
type = str;
};
pod_subnet = mkOption {
type = submodule {
options = {
address = mkOption {
type = str;
};
mask = mkOption {
type = addCheck int (mask: mask >= 0 && mask <= 32);
};
gateway = mkOption {
type = str;
};
};
};
};
};
}
);
};
};
};
};
};
config = with lib;
mkIf cfg.enable (
let
kubeadm = self.packages.${pkgs.system}.kubeadm cfg.package_versions.kubeadm;
kubectl = self.packages.${pkgs.system}.kubectl cfg.package_versions.kubectl;
kubelet = self.packages.${pkgs.system}.kubelet cfg.package_versions.kubelet;
ca-config-json = pkgs.writeText "ca-config-json" (
builtins.toJSON {
signing = {
default = {
expiry = "87600h";
};
profiles = {
server = {
expiry = "87600h";
usages = [
"signing"
"key encipherment"
"server auth"
"client auth"
];
};
client = {
expiry = "87600h";
usages = [
"signing"
"key encipherment"
"client auth"
];
};
peer = {
expiry = "87600h";
usages = [
"signing"
"key encipherment"
"server auth"
"client auth"
];
};
};
};
}
);
apiserver-etcd-client-csr-json = pkgs.writeText "apiserver-etcd-client-csr-json" (
builtins.toJSON {
CN = "kube-apiserver-etcd-client";
names = [
{
O = "system:masters";
}
];
key = {
algo = "rsa";
size = 2048;
};
}
);
healthcheck-client-csr-json = pkgs.writeText "healthcheck-client-csr-json" (
builtins.toJSON {
CN = "kube-etcd-healthcheck-client";
names = [
{
O = "system:masters";
}
];
key = {
algo = "rsa";
size = 2048;
};
}
);
server-csr-json = pkgs.writeText "server-csr-json" (
builtins.toJSON {
CN = "${name}.${cfg.hosts.${name}.domain}";
hosts = [
"localhost"
"${name}.${cfg.hosts.${name}.domain}"
"127.0.0.1"
"0:0:0:0:0:0:0:1"
"${cfg.hosts.${name}.endpoint.address}"
"${cfg.hosts.${name}.wireguard_subnet.address}"
];
key = {
algo = "rsa";
size = 2048;
};
}
);
etcd-ca-csr-json = pkgs.writeText "etcd-ca-csr-json" (
builtins.toJSON {
CN = "etcd-ca";
key = {
algo = "rsa";
size = 2048;
};
}
);
peer-csr-json = pkgs.writeText "peer-csr-json" (
builtins.toJSON {
CN = "${name}.${cfg.hosts.${name}.domain}";
hosts = [
"localhost"
"${name}.${cfg.hosts.${name}.domain}"
"127.0.0.1"
"0:0:0:0:0:0:0:1"
"${cfg.hosts.${name}.endpoint.address}"
"${cfg.hosts.${name}.wireguard_subnet.address}"
];
key = {
algo = "rsa";
size = 2048;
};
}
);
in {
deployment.keys."private_key" = {
text = cfg.hosts.${name}.private_key;
destDir = "/etc/wireguard";
user = "root";
group = "systemd-network";
permissions = "0440";
uploadAt = "pre-activation";
};
systemd.tmpfiles.rules = [
# this is needed so that the cephfs csi module can mount cephfs volumes
(lib.mkIf (cfg.hosts.${name}.role != "etcd") "L+ /lib/modules - - - - ${pkgs.linux_latest}/lib/modules")
# link cni plugins to where kubelet expects them
(lib.mkIf (cfg.hosts.${name}.role != "etcd") "L+ /opt/cni/bin - - - - ${self.packages.${pkgs.system}.cni}/bin")
#
(lib.mkIf (cfg.hosts.${name}.role != "etcd") "d /etc/kubernetes/manifests 0755 root root -")
# install fstab-cifs plugin
(lib.mkIf (cfg.hosts.${name}.role != "etcd") "L+ /usr/libexec/kubernetes/kubelet-plugins/volume/exec/fstab~cifs/cifs - - - - ${self.packages.${pkgs.system}.fstab-cifs}/bin/cifs")
"d /etc/kubernetes 0755 root root -"
"d /etc/kubernetes/pki 0755 root root -"
"d /root/.kube 0750 root root -"
"L+ /root/.kube/config - - - - /etc/kubernetes/admin.conf"
];
boot.kernelModules = [
"br_netfilter"
"overlay"
"ceph"
];
boot.kernel.sysctl = {
"fs.inotify.max_user_instances" = 1024;
"fs.inotify.max_user_watches" = 4064932;
"net.bridge.bridge-nf-call-arptables" = 1;
"net.bridge.bridge-nf-call-ip6tables" = 1;
"net.bridge.bridge-nf-call-iptables" = 1;
"net.ipv4.ip_forward" = 1;
"net.ipv6.ip_forward" = 1;
"user.max_inotify_instances" = 1024;
"user.max_inotify_watches" = 4064932;
};
networking.firewall.enable = false;
services.openssh.openFirewall = false;
# systemd.enableUnifiedCgroupHierarchy = true;
environment.systemPackages = let
endpoints = concatStringsSep "," (
map
(
n: "https://${cfg.hosts.${n}.wireguard_subnet.address}:2379"
)
(
filter (n: cfg.hosts.${n}.role == "etcd" || cfg.hosts.${n}.role == "master+etcd") (attrNames cfg.hosts)
)
);
etcdctl = pkgs.writeShellScriptBin "etcdctl" ''
export ETCDCTL_API=3
export ETCDCTL_CERT=${
if cfg.hosts.${name}.role == "etcd"
then "/var/lib/etcd/healthcheck-client.pem"
else "/etc/kubernetes/pki/etcd/healthcheck-client.crt"
}
export ETCDCTL_KEY=${
if cfg.hosts.${name}.role == "etcd"
then "/var/lib/etcd/healthcheck-client-key.pem"
else "/etc/kubernetes/pki/etcd/healthcheck-client.key"
}
export ETCDCTL_CACERT=${
if cfg.hosts.${name}.role == "etcd"
then cfg.etcd.certPath
else "/etc/kubernetes/pki/etcd/ca.crt"
}
#export ETCDCTL_ENDPOINTS=${endpoints}
export ETCDCTL_ENDPOINTS=https://${cfg.hosts.${name}.wireguard_subnet.address}:2379
exec ${pkgs.etcd_3_5}/bin/etcdctl "$@"
'';
in [
(lib.mkIf (cfg.hosts.${name}.role != "etcd") self.packages.${pkgs.system}.cni)
(lib.mkIf (cfg.hosts.${name}.role != "etcd") self.packages.${pkgs.system}.crictl)
(lib.mkIf (cfg.hosts.${name}.role != "etcd") kubeadm)
(lib.mkIf (cfg.hosts.${name}.role != "etcd") kubectl)
(lib.mkIf (cfg.hosts.${name}.role != "etcd") kubelet)
(lib.mkIf (builtins.elem cfg.hosts.${name}.role ["etcd" "master+etcd"]) etcdctl)
pkgs.cfssl
pkgs.cifs-utils
pkgs.conntrack-tools
pkgs.ethtool
pkgs.file
pkgs.iptables
pkgs.keyutils
pkgs.nfs-utils
pkgs.socat
pkgs.wireguard-tools
pkgs.wireshark-cli
];
virtualisation.containerd = {
enable = true;
settings = {
plugins = {
"io.containerd.runtime.v1.linux" = {
runtime = "runc";
};
"io.containerd.grpc.v1.cri" = {
cni = {
bin_dir = "${self.packages.${pkgs.system}.cni}/bin";
conf_dir = "/etc/cni/net.d";
};
containerd = {
runtimes = {
runc = {
runtime_type = "io.containerd.runc.v2";
options = {
SystemdCgroup = true;
};
};
};
};
};
};
};
};
environment.etc = {
"crictl.yaml" = lib.mkIf (cfg.hosts.${name}.role != "etcd") {
text = ''
runtime-endpoint: "unix:///run/containerd/containerd.sock"
image-endpoint: ""
timeout: 0
debug: false
pull-image-on-create: false
disable-pull-on-run: false
'';
};
"cni/net.d/10-cni0.conf" = lib.mkIf (cfg.hosts.${name}.role != "etcd") {
text = ''
{
"cniVersion": "0.4.0",
"name": "cni0",
"type": "bridge",
"bridge": "cni0",
"isDefaultGateway": true,
"forceAddress": false,
"ipMasq": false,
"ipam": {
"type": "host-local",
"subnet": "${cfg.hosts.${name}.pod_subnet.address}/${toString cfg.hosts.${name}.pod_subnet.mask}",
"gateway": "${cfg.hosts.${name}.pod_subnet.gateway}"
}
}
'';
};
};
systemd.network.netdevs."05-cni0" = lib.mkIf (cfg.hosts.${name}.role != "etcd") {
netdevConfig = {
Name = "cni0";
Kind = "bridge";
};
};
systemd.network.networks."05-cni0" = lib.mkIf (cfg.hosts.${name}.role != "etcd") {
name = "cni0";
linkConfig = {
RequiredForOnline = "no";
};
networkConfig = {
DHCP = "no";
LinkLocalAddressing = "no";
LLMNR = "no";
LLDP = "no";
EmitLLDP = "no";
IPv4Forwarding = "yes";
IPv6Forwarding = "yes";
ConfigureWithoutCarrier = "yes";
};
addresses = [
{
Address = "${cfg.hosts.${name}.pod_subnet.gateway}/${toString cfg.hosts.${name}.pod_subnet.mask}";
}
];
};
systemd.network.netdevs."10-kube" = {
netdevConfig = {
Name = "kube";
Kind = "wireguard";
};
wireguardConfig = {
ListenPort = cfg.hosts.${name}.endpoint.port;
PrivateKeyFile = "/etc/wireguard/private_key";
};
wireguardPeers =
map
(
n: {
PublicKey = cfg.hosts.${n}.public_key;
Endpoint = "${cfg.hosts.${n}.endpoint.address}:${toString cfg.hosts.${n}.endpoint.port}";
AllowedIPs = [
cfg.service_subnet
cfg.pod_subnet
cfg.hosts.${n}.wireguard_subnet.address
"${cfg.hosts.${n}.pod_subnet.address}/${toString cfg.hosts.${n}.pod_subnet.mask}"
];
PersistentKeepalive = 25;
}
)
(
filter (n: n != name) (attrNames cfg.hosts)
);
};
systemd.network.networks."10-kube" = {
name = "kube";
linkConfig = {
RequiredForOnline = "no";
};
networkConfig = {
DHCP = "no";
IPv4Forwarding = "yes";
IPv6Forwarding = "yes";
};
addresses =
map
(
n: {
Address = "${cfg.hosts.${n}.wireguard_subnet.address}/${toString cfg.hosts.${n}.wireguard_subnet.mask}";
}
)
(
filter (n: n == name) (attrNames cfg.hosts)
);
routes =
map
(
n: {
Destination = "${cfg.hosts.${n}.pod_subnet.address}/${toString cfg.hosts.${n}.pod_subnet.mask}";
Gateway = cfg.hosts.${n}.wireguard_subnet.address;
}
)
(
filter (n: n != name) (attrNames cfg.hosts)
);
};
systemd.services.etcd.preStart =
if (cfg.hosts.${name}.role == "etcd")
then ''
cd /var/lib/etcd
if [ ! -f server.pem ]
then
cat ${server-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=server - | ${pkgs.cfssl}/bin/cfssljson -bare server
fi
if [ ! -f peer.pem ]
then
cat ${peer-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=peer - | ${pkgs.cfssl}/bin/cfssljson -bare peer
fi
if [ ! -f healthcheck-client.pem ]
then
cat ${healthcheck-client-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=client - | ${pkgs.cfssl}/bin/cfssljson -bare healthcheck-client
fi
''
else "";
systemd.services.etcd.serviceConfig.ExecStart =
if (cfg.hosts.${name}.role == "etcd")
then (lib.mkForce "${pkgs.etcd_3_5}/bin/etcd")
else "";
services.etcd = lib.mkIf (cfg.hosts.${name}.role == "etcd") {
enable = true;
name = "${name}.${config.networking.domain}";
advertiseClientUrls = [
"https://${cfg.hosts.${name}.wireguard_subnet.address}:2379"
];
initialAdvertisePeerUrls = [
"https://${cfg.hosts.${name}.wireguard_subnet.address}:2380"
];
initialCluster =
map
(
n: "${n}.${cfg.hosts.${n}.domain}=https://${cfg.hosts.${n}.wireguard_subnet.address}:2380"
)
(
filter (n: cfg.hosts.${n}.role == "etcd") (attrNames cfg.hosts)
);
initialClusterState = "existing";
listenClientUrls = [
"https://${cfg.hosts.${name}.wireguard_subnet.address}:2379"
];
listenPeerUrls = [
"https://${cfg.hosts.${name}.wireguard_subnet.address}:2380"
];
peerClientCertAuth = true;
clientCertAuth = true;
certFile = "/var/lib/etcd/server.pem";
keyFile = "/var/lib/etcd/server-key.pem";
trustedCaFile = cfg.etcd.certPath;
# trustedCaFile = "/var/lib/etcd/etcd-ca.pem";
peerCertFile = "/var/lib/etcd/peer.pem";
peerKeyFile = "/var/lib/etcd/peer-key.pem";
peerTrustedCaFile = cfg.etcd.certPath;
# peerTrustedCaFile = "/var/lib/etcd/etcd-ca.pem";
extraConf = {
SNAPSHOT_COUNT = "10000";
EXPERIMENTAL_INITIAL_CORRUPT_CHECK = "true";
LISTEN_METRICS_URLS = "http://${cfg.hosts.${name}.wireguard_subnet.address}:2381";
};
};
systemd.services."kubelet" = let
in
lib.mkIf (cfg.hosts.${name}.role != "etcd") {
enable = true;
description = "kubelet";
path = [
pkgs.ceph-client
pkgs.ethtool
pkgs.iproute2
pkgs.iptables
pkgs.kmod
pkgs.socat
pkgs.thin-provisioning-tools
pkgs.util-linux
];
unitConfig = {
StartLimitIntervalSec = 0;
After = ["network-online.target"];
Wants = ["network-online.target"];
};
preStart =
if (cfg.hosts.${name}.role == "master")
then ''
mkdir -p /etc/kubernetes/pki
cd /etc/kubernetes/pki
if [ ! -f apiserver-etcd-client.crt ]
then
cat ${apiserver-etcd-client-csr-json} | ${pkgs.cfssl}/bin/cfssl gencert -ca=${cfg.etcd.certPath} -ca-key=${cfg.etcd.keyPath} -config=${ca-config-json} -profile=client - | ${pkgs.cfssl}/bin/cfssljson -bare apiserver-etcd-client
mv apiserver-etcd-client.pem apiserver-etcd-client.crt
mv apiserver-etcd-client-key.pem apiserver-etcd-client.key
fi
''
else "";
serviceConfig = {
Slice = "kubernetes.slice";
CPUAccounting = true;
MemoryAccounting = true;
Type = "simple";
Environment = [
"KUBELET_KUBECONFIG_ARGS=\"--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf\""
"KUBELET_CONFIG_ARGS=\"--config=/var/lib/kubelet/config.yaml\""
];
EnvironmentFile = [
"-/var/lib/kubelet/kubeadm-flags.env"
"-/etc/sysconfig/kubelet"
];
ExecStart = "${kubelet}/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS";
Restart = "always";
RestartSec = "10s";
};
wantedBy = ["multi-user.target"];
};
}
);
};
};
}