Build a custom Tiny Core 17.0 kernel (6.18.2) with missing configs that the stock kernel lacks for container workloads: - CONFIG_CGROUP_BPF=y (cgroup v2 device control via BPF) - CONFIG_DEVTMPFS=y (auto-create /dev device nodes) - CONFIG_DEVTMPFS_MOUNT=y (auto-mount devtmpfs) - CONFIG_MEMCG=y (memory cgroup controller for memory.max) - CONFIG_CFS_BANDWIDTH=y (CPU bandwidth throttling for cpu.max) Also strips unnecessary subsystems (sound, GPU, wireless, Bluetooth, KVM, etc.) for minimal footprint on a headless K8s edge appliance. Init system fixes for successful boot-to-running-pods: - Add switch_root in init.sh to escape initramfs (runc pivot_root) - Add mountpoint guards in 00-early-mount.sh (skip if already mounted) - Create essential device nodes after switch_root (kmsg, console, etc.) - Enable cgroup v2 controller delegation with init process isolation - Mount BPF filesystem for cgroup v2 device control - Add mknod fallback from sysfs in 20-persistent-mount.sh for /dev/vda - Move KubeSolo binary to /usr/bin (avoid /usr/local bind mount hiding) - Generate /etc/machine-id in 60-hostname.sh (kubelet requires it) - Pre-initialize iptables tables before kube-proxy starts - Add nft_reject, nft_fib, xt_nfacct to kernel modules list Build system changes: - New build-kernel.sh script for custom kernel compilation - Dockerfile.builder adds kernel build deps (flex, bison, libelf, etc.) - Selective kernel module install (only modules.list + transitive deps) - Install iptables-nft (xtables-nft-multi) + shared libs in rootfs Tested: ISO boots in QEMU, node reaches Ready in ~35s, CoreDNS and local-path-provisioner pods start and run successfully. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
50 lines
1.6 KiB
Bash
Executable File
50 lines
1.6 KiB
Bash
Executable File
#!/bin/sh
|
|
# 90-kubesolo.sh — Start KubeSolo (final init stage)
|
|
#
|
|
# This stage exec's KubeSolo as PID 1 (replacing init).
|
|
# KubeSolo manages containerd, kubelet, API server, and all K8s components.
|
|
|
|
KUBESOLO_BIN="/usr/bin/kubesolo"
|
|
|
|
if [ ! -x "$KUBESOLO_BIN" ]; then
|
|
log_err "KubeSolo binary not found at $KUBESOLO_BIN"
|
|
return 1
|
|
fi
|
|
|
|
# Build KubeSolo command line
|
|
KUBESOLO_ARGS="--path /var/lib/kubesolo --local-storage"
|
|
|
|
# Add extra SANs if hostname resolves
|
|
HOSTNAME="$(hostname)"
|
|
if [ -n "$HOSTNAME" ]; then
|
|
KUBESOLO_ARGS="$KUBESOLO_ARGS --apiserver-extra-sans $HOSTNAME"
|
|
fi
|
|
|
|
# Add any extra flags from boot parameters
|
|
if [ -n "$KUBESOLO_EXTRA_FLAGS" ]; then
|
|
KUBESOLO_ARGS="$KUBESOLO_ARGS $KUBESOLO_EXTRA_FLAGS"
|
|
fi
|
|
|
|
# Add flags from persistent config file
|
|
if [ -f /etc/kubesolo/extra-flags ]; then
|
|
KUBESOLO_ARGS="$KUBESOLO_ARGS $(cat /etc/kubesolo/extra-flags)"
|
|
fi
|
|
|
|
# Pre-initialize iptables filter table and base chains.
|
|
# KubeSolo's kube-proxy uses iptables-restore (nf_tables backend) which needs
|
|
# the filter table to exist. Without this, the first iptables-restore fails
|
|
# with "RULE_APPEND failed (No such file or directory)".
|
|
if command -v iptables >/dev/null 2>&1; then
|
|
iptables -t filter -L -n >/dev/null 2>&1 || true
|
|
iptables -t nat -L -n >/dev/null 2>&1 || true
|
|
iptables -t mangle -L -n >/dev/null 2>&1 || true
|
|
log "Pre-initialized iptables tables (filter, nat, mangle)"
|
|
fi
|
|
|
|
log "Starting KubeSolo: $KUBESOLO_BIN $KUBESOLO_ARGS"
|
|
log "Kubeconfig will be at: /var/lib/kubesolo/pki/admin/admin.kubeconfig"
|
|
|
|
# exec replaces this init process — KubeSolo becomes PID 1
|
|
# shellcheck disable=SC2086
|
|
exec $KUBESOLO_BIN $KUBESOLO_ARGS
|