From d900fa920e8545c99afbbf75d054cdc0f7e31bd1 Mon Sep 17 00:00:00 2001 From: Adolfo Delorenzo Date: Wed, 11 Feb 2026 10:39:05 -0600 Subject: [PATCH] feat: add cloud-init Go parser (Phase 2) Implement a lightweight cloud-init system for first-boot configuration: - Go parser for YAML config (hostname, network, KubeSolo settings) - Static/DHCP network modes with DNS override - KubeSolo extra flags and API server SAN configuration - Portainer Edge Agent and air-gapped deployment support - New init stage 45-cloud-init.sh runs before network/hostname stages - Stages 50/60 skip gracefully when cloud-init has already applied - Build script compiles static Linux/amd64 binary (~2.7 MB) - 17 unit tests covering parsing, validation, and example files - Full documentation at docs/cloud-init.md Co-Authored-By: Claude Opus 4.6 --- Makefile | 16 ++- build/scripts/build-cloudinit.sh | 39 +++++ build/scripts/inject-kubesolo.sh | 10 ++ cloud-init/cmd/main.go | 132 +++++++++++++++++ cloud-init/config.go | 62 ++++++++ cloud-init/go.mod | 5 + cloud-init/go.sum | 4 + cloud-init/hostname.go | 80 +++++++++++ cloud-init/kubesolo.go | 79 ++++++++++ cloud-init/kubesolo_test.go | 118 +++++++++++++++ cloud-init/network.go | 174 ++++++++++++++++++++++ cloud-init/parser.go | 54 +++++++ cloud-init/parser_test.go | 238 +++++++++++++++++++++++++++++++ docs/cloud-init.md | 156 ++++++++++++++++++++ init/lib/45-cloud-init.sh | 35 +++++ init/lib/50-network.sh | 17 +-- init/lib/60-hostname.sh | 10 ++ 17 files changed, 1217 insertions(+), 12 deletions(-) create mode 100755 build/scripts/build-cloudinit.sh create mode 100644 cloud-init/cmd/main.go create mode 100644 cloud-init/config.go create mode 100644 cloud-init/go.mod create mode 100644 cloud-init/go.sum create mode 100644 cloud-init/hostname.go create mode 100644 cloud-init/kubesolo.go create mode 100644 cloud-init/kubesolo_test.go create mode 100644 cloud-init/network.go create mode 100644 cloud-init/parser.go create mode 100644 cloud-init/parser_test.go create mode 100644 docs/cloud-init.md create mode 100644 init/lib/45-cloud-init.sh diff --git a/Makefile b/Makefile index 11ccb4c..57013b3 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ -.PHONY: all fetch rootfs initramfs iso disk-image \ +.PHONY: all fetch build-cloudinit rootfs initramfs iso disk-image \ test-boot test-k8s test-persistence test-deploy test-storage test-all \ + test-cloudinit \ dev-vm dev-vm-shell quick docker-build shellcheck \ kernel-audit clean distclean help @@ -27,7 +28,11 @@ fetch: # ============================================================================= # Build stages # ============================================================================= -rootfs: fetch +build-cloudinit: + @echo "==> Building cloud-init binary..." + $(BUILD_DIR)/scripts/build-cloudinit.sh + +rootfs: fetch build-cloudinit @echo "==> Preparing rootfs..." $(BUILD_DIR)/scripts/extract-core.sh $(BUILD_DIR)/scripts/inject-kubesolo.sh @@ -78,6 +83,11 @@ test-storage: iso test-all: test-boot test-k8s test-persistence +# Cloud-init Go tests +test-cloudinit: + @echo "==> Testing cloud-init parser..." + cd cloud-init && go test ./... -v -count=1 + # Full integration test suite (requires more time) test-integration: test-k8s test-deploy test-storage @@ -148,6 +158,7 @@ help: @echo "" @echo "Build targets:" @echo " make fetch Download Tiny Core ISO, KubeSolo, dependencies" + @echo " make build-cloudinit Build cloud-init Go binary" @echo " make rootfs Extract + prepare rootfs with KubeSolo" @echo " make initramfs Repack rootfs into kubesolo-os.gz" @echo " make iso Create bootable ISO (default target)" @@ -161,6 +172,7 @@ help: @echo " make test-persist Reboot disk image, verify state persists" @echo " make test-deploy Deploy nginx pod, verify Running" @echo " make test-storage Test PVC with local-path provisioner" + @echo " make test-cloudinit Run cloud-init Go unit tests" @echo " make test-all Run core tests (boot + k8s + persistence)" @echo " make test-integ Run full integration suite" @echo "" diff --git a/build/scripts/build-cloudinit.sh b/build/scripts/build-cloudinit.sh new file mode 100755 index 0000000..5016d4e --- /dev/null +++ b/build/scripts/build-cloudinit.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# build-cloudinit.sh — Compile the cloud-init binary as a static Linux binary +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}" +CLOUDINIT_SRC="$PROJECT_ROOT/cloud-init" + +OUTPUT="$CACHE_DIR/kubesolo-cloudinit" + +echo "==> Building cloud-init binary..." + +if ! command -v go >/dev/null 2>&1; then + echo "ERROR: Go is not installed. Install Go 1.22+ to build cloud-init." + echo " https://go.dev/dl/" + exit 1 +fi + +mkdir -p "$CACHE_DIR" + +cd "$CLOUDINIT_SRC" + +# Run tests first +echo " Running tests..." +go test ./... -count=1 || { + echo "ERROR: Tests failed. Fix tests before building." + exit 1 +} + +# Build static binary for Linux amd64 +echo " Compiling (CGO_ENABLED=0 GOOS=linux GOARCH=amd64)..." +CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags='-s -w' \ + -o "$OUTPUT" \ + ./cmd/ + +echo " Built: $OUTPUT ($(du -h "$OUTPUT" | cut -f1))" +echo "" diff --git a/build/scripts/inject-kubesolo.sh b/build/scripts/inject-kubesolo.sh index 39ef4e5..a52edac 100755 --- a/build/scripts/inject-kubesolo.sh +++ b/build/scripts/inject-kubesolo.sh @@ -63,6 +63,16 @@ for lib in network.sh health.sh; do [ -f "$src" ] && cp "$src" "$ROOTFS/usr/lib/kubesolo-os/$lib" done +# Cloud-init binary (Go, built separately) +CLOUDINIT_BIN="$CACHE_DIR/kubesolo-cloudinit" +if [ -f "$CLOUDINIT_BIN" ]; then + cp "$CLOUDINIT_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-cloudinit" + chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-cloudinit" + echo " Installed cloud-init binary ($(du -h "$CLOUDINIT_BIN" | cut -f1))" +else + echo " WARN: Cloud-init binary not found (run 'make build-cloudinit' to build)" +fi + # --- 3. Kernel modules list --- cp "$PROJECT_ROOT/build/config/modules.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list" diff --git a/cloud-init/cmd/main.go b/cloud-init/cmd/main.go new file mode 100644 index 0000000..95ba01e --- /dev/null +++ b/cloud-init/cmd/main.go @@ -0,0 +1,132 @@ +// kubesolo-cloudinit is a lightweight cloud-init parser for KubeSolo OS. +// +// It reads a YAML configuration file and applies hostname, network, and +// KubeSolo settings during the init sequence. Designed to run as a static +// binary on BusyBox-based systems. +// +// Usage: +// +// kubesolo-cloudinit apply +// kubesolo-cloudinit validate +// kubesolo-cloudinit dump +package main + +import ( + "encoding/json" + "fmt" + "log/slog" + "os" + + cloudinit "github.com/portainer/kubesolo-os/cloud-init" +) + +const ( + defaultConfigPath = "/mnt/data/etc-kubesolo/cloud-init.yaml" + persistDataDir = "/mnt/data" + configDir = "/etc/kubesolo" +) + +func main() { + // Set up structured logging to stderr (captured by init) + slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ + Level: slog.LevelInfo, + }))) + + if len(os.Args) < 2 { + usage() + os.Exit(1) + } + + cmd := os.Args[1] + + // Determine config path + configPath := defaultConfigPath + if len(os.Args) >= 3 { + configPath = os.Args[2] + } + + switch cmd { + case "apply": + if err := cmdApply(configPath); err != nil { + slog.Error("cloud-init apply failed", "error", err) + os.Exit(1) + } + case "validate": + if err := cmdValidate(configPath); err != nil { + fmt.Fprintf(os.Stderr, "validation failed: %s\n", err) + os.Exit(1) + } + fmt.Println("OK") + case "dump": + if err := cmdDump(configPath); err != nil { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + os.Exit(1) + } + default: + fmt.Fprintf(os.Stderr, "unknown command: %s\n", cmd) + usage() + os.Exit(1) + } +} + +func cmdApply(configPath string) error { + slog.Info("applying cloud-init", "config", configPath) + + cfg, err := cloudinit.Parse(configPath) + if err != nil { + return err + } + + // 1. Apply hostname + if err := cloudinit.ApplyHostname(cfg); err != nil { + return fmt.Errorf("hostname: %w", err) + } + + // 2. Apply network configuration + if err := cloudinit.ApplyNetwork(cfg); err != nil { + return fmt.Errorf("network: %w", err) + } + + // 3. Apply KubeSolo settings + if err := cloudinit.ApplyKubeSolo(cfg, configDir); err != nil { + return fmt.Errorf("kubesolo config: %w", err) + } + + // 4. Save persistent configs for next boot + if err := cloudinit.SaveHostname(cfg, persistDataDir+"/etc-kubesolo"); err != nil { + slog.Warn("failed to save hostname", "error", err) + } + if err := cloudinit.SaveNetworkConfig(cfg, persistDataDir+"/network"); err != nil { + slog.Warn("failed to save network config", "error", err) + } + + slog.Info("cloud-init applied successfully") + return nil +} + +func cmdValidate(configPath string) error { + _, err := cloudinit.Parse(configPath) + return err +} + +func cmdDump(configPath string) error { + cfg, err := cloudinit.Parse(configPath) + if err != nil { + return err + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(cfg) +} + +func usage() { + fmt.Fprintf(os.Stderr, `Usage: kubesolo-cloudinit [config.yaml] + +Commands: + apply Parse and apply cloud-init configuration + validate Check config file for errors + dump Parse and print config as JSON + +If config path is omitted, defaults to %s +`, defaultConfigPath) +} diff --git a/cloud-init/config.go b/cloud-init/config.go new file mode 100644 index 0000000..1387d2b --- /dev/null +++ b/cloud-init/config.go @@ -0,0 +1,62 @@ +// Package cloudinit implements a lightweight cloud-init parser for KubeSolo OS. +// +// It reads a simplified cloud-init YAML config and applies: +// - hostname +// - network configuration (static IP or DHCP) +// - KubeSolo extra flags and settings +// - NTP servers +// +// The config file is typically at /mnt/data/etc-kubesolo/cloud-init.yaml +// or specified via kubesolo.cloudinit= boot parameter. +package cloudinit + +// Config is the top-level cloud-init configuration. +type Config struct { + Hostname string `yaml:"hostname"` + Network NetworkConfig `yaml:"network"` + KubeSolo KubeSoloConfig `yaml:"kubesolo"` + NTP NTPConfig `yaml:"ntp"` + Airgap AirgapConfig `yaml:"airgap"` + Portainer PortainerConfig `yaml:"portainer"` +} + +// NetworkConfig defines network settings. +type NetworkConfig struct { + Mode string `yaml:"mode"` // "dhcp" or "static" + Interface string `yaml:"interface"` // e.g. "eth0" (auto-detected if empty) + Address string `yaml:"address"` // CIDR notation, e.g. "192.168.1.100/24" + Gateway string `yaml:"gateway"` // e.g. "192.168.1.1" + DNS []string `yaml:"dns"` // nameservers +} + +// KubeSoloConfig defines KubeSolo-specific settings. +type KubeSoloConfig struct { + ExtraFlags string `yaml:"extra-flags"` + LocalStorage *bool `yaml:"local-storage"` + ExtraSANs []string `yaml:"apiserver-extra-sans"` +} + +// NTPConfig defines NTP settings. +type NTPConfig struct { + Servers []string `yaml:"servers"` +} + +// AirgapConfig defines air-gapped deployment settings. +type AirgapConfig struct { + ImportImages bool `yaml:"import-images"` + ImagesDir string `yaml:"images-dir"` +} + +// PortainerConfig defines Portainer Edge Agent settings. +type PortainerConfig struct { + EdgeAgent EdgeAgentConfig `yaml:"edge-agent"` +} + +// EdgeAgentConfig holds Portainer Edge Agent connection details. +type EdgeAgentConfig struct { + Enabled bool `yaml:"enabled"` + EdgeID string `yaml:"edge-id"` + EdgeKey string `yaml:"edge-key"` + PortainerURL string `yaml:"portainer-url"` + Image string `yaml:"image"` +} diff --git a/cloud-init/go.mod b/cloud-init/go.mod new file mode 100644 index 0000000..fe4cfb3 --- /dev/null +++ b/cloud-init/go.mod @@ -0,0 +1,5 @@ +module github.com/portainer/kubesolo-os/cloud-init + +go 1.25.5 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/cloud-init/go.sum b/cloud-init/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/cloud-init/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/cloud-init/hostname.go b/cloud-init/hostname.go new file mode 100644 index 0000000..e1fbc2c --- /dev/null +++ b/cloud-init/hostname.go @@ -0,0 +1,80 @@ +package cloudinit + +import ( + "fmt" + "log/slog" + "os" + "strings" +) + +// ApplyHostname sets the system hostname and updates /etc/hostname and /etc/hosts. +func ApplyHostname(cfg *Config) error { + hostname := cfg.Hostname + if hostname == "" { + slog.Info("no hostname in cloud-init, skipping") + return nil + } + + // Set the running hostname + if err := os.WriteFile("/proc/sys/kernel/hostname", []byte(hostname), 0o644); err != nil { + // Fallback: use the hostname command + if err := run("hostname", hostname); err != nil { + return fmt.Errorf("setting hostname: %w", err) + } + } + + // Write /etc/hostname + if err := os.WriteFile("/etc/hostname", []byte(hostname+"\n"), 0o644); err != nil { + return fmt.Errorf("writing /etc/hostname: %w", err) + } + + // Ensure hostname is in /etc/hosts + if err := ensureHostsEntry(hostname); err != nil { + return fmt.Errorf("updating /etc/hosts: %w", err) + } + + slog.Info("hostname set", "hostname", hostname) + return nil +} + +// SaveHostname writes the hostname to the persistent data partition so it +// survives reboots (even without cloud-init on next boot). +func SaveHostname(cfg *Config, destDir string) error { + if cfg.Hostname == "" { + return nil + } + if err := os.MkdirAll(destDir, 0o755); err != nil { + return fmt.Errorf("creating hostname dir: %w", err) + } + dest := destDir + "/hostname" + if err := os.WriteFile(dest, []byte(cfg.Hostname+"\n"), 0o644); err != nil { + return fmt.Errorf("writing persistent hostname: %w", err) + } + slog.Info("hostname saved", "path", dest) + return nil +} + +func ensureHostsEntry(hostname string) error { + data, err := os.ReadFile("/etc/hosts") + if err != nil && !os.IsNotExist(err) { + return err + } + + content := string(data) + entry := "127.0.0.1 " + hostname + + // Check if already present + for _, line := range strings.Split(content, "\n") { + if strings.Contains(line, hostname) { + return nil + } + } + + // Append + if !strings.HasSuffix(content, "\n") && content != "" { + content += "\n" + } + content += entry + "\n" + + return os.WriteFile("/etc/hosts", []byte(content), 0o644) +} diff --git a/cloud-init/kubesolo.go b/cloud-init/kubesolo.go new file mode 100644 index 0000000..105d307 --- /dev/null +++ b/cloud-init/kubesolo.go @@ -0,0 +1,79 @@ +package cloudinit + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" +) + +// ApplyKubeSolo writes KubeSolo configuration files based on cloud-init config. +// These files are read by init stage 90-kubesolo.sh when building the +// KubeSolo command line. +func ApplyKubeSolo(cfg *Config, configDir string) error { + if err := os.MkdirAll(configDir, 0o755); err != nil { + return fmt.Errorf("creating config dir %s: %w", configDir, err) + } + + // Write extra flags file (consumed by 90-kubesolo.sh) + flags := buildExtraFlags(cfg) + if flags != "" { + flagsPath := filepath.Join(configDir, "extra-flags") + if err := os.WriteFile(flagsPath, []byte(flags+"\n"), 0o644); err != nil { + return fmt.Errorf("writing extra-flags: %w", err) + } + slog.Info("wrote KubeSolo extra flags", "path", flagsPath, "flags", flags) + } + + // Write config.yaml for KubeSolo if we have settings beyond defaults + if err := writeKubeSoloConfig(cfg, configDir); err != nil { + return err + } + + return nil +} + +func buildExtraFlags(cfg *Config) string { + var parts []string + + if cfg.KubeSolo.ExtraFlags != "" { + parts = append(parts, cfg.KubeSolo.ExtraFlags) + } + + // Add extra SANs from cloud-init + for _, san := range cfg.KubeSolo.ExtraSANs { + parts = append(parts, "--apiserver-extra-sans", san) + } + + return strings.Join(parts, " ") +} + +func writeKubeSoloConfig(cfg *Config, configDir string) error { + var lines []string + lines = append(lines, "# Generated by KubeSolo OS cloud-init") + lines = append(lines, "data-dir: /var/lib/kubesolo") + + if cfg.KubeSolo.LocalStorage != nil { + if *cfg.KubeSolo.LocalStorage { + lines = append(lines, "local-storage: true") + } else { + lines = append(lines, "local-storage: false") + } + } else { + lines = append(lines, "local-storage: true") + } + + lines = append(lines, "bind-address: 0.0.0.0") + lines = append(lines, "cluster-cidr: 10.42.0.0/16") + lines = append(lines, "service-cidr: 10.43.0.0/16") + + dest := filepath.Join(configDir, "config.yaml") + content := strings.Join(lines, "\n") + "\n" + if err := os.WriteFile(dest, []byte(content), 0o644); err != nil { + return fmt.Errorf("writing config.yaml: %w", err) + } + + slog.Info("wrote KubeSolo config", "path", dest) + return nil +} diff --git a/cloud-init/kubesolo_test.go b/cloud-init/kubesolo_test.go new file mode 100644 index 0000000..6b0e72e --- /dev/null +++ b/cloud-init/kubesolo_test.go @@ -0,0 +1,118 @@ +package cloudinit + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestBuildExtraFlags(t *testing.T) { + tests := []struct { + name string + cfg Config + want string + }{ + { + name: "empty", + cfg: Config{}, + want: "", + }, + { + name: "extra flags only", + cfg: Config{ + KubeSolo: KubeSoloConfig{ExtraFlags: "--disable traefik"}, + }, + want: "--disable traefik", + }, + { + name: "extra sans only", + cfg: Config{ + KubeSolo: KubeSoloConfig{ + ExtraSANs: []string{"node.local", "192.168.1.100"}, + }, + }, + want: "--apiserver-extra-sans node.local --apiserver-extra-sans 192.168.1.100", + }, + { + name: "flags and sans", + cfg: Config{ + KubeSolo: KubeSoloConfig{ + ExtraFlags: "--disable servicelb", + ExtraSANs: []string{"edge.local"}, + }, + }, + want: "--disable servicelb --apiserver-extra-sans edge.local", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := buildExtraFlags(&tt.cfg) + if got != tt.want { + t.Errorf("buildExtraFlags() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestApplyKubeSolo(t *testing.T) { + dir := t.TempDir() + tr := true + cfg := &Config{ + KubeSolo: KubeSoloConfig{ + ExtraFlags: "--disable traefik", + LocalStorage: &tr, + ExtraSANs: []string{"test.local"}, + }, + } + + if err := ApplyKubeSolo(cfg, dir); err != nil { + t.Fatalf("ApplyKubeSolo error: %v", err) + } + + // Check extra-flags file + flagsData, err := os.ReadFile(filepath.Join(dir, "extra-flags")) + if err != nil { + t.Fatalf("reading extra-flags: %v", err) + } + flags := strings.TrimSpace(string(flagsData)) + if !strings.Contains(flags, "--disable traefik") { + t.Errorf("extra-flags missing '--disable traefik': %q", flags) + } + if !strings.Contains(flags, "--apiserver-extra-sans test.local") { + t.Errorf("extra-flags missing SANs: %q", flags) + } + + // Check config.yaml + configData, err := os.ReadFile(filepath.Join(dir, "config.yaml")) + if err != nil { + t.Fatalf("reading config.yaml: %v", err) + } + config := string(configData) + if !strings.Contains(config, "local-storage: true") { + t.Errorf("config.yaml missing local-storage: %q", config) + } + if !strings.Contains(config, "data-dir: /var/lib/kubesolo") { + t.Errorf("config.yaml missing data-dir: %q", config) + } +} + +func TestApplyKubeSoloNoFlags(t *testing.T) { + dir := t.TempDir() + cfg := &Config{} + + if err := ApplyKubeSolo(cfg, dir); err != nil { + t.Fatalf("ApplyKubeSolo error: %v", err) + } + + // extra-flags should not exist when empty + if _, err := os.Stat(filepath.Join(dir, "extra-flags")); !os.IsNotExist(err) { + t.Error("extra-flags file should not exist when no flags configured") + } + + // config.yaml should still be created with defaults + if _, err := os.Stat(filepath.Join(dir, "config.yaml")); err != nil { + t.Error("config.yaml should be created even with empty config") + } +} diff --git a/cloud-init/network.go b/cloud-init/network.go new file mode 100644 index 0000000..bae2508 --- /dev/null +++ b/cloud-init/network.go @@ -0,0 +1,174 @@ +package cloudinit + +import ( + "fmt" + "log/slog" + "os" + "os/exec" + "path/filepath" + "strings" +) + +// ApplyNetwork configures the network interface based on cloud-init config. +// For static mode, it sets the IP, gateway, and DNS directly. +// For DHCP mode, it runs udhcpc on the target interface. +func ApplyNetwork(cfg *Config) error { + iface := cfg.Network.Interface + if iface == "" { + var err error + iface, err = detectPrimaryInterface() + if err != nil { + return fmt.Errorf("detecting primary interface: %w", err) + } + } + + slog.Info("configuring network", "interface", iface, "mode", cfg.Network.Mode) + + // Bring up the interface + if err := run("ip", "link", "set", iface, "up"); err != nil { + return fmt.Errorf("bringing up %s: %w", iface, err) + } + + switch cfg.Network.Mode { + case "static": + return applyStatic(iface, cfg) + case "dhcp", "": + return applyDHCP(iface, cfg) + default: + return fmt.Errorf("unknown network mode: %s", cfg.Network.Mode) + } +} + +func applyStatic(iface string, cfg *Config) error { + // Set IP address + if err := run("ip", "addr", "add", cfg.Network.Address, "dev", iface); err != nil { + return fmt.Errorf("setting address %s on %s: %w", cfg.Network.Address, iface, err) + } + + // Set default gateway + if err := run("ip", "route", "add", "default", "via", cfg.Network.Gateway, "dev", iface); err != nil { + return fmt.Errorf("setting gateway %s: %w", cfg.Network.Gateway, err) + } + + // Write DNS configuration + if len(cfg.Network.DNS) > 0 { + if err := writeDNS(cfg.Network.DNS); err != nil { + return fmt.Errorf("writing DNS config: %w", err) + } + } + + slog.Info("static network configured", + "interface", iface, + "address", cfg.Network.Address, + "gateway", cfg.Network.Gateway, + ) + return nil +} + +func applyDHCP(iface string, cfg *Config) error { + // Try udhcpc (BusyBox), then dhcpcd + if path, err := exec.LookPath("udhcpc"); err == nil { + args := []string{"-i", iface, "-s", "/usr/share/udhcpc/default.script", + "-t", "10", "-T", "3", "-A", "5", "-b", "-q"} + if err := run(path, args...); err != nil { + return fmt.Errorf("udhcpc on %s: %w", iface, err) + } + } else if path, err := exec.LookPath("dhcpcd"); err == nil { + if err := run(path, iface); err != nil { + return fmt.Errorf("dhcpcd on %s: %w", iface, err) + } + } else { + return fmt.Errorf("no DHCP client available (need udhcpc or dhcpcd)") + } + + // Override DNS if specified in config + if len(cfg.Network.DNS) > 0 { + if err := writeDNS(cfg.Network.DNS); err != nil { + return fmt.Errorf("writing DNS config: %w", err) + } + } + + slog.Info("DHCP network configured", "interface", iface) + return nil +} + +// SaveNetworkConfig writes a shell script that restores the current network +// config on subsequent boots (before cloud-init runs). +func SaveNetworkConfig(cfg *Config, destDir string) error { + iface := cfg.Network.Interface + if iface == "" { + var err error + iface, err = detectPrimaryInterface() + if err != nil { + return err + } + } + + if err := os.MkdirAll(destDir, 0o755); err != nil { + return fmt.Errorf("creating network config dir: %w", err) + } + + dest := filepath.Join(destDir, "interfaces.sh") + var sb strings.Builder + sb.WriteString("#!/bin/sh\n") + sb.WriteString("# Auto-generated by KubeSolo OS cloud-init\n") + sb.WriteString(fmt.Sprintf("ip link set %s up\n", iface)) + + switch cfg.Network.Mode { + case "static": + sb.WriteString(fmt.Sprintf("ip addr add %s dev %s\n", cfg.Network.Address, iface)) + sb.WriteString(fmt.Sprintf("ip route add default via %s dev %s\n", cfg.Network.Gateway, iface)) + if len(cfg.Network.DNS) > 0 { + sb.WriteString(": > /etc/resolv.conf\n") + for _, ns := range cfg.Network.DNS { + sb.WriteString(fmt.Sprintf("echo 'nameserver %s' >> /etc/resolv.conf\n", ns)) + } + } + case "dhcp", "": + sb.WriteString("udhcpc -i " + iface + " -s /usr/share/udhcpc/default.script -t 10 -T 3 -A 5 -b -q 2>/dev/null\n") + } + + if err := os.WriteFile(dest, []byte(sb.String()), 0o755); err != nil { + return fmt.Errorf("writing network config: %w", err) + } + + slog.Info("network config saved", "path", dest) + return nil +} + +func writeDNS(servers []string) error { + var sb strings.Builder + for _, ns := range servers { + sb.WriteString("nameserver " + ns + "\n") + } + return os.WriteFile("/etc/resolv.conf", []byte(sb.String()), 0o644) +} + +func detectPrimaryInterface() (string, error) { + entries, err := os.ReadDir("/sys/class/net") + if err != nil { + return "", fmt.Errorf("reading /sys/class/net: %w", err) + } + for _, e := range entries { + name := e.Name() + switch { + case name == "lo", + strings.HasPrefix(name, "docker"), + strings.HasPrefix(name, "veth"), + strings.HasPrefix(name, "br"), + strings.HasPrefix(name, "cni"), + strings.HasPrefix(name, "flannel"), + strings.HasPrefix(name, "cali"): + continue + } + return name, nil + } + return "", fmt.Errorf("no suitable network interface found") +} + +func run(name string, args ...string) error { + cmd := exec.Command(name, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} diff --git a/cloud-init/parser.go b/cloud-init/parser.go new file mode 100644 index 0000000..f3b45e9 --- /dev/null +++ b/cloud-init/parser.go @@ -0,0 +1,54 @@ +package cloudinit + +import ( + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +// Parse reads a cloud-init YAML file and returns the parsed config. +func Parse(path string) (*Config, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading cloud-init file %s: %w", path, err) + } + return ParseBytes(data) +} + +// ParseBytes parses cloud-init YAML from a byte slice. +func ParseBytes(data []byte) (*Config, error) { + var cfg Config + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("parsing cloud-init YAML: %w", err) + } + + if err := validate(&cfg); err != nil { + return nil, fmt.Errorf("validating cloud-init config: %w", err) + } + + // Apply defaults + if cfg.Network.Mode == "" { + cfg.Network.Mode = "dhcp" + } + + return &cfg, nil +} + +func validate(cfg *Config) error { + switch cfg.Network.Mode { + case "", "dhcp": + // valid + case "static": + if cfg.Network.Address == "" { + return fmt.Errorf("static network mode requires 'address' field") + } + if cfg.Network.Gateway == "" { + return fmt.Errorf("static network mode requires 'gateway' field") + } + default: + return fmt.Errorf("unknown network mode: %q (expected 'dhcp' or 'static')", cfg.Network.Mode) + } + + return nil +} diff --git a/cloud-init/parser_test.go b/cloud-init/parser_test.go new file mode 100644 index 0000000..9c20388 --- /dev/null +++ b/cloud-init/parser_test.go @@ -0,0 +1,238 @@ +package cloudinit + +import ( + "testing" +) + +func TestParseDHCP(t *testing.T) { + yaml := []byte(` +hostname: test-node +network: + mode: dhcp +kubesolo: + local-storage: true +`) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if cfg.Hostname != "test-node" { + t.Errorf("hostname = %q, want %q", cfg.Hostname, "test-node") + } + if cfg.Network.Mode != "dhcp" { + t.Errorf("network.mode = %q, want %q", cfg.Network.Mode, "dhcp") + } +} + +func TestParseStatic(t *testing.T) { + yaml := []byte(` +hostname: edge-01 +network: + mode: static + interface: eth0 + address: 192.168.1.100/24 + gateway: 192.168.1.1 + dns: + - 8.8.8.8 + - 8.8.4.4 +kubesolo: + extra-flags: "--disable traefik" + local-storage: true + apiserver-extra-sans: + - edge-01.local +`) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if cfg.Hostname != "edge-01" { + t.Errorf("hostname = %q, want %q", cfg.Hostname, "edge-01") + } + if cfg.Network.Mode != "static" { + t.Errorf("network.mode = %q, want %q", cfg.Network.Mode, "static") + } + if cfg.Network.Address != "192.168.1.100/24" { + t.Errorf("network.address = %q, want %q", cfg.Network.Address, "192.168.1.100/24") + } + if cfg.Network.Gateway != "192.168.1.1" { + t.Errorf("network.gateway = %q, want %q", cfg.Network.Gateway, "192.168.1.1") + } + if len(cfg.Network.DNS) != 2 { + t.Fatalf("dns count = %d, want 2", len(cfg.Network.DNS)) + } + if cfg.Network.DNS[0] != "8.8.8.8" { + t.Errorf("dns[0] = %q, want %q", cfg.Network.DNS[0], "8.8.8.8") + } + if cfg.KubeSolo.ExtraFlags != "--disable traefik" { + t.Errorf("extra-flags = %q, want %q", cfg.KubeSolo.ExtraFlags, "--disable traefik") + } + if len(cfg.KubeSolo.ExtraSANs) != 1 || cfg.KubeSolo.ExtraSANs[0] != "edge-01.local" { + t.Errorf("extra-sans = %v, want [edge-01.local]", cfg.KubeSolo.ExtraSANs) + } +} + +func TestParseDefaultMode(t *testing.T) { + yaml := []byte(` +hostname: default-node +`) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.Network.Mode != "dhcp" { + t.Errorf("network.mode = %q, want %q (default)", cfg.Network.Mode, "dhcp") + } +} + +func TestParseStaticMissingAddress(t *testing.T) { + yaml := []byte(` +network: + mode: static + gateway: 192.168.1.1 +`) + _, err := ParseBytes(yaml) + if err == nil { + t.Fatal("expected error for static mode without address") + } +} + +func TestParseStaticMissingGateway(t *testing.T) { + yaml := []byte(` +network: + mode: static + address: 192.168.1.100/24 +`) + _, err := ParseBytes(yaml) + if err == nil { + t.Fatal("expected error for static mode without gateway") + } +} + +func TestParseUnknownMode(t *testing.T) { + yaml := []byte(` +network: + mode: ppp +`) + _, err := ParseBytes(yaml) + if err == nil { + t.Fatal("expected error for unknown network mode") + } +} + +func TestParseAirgap(t *testing.T) { + yaml := []byte(` +hostname: airgap-node +network: + mode: static + address: 10.0.0.50/24 + gateway: 10.0.0.1 +airgap: + import-images: true + images-dir: /mnt/data/images +`) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cfg.Airgap.ImportImages { + t.Error("airgap.import-images should be true") + } + if cfg.Airgap.ImagesDir != "/mnt/data/images" { + t.Errorf("airgap.images-dir = %q, want %q", cfg.Airgap.ImagesDir, "/mnt/data/images") + } +} + +func TestParsePortainer(t *testing.T) { + yaml := []byte(` +hostname: edge-node +network: + mode: dhcp +portainer: + edge-agent: + enabled: true + edge-id: test-id + edge-key: test-key + portainer-url: https://portainer.example.com +`) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cfg.Portainer.EdgeAgent.Enabled { + t.Error("portainer.edge-agent.enabled should be true") + } + if cfg.Portainer.EdgeAgent.EdgeID != "test-id" { + t.Errorf("edge-id = %q, want %q", cfg.Portainer.EdgeAgent.EdgeID, "test-id") + } + if cfg.Portainer.EdgeAgent.PortainerURL != "https://portainer.example.com" { + t.Errorf("portainer-url = %q", cfg.Portainer.EdgeAgent.PortainerURL) + } +} + +func TestParseNTP(t *testing.T) { + yaml := []byte(` +hostname: ntp-node +ntp: + servers: + - pool.ntp.org + - time.google.com +`) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(cfg.NTP.Servers) != 2 { + t.Fatalf("ntp.servers count = %d, want 2", len(cfg.NTP.Servers)) + } + if cfg.NTP.Servers[0] != "pool.ntp.org" { + t.Errorf("ntp.servers[0] = %q, want %q", cfg.NTP.Servers[0], "pool.ntp.org") + } +} + +func TestParseBoolPointer(t *testing.T) { + yaml := []byte(` +kubesolo: + local-storage: false +`) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.KubeSolo.LocalStorage == nil { + t.Fatal("local-storage should not be nil") + } + if *cfg.KubeSolo.LocalStorage { + t.Error("local-storage should be false") + } +} + +func TestParseEmptyConfig(t *testing.T) { + yaml := []byte(``) + cfg, err := ParseBytes(yaml) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.Network.Mode != "dhcp" { + t.Errorf("empty config should default to dhcp, got %q", cfg.Network.Mode) + } +} + +func TestParseExampleFiles(t *testing.T) { + examples := []string{ + "examples/dhcp.yaml", + "examples/static-ip.yaml", + "examples/portainer-edge.yaml", + "examples/airgapped.yaml", + } + + for _, path := range examples { + t.Run(path, func(t *testing.T) { + _, err := Parse(path) + if err != nil { + t.Errorf("failed to parse %s: %v", path, err) + } + }) + } +} diff --git a/docs/cloud-init.md b/docs/cloud-init.md new file mode 100644 index 0000000..28e013b --- /dev/null +++ b/docs/cloud-init.md @@ -0,0 +1,156 @@ +# KubeSolo OS — Cloud-Init Configuration + +KubeSolo OS uses a lightweight cloud-init system to configure the node on first boot. The configuration is a YAML file placed on the data partition before the first boot. + +## Configuration File Location + +The cloud-init config is loaded from (in priority order): + +1. Path specified by `kubesolo.cloudinit=` boot parameter +2. `/mnt/data/etc-kubesolo/cloud-init.yaml` (default) + +## Boot Sequence Integration + +Cloud-init runs as **init stage 45**, before network (stage 50) and hostname (stage 60). When cloud-init applies successfully, stages 50 and 60 detect this and skip their default behavior. + +``` +Stage 20: Mount persistent storage +Stage 30: Load kernel modules +Stage 40: Apply sysctl +Stage 45: Cloud-init (parse YAML, apply hostname + network + KubeSolo config) <-- +Stage 50: Network fallback (skipped if cloud-init handled it) +Stage 60: Hostname fallback (skipped if cloud-init handled it) +Stage 70: Clock sync +Stage 80: Containerd prerequisites +Stage 90: Start KubeSolo +``` + +## YAML Schema + +```yaml +# Hostname for the node +hostname: kubesolo-node + +# Network configuration +network: + mode: dhcp | static # Default: dhcp + interface: eth0 # Optional: auto-detected if omitted + address: 192.168.1.100/24 # Required for static mode (CIDR notation) + gateway: 192.168.1.1 # Required for static mode + dns: # Optional: DNS nameservers + - 8.8.8.8 + - 1.1.1.1 + +# KubeSolo settings +kubesolo: + extra-flags: "--disable traefik" # Extra CLI flags for KubeSolo binary + local-storage: true # Enable local-path provisioner (default: true) + apiserver-extra-sans: # Extra SANs for API server certificate + - node.example.com + - 10.0.0.50 + +# NTP servers (optional) +ntp: + servers: + - pool.ntp.org + +# Air-gapped deployment (optional) +airgap: + import-images: true # Import container images from data partition + images-dir: /mnt/data/images # Directory containing .tar image files + +# Portainer Edge Agent (optional) +portainer: + edge-agent: + enabled: true + edge-id: "your-edge-id" + edge-key: "your-edge-key" + portainer-url: "https://portainer.example.com" +``` + +## Network Modes + +### DHCP (Default) + +```yaml +network: + mode: dhcp +``` + +Uses BusyBox `udhcpc` on the first non-virtual interface. Optionally override DNS: + +```yaml +network: + mode: dhcp + dns: + - 10.0.0.1 +``` + +### Static IP + +```yaml +network: + mode: static + interface: eth0 + address: 192.168.1.100/24 + gateway: 192.168.1.1 + dns: + - 8.8.8.8 + - 8.8.4.4 +``` + +Both `address` (CIDR) and `gateway` are required for static mode. + +## Persistence + +After applying, cloud-init saves its configuration to the data partition: + +| File | Purpose | +|------|---------| +| `/mnt/data/network/interfaces.sh` | Shell script to restore network config on next boot | +| `/mnt/data/etc-kubesolo/hostname` | Saved hostname | +| `/etc/kubesolo/extra-flags` | KubeSolo CLI flags | +| `/etc/kubesolo/config.yaml` | KubeSolo configuration | + +On subsequent boots, stage 50 (network) sources the saved `interfaces.sh` directly, skipping cloud-init parsing entirely. This is faster and doesn't require the cloud-init binary. + +## CLI Usage + +The cloud-init binary supports three commands: + +```bash +# Apply configuration (run during boot by stage 45) +kubesolo-cloudinit apply /path/to/cloud-init.yaml + +# Validate a config file +kubesolo-cloudinit validate /path/to/cloud-init.yaml + +# Dump parsed config as JSON (for debugging) +kubesolo-cloudinit dump /path/to/cloud-init.yaml +``` + +## Examples + +See `cloud-init/examples/` for complete configuration examples: + +- `dhcp.yaml` — DHCP with defaults +- `static-ip.yaml` — Static IP configuration +- `portainer-edge.yaml` — Portainer Edge Agent integration +- `airgapped.yaml` — Air-gapped deployment with pre-loaded images + +## Building + +The cloud-init binary is built as part of the normal build process: + +```bash +# Build just the cloud-init binary +make build-cloudinit + +# Run cloud-init unit tests +make test-cloudinit + +# Full build (includes cloud-init) +make iso +``` + +The binary is compiled as a static Linux/amd64 binary (`CGO_ENABLED=0`) and is approximately 2.7 MB. diff --git a/init/lib/45-cloud-init.sh b/init/lib/45-cloud-init.sh new file mode 100644 index 0000000..896fa19 --- /dev/null +++ b/init/lib/45-cloud-init.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# 45-cloud-init.sh — Apply cloud-init configuration +# +# Runs the kubesolo-cloudinit binary to parse cloud-init.yaml and apply: +# - hostname (/etc/hostname, /etc/hosts) +# - network (static IP or DHCP) +# - KubeSolo settings (/etc/kubesolo/extra-flags, config.yaml) +# - persistent configs saved to data partition +# +# If no cloud-init file is found, this stage is a no-op and later stages +# (50-network, 60-hostname) handle defaults. + +CLOUDINIT_BIN="/usr/lib/kubesolo-os/kubesolo-cloudinit" +CLOUDINIT_FILE="${KUBESOLO_CLOUDINIT:-$DATA_MOUNT/etc-kubesolo/cloud-init.yaml}" + +if [ ! -x "$CLOUDINIT_BIN" ]; then + log_warn "cloud-init binary not found at $CLOUDINIT_BIN — skipping" + return 0 +fi + +if [ ! -f "$CLOUDINIT_FILE" ]; then + log "No cloud-init config found at $CLOUDINIT_FILE — skipping" + return 0 +fi + +log "Applying cloud-init from: $CLOUDINIT_FILE" + +if "$CLOUDINIT_BIN" apply "$CLOUDINIT_FILE"; then + log_ok "cloud-init applied successfully" + # Signal to later stages that cloud-init handled network/hostname + CLOUDINIT_APPLIED=1 + export CLOUDINIT_APPLIED +else + log_err "cloud-init apply failed — later stages will use defaults" +fi diff --git a/init/lib/50-network.sh b/init/lib/50-network.sh index 73146e6..918f940 100755 --- a/init/lib/50-network.sh +++ b/init/lib/50-network.sh @@ -1,6 +1,12 @@ #!/bin/sh # 50-network.sh — Configure networking -# Priority: persistent config > cloud-init > DHCP fallback +# Priority: cloud-init (stage 45) > saved config > DHCP fallback + +# If cloud-init already configured networking, skip this stage +if [ "$CLOUDINIT_APPLIED" = "1" ]; then + log "Network already configured by cloud-init — skipping" + return 0 +fi # Check for saved network config (from previous boot or cloud-init) if [ -f "$DATA_MOUNT/network/interfaces.sh" ]; then @@ -9,15 +15,6 @@ if [ -f "$DATA_MOUNT/network/interfaces.sh" ]; then return 0 fi -# Check for cloud-init network config -CLOUDINIT_FILE="${KUBESOLO_CLOUDINIT:-$DATA_MOUNT/etc-kubesolo/cloud-init.yaml}" -if [ -f "$CLOUDINIT_FILE" ]; then - log "Cloud-init found: $CLOUDINIT_FILE" - # Phase 1: simple parsing — extract network stanza - # TODO: Replace with proper cloud-init parser (Go binary) in Phase 2 - log_warn "Cloud-init network parsing not yet implemented — falling back to DHCP" -fi - # Fallback: DHCP on first non-loopback interface log "Configuring network via DHCP" diff --git a/init/lib/60-hostname.sh b/init/lib/60-hostname.sh index 72fc7b6..9331813 100755 --- a/init/lib/60-hostname.sh +++ b/init/lib/60-hostname.sh @@ -1,5 +1,15 @@ #!/bin/sh # 60-hostname.sh — Set system hostname +# If cloud-init (stage 45) already set the hostname, skip this stage. + +# Cloud-init writes /etc/hostname and saves to data partition +if [ "$CLOUDINIT_APPLIED" = "1" ] && [ -f /etc/hostname ]; then + HOSTNAME="$(cat /etc/hostname)" + if [ -n "$HOSTNAME" ]; then + log "Hostname already set by cloud-init: $HOSTNAME" + return 0 + fi +fi if [ -f "$DATA_MOUNT/etc-kubesolo/hostname" ]; then HOSTNAME="$(cat "$DATA_MOUNT/etc-kubesolo/hostname")"