diff --git a/Makefile b/Makefile index 08033e7..4978a15 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ .PHONY: all fetch build-cloudinit build-update-agent rootfs initramfs iso disk-image \ test-boot test-k8s test-persistence test-deploy test-storage test-all \ test-cloudinit test-update-agent \ + bench-boot bench-resources \ dev-vm dev-vm-shell quick docker-build shellcheck \ kernel-audit clean distclean help @@ -109,6 +110,15 @@ test-rollback: disk-image # Full integration test suite (requires more time) test-integration: test-k8s test-deploy test-storage +# Benchmarks +bench-boot: iso + @echo "==> Benchmarking boot performance..." + test/benchmark/bench-boot.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).iso --runs 3 + +bench-resources: + @echo "==> Benchmarking resource usage (requires running VM)..." + test/benchmark/bench-resources.sh + # ============================================================================= # Code quality # ============================================================================= @@ -197,6 +207,8 @@ help: @echo " make test-rollback Forced rollback integration test" @echo " make test-all Run core tests (boot + k8s + persistence)" @echo " make test-integ Run full integration suite" + @echo " make bench-boot Benchmark boot performance (3 runs)" + @echo " make bench-resources Benchmark resource usage (requires running VM)" @echo "" @echo "Dev targets:" @echo " make dev-vm Launch interactive QEMU VM" diff --git a/cloud-init/cmd/main.go b/cloud-init/cmd/main.go index 95ba01e..5191926 100644 --- a/cloud-init/cmd/main.go +++ b/cloud-init/cmd/main.go @@ -92,7 +92,12 @@ func cmdApply(configPath string) error { return fmt.Errorf("kubesolo config: %w", err) } - // 4. Save persistent configs for next boot + // 4. Apply Portainer Edge Agent manifest (if enabled) + if err := cloudinit.ApplyPortainer(cfg, "/var/lib/kubesolo/server/manifests"); err != nil { + return fmt.Errorf("portainer edge agent: %w", err) + } + + // 5. Save persistent configs for next boot if err := cloudinit.SaveHostname(cfg, persistDataDir+"/etc-kubesolo"); err != nil { slog.Warn("failed to save hostname", "error", err) } diff --git a/cloud-init/portainer.go b/cloud-init/portainer.go new file mode 100644 index 0000000..8d6dc55 --- /dev/null +++ b/cloud-init/portainer.go @@ -0,0 +1,136 @@ +package cloudinit + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" +) + +// ApplyPortainer writes the Portainer Edge Agent deployment manifest +// based on cloud-init config. The manifest is applied by KubeSolo after +// the cluster is ready. +func ApplyPortainer(cfg *Config, manifestDir string) error { + if !cfg.Portainer.EdgeAgent.Enabled { + slog.Info("portainer edge agent not enabled, skipping") + return nil + } + + ea := cfg.Portainer.EdgeAgent + if ea.EdgeID == "" || ea.EdgeKey == "" { + return fmt.Errorf("portainer edge-agent enabled but edge-id and edge-key are required") + } + if ea.PortainerURL == "" { + return fmt.Errorf("portainer edge-agent enabled but portainer-url is required") + } + + image := ea.Image + if image == "" { + image = "portainer/agent:latest" + } + + if err := os.MkdirAll(manifestDir, 0o755); err != nil { + return fmt.Errorf("creating manifest dir: %w", err) + } + + manifest := buildEdgeAgentManifest(ea.EdgeID, ea.EdgeKey, ea.PortainerURL, image) + dest := filepath.Join(manifestDir, "portainer-edge-agent.yaml") + if err := os.WriteFile(dest, []byte(manifest), 0o644); err != nil { + return fmt.Errorf("writing edge agent manifest: %w", err) + } + + slog.Info("portainer edge agent manifest written", "path", dest) + return nil +} + +func buildEdgeAgentManifest(edgeID, edgeKey, portainerURL, image string) string { + var sb strings.Builder + + sb.WriteString("# Auto-generated by KubeSolo OS cloud-init\n") + sb.WriteString("# Portainer Edge Agent deployment\n") + sb.WriteString("---\n") + sb.WriteString("apiVersion: v1\n") + sb.WriteString("kind: Namespace\n") + sb.WriteString("metadata:\n") + sb.WriteString(" name: portainer\n") + sb.WriteString(" labels:\n") + sb.WriteString(" app.kubernetes.io/name: portainer-agent\n") + sb.WriteString(" app.kubernetes.io/component: edge-agent\n") + sb.WriteString("---\n") + sb.WriteString("apiVersion: v1\n") + sb.WriteString("kind: ServiceAccount\n") + sb.WriteString("metadata:\n") + sb.WriteString(" name: portainer-sa-clusteradmin\n") + sb.WriteString(" namespace: portainer\n") + sb.WriteString("---\n") + sb.WriteString("apiVersion: rbac.authorization.k8s.io/v1\n") + sb.WriteString("kind: ClusterRoleBinding\n") + sb.WriteString("metadata:\n") + sb.WriteString(" name: portainer-crb-clusteradmin\n") + sb.WriteString("roleRef:\n") + sb.WriteString(" apiGroup: rbac.authorization.k8s.io\n") + sb.WriteString(" kind: ClusterRole\n") + sb.WriteString(" name: cluster-admin\n") + sb.WriteString("subjects:\n") + sb.WriteString(" - kind: ServiceAccount\n") + sb.WriteString(" name: portainer-sa-clusteradmin\n") + sb.WriteString(" namespace: portainer\n") + sb.WriteString("---\n") + sb.WriteString("apiVersion: apps/v1\n") + sb.WriteString("kind: Deployment\n") + sb.WriteString("metadata:\n") + sb.WriteString(" name: portainer-agent\n") + sb.WriteString(" namespace: portainer\n") + sb.WriteString(" labels:\n") + sb.WriteString(" app.kubernetes.io/name: portainer-agent\n") + sb.WriteString(" app.kubernetes.io/component: edge-agent\n") + sb.WriteString("spec:\n") + sb.WriteString(" replicas: 1\n") + sb.WriteString(" selector:\n") + sb.WriteString(" matchLabels:\n") + sb.WriteString(" app: portainer-agent\n") + sb.WriteString(" template:\n") + sb.WriteString(" metadata:\n") + sb.WriteString(" labels:\n") + sb.WriteString(" app: portainer-agent\n") + sb.WriteString(" spec:\n") + sb.WriteString(" serviceAccountName: portainer-sa-clusteradmin\n") + sb.WriteString(" containers:\n") + sb.WriteString(" - name: agent\n") + sb.WriteString(fmt.Sprintf(" image: %s\n", image)) + sb.WriteString(" env:\n") + sb.WriteString(" - name: EDGE\n") + sb.WriteString(" value: \"1\"\n") + sb.WriteString(" - name: EDGE_ID\n") + sb.WriteString(fmt.Sprintf(" value: \"%s\"\n", edgeID)) + sb.WriteString(" - name: EDGE_KEY\n") + sb.WriteString(fmt.Sprintf(" value: \"%s\"\n", edgeKey)) + sb.WriteString(" - name: EDGE_INSECURE_POLL\n") + sb.WriteString(" value: \"1\"\n") + sb.WriteString(" - name: KUBERNETES_POD_IP\n") + sb.WriteString(" valueFrom:\n") + sb.WriteString(" fieldRef:\n") + sb.WriteString(" fieldPath: status.podIP\n") + sb.WriteString(" ports:\n") + sb.WriteString(" - containerPort: 9001\n") + sb.WriteString(" protocol: TCP\n") + sb.WriteString(" resources:\n") + sb.WriteString(" requests:\n") + sb.WriteString(" memory: 64Mi\n") + sb.WriteString(" cpu: 50m\n") + sb.WriteString(" limits:\n") + sb.WriteString(" memory: 256Mi\n") + sb.WriteString(" cpu: 500m\n") + sb.WriteString(" volumeMounts:\n") + sb.WriteString(" - name: docker-certs\n") + sb.WriteString(" mountPath: /certs\n") + sb.WriteString(" readOnly: true\n") + sb.WriteString(" volumes:\n") + sb.WriteString(" - name: docker-certs\n") + sb.WriteString(" emptyDir: {}\n") + sb.WriteString(" tolerations:\n") + sb.WriteString(" - operator: Exists\n") + + return sb.String() +} diff --git a/cloud-init/portainer_test.go b/cloud-init/portainer_test.go new file mode 100644 index 0000000..5854627 --- /dev/null +++ b/cloud-init/portainer_test.go @@ -0,0 +1,136 @@ +package cloudinit + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestApplyPortainerDisabled(t *testing.T) { + cfg := &Config{} + dir := t.TempDir() + + if err := ApplyPortainer(cfg, dir); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // No manifest should be written + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatal(err) + } + if len(entries) != 0 { + t.Errorf("expected no files, got %d", len(entries)) + } +} + +func TestApplyPortainerMissingFields(t *testing.T) { + cfg := &Config{ + Portainer: PortainerConfig{ + EdgeAgent: EdgeAgentConfig{ + Enabled: true, + }, + }, + } + dir := t.TempDir() + + err := ApplyPortainer(cfg, dir) + if err == nil { + t.Fatal("expected error for missing edge-id and edge-key") + } +} + +func TestApplyPortainerMissingURL(t *testing.T) { + cfg := &Config{ + Portainer: PortainerConfig{ + EdgeAgent: EdgeAgentConfig{ + Enabled: true, + EdgeID: "test-id", + EdgeKey: "test-key", + }, + }, + } + dir := t.TempDir() + + err := ApplyPortainer(cfg, dir) + if err == nil { + t.Fatal("expected error for missing portainer-url") + } +} + +func TestApplyPortainerEnabled(t *testing.T) { + cfg := &Config{ + Portainer: PortainerConfig{ + EdgeAgent: EdgeAgentConfig{ + Enabled: true, + EdgeID: "test-edge-id", + EdgeKey: "test-edge-key-abc123", + PortainerURL: "https://portainer.example.com", + }, + }, + } + dir := t.TempDir() + + if err := ApplyPortainer(cfg, dir); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Check manifest was created + manifestPath := filepath.Join(dir, "portainer-edge-agent.yaml") + data, err := os.ReadFile(manifestPath) + if err != nil { + t.Fatalf("manifest not created: %v", err) + } + + content := string(data) + + // Check key elements are present + checks := []string{ + "kind: Namespace", + "name: portainer", + "kind: Deployment", + "name: portainer-agent", + "EDGE_ID", + "test-edge-id", + "EDGE_KEY", + "test-edge-key-abc123", + "image: portainer/agent:latest", + "kind: ClusterRoleBinding", + "kind: ServiceAccount", + } + + for _, check := range checks { + if !strings.Contains(content, check) { + t.Errorf("manifest missing expected content: %q", check) + } + } +} + +func TestApplyPortainerCustomImage(t *testing.T) { + cfg := &Config{ + Portainer: PortainerConfig{ + EdgeAgent: EdgeAgentConfig{ + Enabled: true, + EdgeID: "test-id", + EdgeKey: "test-key", + PortainerURL: "https://portainer.example.com", + Image: "portainer/agent:2.20.0", + }, + }, + } + dir := t.TempDir() + + if err := ApplyPortainer(cfg, dir); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + data, err := os.ReadFile(filepath.Join(dir, "portainer-edge-agent.yaml")) + if err != nil { + t.Fatal(err) + } + + if !strings.Contains(string(data), "image: portainer/agent:2.20.0") { + t.Error("expected custom image in manifest") + } +} diff --git a/docs/deployment-guide.md b/docs/deployment-guide.md new file mode 100644 index 0000000..75cd538 --- /dev/null +++ b/docs/deployment-guide.md @@ -0,0 +1,450 @@ +# KubeSolo OS — Deployment Guide + +This guide covers deploying KubeSolo OS to physical hardware and virtual machines, +including first-boot configuration, update signing, and Portainer Edge integration. + +## Table of Contents + +- [Prerequisites](#prerequisites) +- [Building](#building) +- [Installation Methods](#installation-methods) +- [First-Boot Configuration (Cloud-Init)](#first-boot-configuration) +- [Update Signing](#update-signing) +- [Portainer Edge Integration](#portainer-edge-integration) +- [SSH Debug Access](#ssh-debug-access) +- [Monitoring and Health Checks](#monitoring-and-health-checks) +- [Troubleshooting](#troubleshooting) + +--- + +## Prerequisites + +**Hardware requirements:** +- x86_64 processor +- 512 MB RAM minimum (1 GB recommended) +- 8 GB storage minimum (16 GB recommended) +- Network interface (wired or WiFi with supported chipset) + +**Build requirements:** +- Linux or macOS host +- Docker (for reproducible builds) or: bash, make, cpio, gzip, xorriso, Go 1.22+ +- QEMU (for testing) + +--- + +## Building + +### Quick build (ISO) + +```bash +git clone https://github.com/portainer/kubesolo-os.git +cd kubesolo-os +make fetch # Download Tiny Core + KubeSolo +make iso # Build bootable ISO +``` + +Output: `output/kubesolo-os-.iso` + +### Disk image (for persistent installations) + +```bash +make disk-image # Build raw disk with A/B partitions +``` + +Output: `output/kubesolo-os-.img` + +### Reproducible build (Docker) + +```bash +make docker-build +``` + +--- + +## Installation Methods + +### USB Flash Drive + +```bash +# Write disk image to USB (replace /dev/sdX with your device) +sudo dd if=output/kubesolo-os-0.1.0.img of=/dev/sdX bs=4M status=progress +sync +``` + +### Virtual Machine (QEMU/KVM) + +```bash +# Quick launch for testing +make dev-vm + +# Or manually: +qemu-system-x86_64 -m 1024 -smp 2 \ + -enable-kvm -cpu host \ + -drive file=output/kubesolo-os-0.1.0.img,format=raw,if=virtio \ + -net nic,model=virtio \ + -net user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22 \ + -nographic +``` + +### Cloud / Hypervisor + +Convert the raw image for your platform: + +```bash +# VMware +qemu-img convert -f raw -O vmdk output/kubesolo-os-0.1.0.img kubesolo-os.vmdk + +# VirtualBox +qemu-img convert -f raw -O vdi output/kubesolo-os-0.1.0.img kubesolo-os.vdi + +# Hyper-V +qemu-img convert -f raw -O vhdx output/kubesolo-os-0.1.0.img kubesolo-os.vhdx +``` + +--- + +## First-Boot Configuration + +KubeSolo OS uses a simplified cloud-init system for first-boot configuration. +Place the config file on the data partition before first boot. + +### Config file location + +``` +/mnt/data/etc-kubesolo/cloud-init.yaml +``` + +For ISO boot, the config can be provided via a secondary drive or kernel parameter: +``` +kubesolo.cloudinit=/path/to/cloud-init.yaml +``` + +### Basic DHCP configuration + +```yaml +hostname: kubesolo-node-01 + +network: + mode: dhcp + +kubesolo: + local-storage: true +``` + +### Static IP configuration + +```yaml +hostname: kubesolo-prod-01 + +network: + mode: static + interface: eth0 + address: 192.168.1.100/24 + gateway: 192.168.1.1 + dns: + - 8.8.8.8 + - 1.1.1.1 + +kubesolo: + local-storage: true + apiserver-extra-sans: + - 192.168.1.100 + - kubesolo-prod-01.local +``` + +### Air-gapped deployment + +```yaml +hostname: airgap-node + +network: + mode: static + address: 10.0.0.50/24 + gateway: 10.0.0.1 + dns: + - 10.0.0.1 + +kubesolo: + local-storage: true + extra-flags: "--disable=traefik --disable=servicelb" + +airgap: + import-images: true + images-dir: /mnt/data/images +``` + +Pre-load container images by placing tar archives in `/mnt/data/images/`. + +--- + +## Update Signing + +KubeSolo OS supports Ed25519 signature verification for update images. +This ensures only authorized images can be applied to your devices. + +### Generate a signing key pair + +```bash +# On your build machine (keep private key secure!) +cd update && go run . genkey +``` + +Output: +``` +Public key (hex): <64-char hex string> +Private key (hex): <128-char hex string> + +Save the public key to /etc/kubesolo/update-pubkey.hex on the device. +Keep the private key secure and offline - use it only for signing updates. +``` + +Save the private key to a secure location (e.g., `signing-key.hex`). +Save the public key to `update-pubkey.hex`. + +### Sign update images + +```bash +# Sign the kernel and initramfs +cd update && go run . sign --key /path/to/signing-key.hex \ + ../output/vmlinuz ../output/kubesolo-os.gz +``` + +This produces `.sig` files alongside each image. + +### Deploy the public key + +Place the public key on the device's data partition: +``` +/mnt/data/etc-kubesolo/update-pubkey.hex +``` + +Or embed it in the cloud-init config on the data partition. + +### Update server layout + +Your update server should serve: +``` +/latest.json # Update metadata +/vmlinuz # Kernel +/vmlinuz.sig # Kernel signature +/kubesolo-os.gz # Initramfs +/kubesolo-os.gz.sig # Initramfs signature +``` + +Example `latest.json`: +```json +{ + "version": "0.2.0", + "vmlinuz_url": "https://updates.example.com/v0.2.0/vmlinuz", + "vmlinuz_sha256": "", + "vmlinuz_sig_url": "https://updates.example.com/v0.2.0/vmlinuz.sig", + "initramfs_url": "https://updates.example.com/v0.2.0/kubesolo-os.gz", + "initramfs_sha256": "", + "initramfs_sig_url": "https://updates.example.com/v0.2.0/kubesolo-os.gz.sig", + "release_notes": "Bug fixes and security updates", + "release_date": "2025-01-15" +} +``` + +### Apply a signed update + +```bash +kubesolo-update apply \ + --server https://updates.example.com \ + --pubkey /etc/kubesolo/update-pubkey.hex +``` + +--- + +## Portainer Edge Integration + +KubeSolo OS can automatically deploy the Portainer Edge Agent for remote +management through Portainer Business Edition. + +### Setup in Portainer + +1. Log in to your Portainer Business instance +2. Go to **Environments** → **Add Environment** → **Edge Agent** +3. Select **Kubernetes** as the environment type +4. Copy the **Edge ID** and **Edge Key** values + +### Cloud-init configuration + +```yaml +hostname: edge-node-01 + +network: + mode: dhcp + +kubesolo: + local-storage: true + +portainer: + edge-agent: + enabled: true + edge-id: "your-edge-id-from-portainer" + edge-key: "your-edge-key-from-portainer" + portainer-url: "https://portainer.yourcompany.com" + # Optional: pin agent version + # image: portainer/agent:2.20.0 +``` + +### Manual deployment + +If not using cloud-init, deploy the Edge Agent manually after boot: + +```bash +# Create namespace +kubesolo kubectl create namespace portainer + +# Apply the edge agent manifest (generated from template) +kubesolo kubectl apply -f /path/to/portainer-edge-agent.yaml +``` + +### Verify connection + +```bash +kubesolo kubectl -n portainer get pods +# Should show portainer-agent pod in Running state +``` + +The node should appear in your Portainer dashboard within a few minutes. + +--- + +## SSH Debug Access + +For development and debugging, you can add SSH access using the +optional ssh-debug extension. + +### Build the SSH extension + +```bash +./hack/build-ssh-extension.sh --pubkey ~/.ssh/id_ed25519.pub +``` + +### Load on a running system + +```bash +# Copy to device +scp output/ssh-debug.tcz root@:/mnt/data/extensions/ + +# Load (no reboot required) +unsquashfs -f -d / /mnt/data/extensions/ssh-debug.tcz +/usr/lib/kubesolo-os/init.d/85-ssh.sh +``` + +### Quick inject for development + +```bash +# Inject into rootfs before building ISO +./hack/inject-ssh.sh +make initramfs iso +``` + +> **Warning:** SSH access should NEVER be enabled in production. The debug +> extension uses key-based auth only and has no password, but it still +> expands the attack surface. + +--- + +## Monitoring and Health Checks + +### Automatic health checks + +KubeSolo OS runs a post-boot health check that verifies: +- containerd is running +- Kubernetes API server responds +- Node reports Ready status + +On success, the health check marks the boot as successful in GRUB, +preventing automatic rollback. + +### Deploy the health check CronJob + +```bash +kubesolo kubectl apply -f update/deploy/update-cronjob.yaml +``` + +This deploys: +- A CronJob that checks for updates every 6 hours +- A health check Job that runs at boot + +### Manual health check + +```bash +kubesolo-update healthcheck --timeout 120 +``` + +### Status check + +```bash +kubesolo-update status +``` + +Shows: +- Active/passive slot +- Current version +- Boot counter status +- GRUB environment variables + +--- + +## Troubleshooting + +### Boot hangs at kernel loading + +- Verify the ISO/image is not corrupted: check SHA256 against published hash +- Try adding `kubesolo.debug` to kernel command line for verbose logging +- Try `kubesolo.shell` to drop to emergency shell + +### KubeSolo fails to start + +```bash +# Check KubeSolo logs +cat /var/log/kubesolo.log + +# Verify containerd is running +pidof containerd + +# Check if required kernel modules are loaded +lsmod | grep -E "overlay|br_netfilter|veth" +``` + +### Node not reaching Ready state + +```bash +# Check node status +kubesolo kubectl get nodes -o wide + +# Check system pods +kubesolo kubectl get pods -A + +# Check kubelet logs +kubesolo kubectl logs -n kube-system +``` + +### Update fails with signature error + +```bash +# Verify the public key matches the signing key +cat /etc/kubesolo/update-pubkey.hex + +# Test signature verification manually +kubesolo-update check --server https://updates.example.com +``` + +### Rollback to previous version + +```bash +# Force rollback to the other slot +kubesolo-update rollback --grubenv /boot/grub/grubenv + +# Reboot to apply +reboot +``` + +### Emergency shell + +Boot with `kubesolo.shell` kernel parameter, or if boot fails after 3 +attempts, GRUB automatically rolls back to the last known good slot. diff --git a/hack/build-ssh-extension.sh b/hack/build-ssh-extension.sh new file mode 100755 index 0000000..6242cd3 --- /dev/null +++ b/hack/build-ssh-extension.sh @@ -0,0 +1,169 @@ +#!/bin/bash +# build-ssh-extension.sh — Build a Tiny Core .tcz extension for SSH debugging +# +# Creates a self-contained SSH extension that can be loaded into KubeSolo OS +# at runtime for debugging. Uses dropbear for minimal footprint (~200 KB). +# +# Usage: +# ./hack/build-ssh-extension.sh [--pubkey /path/to/key.pub] +# +# Output: output/ssh-debug.tcz +# +# To load on a running system: +# 1. Copy ssh-debug.tcz to /mnt/data/extensions/ +# 2. Reboot, or manually: unsquashfs /mnt/data/extensions/ssh-debug.tcz -d / +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +OUTPUT_DIR="$PROJECT_ROOT/output" +BUILD_DIR="$PROJECT_ROOT/build/cache/ssh-ext" +DROPBEAR_VERSION="2024.86" + +# Parse args +SSH_PUBKEY="" +for i in "$@"; do + case $i in + --pubkey) + shift + SSH_PUBKEY="$1" + shift + ;; + esac +done + +# Find SSH public key +if [ -z "$SSH_PUBKEY" ]; then + for key in "$HOME/.ssh/id_ed25519.pub" "$HOME/.ssh/id_rsa.pub"; do + if [ -f "$key" ]; then + SSH_PUBKEY="$key" + break + fi + done +fi + +if [ -z "$SSH_PUBKEY" ] || [ ! -f "$SSH_PUBKEY" ]; then + echo "ERROR: No SSH public key found." + echo "Provide with --pubkey or ensure ~/.ssh/id_ed25519.pub exists" + exit 1 +fi + +echo "==> Building SSH debug extension (.tcz)" +echo " Public key: $SSH_PUBKEY" +echo " Dropbear: $DROPBEAR_VERSION" + +# Clean build area +rm -rf "$BUILD_DIR" +mkdir -p "$BUILD_DIR/squashfs-root" +mkdir -p "$OUTPUT_DIR" + +SQUASHFS="$BUILD_DIR/squashfs-root" + +# Create directory structure +mkdir -p "$SQUASHFS/usr/sbin" +mkdir -p "$SQUASHFS/usr/bin" +mkdir -p "$SQUASHFS/etc/dropbear" +mkdir -p "$SQUASHFS/root/.ssh" +mkdir -p "$SQUASHFS/usr/lib/kubesolo-os/init.d" + +# Install authorized key +cp "$SSH_PUBKEY" "$SQUASHFS/root/.ssh/authorized_keys" +chmod 700 "$SQUASHFS/root/.ssh" +chmod 600 "$SQUASHFS/root/.ssh/authorized_keys" + +# Download static dropbear if not cached +DROPBEAR_CACHE="$PROJECT_ROOT/build/cache/dropbear-static" +if [ ! -f "$DROPBEAR_CACHE" ]; then + echo "==> Downloading static dropbear..." + echo "" + echo "NOTE: Static dropbear must be compiled separately." + echo "For now, creating a placeholder extension structure." + echo "" + echo "To compile dropbear statically:" + echo " wget https://matt.ucc.asn.au/dropbear/releases/dropbear-${DROPBEAR_VERSION}.tar.bz2" + echo " tar xf dropbear-${DROPBEAR_VERSION}.tar.bz2" + echo " cd dropbear-${DROPBEAR_VERSION}" + echo " ./configure --enable-static --disable-zlib" + echo " make PROGRAMS='dropbear dbclient dropbearkey scp' STATIC=1" + echo " cp dropbear dbclient dropbearkey scp $PROJECT_ROOT/build/cache/" + echo "" + + # Create placeholder script instead + cat > "$SQUASHFS/usr/sbin/dropbear" << 'PLACEHOLDER' +#!/bin/sh +echo "ERROR: dropbear placeholder — compile static dropbear and rebuild extension" +exit 1 +PLACEHOLDER + chmod +x "$SQUASHFS/usr/sbin/dropbear" +else + cp "$DROPBEAR_CACHE" "$SQUASHFS/usr/sbin/dropbear" + chmod +x "$SQUASHFS/usr/sbin/dropbear" + + # Also copy dbclient and dropbearkey if available + for tool in dbclient dropbearkey scp; do + src="$PROJECT_ROOT/build/cache/${tool}-static" + [ -f "$src" ] && cp "$src" "$SQUASHFS/usr/bin/$tool" && chmod +x "$SQUASHFS/usr/bin/$tool" + done +fi + +# Create SSH init stage +cat > "$SQUASHFS/usr/lib/kubesolo-os/init.d/85-ssh.sh" << 'EOF' +#!/bin/sh +# 85-ssh.sh — Start SSH server for debugging +# Part of ssh-debug.tcz extension + +if ! command -v dropbear >/dev/null 2>&1; then + return 0 +fi + +# Generate host keys on first boot +if [ ! -f /etc/dropbear/dropbear_rsa_host_key ]; then + dropbearkey -t rsa -f /etc/dropbear/dropbear_rsa_host_key >/dev/null 2>&1 +fi +if [ ! -f /etc/dropbear/dropbear_ed25519_host_key ]; then + dropbearkey -t ed25519 -f /etc/dropbear/dropbear_ed25519_host_key >/dev/null 2>&1 +fi + +# Start dropbear in background +dropbear -R -p 22 2>/dev/null +echo "[kubesolo-init] SSH server (dropbear) started on port 22" >&2 +EOF +chmod +x "$SQUASHFS/usr/lib/kubesolo-os/init.d/85-ssh.sh" + +# Create extension info +cat > "$BUILD_DIR/ssh-debug.tcz.info" << EOF +Title: ssh-debug.tcz +Description: SSH debugging extension for KubeSolo OS +Version: ${DROPBEAR_VERSION} +Author: KubeSolo OS +Original-site: https://github.com/portainer/kubesolo +Copying-policy: MIT +Size: ~200KB +Extension_by: kubesolo-os +Comments: Provides dropbear SSH server for dev/debug access. + NOT intended for production use. +EOF + +# Build squashfs +if command -v mksquashfs >/dev/null 2>&1; then + mksquashfs "$SQUASHFS" "$OUTPUT_DIR/ssh-debug.tcz" \ + -noappend -comp xz -b 4096 + echo "" + echo "==> Built: $OUTPUT_DIR/ssh-debug.tcz ($(du -h "$OUTPUT_DIR/ssh-debug.tcz" | cut -f1))" +else + echo "" + echo "==> mksquashfs not found — extension directory prepared at:" + echo " $SQUASHFS" + echo "" + echo " Install squashfs-tools and run:" + echo " mksquashfs $SQUASHFS $OUTPUT_DIR/ssh-debug.tcz -noappend -comp xz -b 4096" +fi + +echo "" +echo "==> To use:" +echo " 1. Copy ssh-debug.tcz to USB drive or /mnt/data/extensions/" +echo " 2. On the target device, load with:" +echo " unsquashfs -f -d / /mnt/data/extensions/ssh-debug.tcz" +echo " 3. Run: /usr/lib/kubesolo-os/init.d/85-ssh.sh" +echo " 4. SSH: ssh root@" +echo "" diff --git a/test/benchmark/bench-boot.sh b/test/benchmark/bench-boot.sh new file mode 100755 index 0000000..f6d324b --- /dev/null +++ b/test/benchmark/bench-boot.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# bench-boot.sh — Measure KubeSolo OS boot performance in QEMU +# +# Measures: +# - Time to first console output (kernel loaded) +# - Time to init complete (all stages done) +# - Time to K8s node Ready +# - Time to first pod Running (nginx test) +# - Peak memory usage +# - Disk image/ISO size +# +# Usage: +# test/benchmark/bench-boot.sh [--runs N] +# +# Output: JSON benchmark results to stdout, human-readable to stderr +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +IMAGE="${1:?Usage: bench-boot.sh [--runs N]}" +RUNS=3 +SSH_PORT=2222 +K8S_PORT=6443 + +shift || true +while [ $# -gt 0 ]; do + case "$1" in + --runs) RUNS="$2"; shift 2 ;; + *) echo "Unknown option: $1" >&2; exit 1 ;; + esac +done + +if [ ! -f "$IMAGE" ]; then + echo "ERROR: Image not found: $IMAGE" >&2 + exit 1 +fi + +# Determine image type +IMAGE_TYPE="iso" +if [[ "$IMAGE" == *.img ]]; then + IMAGE_TYPE="disk" +fi + +echo "=== KubeSolo OS Boot Benchmark ===" >&2 +echo "Image: $IMAGE ($(du -h "$IMAGE" | cut -f1))" >&2 +echo "Type: $IMAGE_TYPE" >&2 +echo "Runs: $RUNS" >&2 +echo "" >&2 + +# Build QEMU command +QEMU_CMD=( + qemu-system-x86_64 + -m 1024 + -smp 2 + -nographic + -no-reboot + -serial mon:stdio + -net nic,model=virtio + -net "user,hostfwd=tcp::${SSH_PORT}-:22,hostfwd=tcp::${K8S_PORT}-:6443" +) + +# Add KVM if available +if [ -e /dev/kvm ] && [ -r /dev/kvm ]; then + QEMU_CMD+=(-enable-kvm -cpu host) +else + QEMU_CMD+=(-cpu max) +fi + +if [ "$IMAGE_TYPE" = "iso" ]; then + QEMU_CMD+=(-cdrom "$IMAGE") + # Add a temp disk for persistence + TEMP_DISK=$(mktemp /tmp/kubesolo-bench-XXXXXX.img) + qemu-img create -f qcow2 "$TEMP_DISK" 8G >/dev/null 2>&1 + QEMU_CMD+=(-drive "file=$TEMP_DISK,format=qcow2,if=virtio") + trap "rm -f $TEMP_DISK" EXIT +else + QEMU_CMD+=(-drive "file=$IMAGE,format=raw,if=virtio") +fi + +# Results arrays +declare -a BOOT_TIMES +declare -a INIT_TIMES +declare -a K8S_TIMES +declare -a MEMORY_USAGE + +for run in $(seq 1 "$RUNS"); do + echo "--- Run $run/$RUNS ---" >&2 + + START_TIME=$(date +%s%N) + BOOT_DONE="" + INIT_DONE="" + K8S_READY="" + PEAK_MEM="" + + # Create a log file for this run + LOG=$(mktemp /tmp/kubesolo-bench-log-XXXXXX) + + # Run QEMU with timeout, capturing output + timeout 300 "${QEMU_CMD[@]}" 2>&1 | while IFS= read -r line; do + NOW=$(date +%s%N) + ELAPSED_MS=$(( (NOW - START_TIME) / 1000000 )) + + echo "$line" >> "$LOG" + + # Detect boot milestones from serial output + case "$line" in + *"Linux version"*) + if [ -z "$BOOT_DONE" ]; then + BOOT_DONE="$ELAPSED_MS" + echo " Kernel loaded: ${ELAPSED_MS}ms" >&2 + echo "KERNEL_MS=$ELAPSED_MS" >> "$LOG.times" + fi + ;; + *"kubesolo-init"*"all stages complete"*|*"init complete"*) + if [ -z "$INIT_DONE" ]; then + INIT_DONE="$ELAPSED_MS" + echo " Init complete: ${ELAPSED_MS}ms" >&2 + echo "INIT_MS=$ELAPSED_MS" >> "$LOG.times" + fi + ;; + *"node is Ready"*|*"NotReady"*"Ready"*|*"kubesolo"*"Ready"*) + if [ -z "$K8S_READY" ]; then + K8S_READY="$ELAPSED_MS" + echo " K8s Ready: ${ELAPSED_MS}ms" >&2 + echo "K8S_MS=$ELAPSED_MS" >> "$LOG.times" + fi + ;; + *"MemTotal:"*|*"MemAvailable:"*) + # Capture memory info if printed + echo "MEM_LINE=$line" >> "$LOG.times" + ;; + esac + + # Stop after K8s is ready (or timeout) + if [ -n "$K8S_READY" ]; then + break + fi + done || true + + # Read results from log + if [ -f "$LOG.times" ]; then + KERNEL_MS=$(grep "KERNEL_MS=" "$LOG.times" 2>/dev/null | tail -1 | cut -d= -f2 || echo "") + INIT_MS=$(grep "INIT_MS=" "$LOG.times" 2>/dev/null | tail -1 | cut -d= -f2 || echo "") + K8S_MS=$(grep "K8S_MS=" "$LOG.times" 2>/dev/null | tail -1 | cut -d= -f2 || echo "") + + [ -n "$KERNEL_MS" ] && BOOT_TIMES+=("$KERNEL_MS") + [ -n "$INIT_MS" ] && INIT_TIMES+=("$INIT_MS") + [ -n "$K8S_MS" ] && K8S_TIMES+=("$K8S_MS") + fi + + rm -f "$LOG" "$LOG.times" + echo "" >&2 +done + +# Compute averages +avg() { + local arr=("$@") + if [ ${#arr[@]} -eq 0 ]; then + echo "null" + return + fi + local sum=0 + for v in "${arr[@]}"; do + sum=$((sum + v)) + done + echo $((sum / ${#arr[@]})) +} + +# Image size +IMAGE_SIZE=$(stat -f%z "$IMAGE" 2>/dev/null || stat -c%s "$IMAGE" 2>/dev/null || echo 0) +IMAGE_SIZE_MB=$((IMAGE_SIZE / 1024 / 1024)) + +AVG_BOOT=$(avg "${BOOT_TIMES[@]+"${BOOT_TIMES[@]}"}") +AVG_INIT=$(avg "${INIT_TIMES[@]+"${INIT_TIMES[@]}"}") +AVG_K8S=$(avg "${K8S_TIMES[@]+"${K8S_TIMES[@]}"}") + +echo "=== Results ===" >&2 +echo "Image size: ${IMAGE_SIZE_MB} MB" >&2 +echo "Avg kernel load: ${AVG_BOOT}ms" >&2 +echo "Avg init complete: ${AVG_INIT}ms" >&2 +echo "Avg K8s Ready: ${AVG_K8S}ms" >&2 +echo "" >&2 + +# Output JSON +cat << EOF +{ + "benchmark": "kubesolo-os-boot", + "image": "$(basename "$IMAGE")", + "image_size_bytes": $IMAGE_SIZE, + "image_size_mb": $IMAGE_SIZE_MB, + "runs": $RUNS, + "results": { + "kernel_load_ms": $AVG_BOOT, + "init_complete_ms": $AVG_INIT, + "k8s_ready_ms": $AVG_K8S + }, + "raw_kernel_ms": [$(IFS=,; echo "${BOOT_TIMES[*]+"${BOOT_TIMES[*]}"}")], + "raw_init_ms": [$(IFS=,; echo "${INIT_TIMES[*]+"${INIT_TIMES[*]}"}")], + "raw_k8s_ms": [$(IFS=,; echo "${K8S_TIMES[*]+"${K8S_TIMES[*]}"}")], + "qemu_config": { + "memory_mb": 1024, + "cpus": 2, + "kvm": $([ -e /dev/kvm ] && echo "true" || echo "false") + } +} +EOF diff --git a/test/benchmark/bench-resources.sh b/test/benchmark/bench-resources.sh new file mode 100755 index 0000000..656b2ce --- /dev/null +++ b/test/benchmark/bench-resources.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# bench-resources.sh — Measure KubeSolo OS resource usage +# +# Connects to a running KubeSolo OS instance and measures: +# - Memory usage (total, used, available, per-process) +# - Disk usage (rootfs, data partition, containerd) +# - CPU usage under idle and load +# - Process count +# - Container count +# - Network overhead +# +# Usage: +# test/benchmark/bench-resources.sh [--ssh-port 2222] +# +# Prerequisites: KubeSolo OS running (e.g. via make dev-vm) +set -euo pipefail + +SSH_PORT="${SSH_PORT:-2222}" +SSH_HOST="${SSH_HOST:-localhost}" +SSH_USER="${SSH_USER:-root}" +SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR" + +while [ $# -gt 0 ]; do + case "$1" in + --ssh-port) SSH_PORT="$2"; shift 2 ;; + --ssh-host) SSH_HOST="$2"; shift 2 ;; + *) echo "Unknown option: $1" >&2; exit 1 ;; + esac +done + +run_ssh() { + ssh $SSH_OPTS -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" "$@" 2>/dev/null +} + +echo "=== KubeSolo OS Resource Benchmark ===" >&2 +echo "Connecting to ${SSH_HOST}:${SSH_PORT}..." >&2 + +# Check connectivity +if ! run_ssh "true" 2>/dev/null; then + echo "ERROR: Cannot connect via SSH. Is KubeSolo OS running?" >&2 + echo "Start with: make dev-vm" >&2 + exit 1 +fi + +echo "" >&2 + +# --- Memory --- +echo "--- Memory Usage ---" >&2 +MEM_INFO=$(run_ssh "cat /proc/meminfo") +MEM_TOTAL=$(echo "$MEM_INFO" | sed -n 's/MemTotal: *\([0-9]*\).*/\1/p') +MEM_FREE=$(echo "$MEM_INFO" | sed -n 's/MemFree: *\([0-9]*\).*/\1/p') +MEM_AVAIL=$(echo "$MEM_INFO" | sed -n 's/MemAvailable: *\([0-9]*\).*/\1/p') +MEM_USED=$((MEM_TOTAL - MEM_FREE)) + +echo " Total: $((MEM_TOTAL / 1024)) MB" >&2 +echo " Used: $((MEM_USED / 1024)) MB" >&2 +echo " Available: $((MEM_AVAIL / 1024)) MB" >&2 +echo " OS overhead: $((MEM_USED / 1024)) MB ($(( (MEM_USED * 100) / MEM_TOTAL ))%)" >&2 +echo "" >&2 + +# Top memory consumers +echo "--- Top Processes (by RSS) ---" >&2 +run_ssh "ps -o pid,rss,comm | sort -k2 -rn | head -10" 2>/dev/null | while read -r line; do + echo " $line" >&2 +done +echo "" >&2 + +# --- Disk --- +echo "--- Disk Usage ---" >&2 +run_ssh "df -h / /mnt/data 2>/dev/null || df -h /" | while read -r line; do + echo " $line" >&2 +done +echo "" >&2 + +# Containerd data +CONTAINERD_SIZE=$(run_ssh "du -sh /var/lib/containerd 2>/dev/null | cut -f1" || echo "N/A") +KUBESOLO_SIZE=$(run_ssh "du -sh /var/lib/kubesolo 2>/dev/null | cut -f1" || echo "N/A") +echo " containerd data: $CONTAINERD_SIZE" >&2 +echo " kubesolo data: $KUBESOLO_SIZE" >&2 +echo "" >&2 + +# --- Processes --- +echo "--- Process Count ---" >&2 +PROC_COUNT=$(run_ssh "ps | wc -l") +echo " Total processes: $PROC_COUNT" >&2 +echo "" >&2 + +# --- K8s Status --- +echo "--- Kubernetes Status ---" >&2 +NODE_STATUS=$(run_ssh "kubesolo kubectl get nodes -o wide --no-headers 2>/dev/null" || echo "N/A") +POD_COUNT=$(run_ssh "kubesolo kubectl get pods -A --no-headers 2>/dev/null | wc -l" || echo "0") +echo " Node: $NODE_STATUS" >&2 +echo " Pod count: $POD_COUNT" >&2 +echo "" >&2 + +# --- CPU (5-second sample) --- +echo "--- CPU Usage (5s idle sample) ---" >&2 +CPU_IDLE=$(run_ssh " + read cpu user nice system idle rest < /proc/stat + sleep 5 + read cpu user2 nice2 system2 idle2 rest2 < /proc/stat + total=\$((user2 + nice2 + system2 + idle2 - user - nice - system - idle)) + idle_diff=\$((idle2 - idle)) + if [ \$total -gt 0 ]; then + echo \$((idle_diff * 100 / total)) + else + echo 0 + fi +" 2>/dev/null || echo "N/A") +echo " CPU idle: ${CPU_IDLE}%" >&2 +echo " CPU used: $((100 - ${CPU_IDLE:-0}))%" >&2 +echo "" >&2 + +# --- OS Version --- +OS_VERSION=$(run_ssh "cat /etc/kubesolo-os-version 2>/dev/null" || echo "unknown") + +# --- Output JSON --- +cat << EOF +{ + "benchmark": "kubesolo-os-resources", + "os_version": "$OS_VERSION", + "memory": { + "total_kb": $MEM_TOTAL, + "used_kb": $MEM_USED, + "available_kb": ${MEM_AVAIL:-0}, + "total_mb": $((MEM_TOTAL / 1024)), + "used_mb": $((MEM_USED / 1024)), + "available_mb": $((${MEM_AVAIL:-0} / 1024)), + "overhead_percent": $(( (MEM_USED * 100) / MEM_TOTAL )) + }, + "disk": { + "containerd_size": "$CONTAINERD_SIZE", + "kubesolo_size": "$KUBESOLO_SIZE" + }, + "processes": { + "total": $PROC_COUNT + }, + "kubernetes": { + "pod_count": $POD_COUNT + }, + "cpu": { + "idle_percent": ${CPU_IDLE:-0}, + "used_percent": $((100 - ${CPU_IDLE:-0})) + } +} +EOF diff --git a/update/cmd/apply.go b/update/cmd/apply.go index 3f5c5ee..bf90c17 100644 --- a/update/cmd/apply.go +++ b/update/cmd/apply.go @@ -33,6 +33,12 @@ func Apply(args []string) error { client := image.NewClient(opts.ServerURL, stageDir) defer client.Cleanup() + // Enable signature verification if public key is configured + if opts.PubKeyPath != "" { + client.SetPublicKeyPath(opts.PubKeyPath) + slog.Info("signature verification enabled", "pubkey", opts.PubKeyPath) + } + meta, err := client.CheckForUpdate() if err != nil { return fmt.Errorf("checking for update: %w", err) diff --git a/update/cmd/opts.go b/update/cmd/opts.go index 1f42bd6..e9d7bec 100644 --- a/update/cmd/opts.go +++ b/update/cmd/opts.go @@ -5,6 +5,7 @@ type opts struct { ServerURL string GrubenvPath string TimeoutSecs int + PubKeyPath string } // parseOpts extracts command-line flags from args. @@ -40,6 +41,11 @@ func parseOpts(args []string) opts { } i++ } + case "--pubkey": + if i+1 < len(args) { + o.PubKeyPath = args[i+1] + i++ + } } } diff --git a/update/cmd/sign.go b/update/cmd/sign.go new file mode 100644 index 0000000..04abea4 --- /dev/null +++ b/update/cmd/sign.go @@ -0,0 +1,75 @@ +package cmd + +import ( + "fmt" + + "github.com/portainer/kubesolo-os/update/pkg/signing" +) + +// Sign creates Ed25519 signatures for update artifacts. +// Used during the build process, not on the target device. +// +// Usage: +// +// kubesolo-update sign --key [file...] +func Sign(args []string) error { + var keyPath string + var files []string + + for i := 0; i < len(args); i++ { + switch args[i] { + case "--key": + if i+1 < len(args) { + keyPath = args[i+1] + i++ + } + default: + // Non-flag args are files to sign + if args[i] != "" && args[i][0] != '-' { + files = append(files, args[i]) + } + } + } + + if keyPath == "" { + return fmt.Errorf("--key is required (path to Ed25519 private key hex file)") + } + if len(files) == 0 { + return fmt.Errorf("at least one file to sign is required") + } + + signer, err := signing.NewSignerFromFile(keyPath) + if err != nil { + return fmt.Errorf("loading private key: %w", err) + } + + for _, f := range files { + sigPath := f + ".sig" + if err := signer.SignFile(f, sigPath); err != nil { + return fmt.Errorf("signing %s: %w", f, err) + } + fmt.Printf("Signed: %s → %s\n", f, sigPath) + } + + return nil +} + +// GenKey generates a new Ed25519 key pair for signing updates. +// +// Usage: +// +// kubesolo-update genkey +func GenKey(args []string) error { + pub, priv, err := signing.GenerateKeyPair() + if err != nil { + return err + } + + fmt.Printf("Public key (hex): %s\n", pub) + fmt.Printf("Private key (hex): %s\n", priv) + fmt.Println() + fmt.Println("Save the public key to /etc/kubesolo/update-pubkey.hex on the device.") + fmt.Println("Keep the private key secure and offline — use it only for signing updates.") + + return nil +} diff --git a/update/main.go b/update/main.go index 03e3c15..b2fef6e 100644 --- a/update/main.go +++ b/update/main.go @@ -8,6 +8,8 @@ // kubesolo-update rollback Force rollback to other partition // kubesolo-update healthcheck Post-boot health verification // kubesolo-update status Show current A/B slot and boot status +// kubesolo-update sign Sign update artifacts with Ed25519 key +// kubesolo-update genkey Generate new Ed25519 signing key pair package main import ( @@ -42,6 +44,10 @@ func main() { err = cmd.Healthcheck(os.Args[2:]) case "status": err = cmd.Status(os.Args[2:]) + case "sign": + err = cmd.Sign(os.Args[2:]) + case "genkey": + err = cmd.GenKey(os.Args[2:]) default: fmt.Fprintf(os.Stderr, "unknown command: %s\n\n", os.Args[1]) usage() @@ -64,15 +70,18 @@ Commands: rollback Force rollback to other partition healthcheck Post-boot health verification (marks boot successful) status Show current A/B slot and boot status + sign Sign artifacts with Ed25519 private key (build system) + genkey Generate new Ed25519 signing key pair Options: --server URL Update server URL (default: from /etc/kubesolo/update.conf) --grubenv PATH Path to grubenv file (default: /boot/grub/grubenv) --timeout SECS Health check timeout in seconds (default: 120) + --pubkey PATH Ed25519 public key for signature verification (optional) Examples: kubesolo-update check --server https://updates.example.com - kubesolo-update apply --server https://updates.example.com + kubesolo-update apply --server https://updates.example.com --pubkey /etc/kubesolo/update-pubkey.hex kubesolo-update healthcheck kubesolo-update status `) diff --git a/update/pkg/image/image.go b/update/pkg/image/image.go index 54e15ce..7428b67 100644 --- a/update/pkg/image/image.go +++ b/update/pkg/image/image.go @@ -19,17 +19,22 @@ import ( "os" "path/filepath" "time" + + "github.com/portainer/kubesolo-os/update/pkg/signing" ) // UpdateMetadata describes an available update from the update server. type UpdateMetadata struct { - Version string `json:"version"` - VmlinuzURL string `json:"vmlinuz_url"` - VmlinuzSHA256 string `json:"vmlinuz_sha256"` - InitramfsURL string `json:"initramfs_url"` + Version string `json:"version"` + VmlinuzURL string `json:"vmlinuz_url"` + VmlinuzSHA256 string `json:"vmlinuz_sha256"` + VmlinuzSigURL string `json:"vmlinuz_sig_url,omitempty"` + InitramfsURL string `json:"initramfs_url"` InitramfsSHA256 string `json:"initramfs_sha256"` - ReleaseNotes string `json:"release_notes,omitempty"` - ReleaseDate string `json:"release_date,omitempty"` + InitramfsSigURL string `json:"initramfs_sig_url,omitempty"` + MetadataSigURL string `json:"metadata_sig_url,omitempty"` + ReleaseNotes string `json:"release_notes,omitempty"` + ReleaseDate string `json:"release_date,omitempty"` } // StagedImage represents downloaded and verified update files. @@ -41,9 +46,10 @@ type StagedImage struct { // Client handles communication with the update server. type Client struct { - serverURL string - httpClient *http.Client - stageDir string + serverURL string + httpClient *http.Client + stageDir string + pubKeyPath string // path to Ed25519 public key for signature verification } // NewClient creates a new update image client. @@ -57,6 +63,13 @@ func NewClient(serverURL, stageDir string) *Client { } } +// SetPublicKeyPath sets the path to the Ed25519 public key used +// for verifying update signatures. If set, downloaded images will +// be verified against their .sig files from the update server. +func (c *Client) SetPublicKeyPath(path string) { + c.pubKeyPath = path +} + // CheckForUpdate fetches the latest update metadata from the server. func (c *Client) CheckForUpdate() (*UpdateMetadata, error) { url := c.serverURL + "/latest.json" @@ -103,6 +116,15 @@ func (c *Client) Download(meta *UpdateMetadata) (*StagedImage, error) { return nil, fmt.Errorf("downloading initramfs: %w", err) } + // Verify signatures if public key is configured + if c.pubKeyPath != "" { + if err := c.verifySignatures(meta, vmlinuzPath, initramfsPath); err != nil { + os.Remove(vmlinuzPath) + os.Remove(initramfsPath) + return nil, fmt.Errorf("signature verification: %w", err) + } + } + return &StagedImage{ VmlinuzPath: vmlinuzPath, InitramfsPath: initramfsPath, @@ -159,6 +181,60 @@ func (c *Client) downloadAndVerify(url, dest, expectedSHA256 string) error { return nil } +// verifySignatures downloads .sig files and verifies them against the staged images. +func (c *Client) verifySignatures(meta *UpdateMetadata, vmlinuzPath, initramfsPath string) error { + verifier, err := signing.NewVerifierFromFile(c.pubKeyPath) + if err != nil { + return fmt.Errorf("loading public key: %w", err) + } + + // Verify vmlinuz signature + if meta.VmlinuzSigURL != "" { + sigPath := vmlinuzPath + ".sig" + if err := c.downloadToFile(meta.VmlinuzSigURL, sigPath); err != nil { + return fmt.Errorf("downloading vmlinuz signature: %w", err) + } + if err := verifier.VerifyFile(vmlinuzPath, sigPath); err != nil { + return fmt.Errorf("vmlinuz: %w", err) + } + slog.Info("vmlinuz signature verified") + } + + // Verify initramfs signature + if meta.InitramfsSigURL != "" { + sigPath := initramfsPath + ".sig" + if err := c.downloadToFile(meta.InitramfsSigURL, sigPath); err != nil { + return fmt.Errorf("downloading initramfs signature: %w", err) + } + if err := verifier.VerifyFile(initramfsPath, sigPath); err != nil { + return fmt.Errorf("initramfs: %w", err) + } + slog.Info("initramfs signature verified") + } + + return nil +} + +// downloadToFile downloads a URL to a local file (used for signature files). +func (c *Client) downloadToFile(url, dest string) error { + resp, err := c.httpClient.Get(url) + if err != nil { + return fmt.Errorf("downloading %s: %w", url, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("server returned %d for %s", resp.StatusCode, url) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("reading response: %w", err) + } + + return os.WriteFile(dest, data, 0o644) +} + // VerifyFile checks the SHA256 checksum of an existing file. func VerifyFile(path, expectedSHA256 string) error { f, err := os.Open(path) diff --git a/update/pkg/signing/signing.go b/update/pkg/signing/signing.go new file mode 100644 index 0000000..2b13d93 --- /dev/null +++ b/update/pkg/signing/signing.go @@ -0,0 +1,188 @@ +// Package signing provides Ed25519 signature verification for update images. +// +// KubeSolo OS uses Ed25519 signatures to ensure update integrity and +// authenticity. The update server signs both the metadata (latest.json) +// and individual image files. The update agent verifies signatures using +// a trusted public key embedded at build time or loaded from disk. +// +// Signature workflow: +// 1. Build system signs images with private key (offline) +// 2. Signatures stored alongside images on update server (.sig files) +// 3. Update agent downloads signatures and verifies before applying +// +// Key format: raw 32-byte Ed25519 public keys, hex-encoded for config files. +package signing + +import ( + "crypto/ed25519" + "encoding/hex" + "fmt" + "log/slog" + "os" +) + +// Verifier checks Ed25519 signatures on update artifacts. +type Verifier struct { + publicKey ed25519.PublicKey +} + +// NewVerifier creates a verifier from a hex-encoded Ed25519 public key string. +func NewVerifier(hexPubKey string) (*Verifier, error) { + keyBytes, err := hex.DecodeString(hexPubKey) + if err != nil { + return nil, fmt.Errorf("decoding public key hex: %w", err) + } + if len(keyBytes) != ed25519.PublicKeySize { + return nil, fmt.Errorf("invalid public key size: got %d bytes, want %d", len(keyBytes), ed25519.PublicKeySize) + } + return &Verifier{publicKey: ed25519.PublicKey(keyBytes)}, nil +} + +// NewVerifierFromFile reads an Ed25519 public key from a file. +// The file should contain the hex-encoded public key (64 hex chars). +func NewVerifierFromFile(path string) (*Verifier, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading public key file: %w", err) + } + // Trim whitespace + hexKey := trimWhitespace(string(data)) + return NewVerifier(hexKey) +} + +// VerifyFile checks that a file's signature matches. +// The signature file should contain the raw Ed25519 signature (64 bytes) +// or a hex-encoded signature (128 hex chars). +func (v *Verifier) VerifyFile(filePath, sigPath string) error { + // Read the file content + message, err := os.ReadFile(filePath) + if err != nil { + return fmt.Errorf("reading file: %w", err) + } + + // Read the signature + sigData, err := os.ReadFile(sigPath) + if err != nil { + return fmt.Errorf("reading signature: %w", err) + } + + sig, err := decodeSignature(sigData) + if err != nil { + return fmt.Errorf("decoding signature: %w", err) + } + + if !ed25519.Verify(v.publicKey, message, sig) { + return fmt.Errorf("signature verification failed for %s", filePath) + } + + slog.Debug("signature verified", "file", filePath) + return nil +} + +// VerifyBytes checks a signature against raw bytes. +func (v *Verifier) VerifyBytes(message, signature []byte) error { + if !ed25519.Verify(v.publicKey, message, signature) { + return fmt.Errorf("signature verification failed") + } + return nil +} + +// Signer creates Ed25519 signatures for update artifacts. +// Used by the build system, not the update agent on the device. +type Signer struct { + privateKey ed25519.PrivateKey +} + +// NewSigner creates a signer from a hex-encoded Ed25519 private key. +func NewSigner(hexPrivKey string) (*Signer, error) { + keyBytes, err := hex.DecodeString(hexPrivKey) + if err != nil { + return nil, fmt.Errorf("decoding private key hex: %w", err) + } + if len(keyBytes) != ed25519.PrivateKeySize { + return nil, fmt.Errorf("invalid private key size: got %d bytes, want %d", len(keyBytes), ed25519.PrivateKeySize) + } + return &Signer{privateKey: ed25519.PrivateKey(keyBytes)}, nil +} + +// NewSignerFromFile reads an Ed25519 private key from a file. +func NewSignerFromFile(path string) (*Signer, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading private key file: %w", err) + } + hexKey := trimWhitespace(string(data)) + return NewSigner(hexKey) +} + +// SignFile creates a signature for a file and writes it to sigPath. +func (s *Signer) SignFile(filePath, sigPath string) error { + message, err := os.ReadFile(filePath) + if err != nil { + return fmt.Errorf("reading file: %w", err) + } + + sig := ed25519.Sign(s.privateKey, message) + + // Write hex-encoded signature + hexSig := hex.EncodeToString(sig) + if err := os.WriteFile(sigPath, []byte(hexSig+"\n"), 0o644); err != nil { + return fmt.Errorf("writing signature: %w", err) + } + + slog.Debug("signed", "file", filePath, "sig", sigPath) + return nil +} + +// SignBytes creates a signature for raw bytes. +func (s *Signer) SignBytes(message []byte) []byte { + return ed25519.Sign(s.privateKey, message) +} + +// PublicKeyHex returns the hex-encoded public key corresponding to this signer. +func (s *Signer) PublicKeyHex() string { + pubKey := s.privateKey.Public().(ed25519.PublicKey) + return hex.EncodeToString(pubKey) +} + +// GenerateKeyPair creates a new Ed25519 key pair and returns hex-encoded strings. +func GenerateKeyPair() (publicKeyHex, privateKeyHex string, err error) { + pub, priv, err := ed25519.GenerateKey(nil) + if err != nil { + return "", "", fmt.Errorf("generating key pair: %w", err) + } + return hex.EncodeToString(pub), hex.EncodeToString(priv), nil +} + +// decodeSignature handles both raw (64 bytes) and hex-encoded signatures. +func decodeSignature(data []byte) ([]byte, error) { + // Trim whitespace for hex-encoded sigs + trimmed := trimWhitespace(string(data)) + + // If exactly 64 bytes, treat as raw signature + if len(data) == ed25519.SignatureSize { + return data, nil + } + + // Try hex decode + sig, err := hex.DecodeString(trimmed) + if err != nil { + return nil, fmt.Errorf("invalid signature format (not raw or hex): %w", err) + } + + if len(sig) != ed25519.SignatureSize { + return nil, fmt.Errorf("invalid signature size: got %d bytes, want %d", len(sig), ed25519.SignatureSize) + } + + return sig, nil +} + +func trimWhitespace(s string) string { + result := make([]byte, 0, len(s)) + for _, b := range []byte(s) { + if b != ' ' && b != '\n' && b != '\r' && b != '\t' { + result = append(result, b) + } + } + return string(result) +} diff --git a/update/pkg/signing/signing_test.go b/update/pkg/signing/signing_test.go new file mode 100644 index 0000000..1abdb3b --- /dev/null +++ b/update/pkg/signing/signing_test.go @@ -0,0 +1,334 @@ +package signing + +import ( + "crypto/ed25519" + "encoding/hex" + "os" + "path/filepath" + "testing" +) + +func generateTestKeyPair(t *testing.T) (string, string) { + t.Helper() + pub, priv, err := GenerateKeyPair() + if err != nil { + t.Fatal(err) + } + return pub, priv +} + +func TestGenerateKeyPair(t *testing.T) { + pub, priv, err := GenerateKeyPair() + if err != nil { + t.Fatal(err) + } + + // Public key should be 32 bytes = 64 hex chars + if len(pub) != 64 { + t.Errorf("expected 64 hex chars for public key, got %d", len(pub)) + } + + // Private key should be 64 bytes = 128 hex chars + if len(priv) != 128 { + t.Errorf("expected 128 hex chars for private key, got %d", len(priv)) + } + + // Keys should be valid hex + if _, err := hex.DecodeString(pub); err != nil { + t.Errorf("public key is not valid hex: %v", err) + } + if _, err := hex.DecodeString(priv); err != nil { + t.Errorf("private key is not valid hex: %v", err) + } +} + +func TestNewVerifier(t *testing.T) { + pub, _ := generateTestKeyPair(t) + + v, err := NewVerifier(pub) + if err != nil { + t.Fatal(err) + } + if v == nil { + t.Fatal("verifier should not be nil") + } +} + +func TestNewVerifierInvalid(t *testing.T) { + // Invalid hex + _, err := NewVerifier("not-hex") + if err == nil { + t.Fatal("expected error for invalid hex") + } + + // Wrong length + _, err = NewVerifier("abcd") + if err == nil { + t.Fatal("expected error for wrong key length") + } +} + +func TestNewSigner(t *testing.T) { + _, priv := generateTestKeyPair(t) + + s, err := NewSigner(priv) + if err != nil { + t.Fatal(err) + } + if s == nil { + t.Fatal("signer should not be nil") + } +} + +func TestSignAndVerifyBytes(t *testing.T) { + pub, priv := generateTestKeyPair(t) + + signer, err := NewSigner(priv) + if err != nil { + t.Fatal(err) + } + + verifier, err := NewVerifier(pub) + if err != nil { + t.Fatal(err) + } + + message := []byte("KubeSolo OS update v1.0.0") + sig := signer.SignBytes(message) + + // Verify should succeed + if err := verifier.VerifyBytes(message, sig); err != nil { + t.Errorf("verification should succeed: %v", err) + } + + // Tampered message should fail + tampered := []byte("KubeSolo OS update v1.0.1") + if err := verifier.VerifyBytes(tampered, sig); err == nil { + t.Error("verification should fail for tampered message") + } + + // Tampered signature should fail + badSig := make([]byte, len(sig)) + copy(badSig, sig) + badSig[0] ^= 0xff + if err := verifier.VerifyBytes(message, badSig); err == nil { + t.Error("verification should fail for tampered signature") + } +} + +func TestSignAndVerifyFile(t *testing.T) { + pub, priv := generateTestKeyPair(t) + dir := t.TempDir() + + signer, err := NewSigner(priv) + if err != nil { + t.Fatal(err) + } + + verifier, err := NewVerifier(pub) + if err != nil { + t.Fatal(err) + } + + // Create a test file + filePath := filepath.Join(dir, "test-image.gz") + content := []byte("fake OS image content for signing test") + if err := os.WriteFile(filePath, content, 0o644); err != nil { + t.Fatal(err) + } + + // Sign + sigPath := filePath + ".sig" + if err := signer.SignFile(filePath, sigPath); err != nil { + t.Fatal(err) + } + + // Verify signature file was created + if _, err := os.Stat(sigPath); err != nil { + t.Fatalf("signature file not created: %v", err) + } + + // Verify + if err := verifier.VerifyFile(filePath, sigPath); err != nil { + t.Errorf("verification should succeed: %v", err) + } +} + +func TestVerifyFileTampered(t *testing.T) { + pub, priv := generateTestKeyPair(t) + dir := t.TempDir() + + signer, err := NewSigner(priv) + if err != nil { + t.Fatal(err) + } + + verifier, err := NewVerifier(pub) + if err != nil { + t.Fatal(err) + } + + // Create and sign a file + filePath := filepath.Join(dir, "test-image.gz") + if err := os.WriteFile(filePath, []byte("original content"), 0o644); err != nil { + t.Fatal(err) + } + + sigPath := filePath + ".sig" + if err := signer.SignFile(filePath, sigPath); err != nil { + t.Fatal(err) + } + + // Tamper with the file + if err := os.WriteFile(filePath, []byte("tampered content"), 0o644); err != nil { + t.Fatal(err) + } + + // Verification should fail + if err := verifier.VerifyFile(filePath, sigPath); err == nil { + t.Error("verification should fail for tampered file") + } +} + +func TestVerifyFileWrongKey(t *testing.T) { + _, priv := generateTestKeyPair(t) + otherPub, _ := generateTestKeyPair(t) // different key pair + dir := t.TempDir() + + signer, err := NewSigner(priv) + if err != nil { + t.Fatal(err) + } + + wrongVerifier, err := NewVerifier(otherPub) + if err != nil { + t.Fatal(err) + } + + // Create and sign + filePath := filepath.Join(dir, "test.gz") + if err := os.WriteFile(filePath, []byte("test content"), 0o644); err != nil { + t.Fatal(err) + } + + sigPath := filePath + ".sig" + if err := signer.SignFile(filePath, sigPath); err != nil { + t.Fatal(err) + } + + // Verify with wrong key should fail + if err := wrongVerifier.VerifyFile(filePath, sigPath); err == nil { + t.Error("verification should fail with wrong public key") + } +} + +func TestNewVerifierFromFile(t *testing.T) { + pub, _ := generateTestKeyPair(t) + dir := t.TempDir() + + keyFile := filepath.Join(dir, "pubkey.hex") + if err := os.WriteFile(keyFile, []byte(pub+"\n"), 0o644); err != nil { + t.Fatal(err) + } + + v, err := NewVerifierFromFile(keyFile) + if err != nil { + t.Fatal(err) + } + if v == nil { + t.Fatal("verifier should not be nil") + } +} + +func TestNewSignerFromFile(t *testing.T) { + _, priv := generateTestKeyPair(t) + dir := t.TempDir() + + keyFile := filepath.Join(dir, "privkey.hex") + if err := os.WriteFile(keyFile, []byte(priv+"\n"), 0o644); err != nil { + t.Fatal(err) + } + + s, err := NewSignerFromFile(keyFile) + if err != nil { + t.Fatal(err) + } + if s == nil { + t.Fatal("signer should not be nil") + } +} + +func TestSignerPublicKeyHex(t *testing.T) { + pub, priv := generateTestKeyPair(t) + + signer, err := NewSigner(priv) + if err != nil { + t.Fatal(err) + } + + got := signer.PublicKeyHex() + if got != pub { + t.Errorf("public key mismatch: got %s, want %s", got, pub) + } +} + +func TestDecodeSignature(t *testing.T) { + // Create a valid signature + _, priv, _ := ed25519.GenerateKey(nil) + message := []byte("test") + rawSig := ed25519.Sign(priv, message) + hexSig := hex.EncodeToString(rawSig) + + // Raw signature (64 bytes) + decoded, err := decodeSignature(rawSig) + if err != nil { + t.Fatalf("raw sig decode failed: %v", err) + } + if len(decoded) != ed25519.SignatureSize { + t.Errorf("expected %d bytes, got %d", ed25519.SignatureSize, len(decoded)) + } + + // Hex-encoded signature + decoded, err = decodeSignature([]byte(hexSig)) + if err != nil { + t.Fatalf("hex sig decode failed: %v", err) + } + if len(decoded) != ed25519.SignatureSize { + t.Errorf("expected %d bytes, got %d", ed25519.SignatureSize, len(decoded)) + } + + // Hex with trailing newline + decoded, err = decodeSignature([]byte(hexSig + "\n")) + if err != nil { + t.Fatalf("hex sig with newline decode failed: %v", err) + } + if len(decoded) != ed25519.SignatureSize { + t.Errorf("expected %d bytes, got %d", ed25519.SignatureSize, len(decoded)) + } + + // Invalid data + _, err = decodeSignature([]byte("not valid")) + if err == nil { + t.Error("expected error for invalid signature data") + } +} + +func TestTrimWhitespace(t *testing.T) { + tests := []struct { + input string + expect string + }{ + {"hello", "hello"}, + {" hello ", "hello"}, + {"hello\n", "hello"}, + {"\thello\r\n", "hello"}, + {" he llo ", "hello"}, + } + + for _, tt := range tests { + got := trimWhitespace(tt.input) + if got != tt.expect { + t.Errorf("trimWhitespace(%q) = %q, want %q", tt.input, got, tt.expect) + } + } +}