diff --git a/build/scripts/inject-kubesolo.sh b/build/scripts/inject-kubesolo.sh index f1236bf..fb39615 100755 --- a/build/scripts/inject-kubesolo.sh +++ b/build/scripts/inject-kubesolo.sh @@ -350,7 +350,19 @@ mkdir -p "$ROOTFS/usr/local" mkdir -p "$ROOTFS/mnt/data" mkdir -p "$ROOTFS/run/containerd" -# --- 8. Ensure /etc/hosts and /etc/resolv.conf exist --- +# --- 8. CA certificates (required for containerd to pull from registries) --- +mkdir -p "$ROOTFS/etc/ssl/certs" +if [ -f /etc/ssl/certs/ca-certificates.crt ]; then + cp /etc/ssl/certs/ca-certificates.crt "$ROOTFS/etc/ssl/certs/ca-certificates.crt" + echo " Installed CA certificates bundle" +elif [ -f /etc/pki/tls/certs/ca-bundle.crt ]; then + cp /etc/pki/tls/certs/ca-bundle.crt "$ROOTFS/etc/ssl/certs/ca-certificates.crt" + echo " Installed CA certificates bundle (from ca-bundle.crt)" +else + echo " WARN: No CA certificates found in builder — TLS verification will fail" +fi + +# --- 9. Ensure /etc/hosts and /etc/resolv.conf exist --- if [ ! -f "$ROOTFS/etc/hosts" ]; then cat > "$ROOTFS/etc/hosts" << EOF 127.0.0.1 localhost diff --git a/cloud-init/portainer.go b/cloud-init/portainer.go index 8d6dc55..13af8ec 100644 --- a/cloud-init/portainer.go +++ b/cloud-init/portainer.go @@ -77,6 +77,21 @@ func buildEdgeAgentManifest(edgeID, edgeKey, portainerURL, image string) string sb.WriteString(" name: portainer-sa-clusteradmin\n") sb.WriteString(" namespace: portainer\n") sb.WriteString("---\n") + sb.WriteString("apiVersion: v1\n") + sb.WriteString("kind: Service\n") + sb.WriteString("metadata:\n") + sb.WriteString(" name: portainer-agent\n") + sb.WriteString(" namespace: portainer\n") + sb.WriteString("spec:\n") + sb.WriteString(" clusterIP: None\n") + sb.WriteString(" selector:\n") + sb.WriteString(" app: portainer-agent\n") + sb.WriteString(" ports:\n") + sb.WriteString(" - name: agent\n") + sb.WriteString(" port: 9001\n") + sb.WriteString(" targetPort: 9001\n") + sb.WriteString(" protocol: TCP\n") + sb.WriteString("---\n") sb.WriteString("apiVersion: apps/v1\n") sb.WriteString("kind: Deployment\n") sb.WriteString("metadata:\n") diff --git a/hack/dev-vm.sh b/hack/dev-vm.sh index 516f14d..c6e748e 100755 --- a/hack/dev-vm.sh +++ b/hack/dev-vm.sh @@ -1,24 +1,29 @@ #!/bin/bash # dev-vm.sh — Launch a QEMU VM for development and testing # Usage: ./hack/dev-vm.sh [path-to-iso-or-img] [--shell] [--debug] +# +# Works on both Linux (with KVM) and macOS (TCG emulation). +# On macOS/Apple Silicon, x86_64 guests run under TCG (~5-15x slower than KVM). set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" VERSION="$(cat "$PROJECT_ROOT/VERSION")" +ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}" DEFAULT_ISO="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.iso" DEFAULT_IMG="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.img" -IMAGE="${1:-}" +IMAGE="" EXTRA_APPEND="" -SERIAL_OPTS="-serial stdio" -# Parse flags -shift || true +# Parse all arguments — flags and optional image path for arg in "$@"; do case "$arg" in - --shell) EXTRA_APPEND="$EXTRA_APPEND kubesolo.shell" ;; - --debug) EXTRA_APPEND="$EXTRA_APPEND kubesolo.debug" ;; + --shell) EXTRA_APPEND="$EXTRA_APPEND kubesolo.shell" ;; + --debug) EXTRA_APPEND="$EXTRA_APPEND kubesolo.debug" ;; + --edge-id=*) EXTRA_APPEND="$EXTRA_APPEND kubesolo.edge_id=${arg#--edge-id=}" ;; + --edge-key=*) EXTRA_APPEND="$EXTRA_APPEND kubesolo.edge_key=${arg#--edge-key=}" ;; + *) IMAGE="$arg" ;; esac done @@ -39,42 +44,103 @@ echo "==> Launching QEMU with: $IMAGE" echo " Press Ctrl+A, X to exit" echo "" -# Create a temporary data disk for persistence testing -DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img) -dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null -mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null +DATA_APPEND="" +DATA_DISK="" -cleanup() { rm -f "$DATA_DISK"; } +# Find mkfs.ext4 (Homebrew on macOS installs to a non-PATH location) +MKFS_EXT4="" +if command -v mkfs.ext4 >/dev/null 2>&1; then + MKFS_EXT4="mkfs.ext4" +elif [ -x "/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4" ]; then + MKFS_EXT4="/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4" +elif [ -x "/usr/local/opt/e2fsprogs/sbin/mkfs.ext4" ]; then + MKFS_EXT4="/usr/local/opt/e2fsprogs/sbin/mkfs.ext4" +fi + +# Create and attach a formatted data disk for persistent K8s state. +if [ -n "$MKFS_EXT4" ]; then + DATA_DISK="$(mktemp /tmp/kubesolo-data-XXXXXX).img" + dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null + "$MKFS_EXT4" -q -L KSOLODATA "$DATA_DISK" 2>/dev/null + DATA_APPEND="kubesolo.data=/dev/vda" + echo " Data disk: 2 GB ext4 (persistent)" +else + echo "ERROR: mkfs.ext4 not found. Install e2fsprogs:" + echo " brew install e2fsprogs" + exit 1 +fi + +EXTRACT_DIR="" + +cleanup() { + [ -n "$DATA_DISK" ] && rm -f "$DATA_DISK" "${DATA_DISK%.img}" + [ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR" +} trap cleanup EXIT -COMMON_OPTS=( - -m 2048 - -smp 2 - -nographic - -net nic,model=virtio - -net user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22 - -drive "file=$DATA_DISK,format=raw,if=virtio" -) +# Build QEMU command +QEMU_ARGS=(-m 2048 -smp 2 -nographic -cpu max) +QEMU_ARGS+=(-net nic,model=virtio) +QEMU_ARGS+=(-net user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22,hostfwd=tcp::8080-:8080) -# Enable KVM if available +if [ -n "$DATA_DISK" ]; then + QEMU_ARGS+=(-drive "file=$DATA_DISK,format=raw,if=virtio") +fi + +# Enable KVM on Linux, fall back to TCG everywhere else if [ -w /dev/kvm ] 2>/dev/null; then - COMMON_OPTS+=(-enable-kvm) + QEMU_ARGS+=(-accel kvm) echo " KVM acceleration: enabled" else - echo " KVM acceleration: not available (using TCG)" + QEMU_ARGS+=(-accel tcg) + echo " TCG emulation (no KVM — expect slower boot)" fi case "$IMAGE" in *.iso) + # -append only works with -kernel, not -cdrom. + # Extract kernel + initramfs and use direct kernel boot. + VMLINUZ="" + INITRAMFS="" + + # Prefer build artifacts if present (no extraction needed) + if [ -f "$ROOTFS_DIR/vmlinuz" ] && [ -f "$ROOTFS_DIR/kubesolo-os.gz" ]; then + VMLINUZ="$ROOTFS_DIR/vmlinuz" + INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz" + echo " Using kernel/initramfs from build directory" + else + # Extract from ISO using bsdtar (works on macOS + Linux, no mount needed) + EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)" + + echo " Extracting kernel/initramfs from ISO..." + bsdtar -xf "$IMAGE" -C "$EXTRACT_DIR" boot/vmlinuz boot/kubesolo-os.gz 2>/dev/null || { + echo "ERROR: Failed to extract kernel/initramfs from ISO." + echo " Ensure bsdtar is available (ships with macOS, install libarchive on Linux)." + echo " Or run 'make rootfs initramfs' to produce build artifacts." + exit 1 + } + + VMLINUZ="$EXTRACT_DIR/boot/vmlinuz" + INITRAMFS="$EXTRACT_DIR/boot/kubesolo-os.gz" + + if [ ! -f "$VMLINUZ" ] || [ ! -f "$INITRAMFS" ]; then + echo "ERROR: ISO does not contain expected boot/vmlinuz and boot/kubesolo-os.gz" + echo " ISO contents:" + bsdtar -tf "$IMAGE" 2>/dev/null || true + exit 1 + fi + echo " Extracted kernel/initramfs from ISO" + fi + qemu-system-x86_64 \ - "${COMMON_OPTS[@]}" \ - -cdrom "$IMAGE" \ - -boot d \ - -append "console=ttyS0,115200n8 kubesolo.data=/dev/vda $EXTRA_APPEND" + "${QEMU_ARGS[@]}" \ + -kernel "$VMLINUZ" \ + -initrd "$INITRAMFS" \ + -append "console=ttyS0,115200n8 $DATA_APPEND $EXTRA_APPEND" ;; *.img) qemu-system-x86_64 \ - "${COMMON_OPTS[@]}" \ + "${QEMU_ARGS[@]}" \ -drive "file=$IMAGE,format=raw,if=virtio" ;; *) diff --git a/hack/fix-portainer-service.sh b/hack/fix-portainer-service.sh new file mode 100755 index 0000000..faaf626 --- /dev/null +++ b/hack/fix-portainer-service.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# fix-portainer-service.sh — Create the missing headless Service for Portainer agent +# Usage: ./hack/fix-portainer-service.sh +# +# The Portainer agent does a DNS lookup for "portainer-agent" to discover peers. +# Without a Service, this lookup fails and the agent crashes. + +set -euo pipefail + +KUBECONFIG_URL="http://localhost:8080" + +echo "==> Fetching kubeconfig from $KUBECONFIG_URL..." +KUBECONFIG_FILE=$(mktemp) +trap 'rm -f "$KUBECONFIG_FILE"' EXIT + +curl -s "$KUBECONFIG_URL" > "$KUBECONFIG_FILE" + +if [ ! -s "$KUBECONFIG_FILE" ]; then + echo "ERROR: Failed to fetch kubeconfig. Is the VM running?" + exit 1 +fi + +echo "==> Creating headless Service for portainer-agent..." +kubectl --kubeconfig "$KUBECONFIG_FILE" apply -f - <<'EOF' +apiVersion: v1 +kind: Service +metadata: + name: portainer-agent + namespace: portainer +spec: + clusterIP: None + selector: + app: portainer-agent + ports: + - name: agent + port: 9001 + targetPort: 9001 + protocol: TCP +EOF + +echo "==> Restarting portainer-agent deployment..." +kubectl --kubeconfig "$KUBECONFIG_FILE" rollout restart -n portainer deployment/portainer-agent + +echo "==> Waiting for rollout..." +kubectl --kubeconfig "$KUBECONFIG_FILE" rollout status -n portainer deployment/portainer-agent --timeout=120s + +echo "==> Done. Checking pod status:" +kubectl --kubeconfig "$KUBECONFIG_FILE" get pods -n portainer diff --git a/init/init.sh b/init/init.sh index 0b87de6..ec6f8af 100755 --- a/init/init.sh +++ b/init/init.sh @@ -62,6 +62,8 @@ export KUBESOLO_SHELL="" export KUBESOLO_NOPERSIST="" export KUBESOLO_CLOUDINIT="" export KUBESOLO_EXTRA_FLAGS="" +export KUBESOLO_PORTAINER_EDGE_ID="" +export KUBESOLO_PORTAINER_EDGE_KEY="" # --- Logging --- log() { diff --git a/init/lib/10-parse-cmdline.sh b/init/lib/10-parse-cmdline.sh index 226e857..6488795 100755 --- a/init/lib/10-parse-cmdline.sh +++ b/init/lib/10-parse-cmdline.sh @@ -9,6 +9,8 @@ for arg in $(cat /proc/cmdline); do kubesolo.nopersist) KUBESOLO_NOPERSIST=1 ;; kubesolo.cloudinit=*) KUBESOLO_CLOUDINIT="${arg#kubesolo.cloudinit=}" ;; kubesolo.flags=*) KUBESOLO_EXTRA_FLAGS="${arg#kubesolo.flags=}" ;; + kubesolo.edge_id=*) KUBESOLO_PORTAINER_EDGE_ID="${arg#kubesolo.edge_id=}" ;; + kubesolo.edge_key=*) KUBESOLO_PORTAINER_EDGE_KEY="${arg#kubesolo.edge_key=}" ;; esac done diff --git a/init/lib/20-persistent-mount.sh b/init/lib/20-persistent-mount.sh index 415c424..fbffbbc 100755 --- a/init/lib/20-persistent-mount.sh +++ b/init/lib/20-persistent-mount.sh @@ -36,12 +36,19 @@ if [ ! -b "$KUBESOLO_DATA_DEV" ]; then return 1 fi -# Mount data partition +# Mount data partition (format on first boot if unformatted) mkdir -p "$DATA_MOUNT" -mount -t ext4 -o noatime "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" || { - log_err "Failed to mount $KUBESOLO_DATA_DEV" - return 1 -} +if ! mount -t ext4 -o noatime "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" 2>/dev/null; then + log "Formatting $KUBESOLO_DATA_DEV as ext4 (first boot)" + mkfs.ext4 -q -L KSOLODATA "$KUBESOLO_DATA_DEV" || { + log_err "Failed to format $KUBESOLO_DATA_DEV" + return 1 + } + mount -t ext4 -o noatime "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" || { + log_err "Failed to mount $KUBESOLO_DATA_DEV after format" + return 1 + } +fi log_ok "Mounted $KUBESOLO_DATA_DEV at $DATA_MOUNT" # Create persistent directory structure (first boot) diff --git a/init/lib/50-network.sh b/init/lib/50-network.sh index fcdf2be..0b8bf94 100755 --- a/init/lib/50-network.sh +++ b/init/lib/50-network.sh @@ -58,4 +58,16 @@ else return 1 fi -log_ok "Network configured on $ETH_DEV" +# Ensure /etc/resolv.conf has valid DNS (udhcpc should have written it, +# but verify and add fallbacks if missing) +if [ ! -s /etc/resolv.conf ]; then + log_warn "/etc/resolv.conf is empty — adding fallback DNS" + echo "nameserver 10.0.2.3" > /etc/resolv.conf + echo "nameserver 8.8.8.8" >> /etc/resolv.conf +elif ! grep -q nameserver /etc/resolv.conf 2>/dev/null; then + log_warn "No nameserver in /etc/resolv.conf — adding fallback DNS" + echo "nameserver 10.0.2.3" >> /etc/resolv.conf + echo "nameserver 8.8.8.8" >> /etc/resolv.conf +fi + +log_ok "Network configured on $ETH_DEV (DNS: $(grep nameserver /etc/resolv.conf 2>/dev/null | head -1))" diff --git a/init/lib/90-kubesolo.sh b/init/lib/90-kubesolo.sh index 840d5d3..884ead6 100755 --- a/init/lib/90-kubesolo.sh +++ b/init/lib/90-kubesolo.sh @@ -1,8 +1,8 @@ #!/bin/sh # 90-kubesolo.sh — Start KubeSolo (final init stage) # -# This stage exec's KubeSolo as PID 1 (replacing init). -# KubeSolo manages containerd, kubelet, API server, and all K8s components. +# Starts KubeSolo, waits for it to become ready, then prints the kubeconfig +# to the console so it can be copied for remote kubectl access. KUBESOLO_BIN="/usr/bin/kubesolo" @@ -14,11 +14,13 @@ fi # Build KubeSolo command line KUBESOLO_ARGS="--path /var/lib/kubesolo --local-storage" -# Add extra SANs if hostname resolves +# Add SANs for remote access (127.0.0.1 for QEMU port forwarding, 10.0.2.15 for QEMU NAT) +EXTRA_SANS="127.0.0.1,10.0.2.15" HOSTNAME="$(hostname)" if [ -n "$HOSTNAME" ]; then - KUBESOLO_ARGS="$KUBESOLO_ARGS --apiserver-extra-sans $HOSTNAME" + EXTRA_SANS="$EXTRA_SANS,$HOSTNAME" fi +KUBESOLO_ARGS="$KUBESOLO_ARGS --apiserver-extra-sans $EXTRA_SANS" # Add any extra flags from boot parameters if [ -n "$KUBESOLO_EXTRA_FLAGS" ]; then @@ -41,9 +43,66 @@ if command -v iptables >/dev/null 2>&1; then log "Pre-initialized iptables tables (filter, nat, mangle)" fi -log "Starting KubeSolo: $KUBESOLO_BIN $KUBESOLO_ARGS" -log "Kubeconfig will be at: /var/lib/kubesolo/pki/admin/admin.kubeconfig" +# Export Portainer Edge env vars if set (via boot params or cloud-init) +if [ -n "${KUBESOLO_PORTAINER_EDGE_ID:-}" ]; then + export KUBESOLO_PORTAINER_EDGE_ID + log "Portainer Edge ID configured" +fi +if [ -n "${KUBESOLO_PORTAINER_EDGE_KEY:-}" ]; then + export KUBESOLO_PORTAINER_EDGE_KEY + log "Portainer Edge Key configured" +fi -# exec replaces this init process — KubeSolo becomes PID 1 +log "Starting KubeSolo: $KUBESOLO_BIN $KUBESOLO_ARGS" + +KUBECONFIG_PATH="/var/lib/kubesolo/pki/admin/admin.kubeconfig" + +# Start KubeSolo in background so we can wait for readiness and print kubeconfig # shellcheck disable=SC2086 -exec $KUBESOLO_BIN $KUBESOLO_ARGS +$KUBESOLO_BIN $KUBESOLO_ARGS & +KUBESOLO_PID=$! + +# Wait for kubeconfig to appear (KubeSolo generates it during startup) +log "Waiting for KubeSolo to generate kubeconfig..." +WAIT=0 +while [ ! -f "$KUBECONFIG_PATH" ] && [ $WAIT -lt 120 ]; do + sleep 2 + WAIT=$((WAIT + 2)) + # Check KubeSolo is still running + if ! kill -0 $KUBESOLO_PID 2>/dev/null; then + log_err "KubeSolo exited unexpectedly" + wait $KUBESOLO_PID 2>/dev/null || true + return 1 + fi +done + +if [ -f "$KUBECONFIG_PATH" ]; then + log_ok "KubeSolo is running (PID $KUBESOLO_PID)" + + # Rewrite server URL for external access and serve via HTTP. + # Serial console truncates long base64 cert lines, so we serve + # the kubeconfig over HTTP for reliable retrieval. + EXTERNAL_KC="/tmp/kubeconfig-external.yaml" + sed 's|server: https://.*:6443|server: https://localhost:6443|' "$KUBECONFIG_PATH" > "$EXTERNAL_KC" + + # Serve kubeconfig via HTTP on port 8080 using BusyBox nc + (while true; do + printf "HTTP/1.1 200 OK\r\nContent-Type: text/yaml\r\nConnection: close\r\n\r\n" | cat - "$EXTERNAL_KC" | nc -l -p 8080 2>/dev/null + done) & + + log_ok "Kubeconfig available via HTTP" + echo "" + echo "============================================================" + echo " From your host machine, run:" + echo "" + echo " curl -s http://localhost:8080 > ~/.kube/kubesolo-config" + echo " kubectl --kubeconfig ~/.kube/kubesolo-config get nodes" + echo "============================================================" + echo "" +else + log_warn "Kubeconfig not found after ${WAIT}s — KubeSolo may still be starting" + log_warn "Check manually: cat $KUBECONFIG_PATH" +fi + +# Keep init alive — wait on KubeSolo process +wait $KUBESOLO_PID