fix: macOS dev VM, CA certs, DNS fallback, Portainer Edge integration
Some checks failed
CI / Go Tests (push) Has been cancelled
CI / Build Go Binaries (amd64, linux, linux-amd64) (push) Has been cancelled
CI / Build Go Binaries (arm64, linux, linux-arm64) (push) Has been cancelled
CI / Shellcheck (push) Has been cancelled

- dev-vm.sh: rewrite for macOS (bsdtar ISO extraction, Homebrew mkfs.ext4
  detection, direct kernel boot, TCG acceleration, port 8080 forwarding)
- inject-kubesolo.sh: add CA certificates bundle from builder so containerd
  can verify TLS when pulling from registries (Docker Hub, etc.)
- 50-network.sh: add DNS fallback (10.0.2.3 + 8.8.8.8) when DHCP client
  doesn't populate /etc/resolv.conf
- 90-kubesolo.sh: serve kubeconfig via HTTP on port 8080 for reliable
  retrieval from host, add 127.0.0.1 and 10.0.2.15 to API server SANs
- portainer.go: add headless Service to Edge Agent manifest (required for
  agent peer discovery DNS lookup)
- 10-parse-cmdline.sh + init.sh: add kubesolo.edge_id/edge_key boot params
- 20-persistent-mount.sh: auto-format unformatted data disks on first boot
- hack/fix-portainer-service.sh: helper to patch running cluster

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-12 02:11:31 -06:00
parent 36311ed4f4
commit d9ac58418d
9 changed files with 265 additions and 42 deletions

View File

@@ -350,7 +350,19 @@ mkdir -p "$ROOTFS/usr/local"
mkdir -p "$ROOTFS/mnt/data"
mkdir -p "$ROOTFS/run/containerd"
# --- 8. Ensure /etc/hosts and /etc/resolv.conf exist ---
# --- 8. CA certificates (required for containerd to pull from registries) ---
mkdir -p "$ROOTFS/etc/ssl/certs"
if [ -f /etc/ssl/certs/ca-certificates.crt ]; then
cp /etc/ssl/certs/ca-certificates.crt "$ROOTFS/etc/ssl/certs/ca-certificates.crt"
echo " Installed CA certificates bundle"
elif [ -f /etc/pki/tls/certs/ca-bundle.crt ]; then
cp /etc/pki/tls/certs/ca-bundle.crt "$ROOTFS/etc/ssl/certs/ca-certificates.crt"
echo " Installed CA certificates bundle (from ca-bundle.crt)"
else
echo " WARN: No CA certificates found in builder — TLS verification will fail"
fi
# --- 9. Ensure /etc/hosts and /etc/resolv.conf exist ---
if [ ! -f "$ROOTFS/etc/hosts" ]; then
cat > "$ROOTFS/etc/hosts" << EOF
127.0.0.1 localhost

View File

@@ -77,6 +77,21 @@ func buildEdgeAgentManifest(edgeID, edgeKey, portainerURL, image string) string
sb.WriteString(" name: portainer-sa-clusteradmin\n")
sb.WriteString(" namespace: portainer\n")
sb.WriteString("---\n")
sb.WriteString("apiVersion: v1\n")
sb.WriteString("kind: Service\n")
sb.WriteString("metadata:\n")
sb.WriteString(" name: portainer-agent\n")
sb.WriteString(" namespace: portainer\n")
sb.WriteString("spec:\n")
sb.WriteString(" clusterIP: None\n")
sb.WriteString(" selector:\n")
sb.WriteString(" app: portainer-agent\n")
sb.WriteString(" ports:\n")
sb.WriteString(" - name: agent\n")
sb.WriteString(" port: 9001\n")
sb.WriteString(" targetPort: 9001\n")
sb.WriteString(" protocol: TCP\n")
sb.WriteString("---\n")
sb.WriteString("apiVersion: apps/v1\n")
sb.WriteString("kind: Deployment\n")
sb.WriteString("metadata:\n")

View File

@@ -1,24 +1,29 @@
#!/bin/bash
# dev-vm.sh — Launch a QEMU VM for development and testing
# Usage: ./hack/dev-vm.sh [path-to-iso-or-img] [--shell] [--debug]
#
# Works on both Linux (with KVM) and macOS (TCG emulation).
# On macOS/Apple Silicon, x86_64 guests run under TCG (~5-15x slower than KVM).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
DEFAULT_ISO="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.iso"
DEFAULT_IMG="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.img"
IMAGE="${1:-}"
IMAGE=""
EXTRA_APPEND=""
SERIAL_OPTS="-serial stdio"
# Parse flags
shift || true
# Parse all arguments — flags and optional image path
for arg in "$@"; do
case "$arg" in
--shell) EXTRA_APPEND="$EXTRA_APPEND kubesolo.shell" ;;
--debug) EXTRA_APPEND="$EXTRA_APPEND kubesolo.debug" ;;
--shell) EXTRA_APPEND="$EXTRA_APPEND kubesolo.shell" ;;
--debug) EXTRA_APPEND="$EXTRA_APPEND kubesolo.debug" ;;
--edge-id=*) EXTRA_APPEND="$EXTRA_APPEND kubesolo.edge_id=${arg#--edge-id=}" ;;
--edge-key=*) EXTRA_APPEND="$EXTRA_APPEND kubesolo.edge_key=${arg#--edge-key=}" ;;
*) IMAGE="$arg" ;;
esac
done
@@ -39,42 +44,103 @@ echo "==> Launching QEMU with: $IMAGE"
echo " Press Ctrl+A, X to exit"
echo ""
# Create a temporary data disk for persistence testing
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
DATA_APPEND=""
DATA_DISK=""
cleanup() { rm -f "$DATA_DISK"; }
# Find mkfs.ext4 (Homebrew on macOS installs to a non-PATH location)
MKFS_EXT4=""
if command -v mkfs.ext4 >/dev/null 2>&1; then
MKFS_EXT4="mkfs.ext4"
elif [ -x "/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4" ]; then
MKFS_EXT4="/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4"
elif [ -x "/usr/local/opt/e2fsprogs/sbin/mkfs.ext4" ]; then
MKFS_EXT4="/usr/local/opt/e2fsprogs/sbin/mkfs.ext4"
fi
# Create and attach a formatted data disk for persistent K8s state.
if [ -n "$MKFS_EXT4" ]; then
DATA_DISK="$(mktemp /tmp/kubesolo-data-XXXXXX).img"
dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null
"$MKFS_EXT4" -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
DATA_APPEND="kubesolo.data=/dev/vda"
echo " Data disk: 2 GB ext4 (persistent)"
else
echo "ERROR: mkfs.ext4 not found. Install e2fsprogs:"
echo " brew install e2fsprogs"
exit 1
fi
EXTRACT_DIR=""
cleanup() {
[ -n "$DATA_DISK" ] && rm -f "$DATA_DISK" "${DATA_DISK%.img}"
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
}
trap cleanup EXIT
COMMON_OPTS=(
-m 2048
-smp 2
-nographic
-net nic,model=virtio
-net user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22
-drive "file=$DATA_DISK,format=raw,if=virtio"
)
# Build QEMU command
QEMU_ARGS=(-m 2048 -smp 2 -nographic -cpu max)
QEMU_ARGS+=(-net nic,model=virtio)
QEMU_ARGS+=(-net user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22,hostfwd=tcp::8080-:8080)
# Enable KVM if available
if [ -n "$DATA_DISK" ]; then
QEMU_ARGS+=(-drive "file=$DATA_DISK,format=raw,if=virtio")
fi
# Enable KVM on Linux, fall back to TCG everywhere else
if [ -w /dev/kvm ] 2>/dev/null; then
COMMON_OPTS+=(-enable-kvm)
QEMU_ARGS+=(-accel kvm)
echo " KVM acceleration: enabled"
else
echo " KVM acceleration: not available (using TCG)"
QEMU_ARGS+=(-accel tcg)
echo " TCG emulation (no KVM — expect slower boot)"
fi
case "$IMAGE" in
*.iso)
# -append only works with -kernel, not -cdrom.
# Extract kernel + initramfs and use direct kernel boot.
VMLINUZ=""
INITRAMFS=""
# Prefer build artifacts if present (no extraction needed)
if [ -f "$ROOTFS_DIR/vmlinuz" ] && [ -f "$ROOTFS_DIR/kubesolo-os.gz" ]; then
VMLINUZ="$ROOTFS_DIR/vmlinuz"
INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz"
echo " Using kernel/initramfs from build directory"
else
# Extract from ISO using bsdtar (works on macOS + Linux, no mount needed)
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
echo " Extracting kernel/initramfs from ISO..."
bsdtar -xf "$IMAGE" -C "$EXTRACT_DIR" boot/vmlinuz boot/kubesolo-os.gz 2>/dev/null || {
echo "ERROR: Failed to extract kernel/initramfs from ISO."
echo " Ensure bsdtar is available (ships with macOS, install libarchive on Linux)."
echo " Or run 'make rootfs initramfs' to produce build artifacts."
exit 1
}
VMLINUZ="$EXTRACT_DIR/boot/vmlinuz"
INITRAMFS="$EXTRACT_DIR/boot/kubesolo-os.gz"
if [ ! -f "$VMLINUZ" ] || [ ! -f "$INITRAMFS" ]; then
echo "ERROR: ISO does not contain expected boot/vmlinuz and boot/kubesolo-os.gz"
echo " ISO contents:"
bsdtar -tf "$IMAGE" 2>/dev/null || true
exit 1
fi
echo " Extracted kernel/initramfs from ISO"
fi
qemu-system-x86_64 \
"${COMMON_OPTS[@]}" \
-cdrom "$IMAGE" \
-boot d \
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda $EXTRA_APPEND"
"${QEMU_ARGS[@]}" \
-kernel "$VMLINUZ" \
-initrd "$INITRAMFS" \
-append "console=ttyS0,115200n8 $DATA_APPEND $EXTRA_APPEND"
;;
*.img)
qemu-system-x86_64 \
"${COMMON_OPTS[@]}" \
"${QEMU_ARGS[@]}" \
-drive "file=$IMAGE,format=raw,if=virtio"
;;
*)

48
hack/fix-portainer-service.sh Executable file
View File

@@ -0,0 +1,48 @@
#!/bin/bash
# fix-portainer-service.sh — Create the missing headless Service for Portainer agent
# Usage: ./hack/fix-portainer-service.sh
#
# The Portainer agent does a DNS lookup for "portainer-agent" to discover peers.
# Without a Service, this lookup fails and the agent crashes.
set -euo pipefail
KUBECONFIG_URL="http://localhost:8080"
echo "==> Fetching kubeconfig from $KUBECONFIG_URL..."
KUBECONFIG_FILE=$(mktemp)
trap 'rm -f "$KUBECONFIG_FILE"' EXIT
curl -s "$KUBECONFIG_URL" > "$KUBECONFIG_FILE"
if [ ! -s "$KUBECONFIG_FILE" ]; then
echo "ERROR: Failed to fetch kubeconfig. Is the VM running?"
exit 1
fi
echo "==> Creating headless Service for portainer-agent..."
kubectl --kubeconfig "$KUBECONFIG_FILE" apply -f - <<'EOF'
apiVersion: v1
kind: Service
metadata:
name: portainer-agent
namespace: portainer
spec:
clusterIP: None
selector:
app: portainer-agent
ports:
- name: agent
port: 9001
targetPort: 9001
protocol: TCP
EOF
echo "==> Restarting portainer-agent deployment..."
kubectl --kubeconfig "$KUBECONFIG_FILE" rollout restart -n portainer deployment/portainer-agent
echo "==> Waiting for rollout..."
kubectl --kubeconfig "$KUBECONFIG_FILE" rollout status -n portainer deployment/portainer-agent --timeout=120s
echo "==> Done. Checking pod status:"
kubectl --kubeconfig "$KUBECONFIG_FILE" get pods -n portainer

View File

@@ -62,6 +62,8 @@ export KUBESOLO_SHELL=""
export KUBESOLO_NOPERSIST=""
export KUBESOLO_CLOUDINIT=""
export KUBESOLO_EXTRA_FLAGS=""
export KUBESOLO_PORTAINER_EDGE_ID=""
export KUBESOLO_PORTAINER_EDGE_KEY=""
# --- Logging ---
log() {

View File

@@ -9,6 +9,8 @@ for arg in $(cat /proc/cmdline); do
kubesolo.nopersist) KUBESOLO_NOPERSIST=1 ;;
kubesolo.cloudinit=*) KUBESOLO_CLOUDINIT="${arg#kubesolo.cloudinit=}" ;;
kubesolo.flags=*) KUBESOLO_EXTRA_FLAGS="${arg#kubesolo.flags=}" ;;
kubesolo.edge_id=*) KUBESOLO_PORTAINER_EDGE_ID="${arg#kubesolo.edge_id=}" ;;
kubesolo.edge_key=*) KUBESOLO_PORTAINER_EDGE_KEY="${arg#kubesolo.edge_key=}" ;;
esac
done

View File

@@ -36,12 +36,19 @@ if [ ! -b "$KUBESOLO_DATA_DEV" ]; then
return 1
fi
# Mount data partition
# Mount data partition (format on first boot if unformatted)
mkdir -p "$DATA_MOUNT"
mount -t ext4 -o noatime "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" || {
log_err "Failed to mount $KUBESOLO_DATA_DEV"
return 1
}
if ! mount -t ext4 -o noatime "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" 2>/dev/null; then
log "Formatting $KUBESOLO_DATA_DEV as ext4 (first boot)"
mkfs.ext4 -q -L KSOLODATA "$KUBESOLO_DATA_DEV" || {
log_err "Failed to format $KUBESOLO_DATA_DEV"
return 1
}
mount -t ext4 -o noatime "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" || {
log_err "Failed to mount $KUBESOLO_DATA_DEV after format"
return 1
}
fi
log_ok "Mounted $KUBESOLO_DATA_DEV at $DATA_MOUNT"
# Create persistent directory structure (first boot)

View File

@@ -58,4 +58,16 @@ else
return 1
fi
log_ok "Network configured on $ETH_DEV"
# Ensure /etc/resolv.conf has valid DNS (udhcpc should have written it,
# but verify and add fallbacks if missing)
if [ ! -s /etc/resolv.conf ]; then
log_warn "/etc/resolv.conf is empty — adding fallback DNS"
echo "nameserver 10.0.2.3" > /etc/resolv.conf
echo "nameserver 8.8.8.8" >> /etc/resolv.conf
elif ! grep -q nameserver /etc/resolv.conf 2>/dev/null; then
log_warn "No nameserver in /etc/resolv.conf — adding fallback DNS"
echo "nameserver 10.0.2.3" >> /etc/resolv.conf
echo "nameserver 8.8.8.8" >> /etc/resolv.conf
fi
log_ok "Network configured on $ETH_DEV (DNS: $(grep nameserver /etc/resolv.conf 2>/dev/null | head -1))"

View File

@@ -1,8 +1,8 @@
#!/bin/sh
# 90-kubesolo.sh — Start KubeSolo (final init stage)
#
# This stage exec's KubeSolo as PID 1 (replacing init).
# KubeSolo manages containerd, kubelet, API server, and all K8s components.
# Starts KubeSolo, waits for it to become ready, then prints the kubeconfig
# to the console so it can be copied for remote kubectl access.
KUBESOLO_BIN="/usr/bin/kubesolo"
@@ -14,11 +14,13 @@ fi
# Build KubeSolo command line
KUBESOLO_ARGS="--path /var/lib/kubesolo --local-storage"
# Add extra SANs if hostname resolves
# Add SANs for remote access (127.0.0.1 for QEMU port forwarding, 10.0.2.15 for QEMU NAT)
EXTRA_SANS="127.0.0.1,10.0.2.15"
HOSTNAME="$(hostname)"
if [ -n "$HOSTNAME" ]; then
KUBESOLO_ARGS="$KUBESOLO_ARGS --apiserver-extra-sans $HOSTNAME"
EXTRA_SANS="$EXTRA_SANS,$HOSTNAME"
fi
KUBESOLO_ARGS="$KUBESOLO_ARGS --apiserver-extra-sans $EXTRA_SANS"
# Add any extra flags from boot parameters
if [ -n "$KUBESOLO_EXTRA_FLAGS" ]; then
@@ -41,9 +43,66 @@ if command -v iptables >/dev/null 2>&1; then
log "Pre-initialized iptables tables (filter, nat, mangle)"
fi
log "Starting KubeSolo: $KUBESOLO_BIN $KUBESOLO_ARGS"
log "Kubeconfig will be at: /var/lib/kubesolo/pki/admin/admin.kubeconfig"
# Export Portainer Edge env vars if set (via boot params or cloud-init)
if [ -n "${KUBESOLO_PORTAINER_EDGE_ID:-}" ]; then
export KUBESOLO_PORTAINER_EDGE_ID
log "Portainer Edge ID configured"
fi
if [ -n "${KUBESOLO_PORTAINER_EDGE_KEY:-}" ]; then
export KUBESOLO_PORTAINER_EDGE_KEY
log "Portainer Edge Key configured"
fi
# exec replaces this init process — KubeSolo becomes PID 1
log "Starting KubeSolo: $KUBESOLO_BIN $KUBESOLO_ARGS"
KUBECONFIG_PATH="/var/lib/kubesolo/pki/admin/admin.kubeconfig"
# Start KubeSolo in background so we can wait for readiness and print kubeconfig
# shellcheck disable=SC2086
exec $KUBESOLO_BIN $KUBESOLO_ARGS
$KUBESOLO_BIN $KUBESOLO_ARGS &
KUBESOLO_PID=$!
# Wait for kubeconfig to appear (KubeSolo generates it during startup)
log "Waiting for KubeSolo to generate kubeconfig..."
WAIT=0
while [ ! -f "$KUBECONFIG_PATH" ] && [ $WAIT -lt 120 ]; do
sleep 2
WAIT=$((WAIT + 2))
# Check KubeSolo is still running
if ! kill -0 $KUBESOLO_PID 2>/dev/null; then
log_err "KubeSolo exited unexpectedly"
wait $KUBESOLO_PID 2>/dev/null || true
return 1
fi
done
if [ -f "$KUBECONFIG_PATH" ]; then
log_ok "KubeSolo is running (PID $KUBESOLO_PID)"
# Rewrite server URL for external access and serve via HTTP.
# Serial console truncates long base64 cert lines, so we serve
# the kubeconfig over HTTP for reliable retrieval.
EXTERNAL_KC="/tmp/kubeconfig-external.yaml"
sed 's|server: https://.*:6443|server: https://localhost:6443|' "$KUBECONFIG_PATH" > "$EXTERNAL_KC"
# Serve kubeconfig via HTTP on port 8080 using BusyBox nc
(while true; do
printf "HTTP/1.1 200 OK\r\nContent-Type: text/yaml\r\nConnection: close\r\n\r\n" | cat - "$EXTERNAL_KC" | nc -l -p 8080 2>/dev/null
done) &
log_ok "Kubeconfig available via HTTP"
echo ""
echo "============================================================"
echo " From your host machine, run:"
echo ""
echo " curl -s http://localhost:8080 > ~/.kube/kubesolo-config"
echo " kubectl --kubeconfig ~/.kube/kubesolo-config get nodes"
echo "============================================================"
echo ""
else
log_warn "Kubeconfig not found after ${WAIT}s — KubeSolo may still be starting"
log_warn "Check manually: cat $KUBECONFIG_PATH"
fi
# Keep init alive — wait on KubeSolo process
wait $KUBESOLO_PID