feat: add production hardening — Ed25519 signing, Portainer Edge, SSH extension (Phase 4)

Image signing:
- Ed25519 sign/verify package (pure Go stdlib, zero deps)
- genkey and sign CLI subcommands for build system
- Optional --pubkey flag for verifying updates on apply
- Signature URLs in update metadata (latest.json)

Portainer Edge Agent:
- cloud-init portainer.go module writes K8s manifest
- Auto-deploys Edge Agent when portainer.edge-agent.enabled
- Full RBAC (ServiceAccount, ClusterRoleBinding, Deployment)
- 5 Portainer tests in portainer_test.go

Production tooling:
- SSH debug extension builder (hack/build-ssh-extension.sh)
- Boot performance benchmark (test/benchmark/bench-boot.sh)
- Resource usage benchmark (test/benchmark/bench-resources.sh)
- Deployment guide (docs/deployment-guide.md)

Test results: 50 update agent tests + 22 cloud-init tests passing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-11 11:26:23 -06:00
parent 8d25e1890e
commit 49a37e30e8
15 changed files with 1965 additions and 11 deletions

206
test/benchmark/bench-boot.sh Executable file
View File

@@ -0,0 +1,206 @@
#!/bin/bash
# bench-boot.sh — Measure KubeSolo OS boot performance in QEMU
#
# Measures:
# - Time to first console output (kernel loaded)
# - Time to init complete (all stages done)
# - Time to K8s node Ready
# - Time to first pod Running (nginx test)
# - Peak memory usage
# - Disk image/ISO size
#
# Usage:
# test/benchmark/bench-boot.sh <iso-or-img> [--runs N]
#
# Output: JSON benchmark results to stdout, human-readable to stderr
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
IMAGE="${1:?Usage: bench-boot.sh <iso-or-img> [--runs N]}"
RUNS=3
SSH_PORT=2222
K8S_PORT=6443
shift || true
while [ $# -gt 0 ]; do
case "$1" in
--runs) RUNS="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
if [ ! -f "$IMAGE" ]; then
echo "ERROR: Image not found: $IMAGE" >&2
exit 1
fi
# Determine image type
IMAGE_TYPE="iso"
if [[ "$IMAGE" == *.img ]]; then
IMAGE_TYPE="disk"
fi
echo "=== KubeSolo OS Boot Benchmark ===" >&2
echo "Image: $IMAGE ($(du -h "$IMAGE" | cut -f1))" >&2
echo "Type: $IMAGE_TYPE" >&2
echo "Runs: $RUNS" >&2
echo "" >&2
# Build QEMU command
QEMU_CMD=(
qemu-system-x86_64
-m 1024
-smp 2
-nographic
-no-reboot
-serial mon:stdio
-net nic,model=virtio
-net "user,hostfwd=tcp::${SSH_PORT}-:22,hostfwd=tcp::${K8S_PORT}-:6443"
)
# Add KVM if available
if [ -e /dev/kvm ] && [ -r /dev/kvm ]; then
QEMU_CMD+=(-enable-kvm -cpu host)
else
QEMU_CMD+=(-cpu max)
fi
if [ "$IMAGE_TYPE" = "iso" ]; then
QEMU_CMD+=(-cdrom "$IMAGE")
# Add a temp disk for persistence
TEMP_DISK=$(mktemp /tmp/kubesolo-bench-XXXXXX.img)
qemu-img create -f qcow2 "$TEMP_DISK" 8G >/dev/null 2>&1
QEMU_CMD+=(-drive "file=$TEMP_DISK,format=qcow2,if=virtio")
trap "rm -f $TEMP_DISK" EXIT
else
QEMU_CMD+=(-drive "file=$IMAGE,format=raw,if=virtio")
fi
# Results arrays
declare -a BOOT_TIMES
declare -a INIT_TIMES
declare -a K8S_TIMES
declare -a MEMORY_USAGE
for run in $(seq 1 "$RUNS"); do
echo "--- Run $run/$RUNS ---" >&2
START_TIME=$(date +%s%N)
BOOT_DONE=""
INIT_DONE=""
K8S_READY=""
PEAK_MEM=""
# Create a log file for this run
LOG=$(mktemp /tmp/kubesolo-bench-log-XXXXXX)
# Run QEMU with timeout, capturing output
timeout 300 "${QEMU_CMD[@]}" 2>&1 | while IFS= read -r line; do
NOW=$(date +%s%N)
ELAPSED_MS=$(( (NOW - START_TIME) / 1000000 ))
echo "$line" >> "$LOG"
# Detect boot milestones from serial output
case "$line" in
*"Linux version"*)
if [ -z "$BOOT_DONE" ]; then
BOOT_DONE="$ELAPSED_MS"
echo " Kernel loaded: ${ELAPSED_MS}ms" >&2
echo "KERNEL_MS=$ELAPSED_MS" >> "$LOG.times"
fi
;;
*"kubesolo-init"*"all stages complete"*|*"init complete"*)
if [ -z "$INIT_DONE" ]; then
INIT_DONE="$ELAPSED_MS"
echo " Init complete: ${ELAPSED_MS}ms" >&2
echo "INIT_MS=$ELAPSED_MS" >> "$LOG.times"
fi
;;
*"node is Ready"*|*"NotReady"*"Ready"*|*"kubesolo"*"Ready"*)
if [ -z "$K8S_READY" ]; then
K8S_READY="$ELAPSED_MS"
echo " K8s Ready: ${ELAPSED_MS}ms" >&2
echo "K8S_MS=$ELAPSED_MS" >> "$LOG.times"
fi
;;
*"MemTotal:"*|*"MemAvailable:"*)
# Capture memory info if printed
echo "MEM_LINE=$line" >> "$LOG.times"
;;
esac
# Stop after K8s is ready (or timeout)
if [ -n "$K8S_READY" ]; then
break
fi
done || true
# Read results from log
if [ -f "$LOG.times" ]; then
KERNEL_MS=$(grep "KERNEL_MS=" "$LOG.times" 2>/dev/null | tail -1 | cut -d= -f2 || echo "")
INIT_MS=$(grep "INIT_MS=" "$LOG.times" 2>/dev/null | tail -1 | cut -d= -f2 || echo "")
K8S_MS=$(grep "K8S_MS=" "$LOG.times" 2>/dev/null | tail -1 | cut -d= -f2 || echo "")
[ -n "$KERNEL_MS" ] && BOOT_TIMES+=("$KERNEL_MS")
[ -n "$INIT_MS" ] && INIT_TIMES+=("$INIT_MS")
[ -n "$K8S_MS" ] && K8S_TIMES+=("$K8S_MS")
fi
rm -f "$LOG" "$LOG.times"
echo "" >&2
done
# Compute averages
avg() {
local arr=("$@")
if [ ${#arr[@]} -eq 0 ]; then
echo "null"
return
fi
local sum=0
for v in "${arr[@]}"; do
sum=$((sum + v))
done
echo $((sum / ${#arr[@]}))
}
# Image size
IMAGE_SIZE=$(stat -f%z "$IMAGE" 2>/dev/null || stat -c%s "$IMAGE" 2>/dev/null || echo 0)
IMAGE_SIZE_MB=$((IMAGE_SIZE / 1024 / 1024))
AVG_BOOT=$(avg "${BOOT_TIMES[@]+"${BOOT_TIMES[@]}"}")
AVG_INIT=$(avg "${INIT_TIMES[@]+"${INIT_TIMES[@]}"}")
AVG_K8S=$(avg "${K8S_TIMES[@]+"${K8S_TIMES[@]}"}")
echo "=== Results ===" >&2
echo "Image size: ${IMAGE_SIZE_MB} MB" >&2
echo "Avg kernel load: ${AVG_BOOT}ms" >&2
echo "Avg init complete: ${AVG_INIT}ms" >&2
echo "Avg K8s Ready: ${AVG_K8S}ms" >&2
echo "" >&2
# Output JSON
cat << EOF
{
"benchmark": "kubesolo-os-boot",
"image": "$(basename "$IMAGE")",
"image_size_bytes": $IMAGE_SIZE,
"image_size_mb": $IMAGE_SIZE_MB,
"runs": $RUNS,
"results": {
"kernel_load_ms": $AVG_BOOT,
"init_complete_ms": $AVG_INIT,
"k8s_ready_ms": $AVG_K8S
},
"raw_kernel_ms": [$(IFS=,; echo "${BOOT_TIMES[*]+"${BOOT_TIMES[*]}"}")],
"raw_init_ms": [$(IFS=,; echo "${INIT_TIMES[*]+"${INIT_TIMES[*]}"}")],
"raw_k8s_ms": [$(IFS=,; echo "${K8S_TIMES[*]+"${K8S_TIMES[*]}"}")],
"qemu_config": {
"memory_mb": 1024,
"cpus": 2,
"kvm": $([ -e /dev/kvm ] && echo "true" || echo "false")
}
}
EOF

146
test/benchmark/bench-resources.sh Executable file
View File

@@ -0,0 +1,146 @@
#!/bin/bash
# bench-resources.sh — Measure KubeSolo OS resource usage
#
# Connects to a running KubeSolo OS instance and measures:
# - Memory usage (total, used, available, per-process)
# - Disk usage (rootfs, data partition, containerd)
# - CPU usage under idle and load
# - Process count
# - Container count
# - Network overhead
#
# Usage:
# test/benchmark/bench-resources.sh [--ssh-port 2222]
#
# Prerequisites: KubeSolo OS running (e.g. via make dev-vm)
set -euo pipefail
SSH_PORT="${SSH_PORT:-2222}"
SSH_HOST="${SSH_HOST:-localhost}"
SSH_USER="${SSH_USER:-root}"
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
while [ $# -gt 0 ]; do
case "$1" in
--ssh-port) SSH_PORT="$2"; shift 2 ;;
--ssh-host) SSH_HOST="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
run_ssh() {
ssh $SSH_OPTS -p "$SSH_PORT" "$SSH_USER@$SSH_HOST" "$@" 2>/dev/null
}
echo "=== KubeSolo OS Resource Benchmark ===" >&2
echo "Connecting to ${SSH_HOST}:${SSH_PORT}..." >&2
# Check connectivity
if ! run_ssh "true" 2>/dev/null; then
echo "ERROR: Cannot connect via SSH. Is KubeSolo OS running?" >&2
echo "Start with: make dev-vm" >&2
exit 1
fi
echo "" >&2
# --- Memory ---
echo "--- Memory Usage ---" >&2
MEM_INFO=$(run_ssh "cat /proc/meminfo")
MEM_TOTAL=$(echo "$MEM_INFO" | sed -n 's/MemTotal: *\([0-9]*\).*/\1/p')
MEM_FREE=$(echo "$MEM_INFO" | sed -n 's/MemFree: *\([0-9]*\).*/\1/p')
MEM_AVAIL=$(echo "$MEM_INFO" | sed -n 's/MemAvailable: *\([0-9]*\).*/\1/p')
MEM_USED=$((MEM_TOTAL - MEM_FREE))
echo " Total: $((MEM_TOTAL / 1024)) MB" >&2
echo " Used: $((MEM_USED / 1024)) MB" >&2
echo " Available: $((MEM_AVAIL / 1024)) MB" >&2
echo " OS overhead: $((MEM_USED / 1024)) MB ($(( (MEM_USED * 100) / MEM_TOTAL ))%)" >&2
echo "" >&2
# Top memory consumers
echo "--- Top Processes (by RSS) ---" >&2
run_ssh "ps -o pid,rss,comm | sort -k2 -rn | head -10" 2>/dev/null | while read -r line; do
echo " $line" >&2
done
echo "" >&2
# --- Disk ---
echo "--- Disk Usage ---" >&2
run_ssh "df -h / /mnt/data 2>/dev/null || df -h /" | while read -r line; do
echo " $line" >&2
done
echo "" >&2
# Containerd data
CONTAINERD_SIZE=$(run_ssh "du -sh /var/lib/containerd 2>/dev/null | cut -f1" || echo "N/A")
KUBESOLO_SIZE=$(run_ssh "du -sh /var/lib/kubesolo 2>/dev/null | cut -f1" || echo "N/A")
echo " containerd data: $CONTAINERD_SIZE" >&2
echo " kubesolo data: $KUBESOLO_SIZE" >&2
echo "" >&2
# --- Processes ---
echo "--- Process Count ---" >&2
PROC_COUNT=$(run_ssh "ps | wc -l")
echo " Total processes: $PROC_COUNT" >&2
echo "" >&2
# --- K8s Status ---
echo "--- Kubernetes Status ---" >&2
NODE_STATUS=$(run_ssh "kubesolo kubectl get nodes -o wide --no-headers 2>/dev/null" || echo "N/A")
POD_COUNT=$(run_ssh "kubesolo kubectl get pods -A --no-headers 2>/dev/null | wc -l" || echo "0")
echo " Node: $NODE_STATUS" >&2
echo " Pod count: $POD_COUNT" >&2
echo "" >&2
# --- CPU (5-second sample) ---
echo "--- CPU Usage (5s idle sample) ---" >&2
CPU_IDLE=$(run_ssh "
read cpu user nice system idle rest < /proc/stat
sleep 5
read cpu user2 nice2 system2 idle2 rest2 < /proc/stat
total=\$((user2 + nice2 + system2 + idle2 - user - nice - system - idle))
idle_diff=\$((idle2 - idle))
if [ \$total -gt 0 ]; then
echo \$((idle_diff * 100 / total))
else
echo 0
fi
" 2>/dev/null || echo "N/A")
echo " CPU idle: ${CPU_IDLE}%" >&2
echo " CPU used: $((100 - ${CPU_IDLE:-0}))%" >&2
echo "" >&2
# --- OS Version ---
OS_VERSION=$(run_ssh "cat /etc/kubesolo-os-version 2>/dev/null" || echo "unknown")
# --- Output JSON ---
cat << EOF
{
"benchmark": "kubesolo-os-resources",
"os_version": "$OS_VERSION",
"memory": {
"total_kb": $MEM_TOTAL,
"used_kb": $MEM_USED,
"available_kb": ${MEM_AVAIL:-0},
"total_mb": $((MEM_TOTAL / 1024)),
"used_mb": $((MEM_USED / 1024)),
"available_mb": $((${MEM_AVAIL:-0} / 1024)),
"overhead_percent": $(( (MEM_USED * 100) / MEM_TOTAL ))
},
"disk": {
"containerd_size": "$CONTAINERD_SIZE",
"kubesolo_size": "$KUBESOLO_SIZE"
},
"processes": {
"total": $PROC_COUNT
},
"kubernetes": {
"pod_count": $POD_COUNT
},
"cpu": {
"idle_percent": ${CPU_IDLE:-0},
"used_percent": $((100 - ${CPU_IDLE:-0}))
}
}
EOF