feat: add A/B partition updates with GRUB and Go update agent (Phase 3)
Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
11
build/grub/grub-env-defaults
Normal file
11
build/grub/grub-env-defaults
Normal file
@@ -0,0 +1,11 @@
|
||||
# KubeSolo OS — Default GRUB Environment Variables
|
||||
# These are written to grubenv on first install.
|
||||
# Format: key=value (one per line, grub-editenv compatible)
|
||||
#
|
||||
# active_slot: Which system partition to boot (A or B)
|
||||
# boot_counter: Attempts remaining before rollback (3 = fresh, 0 = rollback)
|
||||
# boot_success: Set to 1 by health check after successful boot
|
||||
|
||||
active_slot=A
|
||||
boot_counter=3
|
||||
boot_success=1
|
||||
95
build/grub/grub.cfg
Normal file
95
build/grub/grub.cfg
Normal file
@@ -0,0 +1,95 @@
|
||||
# KubeSolo OS — GRUB Configuration
|
||||
# A/B partition boot with automatic rollback
|
||||
#
|
||||
# Partition layout:
|
||||
# (hd0,gpt1) — EFI/Boot (256 MB, FAT32) — contains GRUB + grubenv
|
||||
# (hd0,gpt2) — System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||
# (hd0,gpt3) — System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||
# (hd0,gpt4) — Data (remaining, ext4) — persistent K8s state
|
||||
#
|
||||
# Environment variables (in grubenv):
|
||||
# active_slot — "A" or "B" (which partition to boot)
|
||||
# boot_counter — 3→2→1→0 (decremented on each failed boot)
|
||||
# boot_success — 0 or 1 (set to 1 by health check post-boot)
|
||||
|
||||
set default=0
|
||||
set timeout=3
|
||||
|
||||
# Load saved environment
|
||||
load_env
|
||||
|
||||
# --- A/B Rollback Logic ---
|
||||
# On every boot, check if the last boot was successful.
|
||||
# If not, decrement the counter. If counter hits 0, swap slots.
|
||||
|
||||
if [ "${boot_success}" != "1" ]; then
|
||||
# Last boot failed — check counter
|
||||
if [ "${boot_counter}" = "0" ]; then
|
||||
# Counter exhausted — rollback to other slot
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set active_slot=B
|
||||
else
|
||||
set active_slot=A
|
||||
fi
|
||||
save_env active_slot
|
||||
set boot_counter=3
|
||||
save_env boot_counter
|
||||
else
|
||||
# Decrement counter (GRUB doesn't have arithmetic)
|
||||
if [ "${boot_counter}" = "3" ]; then
|
||||
set boot_counter=2
|
||||
elif [ "${boot_counter}" = "2" ]; then
|
||||
set boot_counter=1
|
||||
elif [ "${boot_counter}" = "1" ]; then
|
||||
set boot_counter=0
|
||||
fi
|
||||
save_env boot_counter
|
||||
fi
|
||||
fi
|
||||
|
||||
# Reset boot_success for this boot attempt — health check must set it to 1
|
||||
set boot_success=0
|
||||
save_env boot_success
|
||||
|
||||
# --- Resolve boot partition ---
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set root='(hd0,gpt2)'
|
||||
set slot_label="System A"
|
||||
else
|
||||
set root='(hd0,gpt3)'
|
||||
set slot_label="System B"
|
||||
fi
|
||||
|
||||
# --- Menu Entries ---
|
||||
|
||||
menuentry "KubeSolo OS (${slot_label})" {
|
||||
echo "Booting KubeSolo OS from ${slot_label}..."
|
||||
echo "Boot counter: ${boot_counter}, Boot success: ${boot_success}"
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA quiet
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS (${slot_label}) — Debug Mode" {
|
||||
echo "Booting KubeSolo OS (debug) from ${slot_label}..."
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS — Emergency Shell" {
|
||||
echo "Booting to emergency shell..."
|
||||
linux /vmlinuz kubesolo.shell console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS — Boot Other Slot" {
|
||||
# Manually boot the passive slot (for testing)
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set root='(hd0,gpt3)'
|
||||
echo "Booting from System B (passive)..."
|
||||
else
|
||||
set root='(hd0,gpt2)'
|
||||
echo "Booting from System A (passive)..."
|
||||
fi
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
29
build/scripts/build-update-agent.sh
Executable file
29
build/scripts/build-update-agent.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# build-update-agent.sh — Compile the KubeSolo OS update agent
|
||||
#
|
||||
# Builds a static Linux binary for the update agent.
|
||||
# Output: build/cache/kubesolo-update
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
UPDATE_DIR="$PROJECT_ROOT/update"
|
||||
CACHE_DIR="$PROJECT_ROOT/build/cache"
|
||||
OUTPUT="$CACHE_DIR/kubesolo-update"
|
||||
|
||||
echo "=== Building KubeSolo Update Agent ==="
|
||||
|
||||
# Ensure output dir exists
|
||||
mkdir -p "$CACHE_DIR"
|
||||
|
||||
# Run tests first
|
||||
echo "--- Running tests ---"
|
||||
(cd "$UPDATE_DIR" && go test ./... -count=1)
|
||||
|
||||
# Build static binary
|
||||
echo "--- Compiling static binary ---"
|
||||
(cd "$UPDATE_DIR" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -ldflags='-s -w' -o "$OUTPUT" .)
|
||||
|
||||
SIZE=$(ls -lh "$OUTPUT" | awk '{print $5}')
|
||||
echo "--- Update agent built: $OUTPUT ($SIZE) ---"
|
||||
@@ -1,6 +1,11 @@
|
||||
#!/bin/bash
|
||||
# create-disk-image.sh — Create a raw disk image with boot + data partitions
|
||||
# Phase 1: simple layout (boot + data). Phase 3 adds A/B system partitions.
|
||||
# create-disk-image.sh — Create a raw disk image with A/B system partitions
|
||||
#
|
||||
# Partition layout (GPT):
|
||||
# Part 1: EFI/Boot (256 MB, FAT32) — GRUB + grubenv + A/B boot logic
|
||||
# Part 2: System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz (active)
|
||||
# Part 3: System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz (passive)
|
||||
# Part 4: Data (remaining, ext4) — persistent K8s state
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
@@ -11,93 +16,165 @@ VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||
OS_NAME="kubesolo-os"
|
||||
|
||||
IMG_OUTPUT="$OUTPUT_DIR/${OS_NAME}-${VERSION}.img"
|
||||
IMG_SIZE_MB="${IMG_SIZE_MB:-2048}" # 2 GB default
|
||||
IMG_SIZE_MB="${IMG_SIZE_MB:-4096}" # 4 GB default (larger for A/B)
|
||||
|
||||
VMLINUZ="$ROOTFS_DIR/vmlinuz"
|
||||
INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz"
|
||||
GRUB_CFG="$PROJECT_ROOT/build/grub/grub.cfg"
|
||||
GRUB_ENV_DEFAULTS="$PROJECT_ROOT/build/grub/grub-env-defaults"
|
||||
|
||||
for f in "$VMLINUZ" "$INITRAMFS"; do
|
||||
[ -f "$f" ] || { echo "ERROR: Missing $f — run 'make initramfs'"; exit 1; }
|
||||
for f in "$VMLINUZ" "$INITRAMFS" "$GRUB_CFG" "$GRUB_ENV_DEFAULTS"; do
|
||||
[ -f "$f" ] || { echo "ERROR: Missing $f"; exit 1; }
|
||||
done
|
||||
|
||||
echo "==> Creating ${IMG_SIZE_MB}MB disk image..."
|
||||
echo "==> Creating ${IMG_SIZE_MB}MB disk image with A/B partitions..."
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Create sparse image
|
||||
dd if=/dev/zero of="$IMG_OUTPUT" bs=1M count=0 seek="$IMG_SIZE_MB" 2>/dev/null
|
||||
|
||||
# Partition: 256MB boot (ext4) + rest data (ext4)
|
||||
# Using sfdisk for scriptability
|
||||
# Partition (GPT):
|
||||
# Part 1: 256 MB EFI System Partition (FAT32)
|
||||
# Part 2: 512 MB System A (Linux filesystem)
|
||||
# Part 3: 512 MB System B (Linux filesystem)
|
||||
# Part 4: Remaining — Data (Linux filesystem)
|
||||
sfdisk "$IMG_OUTPUT" << EOF
|
||||
label: dos
|
||||
unit: sectors
|
||||
label: gpt
|
||||
|
||||
# Boot partition: 256 MB, bootable
|
||||
start=2048, size=524288, type=83, bootable
|
||||
# Data partition: remaining space
|
||||
start=526336, type=83
|
||||
# EFI/Boot partition: 256 MB
|
||||
start=2048, size=524288, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B, name="EFI"
|
||||
# System A partition: 512 MB
|
||||
size=1048576, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="SystemA"
|
||||
# System B partition: 512 MB
|
||||
size=1048576, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="SystemB"
|
||||
# Data partition: remaining
|
||||
type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="Data"
|
||||
EOF
|
||||
|
||||
# Set up loop device
|
||||
LOOP=$(losetup --show -fP "$IMG_OUTPUT")
|
||||
echo "==> Loop device: $LOOP"
|
||||
|
||||
MNT_EFI=$(mktemp -d)
|
||||
MNT_SYSA=$(mktemp -d)
|
||||
MNT_SYSB=$(mktemp -d)
|
||||
MNT_DATA=$(mktemp -d)
|
||||
|
||||
cleanup() {
|
||||
umount "${LOOP}p1" 2>/dev/null || true
|
||||
umount "${LOOP}p2" 2>/dev/null || true
|
||||
umount "$MNT_EFI" 2>/dev/null || true
|
||||
umount "$MNT_SYSA" 2>/dev/null || true
|
||||
umount "$MNT_SYSB" 2>/dev/null || true
|
||||
umount "$MNT_DATA" 2>/dev/null || true
|
||||
losetup -d "$LOOP" 2>/dev/null || true
|
||||
rm -rf "$MNT_BOOT" "$MNT_DATA" 2>/dev/null || true
|
||||
rm -rf "$MNT_EFI" "$MNT_SYSA" "$MNT_SYSB" "$MNT_DATA" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Format partitions
|
||||
mkfs.ext4 -q -L KSOLOBOOT "${LOOP}p1"
|
||||
mkfs.ext4 -q -L KSOLODATA "${LOOP}p2"
|
||||
mkfs.vfat -F 32 -n KSOLOEFI "${LOOP}p1"
|
||||
mkfs.ext4 -q -L KSOLOA "${LOOP}p2"
|
||||
mkfs.ext4 -q -L KSOLOB "${LOOP}p3"
|
||||
mkfs.ext4 -q -L KSOLODATA "${LOOP}p4"
|
||||
|
||||
# Mount and populate boot partition
|
||||
MNT_BOOT=$(mktemp -d)
|
||||
MNT_DATA=$(mktemp -d)
|
||||
# Mount all partitions
|
||||
mount "${LOOP}p1" "$MNT_EFI"
|
||||
mount "${LOOP}p2" "$MNT_SYSA"
|
||||
mount "${LOOP}p3" "$MNT_SYSB"
|
||||
mount "${LOOP}p4" "$MNT_DATA"
|
||||
|
||||
mount "${LOOP}p1" "$MNT_BOOT"
|
||||
mount "${LOOP}p2" "$MNT_DATA"
|
||||
# --- EFI/Boot Partition ---
|
||||
echo " Installing GRUB..."
|
||||
mkdir -p "$MNT_EFI/EFI/BOOT"
|
||||
mkdir -p "$MNT_EFI/boot/grub"
|
||||
|
||||
# Install syslinux + kernel + initramfs to boot partition
|
||||
mkdir -p "$MNT_BOOT/boot/syslinux"
|
||||
cp "$VMLINUZ" "$MNT_BOOT/boot/vmlinuz"
|
||||
cp "$INITRAMFS" "$MNT_BOOT/boot/kubesolo-os.gz"
|
||||
# Copy GRUB config
|
||||
cp "$GRUB_CFG" "$MNT_EFI/boot/grub/grub.cfg"
|
||||
|
||||
# Syslinux config for disk boot (extlinux)
|
||||
cat > "$MNT_BOOT/boot/syslinux/syslinux.cfg" << 'EOF'
|
||||
DEFAULT kubesolo
|
||||
TIMEOUT 30
|
||||
PROMPT 0
|
||||
# Create GRUB environment file from defaults
|
||||
if command -v grub-editenv >/dev/null 2>&1; then
|
||||
GRUB_EDITENV=grub-editenv
|
||||
elif command -v grub2-editenv >/dev/null 2>&1; then
|
||||
GRUB_EDITENV=grub2-editenv
|
||||
else
|
||||
GRUB_EDITENV=""
|
||||
fi
|
||||
|
||||
LABEL kubesolo
|
||||
KERNEL /boot/vmlinuz
|
||||
INITRD /boot/kubesolo-os.gz
|
||||
APPEND quiet kubesolo.data=LABEL=KSOLODATA
|
||||
GRUBENV_FILE="$MNT_EFI/boot/grub/grubenv"
|
||||
|
||||
LABEL kubesolo-debug
|
||||
KERNEL /boot/vmlinuz
|
||||
INITRD /boot/kubesolo-os.gz
|
||||
APPEND kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
if [ -n "$GRUB_EDITENV" ]; then
|
||||
# Create grubenv with defaults
|
||||
"$GRUB_EDITENV" "$GRUBENV_FILE" create
|
||||
while IFS='=' read -r key value; do
|
||||
# Skip comments and empty lines
|
||||
case "$key" in
|
||||
'#'*|'') continue ;;
|
||||
esac
|
||||
"$GRUB_EDITENV" "$GRUBENV_FILE" set "$key=$value"
|
||||
done < "$GRUB_ENV_DEFAULTS"
|
||||
echo " GRUB environment created with grub-editenv"
|
||||
else
|
||||
# Fallback: write grubenv file manually (1024 bytes, padded with '#')
|
||||
echo " WARN: grub-editenv not found — writing grubenv manually"
|
||||
{
|
||||
echo "# GRUB Environment Block"
|
||||
while IFS='=' read -r key value; do
|
||||
case "$key" in
|
||||
'#'*|'') continue ;;
|
||||
esac
|
||||
echo "$key=$value"
|
||||
done < "$GRUB_ENV_DEFAULTS"
|
||||
} > "$GRUBENV_FILE.tmp"
|
||||
# Pad to 1024 bytes (GRUB requirement)
|
||||
truncate -s 1024 "$GRUBENV_FILE.tmp"
|
||||
mv "$GRUBENV_FILE.tmp" "$GRUBENV_FILE"
|
||||
fi
|
||||
|
||||
LABEL kubesolo-shell
|
||||
KERNEL /boot/vmlinuz
|
||||
INITRD /boot/kubesolo-os.gz
|
||||
APPEND kubesolo.shell console=ttyS0,115200n8
|
||||
EOF
|
||||
# Install GRUB EFI binary if available
|
||||
if command -v grub-mkimage >/dev/null 2>&1; then
|
||||
grub-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
|
||||
-p /boot/grub \
|
||||
part_gpt ext2 fat normal linux echo all_video test search \
|
||||
search_fs_uuid search_label configfile loadenv \
|
||||
2>/dev/null || echo " WARN: grub-mkimage failed — use QEMU -bios flag"
|
||||
elif command -v grub2-mkimage >/dev/null 2>&1; then
|
||||
grub2-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
|
||||
-p /boot/grub \
|
||||
part_gpt ext2 fat normal linux echo all_video test search \
|
||||
search_fs_uuid search_label configfile loadenv \
|
||||
2>/dev/null || echo " WARN: grub2-mkimage failed — use QEMU -bios flag"
|
||||
else
|
||||
echo " WARN: grub-mkimage not found — EFI boot image not created"
|
||||
echo " Install grub2-tools or use QEMU -kernel/-initrd flags"
|
||||
fi
|
||||
|
||||
# Install extlinux bootloader
|
||||
if command -v extlinux >/dev/null 2>&1; then
|
||||
extlinux --install "$MNT_BOOT/boot/syslinux" 2>/dev/null || {
|
||||
echo "WARN: extlinux install failed — image may not be directly bootable"
|
||||
echo " Use with QEMU -kernel/-initrd flags instead"
|
||||
# For BIOS boot: install GRUB i386-pc modules if available
|
||||
if command -v grub-install >/dev/null 2>&1; then
|
||||
grub-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
||||
--no-floppy "$LOOP" 2>/dev/null || {
|
||||
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
||||
}
|
||||
elif command -v grub2-install >/dev/null 2>&1; then
|
||||
grub2-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
||||
--no-floppy "$LOOP" 2>/dev/null || {
|
||||
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
||||
}
|
||||
fi
|
||||
|
||||
# Prepare data partition structure
|
||||
for dir in kubesolo containerd etc-kubesolo log usr-local network; do
|
||||
# --- System A Partition (active) ---
|
||||
echo " Populating System A (active)..."
|
||||
cp "$VMLINUZ" "$MNT_SYSA/vmlinuz"
|
||||
cp "$INITRAMFS" "$MNT_SYSA/kubesolo-os.gz"
|
||||
echo "$VERSION" > "$MNT_SYSA/version"
|
||||
|
||||
# --- System B Partition (passive, initially same as A) ---
|
||||
echo " Populating System B (passive)..."
|
||||
cp "$VMLINUZ" "$MNT_SYSB/vmlinuz"
|
||||
cp "$INITRAMFS" "$MNT_SYSB/kubesolo-os.gz"
|
||||
echo "$VERSION" > "$MNT_SYSB/version"
|
||||
|
||||
# --- Data Partition ---
|
||||
echo " Preparing data partition..."
|
||||
for dir in kubesolo containerd etc-kubesolo log usr-local network images; do
|
||||
mkdir -p "$MNT_DATA/$dir"
|
||||
done
|
||||
|
||||
@@ -106,5 +183,8 @@ sync
|
||||
echo ""
|
||||
echo "==> Disk image created: $IMG_OUTPUT"
|
||||
echo " Size: $(du -h "$IMG_OUTPUT" | cut -f1)"
|
||||
echo " Boot partition (KSOLOBOOT): kernel + initramfs"
|
||||
echo " Data partition (KSOLODATA): persistent K8s state"
|
||||
echo " Part 1 (KSOLOEFI): GRUB + A/B boot config"
|
||||
echo " Part 2 (KSOLOA): System A — kernel + initramfs (active)"
|
||||
echo " Part 3 (KSOLOB): System B — kernel + initramfs (passive)"
|
||||
echo " Part 4 (KSOLODATA): Persistent K8s state"
|
||||
echo ""
|
||||
|
||||
@@ -73,6 +73,16 @@ else
|
||||
echo " WARN: Cloud-init binary not found (run 'make build-cloudinit' to build)"
|
||||
fi
|
||||
|
||||
# Update agent binary (Go, built separately)
|
||||
UPDATE_BIN="$CACHE_DIR/kubesolo-update"
|
||||
if [ -f "$UPDATE_BIN" ]; then
|
||||
cp "$UPDATE_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
||||
chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
||||
echo " Installed update agent ($(du -h "$UPDATE_BIN" | cut -f1))"
|
||||
else
|
||||
echo " WARN: Update agent not found (run 'make build-update-agent' to build)"
|
||||
fi
|
||||
|
||||
# --- 3. Kernel modules list ---
|
||||
cp "$PROJECT_ROOT/build/config/modules.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user