Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
96 lines
2.9 KiB
INI
96 lines
2.9 KiB
INI
# KubeSolo OS — GRUB Configuration
|
|
# A/B partition boot with automatic rollback
|
|
#
|
|
# Partition layout:
|
|
# (hd0,gpt1) — EFI/Boot (256 MB, FAT32) — contains GRUB + grubenv
|
|
# (hd0,gpt2) — System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
|
# (hd0,gpt3) — System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
|
# (hd0,gpt4) — Data (remaining, ext4) — persistent K8s state
|
|
#
|
|
# Environment variables (in grubenv):
|
|
# active_slot — "A" or "B" (which partition to boot)
|
|
# boot_counter — 3→2→1→0 (decremented on each failed boot)
|
|
# boot_success — 0 or 1 (set to 1 by health check post-boot)
|
|
|
|
set default=0
|
|
set timeout=3
|
|
|
|
# Load saved environment
|
|
load_env
|
|
|
|
# --- A/B Rollback Logic ---
|
|
# On every boot, check if the last boot was successful.
|
|
# If not, decrement the counter. If counter hits 0, swap slots.
|
|
|
|
if [ "${boot_success}" != "1" ]; then
|
|
# Last boot failed — check counter
|
|
if [ "${boot_counter}" = "0" ]; then
|
|
# Counter exhausted — rollback to other slot
|
|
if [ "${active_slot}" = "A" ]; then
|
|
set active_slot=B
|
|
else
|
|
set active_slot=A
|
|
fi
|
|
save_env active_slot
|
|
set boot_counter=3
|
|
save_env boot_counter
|
|
else
|
|
# Decrement counter (GRUB doesn't have arithmetic)
|
|
if [ "${boot_counter}" = "3" ]; then
|
|
set boot_counter=2
|
|
elif [ "${boot_counter}" = "2" ]; then
|
|
set boot_counter=1
|
|
elif [ "${boot_counter}" = "1" ]; then
|
|
set boot_counter=0
|
|
fi
|
|
save_env boot_counter
|
|
fi
|
|
fi
|
|
|
|
# Reset boot_success for this boot attempt — health check must set it to 1
|
|
set boot_success=0
|
|
save_env boot_success
|
|
|
|
# --- Resolve boot partition ---
|
|
if [ "${active_slot}" = "A" ]; then
|
|
set root='(hd0,gpt2)'
|
|
set slot_label="System A"
|
|
else
|
|
set root='(hd0,gpt3)'
|
|
set slot_label="System B"
|
|
fi
|
|
|
|
# --- Menu Entries ---
|
|
|
|
menuentry "KubeSolo OS (${slot_label})" {
|
|
echo "Booting KubeSolo OS from ${slot_label}..."
|
|
echo "Boot counter: ${boot_counter}, Boot success: ${boot_success}"
|
|
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA quiet
|
|
initrd /kubesolo-os.gz
|
|
}
|
|
|
|
menuentry "KubeSolo OS (${slot_label}) — Debug Mode" {
|
|
echo "Booting KubeSolo OS (debug) from ${slot_label}..."
|
|
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
|
initrd /kubesolo-os.gz
|
|
}
|
|
|
|
menuentry "KubeSolo OS — Emergency Shell" {
|
|
echo "Booting to emergency shell..."
|
|
linux /vmlinuz kubesolo.shell console=ttyS0,115200n8
|
|
initrd /kubesolo-os.gz
|
|
}
|
|
|
|
menuentry "KubeSolo OS — Boot Other Slot" {
|
|
# Manually boot the passive slot (for testing)
|
|
if [ "${active_slot}" = "A" ]; then
|
|
set root='(hd0,gpt3)'
|
|
echo "Booting from System B (passive)..."
|
|
else
|
|
set root='(hd0,gpt2)'
|
|
echo "Booting from System A (passive)..."
|
|
fi
|
|
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
|
initrd /kubesolo-os.gz
|
|
}
|