feat: add A/B partition updates with GRUB and Go update agent (Phase 3)
Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
95
build/grub/grub.cfg
Normal file
95
build/grub/grub.cfg
Normal file
@@ -0,0 +1,95 @@
|
||||
# KubeSolo OS — GRUB Configuration
|
||||
# A/B partition boot with automatic rollback
|
||||
#
|
||||
# Partition layout:
|
||||
# (hd0,gpt1) — EFI/Boot (256 MB, FAT32) — contains GRUB + grubenv
|
||||
# (hd0,gpt2) — System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||
# (hd0,gpt3) — System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||
# (hd0,gpt4) — Data (remaining, ext4) — persistent K8s state
|
||||
#
|
||||
# Environment variables (in grubenv):
|
||||
# active_slot — "A" or "B" (which partition to boot)
|
||||
# boot_counter — 3→2→1→0 (decremented on each failed boot)
|
||||
# boot_success — 0 or 1 (set to 1 by health check post-boot)
|
||||
|
||||
set default=0
|
||||
set timeout=3
|
||||
|
||||
# Load saved environment
|
||||
load_env
|
||||
|
||||
# --- A/B Rollback Logic ---
|
||||
# On every boot, check if the last boot was successful.
|
||||
# If not, decrement the counter. If counter hits 0, swap slots.
|
||||
|
||||
if [ "${boot_success}" != "1" ]; then
|
||||
# Last boot failed — check counter
|
||||
if [ "${boot_counter}" = "0" ]; then
|
||||
# Counter exhausted — rollback to other slot
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set active_slot=B
|
||||
else
|
||||
set active_slot=A
|
||||
fi
|
||||
save_env active_slot
|
||||
set boot_counter=3
|
||||
save_env boot_counter
|
||||
else
|
||||
# Decrement counter (GRUB doesn't have arithmetic)
|
||||
if [ "${boot_counter}" = "3" ]; then
|
||||
set boot_counter=2
|
||||
elif [ "${boot_counter}" = "2" ]; then
|
||||
set boot_counter=1
|
||||
elif [ "${boot_counter}" = "1" ]; then
|
||||
set boot_counter=0
|
||||
fi
|
||||
save_env boot_counter
|
||||
fi
|
||||
fi
|
||||
|
||||
# Reset boot_success for this boot attempt — health check must set it to 1
|
||||
set boot_success=0
|
||||
save_env boot_success
|
||||
|
||||
# --- Resolve boot partition ---
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set root='(hd0,gpt2)'
|
||||
set slot_label="System A"
|
||||
else
|
||||
set root='(hd0,gpt3)'
|
||||
set slot_label="System B"
|
||||
fi
|
||||
|
||||
# --- Menu Entries ---
|
||||
|
||||
menuentry "KubeSolo OS (${slot_label})" {
|
||||
echo "Booting KubeSolo OS from ${slot_label}..."
|
||||
echo "Boot counter: ${boot_counter}, Boot success: ${boot_success}"
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA quiet
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS (${slot_label}) — Debug Mode" {
|
||||
echo "Booting KubeSolo OS (debug) from ${slot_label}..."
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS — Emergency Shell" {
|
||||
echo "Booting to emergency shell..."
|
||||
linux /vmlinuz kubesolo.shell console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS — Boot Other Slot" {
|
||||
# Manually boot the passive slot (for testing)
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set root='(hd0,gpt3)'
|
||||
echo "Booting from System B (passive)..."
|
||||
else
|
||||
set root='(hd0,gpt2)'
|
||||
echo "Booting from System A (passive)..."
|
||||
fi
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
Reference in New Issue
Block a user