feat: add A/B partition updates with GRUB and Go update agent (Phase 3)
Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
79
update/main.go
Normal file
79
update/main.go
Normal file
@@ -0,0 +1,79 @@
|
||||
// kubesolo-update is the atomic update agent for KubeSolo OS.
|
||||
//
|
||||
// It manages A/B partition updates with automatic rollback:
|
||||
//
|
||||
// kubesolo-update check Check for available updates
|
||||
// kubesolo-update apply Download + write update to passive partition
|
||||
// kubesolo-update activate Set passive partition as next boot target
|
||||
// kubesolo-update rollback Force rollback to other partition
|
||||
// kubesolo-update healthcheck Post-boot health verification
|
||||
// kubesolo-update status Show current A/B slot and boot status
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/cmd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||
Level: slog.LevelInfo,
|
||||
})))
|
||||
|
||||
if len(os.Args) < 2 {
|
||||
usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var err error
|
||||
switch os.Args[1] {
|
||||
case "check":
|
||||
err = cmd.Check(os.Args[2:])
|
||||
case "apply":
|
||||
err = cmd.Apply(os.Args[2:])
|
||||
case "activate":
|
||||
err = cmd.Activate(os.Args[2:])
|
||||
case "rollback":
|
||||
err = cmd.Rollback(os.Args[2:])
|
||||
case "healthcheck":
|
||||
err = cmd.Healthcheck(os.Args[2:])
|
||||
case "status":
|
||||
err = cmd.Status(os.Args[2:])
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown command: %s\n\n", os.Args[1])
|
||||
usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
slog.Error("command failed", "command", os.Args[1], "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, `Usage: kubesolo-update <command> [options]
|
||||
|
||||
Commands:
|
||||
check Check for available updates
|
||||
apply Download and write update to passive partition
|
||||
activate Set passive partition as next boot target
|
||||
rollback Force rollback to other partition
|
||||
healthcheck Post-boot health verification (marks boot successful)
|
||||
status Show current A/B slot and boot status
|
||||
|
||||
Options:
|
||||
--server URL Update server URL (default: from /etc/kubesolo/update.conf)
|
||||
--grubenv PATH Path to grubenv file (default: /boot/grub/grubenv)
|
||||
--timeout SECS Health check timeout in seconds (default: 120)
|
||||
|
||||
Examples:
|
||||
kubesolo-update check --server https://updates.example.com
|
||||
kubesolo-update apply --server https://updates.example.com
|
||||
kubesolo-update healthcheck
|
||||
kubesolo-update status
|
||||
`)
|
||||
}
|
||||
Reference in New Issue
Block a user