Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
240 lines
5.8 KiB
Go
240 lines
5.8 KiB
Go
// Package grubenv provides read/write access to GRUB environment variables.
|
|
//
|
|
// GRUB stores its environment in a 1024-byte file (grubenv) located at
|
|
// /boot/grub/grubenv on the EFI partition. This package manipulates
|
|
// those variables for A/B boot slot management.
|
|
//
|
|
// Key variables:
|
|
// - active_slot: "A" or "B"
|
|
// - boot_counter: "3" (fresh) down to "0" (triggers rollback)
|
|
// - boot_success: "0" (pending) or "1" (healthy boot confirmed)
|
|
package grubenv
|
|
|
|
import (
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"os/exec"
|
|
"strings"
|
|
)
|
|
|
|
const (
|
|
// DefaultGrubenvPath is the standard location for the GRUB environment file.
|
|
DefaultGrubenvPath = "/boot/grub/grubenv"
|
|
|
|
// SlotA represents system partition A.
|
|
SlotA = "A"
|
|
// SlotB represents system partition B.
|
|
SlotB = "B"
|
|
)
|
|
|
|
// Env provides access to GRUB environment variables.
|
|
type Env struct {
|
|
path string
|
|
}
|
|
|
|
// New creates a new Env for the given grubenv file path.
|
|
func New(path string) *Env {
|
|
if path == "" {
|
|
path = DefaultGrubenvPath
|
|
}
|
|
return &Env{path: path}
|
|
}
|
|
|
|
// Get reads a variable from the GRUB environment.
|
|
func (e *Env) Get(key string) (string, error) {
|
|
vars, err := e.ReadAll()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
val, ok := vars[key]
|
|
if !ok {
|
|
return "", fmt.Errorf("grubenv: key %q not found", key)
|
|
}
|
|
return val, nil
|
|
}
|
|
|
|
// Set writes a variable to the GRUB environment.
|
|
func (e *Env) Set(key, value string) error {
|
|
editenv, err := findEditenv()
|
|
if err != nil {
|
|
return e.setManual(key, value)
|
|
}
|
|
|
|
cmd := exec.Command(editenv, e.path, "set", key+"="+value)
|
|
if output, err := cmd.CombinedOutput(); err != nil {
|
|
return fmt.Errorf("grub-editenv set %s=%s: %w\n%s", key, value, err, output)
|
|
}
|
|
|
|
slog.Debug("grubenv set", "key", key, "value", value)
|
|
return nil
|
|
}
|
|
|
|
// ReadAll reads all variables from the GRUB environment.
|
|
func (e *Env) ReadAll() (map[string]string, error) {
|
|
editenv, err := findEditenv()
|
|
if err != nil {
|
|
return e.readManual()
|
|
}
|
|
|
|
cmd := exec.Command(editenv, e.path, "list")
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("grub-editenv list: %w", err)
|
|
}
|
|
|
|
return parseEnvOutput(string(output)), nil
|
|
}
|
|
|
|
// ActiveSlot returns the currently active boot slot ("A" or "B").
|
|
func (e *Env) ActiveSlot() (string, error) {
|
|
return e.Get("active_slot")
|
|
}
|
|
|
|
// PassiveSlot returns the currently passive boot slot.
|
|
func (e *Env) PassiveSlot() (string, error) {
|
|
active, err := e.ActiveSlot()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if active == SlotA {
|
|
return SlotB, nil
|
|
}
|
|
return SlotA, nil
|
|
}
|
|
|
|
// BootCounter returns the current boot counter value.
|
|
func (e *Env) BootCounter() (int, error) {
|
|
val, err := e.Get("boot_counter")
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
switch val {
|
|
case "0":
|
|
return 0, nil
|
|
case "1":
|
|
return 1, nil
|
|
case "2":
|
|
return 2, nil
|
|
case "3":
|
|
return 3, nil
|
|
default:
|
|
return -1, fmt.Errorf("grubenv: invalid boot_counter: %q", val)
|
|
}
|
|
}
|
|
|
|
// BootSuccess returns whether the last boot was marked successful.
|
|
func (e *Env) BootSuccess() (bool, error) {
|
|
val, err := e.Get("boot_success")
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return val == "1", nil
|
|
}
|
|
|
|
// MarkBootSuccess sets boot_success=1 and boot_counter=3.
|
|
// Called by the health check after a successful boot.
|
|
func (e *Env) MarkBootSuccess() error {
|
|
if err := e.Set("boot_success", "1"); err != nil {
|
|
return fmt.Errorf("setting boot_success: %w", err)
|
|
}
|
|
if err := e.Set("boot_counter", "3"); err != nil {
|
|
return fmt.Errorf("setting boot_counter: %w", err)
|
|
}
|
|
slog.Info("boot marked successful")
|
|
return nil
|
|
}
|
|
|
|
// ActivateSlot switches the active slot and resets the boot counter.
|
|
// Used after writing a new image to the passive partition.
|
|
func (e *Env) ActivateSlot(slot string) error {
|
|
if slot != SlotA && slot != SlotB {
|
|
return fmt.Errorf("invalid slot: %q (must be A or B)", slot)
|
|
}
|
|
if err := e.Set("active_slot", slot); err != nil {
|
|
return err
|
|
}
|
|
if err := e.Set("boot_counter", "3"); err != nil {
|
|
return err
|
|
}
|
|
if err := e.Set("boot_success", "0"); err != nil {
|
|
return err
|
|
}
|
|
slog.Info("activated slot", "slot", slot)
|
|
return nil
|
|
}
|
|
|
|
// ForceRollback switches to the other slot immediately.
|
|
func (e *Env) ForceRollback() error {
|
|
passive, err := e.PassiveSlot()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return e.ActivateSlot(passive)
|
|
}
|
|
|
|
func findEditenv() (string, error) {
|
|
if path, err := exec.LookPath("grub-editenv"); err == nil {
|
|
return path, nil
|
|
}
|
|
if path, err := exec.LookPath("grub2-editenv"); err == nil {
|
|
return path, nil
|
|
}
|
|
return "", fmt.Errorf("grub-editenv not found")
|
|
}
|
|
|
|
func parseEnvOutput(output string) map[string]string {
|
|
vars := make(map[string]string)
|
|
for _, line := range strings.Split(output, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" || strings.HasPrefix(line, "#") {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(line, "=", 2)
|
|
if len(parts) == 2 {
|
|
vars[parts[0]] = parts[1]
|
|
}
|
|
}
|
|
return vars
|
|
}
|
|
|
|
// setManual writes to grubenv without grub-editenv (fallback).
|
|
func (e *Env) setManual(key, value string) error {
|
|
vars, err := e.readManual()
|
|
if err != nil {
|
|
vars = make(map[string]string)
|
|
}
|
|
vars[key] = value
|
|
return e.writeManual(vars)
|
|
}
|
|
|
|
// readManual reads grubenv without grub-editenv.
|
|
func (e *Env) readManual() (map[string]string, error) {
|
|
data, err := os.ReadFile(e.path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("reading grubenv: %w", err)
|
|
}
|
|
return parseEnvOutput(string(data)), nil
|
|
}
|
|
|
|
// writeManual writes grubenv without grub-editenv.
|
|
// GRUB requires the file to be exactly 1024 bytes, padded with '#'.
|
|
func (e *Env) writeManual(vars map[string]string) error {
|
|
var sb strings.Builder
|
|
sb.WriteString("# GRUB Environment Block\n")
|
|
for k, v := range vars {
|
|
sb.WriteString(k + "=" + v + "\n")
|
|
}
|
|
|
|
content := sb.String()
|
|
if len(content) > 1024 {
|
|
return fmt.Errorf("grubenv content exceeds 1024 bytes")
|
|
}
|
|
|
|
// Pad to 1024 bytes with '#'
|
|
padding := 1024 - len(content)
|
|
content += strings.Repeat("#", padding)
|
|
|
|
return os.WriteFile(e.path, []byte(content), 0o644)
|
|
}
|