Files
kubesolo-os/update/pkg/grubenv/grubenv.go
Adolfo Delorenzo 8d25e1890e feat: add A/B partition updates with GRUB and Go update agent (Phase 3)
Implement atomic OS updates via A/B partition scheme with automatic
rollback. GRUB bootloader manages slot selection with a 3-attempt
boot counter that auto-rolls back on repeated health check failures.

GRUB boot config:
- A/B slot selection with boot_counter/boot_success env vars
- Automatic rollback when counter reaches 0 (3 failed boots)
- Debug, emergency shell, and manual slot-switch menu entries

Disk image (refactored):
- 4-partition GPT layout: EFI + System A + System B + Data
- GRUB EFI/BIOS installation with graceful fallbacks
- Both system partitions populated during image creation

Update agent (Go, zero external deps):
- pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback)
- pkg/partition: find/mount/write system partitions by label
- pkg/image: HTTP download with SHA256 verification
- pkg/health: post-boot checks (containerd, API server, node Ready)
- 6 CLI commands: check, apply, activate, rollback, healthcheck, status
- 37 unit tests across all 4 packages

Deployment:
- K8s CronJob for automatic update checks (every 6 hours)
- ConfigMap for update server URL
- Health check Job for post-boot verification

Build pipeline:
- build-update-agent.sh compiles static Linux binary (~5.9 MB)
- inject-kubesolo.sh includes update agent in initramfs
- Makefile: build-update-agent, test-update-agent, test-update targets

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 11:12:46 -06:00

240 lines
5.8 KiB
Go

// Package grubenv provides read/write access to GRUB environment variables.
//
// GRUB stores its environment in a 1024-byte file (grubenv) located at
// /boot/grub/grubenv on the EFI partition. This package manipulates
// those variables for A/B boot slot management.
//
// Key variables:
// - active_slot: "A" or "B"
// - boot_counter: "3" (fresh) down to "0" (triggers rollback)
// - boot_success: "0" (pending) or "1" (healthy boot confirmed)
package grubenv
import (
"fmt"
"log/slog"
"os"
"os/exec"
"strings"
)
const (
// DefaultGrubenvPath is the standard location for the GRUB environment file.
DefaultGrubenvPath = "/boot/grub/grubenv"
// SlotA represents system partition A.
SlotA = "A"
// SlotB represents system partition B.
SlotB = "B"
)
// Env provides access to GRUB environment variables.
type Env struct {
path string
}
// New creates a new Env for the given grubenv file path.
func New(path string) *Env {
if path == "" {
path = DefaultGrubenvPath
}
return &Env{path: path}
}
// Get reads a variable from the GRUB environment.
func (e *Env) Get(key string) (string, error) {
vars, err := e.ReadAll()
if err != nil {
return "", err
}
val, ok := vars[key]
if !ok {
return "", fmt.Errorf("grubenv: key %q not found", key)
}
return val, nil
}
// Set writes a variable to the GRUB environment.
func (e *Env) Set(key, value string) error {
editenv, err := findEditenv()
if err != nil {
return e.setManual(key, value)
}
cmd := exec.Command(editenv, e.path, "set", key+"="+value)
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("grub-editenv set %s=%s: %w\n%s", key, value, err, output)
}
slog.Debug("grubenv set", "key", key, "value", value)
return nil
}
// ReadAll reads all variables from the GRUB environment.
func (e *Env) ReadAll() (map[string]string, error) {
editenv, err := findEditenv()
if err != nil {
return e.readManual()
}
cmd := exec.Command(editenv, e.path, "list")
output, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("grub-editenv list: %w", err)
}
return parseEnvOutput(string(output)), nil
}
// ActiveSlot returns the currently active boot slot ("A" or "B").
func (e *Env) ActiveSlot() (string, error) {
return e.Get("active_slot")
}
// PassiveSlot returns the currently passive boot slot.
func (e *Env) PassiveSlot() (string, error) {
active, err := e.ActiveSlot()
if err != nil {
return "", err
}
if active == SlotA {
return SlotB, nil
}
return SlotA, nil
}
// BootCounter returns the current boot counter value.
func (e *Env) BootCounter() (int, error) {
val, err := e.Get("boot_counter")
if err != nil {
return -1, err
}
switch val {
case "0":
return 0, nil
case "1":
return 1, nil
case "2":
return 2, nil
case "3":
return 3, nil
default:
return -1, fmt.Errorf("grubenv: invalid boot_counter: %q", val)
}
}
// BootSuccess returns whether the last boot was marked successful.
func (e *Env) BootSuccess() (bool, error) {
val, err := e.Get("boot_success")
if err != nil {
return false, err
}
return val == "1", nil
}
// MarkBootSuccess sets boot_success=1 and boot_counter=3.
// Called by the health check after a successful boot.
func (e *Env) MarkBootSuccess() error {
if err := e.Set("boot_success", "1"); err != nil {
return fmt.Errorf("setting boot_success: %w", err)
}
if err := e.Set("boot_counter", "3"); err != nil {
return fmt.Errorf("setting boot_counter: %w", err)
}
slog.Info("boot marked successful")
return nil
}
// ActivateSlot switches the active slot and resets the boot counter.
// Used after writing a new image to the passive partition.
func (e *Env) ActivateSlot(slot string) error {
if slot != SlotA && slot != SlotB {
return fmt.Errorf("invalid slot: %q (must be A or B)", slot)
}
if err := e.Set("active_slot", slot); err != nil {
return err
}
if err := e.Set("boot_counter", "3"); err != nil {
return err
}
if err := e.Set("boot_success", "0"); err != nil {
return err
}
slog.Info("activated slot", "slot", slot)
return nil
}
// ForceRollback switches to the other slot immediately.
func (e *Env) ForceRollback() error {
passive, err := e.PassiveSlot()
if err != nil {
return err
}
return e.ActivateSlot(passive)
}
func findEditenv() (string, error) {
if path, err := exec.LookPath("grub-editenv"); err == nil {
return path, nil
}
if path, err := exec.LookPath("grub2-editenv"); err == nil {
return path, nil
}
return "", fmt.Errorf("grub-editenv not found")
}
func parseEnvOutput(output string) map[string]string {
vars := make(map[string]string)
for _, line := range strings.Split(output, "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
parts := strings.SplitN(line, "=", 2)
if len(parts) == 2 {
vars[parts[0]] = parts[1]
}
}
return vars
}
// setManual writes to grubenv without grub-editenv (fallback).
func (e *Env) setManual(key, value string) error {
vars, err := e.readManual()
if err != nil {
vars = make(map[string]string)
}
vars[key] = value
return e.writeManual(vars)
}
// readManual reads grubenv without grub-editenv.
func (e *Env) readManual() (map[string]string, error) {
data, err := os.ReadFile(e.path)
if err != nil {
return nil, fmt.Errorf("reading grubenv: %w", err)
}
return parseEnvOutput(string(data)), nil
}
// writeManual writes grubenv without grub-editenv.
// GRUB requires the file to be exactly 1024 bytes, padded with '#'.
func (e *Env) writeManual(vars map[string]string) error {
var sb strings.Builder
sb.WriteString("# GRUB Environment Block\n")
for k, v := range vars {
sb.WriteString(k + "=" + v + "\n")
}
content := sb.String()
if len(content) > 1024 {
return fmt.Errorf("grubenv content exceeds 1024 bytes")
}
// Pad to 1024 bytes with '#'
padding := 1024 - len(content)
content += strings.Repeat("#", padding)
return os.WriteFile(e.path, []byte(content), 0o644)
}