Files
kubesolo-os/update/pkg/image/image.go
Adolfo Delorenzo 8d25e1890e feat: add A/B partition updates with GRUB and Go update agent (Phase 3)
Implement atomic OS updates via A/B partition scheme with automatic
rollback. GRUB bootloader manages slot selection with a 3-attempt
boot counter that auto-rolls back on repeated health check failures.

GRUB boot config:
- A/B slot selection with boot_counter/boot_success env vars
- Automatic rollback when counter reaches 0 (3 failed boots)
- Debug, emergency shell, and manual slot-switch menu entries

Disk image (refactored):
- 4-partition GPT layout: EFI + System A + System B + Data
- GRUB EFI/BIOS installation with graceful fallbacks
- Both system partitions populated during image creation

Update agent (Go, zero external deps):
- pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback)
- pkg/partition: find/mount/write system partitions by label
- pkg/image: HTTP download with SHA256 verification
- pkg/health: post-boot checks (containerd, API server, node Ready)
- 6 CLI commands: check, apply, activate, rollback, healthcheck, status
- 37 unit tests across all 4 packages

Deployment:
- K8s CronJob for automatic update checks (every 6 hours)
- ConfigMap for update server URL
- Health check Job for post-boot verification

Build pipeline:
- build-update-agent.sh compiles static Linux binary (~5.9 MB)
- inject-kubesolo.sh includes update agent in initramfs
- Makefile: build-update-agent, test-update-agent, test-update targets

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 11:12:46 -06:00

181 lines
4.7 KiB
Go

// Package image handles downloading, verifying, and staging OS update images.
//
// Update images are distributed as pairs of files:
// - vmlinuz (kernel)
// - kubesolo-os.gz (initramfs)
//
// These are fetched from an HTTP(S) server that provides a metadata file
// (latest.json) describing available updates.
package image
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"path/filepath"
"time"
)
// UpdateMetadata describes an available update from the update server.
type UpdateMetadata struct {
Version string `json:"version"`
VmlinuzURL string `json:"vmlinuz_url"`
VmlinuzSHA256 string `json:"vmlinuz_sha256"`
InitramfsURL string `json:"initramfs_url"`
InitramfsSHA256 string `json:"initramfs_sha256"`
ReleaseNotes string `json:"release_notes,omitempty"`
ReleaseDate string `json:"release_date,omitempty"`
}
// StagedImage represents downloaded and verified update files.
type StagedImage struct {
VmlinuzPath string
InitramfsPath string
Version string
}
// Client handles communication with the update server.
type Client struct {
serverURL string
httpClient *http.Client
stageDir string
}
// NewClient creates a new update image client.
func NewClient(serverURL, stageDir string) *Client {
return &Client{
serverURL: serverURL,
httpClient: &http.Client{
Timeout: 5 * time.Minute,
},
stageDir: stageDir,
}
}
// CheckForUpdate fetches the latest update metadata from the server.
func (c *Client) CheckForUpdate() (*UpdateMetadata, error) {
url := c.serverURL + "/latest.json"
slog.Info("checking for update", "url", url)
resp, err := c.httpClient.Get(url)
if err != nil {
return nil, fmt.Errorf("fetching update metadata: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("update server returned %d", resp.StatusCode)
}
var meta UpdateMetadata
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
return nil, fmt.Errorf("parsing update metadata: %w", err)
}
if meta.Version == "" {
return nil, fmt.Errorf("update metadata missing version")
}
return &meta, nil
}
// Download fetches the update files and verifies their checksums.
func (c *Client) Download(meta *UpdateMetadata) (*StagedImage, error) {
if err := os.MkdirAll(c.stageDir, 0o755); err != nil {
return nil, fmt.Errorf("creating stage dir: %w", err)
}
vmlinuzPath := filepath.Join(c.stageDir, "vmlinuz")
initramfsPath := filepath.Join(c.stageDir, "kubesolo-os.gz")
slog.Info("downloading vmlinuz", "url", meta.VmlinuzURL)
if err := c.downloadAndVerify(meta.VmlinuzURL, vmlinuzPath, meta.VmlinuzSHA256); err != nil {
return nil, fmt.Errorf("downloading vmlinuz: %w", err)
}
slog.Info("downloading initramfs", "url", meta.InitramfsURL)
if err := c.downloadAndVerify(meta.InitramfsURL, initramfsPath, meta.InitramfsSHA256); err != nil {
return nil, fmt.Errorf("downloading initramfs: %w", err)
}
return &StagedImage{
VmlinuzPath: vmlinuzPath,
InitramfsPath: initramfsPath,
Version: meta.Version,
}, nil
}
// Cleanup removes staged update files.
func (c *Client) Cleanup() error {
return os.RemoveAll(c.stageDir)
}
func (c *Client) downloadAndVerify(url, dest, expectedSHA256 string) error {
resp, err := c.httpClient.Get(url)
if err != nil {
return fmt.Errorf("downloading %s: %w", url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("server returned %d for %s", resp.StatusCode, url)
}
f, err := os.Create(dest)
if err != nil {
return fmt.Errorf("creating %s: %w", dest, err)
}
defer f.Close()
hasher := sha256.New()
writer := io.MultiWriter(f, hasher)
written, err := io.Copy(writer, resp.Body)
if err != nil {
os.Remove(dest)
return fmt.Errorf("writing %s: %w", dest, err)
}
if err := f.Close(); err != nil {
return fmt.Errorf("closing %s: %w", dest, err)
}
// Verify checksum
if expectedSHA256 != "" {
actual := hex.EncodeToString(hasher.Sum(nil))
if actual != expectedSHA256 {
os.Remove(dest)
return fmt.Errorf("checksum mismatch for %s: expected %s, got %s", dest, expectedSHA256, actual)
}
slog.Debug("checksum verified", "file", dest, "sha256", actual)
}
slog.Info("downloaded", "file", dest, "size", written)
return nil
}
// VerifyFile checks the SHA256 checksum of an existing file.
func VerifyFile(path, expectedSHA256 string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
hasher := sha256.New()
if _, err := io.Copy(hasher, f); err != nil {
return err
}
actual := hex.EncodeToString(hasher.Sum(nil))
if actual != expectedSHA256 {
return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedSHA256, actual)
}
return nil
}