Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
181 lines
4.7 KiB
Go
181 lines
4.7 KiB
Go
// Package image handles downloading, verifying, and staging OS update images.
|
|
//
|
|
// Update images are distributed as pairs of files:
|
|
// - vmlinuz (kernel)
|
|
// - kubesolo-os.gz (initramfs)
|
|
//
|
|
// These are fetched from an HTTP(S) server that provides a metadata file
|
|
// (latest.json) describing available updates.
|
|
package image
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"time"
|
|
)
|
|
|
|
// UpdateMetadata describes an available update from the update server.
|
|
type UpdateMetadata struct {
|
|
Version string `json:"version"`
|
|
VmlinuzURL string `json:"vmlinuz_url"`
|
|
VmlinuzSHA256 string `json:"vmlinuz_sha256"`
|
|
InitramfsURL string `json:"initramfs_url"`
|
|
InitramfsSHA256 string `json:"initramfs_sha256"`
|
|
ReleaseNotes string `json:"release_notes,omitempty"`
|
|
ReleaseDate string `json:"release_date,omitempty"`
|
|
}
|
|
|
|
// StagedImage represents downloaded and verified update files.
|
|
type StagedImage struct {
|
|
VmlinuzPath string
|
|
InitramfsPath string
|
|
Version string
|
|
}
|
|
|
|
// Client handles communication with the update server.
|
|
type Client struct {
|
|
serverURL string
|
|
httpClient *http.Client
|
|
stageDir string
|
|
}
|
|
|
|
// NewClient creates a new update image client.
|
|
func NewClient(serverURL, stageDir string) *Client {
|
|
return &Client{
|
|
serverURL: serverURL,
|
|
httpClient: &http.Client{
|
|
Timeout: 5 * time.Minute,
|
|
},
|
|
stageDir: stageDir,
|
|
}
|
|
}
|
|
|
|
// CheckForUpdate fetches the latest update metadata from the server.
|
|
func (c *Client) CheckForUpdate() (*UpdateMetadata, error) {
|
|
url := c.serverURL + "/latest.json"
|
|
slog.Info("checking for update", "url", url)
|
|
|
|
resp, err := c.httpClient.Get(url)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fetching update metadata: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("update server returned %d", resp.StatusCode)
|
|
}
|
|
|
|
var meta UpdateMetadata
|
|
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
|
|
return nil, fmt.Errorf("parsing update metadata: %w", err)
|
|
}
|
|
|
|
if meta.Version == "" {
|
|
return nil, fmt.Errorf("update metadata missing version")
|
|
}
|
|
|
|
return &meta, nil
|
|
}
|
|
|
|
// Download fetches the update files and verifies their checksums.
|
|
func (c *Client) Download(meta *UpdateMetadata) (*StagedImage, error) {
|
|
if err := os.MkdirAll(c.stageDir, 0o755); err != nil {
|
|
return nil, fmt.Errorf("creating stage dir: %w", err)
|
|
}
|
|
|
|
vmlinuzPath := filepath.Join(c.stageDir, "vmlinuz")
|
|
initramfsPath := filepath.Join(c.stageDir, "kubesolo-os.gz")
|
|
|
|
slog.Info("downloading vmlinuz", "url", meta.VmlinuzURL)
|
|
if err := c.downloadAndVerify(meta.VmlinuzURL, vmlinuzPath, meta.VmlinuzSHA256); err != nil {
|
|
return nil, fmt.Errorf("downloading vmlinuz: %w", err)
|
|
}
|
|
|
|
slog.Info("downloading initramfs", "url", meta.InitramfsURL)
|
|
if err := c.downloadAndVerify(meta.InitramfsURL, initramfsPath, meta.InitramfsSHA256); err != nil {
|
|
return nil, fmt.Errorf("downloading initramfs: %w", err)
|
|
}
|
|
|
|
return &StagedImage{
|
|
VmlinuzPath: vmlinuzPath,
|
|
InitramfsPath: initramfsPath,
|
|
Version: meta.Version,
|
|
}, nil
|
|
}
|
|
|
|
// Cleanup removes staged update files.
|
|
func (c *Client) Cleanup() error {
|
|
return os.RemoveAll(c.stageDir)
|
|
}
|
|
|
|
func (c *Client) downloadAndVerify(url, dest, expectedSHA256 string) error {
|
|
resp, err := c.httpClient.Get(url)
|
|
if err != nil {
|
|
return fmt.Errorf("downloading %s: %w", url, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return fmt.Errorf("server returned %d for %s", resp.StatusCode, url)
|
|
}
|
|
|
|
f, err := os.Create(dest)
|
|
if err != nil {
|
|
return fmt.Errorf("creating %s: %w", dest, err)
|
|
}
|
|
defer f.Close()
|
|
|
|
hasher := sha256.New()
|
|
writer := io.MultiWriter(f, hasher)
|
|
|
|
written, err := io.Copy(writer, resp.Body)
|
|
if err != nil {
|
|
os.Remove(dest)
|
|
return fmt.Errorf("writing %s: %w", dest, err)
|
|
}
|
|
|
|
if err := f.Close(); err != nil {
|
|
return fmt.Errorf("closing %s: %w", dest, err)
|
|
}
|
|
|
|
// Verify checksum
|
|
if expectedSHA256 != "" {
|
|
actual := hex.EncodeToString(hasher.Sum(nil))
|
|
if actual != expectedSHA256 {
|
|
os.Remove(dest)
|
|
return fmt.Errorf("checksum mismatch for %s: expected %s, got %s", dest, expectedSHA256, actual)
|
|
}
|
|
slog.Debug("checksum verified", "file", dest, "sha256", actual)
|
|
}
|
|
|
|
slog.Info("downloaded", "file", dest, "size", written)
|
|
return nil
|
|
}
|
|
|
|
// VerifyFile checks the SHA256 checksum of an existing file.
|
|
func VerifyFile(path, expectedSHA256 string) error {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
hasher := sha256.New()
|
|
if _, err := io.Copy(hasher, f); err != nil {
|
|
return err
|
|
}
|
|
|
|
actual := hex.EncodeToString(hasher.Sum(nil))
|
|
if actual != expectedSHA256 {
|
|
return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedSHA256, actual)
|
|
}
|
|
return nil
|
|
}
|