feat: add A/B partition updates with GRUB and Go update agent (Phase 3)
Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
180
update/pkg/image/image.go
Normal file
180
update/pkg/image/image.go
Normal file
@@ -0,0 +1,180 @@
|
||||
// Package image handles downloading, verifying, and staging OS update images.
|
||||
//
|
||||
// Update images are distributed as pairs of files:
|
||||
// - vmlinuz (kernel)
|
||||
// - kubesolo-os.gz (initramfs)
|
||||
//
|
||||
// These are fetched from an HTTP(S) server that provides a metadata file
|
||||
// (latest.json) describing available updates.
|
||||
package image
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// UpdateMetadata describes an available update from the update server.
|
||||
type UpdateMetadata struct {
|
||||
Version string `json:"version"`
|
||||
VmlinuzURL string `json:"vmlinuz_url"`
|
||||
VmlinuzSHA256 string `json:"vmlinuz_sha256"`
|
||||
InitramfsURL string `json:"initramfs_url"`
|
||||
InitramfsSHA256 string `json:"initramfs_sha256"`
|
||||
ReleaseNotes string `json:"release_notes,omitempty"`
|
||||
ReleaseDate string `json:"release_date,omitempty"`
|
||||
}
|
||||
|
||||
// StagedImage represents downloaded and verified update files.
|
||||
type StagedImage struct {
|
||||
VmlinuzPath string
|
||||
InitramfsPath string
|
||||
Version string
|
||||
}
|
||||
|
||||
// Client handles communication with the update server.
|
||||
type Client struct {
|
||||
serverURL string
|
||||
httpClient *http.Client
|
||||
stageDir string
|
||||
}
|
||||
|
||||
// NewClient creates a new update image client.
|
||||
func NewClient(serverURL, stageDir string) *Client {
|
||||
return &Client{
|
||||
serverURL: serverURL,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 5 * time.Minute,
|
||||
},
|
||||
stageDir: stageDir,
|
||||
}
|
||||
}
|
||||
|
||||
// CheckForUpdate fetches the latest update metadata from the server.
|
||||
func (c *Client) CheckForUpdate() (*UpdateMetadata, error) {
|
||||
url := c.serverURL + "/latest.json"
|
||||
slog.Info("checking for update", "url", url)
|
||||
|
||||
resp, err := c.httpClient.Get(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetching update metadata: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("update server returned %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var meta UpdateMetadata
|
||||
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
|
||||
return nil, fmt.Errorf("parsing update metadata: %w", err)
|
||||
}
|
||||
|
||||
if meta.Version == "" {
|
||||
return nil, fmt.Errorf("update metadata missing version")
|
||||
}
|
||||
|
||||
return &meta, nil
|
||||
}
|
||||
|
||||
// Download fetches the update files and verifies their checksums.
|
||||
func (c *Client) Download(meta *UpdateMetadata) (*StagedImage, error) {
|
||||
if err := os.MkdirAll(c.stageDir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("creating stage dir: %w", err)
|
||||
}
|
||||
|
||||
vmlinuzPath := filepath.Join(c.stageDir, "vmlinuz")
|
||||
initramfsPath := filepath.Join(c.stageDir, "kubesolo-os.gz")
|
||||
|
||||
slog.Info("downloading vmlinuz", "url", meta.VmlinuzURL)
|
||||
if err := c.downloadAndVerify(meta.VmlinuzURL, vmlinuzPath, meta.VmlinuzSHA256); err != nil {
|
||||
return nil, fmt.Errorf("downloading vmlinuz: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("downloading initramfs", "url", meta.InitramfsURL)
|
||||
if err := c.downloadAndVerify(meta.InitramfsURL, initramfsPath, meta.InitramfsSHA256); err != nil {
|
||||
return nil, fmt.Errorf("downloading initramfs: %w", err)
|
||||
}
|
||||
|
||||
return &StagedImage{
|
||||
VmlinuzPath: vmlinuzPath,
|
||||
InitramfsPath: initramfsPath,
|
||||
Version: meta.Version,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Cleanup removes staged update files.
|
||||
func (c *Client) Cleanup() error {
|
||||
return os.RemoveAll(c.stageDir)
|
||||
}
|
||||
|
||||
func (c *Client) downloadAndVerify(url, dest, expectedSHA256 string) error {
|
||||
resp, err := c.httpClient.Get(url)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading %s: %w", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("server returned %d for %s", resp.StatusCode, url)
|
||||
}
|
||||
|
||||
f, err := os.Create(dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating %s: %w", dest, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
hasher := sha256.New()
|
||||
writer := io.MultiWriter(f, hasher)
|
||||
|
||||
written, err := io.Copy(writer, resp.Body)
|
||||
if err != nil {
|
||||
os.Remove(dest)
|
||||
return fmt.Errorf("writing %s: %w", dest, err)
|
||||
}
|
||||
|
||||
if err := f.Close(); err != nil {
|
||||
return fmt.Errorf("closing %s: %w", dest, err)
|
||||
}
|
||||
|
||||
// Verify checksum
|
||||
if expectedSHA256 != "" {
|
||||
actual := hex.EncodeToString(hasher.Sum(nil))
|
||||
if actual != expectedSHA256 {
|
||||
os.Remove(dest)
|
||||
return fmt.Errorf("checksum mismatch for %s: expected %s, got %s", dest, expectedSHA256, actual)
|
||||
}
|
||||
slog.Debug("checksum verified", "file", dest, "sha256", actual)
|
||||
}
|
||||
|
||||
slog.Info("downloaded", "file", dest, "size", written)
|
||||
return nil
|
||||
}
|
||||
|
||||
// VerifyFile checks the SHA256 checksum of an existing file.
|
||||
func VerifyFile(path, expectedSHA256 string) error {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
hasher := sha256.New()
|
||||
if _, err := io.Copy(hasher, f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
actual := hex.EncodeToString(hasher.Sum(nil))
|
||||
if actual != expectedSHA256 {
|
||||
return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedSHA256, actual)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user