feat: add A/B partition updates with GRUB and Go update agent (Phase 3)

Implement atomic OS updates via A/B partition scheme with automatic
rollback. GRUB bootloader manages slot selection with a 3-attempt
boot counter that auto-rolls back on repeated health check failures.

GRUB boot config:
- A/B slot selection with boot_counter/boot_success env vars
- Automatic rollback when counter reaches 0 (3 failed boots)
- Debug, emergency shell, and manual slot-switch menu entries

Disk image (refactored):
- 4-partition GPT layout: EFI + System A + System B + Data
- GRUB EFI/BIOS installation with graceful fallbacks
- Both system partitions populated during image creation

Update agent (Go, zero external deps):
- pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback)
- pkg/partition: find/mount/write system partitions by label
- pkg/image: HTTP download with SHA256 verification
- pkg/health: post-boot checks (containerd, API server, node Ready)
- 6 CLI commands: check, apply, activate, rollback, healthcheck, status
- 37 unit tests across all 4 packages

Deployment:
- K8s CronJob for automatic update checks (every 6 hours)
- ConfigMap for update server URL
- Health check Job for post-boot verification

Build pipeline:
- build-update-agent.sh compiles static Linux binary (~5.9 MB)
- inject-kubesolo.sh includes update agent in initramfs
- Makefile: build-update-agent, test-update-agent, test-update targets

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-11 11:12:46 -06:00
parent d900fa920e
commit 8d25e1890e
25 changed files with 2807 additions and 74 deletions

180
update/pkg/image/image.go Normal file
View File

@@ -0,0 +1,180 @@
// Package image handles downloading, verifying, and staging OS update images.
//
// Update images are distributed as pairs of files:
// - vmlinuz (kernel)
// - kubesolo-os.gz (initramfs)
//
// These are fetched from an HTTP(S) server that provides a metadata file
// (latest.json) describing available updates.
package image
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"path/filepath"
"time"
)
// UpdateMetadata describes an available update from the update server.
type UpdateMetadata struct {
Version string `json:"version"`
VmlinuzURL string `json:"vmlinuz_url"`
VmlinuzSHA256 string `json:"vmlinuz_sha256"`
InitramfsURL string `json:"initramfs_url"`
InitramfsSHA256 string `json:"initramfs_sha256"`
ReleaseNotes string `json:"release_notes,omitempty"`
ReleaseDate string `json:"release_date,omitempty"`
}
// StagedImage represents downloaded and verified update files.
type StagedImage struct {
VmlinuzPath string
InitramfsPath string
Version string
}
// Client handles communication with the update server.
type Client struct {
serverURL string
httpClient *http.Client
stageDir string
}
// NewClient creates a new update image client.
func NewClient(serverURL, stageDir string) *Client {
return &Client{
serverURL: serverURL,
httpClient: &http.Client{
Timeout: 5 * time.Minute,
},
stageDir: stageDir,
}
}
// CheckForUpdate fetches the latest update metadata from the server.
func (c *Client) CheckForUpdate() (*UpdateMetadata, error) {
url := c.serverURL + "/latest.json"
slog.Info("checking for update", "url", url)
resp, err := c.httpClient.Get(url)
if err != nil {
return nil, fmt.Errorf("fetching update metadata: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("update server returned %d", resp.StatusCode)
}
var meta UpdateMetadata
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
return nil, fmt.Errorf("parsing update metadata: %w", err)
}
if meta.Version == "" {
return nil, fmt.Errorf("update metadata missing version")
}
return &meta, nil
}
// Download fetches the update files and verifies their checksums.
func (c *Client) Download(meta *UpdateMetadata) (*StagedImage, error) {
if err := os.MkdirAll(c.stageDir, 0o755); err != nil {
return nil, fmt.Errorf("creating stage dir: %w", err)
}
vmlinuzPath := filepath.Join(c.stageDir, "vmlinuz")
initramfsPath := filepath.Join(c.stageDir, "kubesolo-os.gz")
slog.Info("downloading vmlinuz", "url", meta.VmlinuzURL)
if err := c.downloadAndVerify(meta.VmlinuzURL, vmlinuzPath, meta.VmlinuzSHA256); err != nil {
return nil, fmt.Errorf("downloading vmlinuz: %w", err)
}
slog.Info("downloading initramfs", "url", meta.InitramfsURL)
if err := c.downloadAndVerify(meta.InitramfsURL, initramfsPath, meta.InitramfsSHA256); err != nil {
return nil, fmt.Errorf("downloading initramfs: %w", err)
}
return &StagedImage{
VmlinuzPath: vmlinuzPath,
InitramfsPath: initramfsPath,
Version: meta.Version,
}, nil
}
// Cleanup removes staged update files.
func (c *Client) Cleanup() error {
return os.RemoveAll(c.stageDir)
}
func (c *Client) downloadAndVerify(url, dest, expectedSHA256 string) error {
resp, err := c.httpClient.Get(url)
if err != nil {
return fmt.Errorf("downloading %s: %w", url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("server returned %d for %s", resp.StatusCode, url)
}
f, err := os.Create(dest)
if err != nil {
return fmt.Errorf("creating %s: %w", dest, err)
}
defer f.Close()
hasher := sha256.New()
writer := io.MultiWriter(f, hasher)
written, err := io.Copy(writer, resp.Body)
if err != nil {
os.Remove(dest)
return fmt.Errorf("writing %s: %w", dest, err)
}
if err := f.Close(); err != nil {
return fmt.Errorf("closing %s: %w", dest, err)
}
// Verify checksum
if expectedSHA256 != "" {
actual := hex.EncodeToString(hasher.Sum(nil))
if actual != expectedSHA256 {
os.Remove(dest)
return fmt.Errorf("checksum mismatch for %s: expected %s, got %s", dest, expectedSHA256, actual)
}
slog.Debug("checksum verified", "file", dest, "sha256", actual)
}
slog.Info("downloaded", "file", dest, "size", written)
return nil
}
// VerifyFile checks the SHA256 checksum of an existing file.
func VerifyFile(path, expectedSHA256 string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
hasher := sha256.New()
if _, err := io.Copy(hasher, f); err != nil {
return err
}
actual := hex.EncodeToString(hasher.Sum(nil))
if actual != expectedSHA256 {
return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedSHA256, actual)
}
return nil
}

View File

@@ -0,0 +1,241 @@
package image
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
)
func TestCheckForUpdate(t *testing.T) {
meta := UpdateMetadata{
Version: "1.2.0",
VmlinuzURL: "/vmlinuz",
VmlinuzSHA256: "abc123",
InitramfsURL: "/kubesolo-os.gz",
InitramfsSHA256: "def456",
ReleaseNotes: "Bug fixes",
ReleaseDate: "2025-01-15",
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/latest.json" {
http.NotFound(w, r)
return
}
json.NewEncoder(w).Encode(meta)
}))
defer server.Close()
client := NewClient(server.URL, "")
got, err := client.CheckForUpdate()
if err != nil {
t.Fatal(err)
}
if got.Version != "1.2.0" {
t.Errorf("expected version 1.2.0, got %s", got.Version)
}
if got.VmlinuzSHA256 != "abc123" {
t.Errorf("expected vmlinuz sha abc123, got %s", got.VmlinuzSHA256)
}
if got.ReleaseNotes != "Bug fixes" {
t.Errorf("expected release notes, got %s", got.ReleaseNotes)
}
}
func TestCheckForUpdateMissingVersion(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(UpdateMetadata{})
}))
defer server.Close()
client := NewClient(server.URL, "")
_, err := client.CheckForUpdate()
if err == nil {
t.Fatal("expected error for missing version")
}
}
func TestCheckForUpdateServerError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer server.Close()
client := NewClient(server.URL, "")
_, err := client.CheckForUpdate()
if err == nil {
t.Fatal("expected error for server error")
}
}
func TestDownloadAndVerify(t *testing.T) {
// Create test content
vmlinuzContent := []byte("fake vmlinuz content for testing")
initramfsContent := []byte("fake initramfs content for testing")
vmlinuzHash := sha256.Sum256(vmlinuzContent)
initramfsHash := sha256.Sum256(initramfsContent)
meta := UpdateMetadata{
Version: "2.0.0",
VmlinuzSHA256: hex.EncodeToString(vmlinuzHash[:]),
InitramfsSHA256: hex.EncodeToString(initramfsHash[:]),
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/latest.json":
m := meta
m.VmlinuzURL = "http://" + r.Host + "/vmlinuz"
m.InitramfsURL = "http://" + r.Host + "/kubesolo-os.gz"
json.NewEncoder(w).Encode(m)
case "/vmlinuz":
w.Write(vmlinuzContent)
case "/kubesolo-os.gz":
w.Write(initramfsContent)
default:
http.NotFound(w, r)
}
}))
defer server.Close()
stageDir := filepath.Join(t.TempDir(), "stage")
client := NewClient(server.URL, stageDir)
defer client.Cleanup()
// First get metadata
gotMeta, err := client.CheckForUpdate()
if err != nil {
t.Fatal(err)
}
// Download
staged, err := client.Download(gotMeta)
if err != nil {
t.Fatal(err)
}
if staged.Version != "2.0.0" {
t.Errorf("expected version 2.0.0, got %s", staged.Version)
}
// Verify files exist
if _, err := os.Stat(staged.VmlinuzPath); err != nil {
t.Errorf("vmlinuz not found: %v", err)
}
if _, err := os.Stat(staged.InitramfsPath); err != nil {
t.Errorf("initramfs not found: %v", err)
}
// Verify content
data, _ := os.ReadFile(staged.VmlinuzPath)
if string(data) != string(vmlinuzContent) {
t.Error("vmlinuz content mismatch")
}
}
func TestDownloadChecksumMismatch(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/vmlinuz":
w.Write([]byte("actual content"))
default:
http.NotFound(w, r)
}
}))
defer server.Close()
stageDir := filepath.Join(t.TempDir(), "stage")
client := NewClient(server.URL, stageDir)
meta := &UpdateMetadata{
Version: "1.0.0",
VmlinuzURL: server.URL + "/vmlinuz",
VmlinuzSHA256: "wrong_checksum_value",
InitramfsURL: server.URL + "/initramfs",
}
_, err := client.Download(meta)
if err == nil {
t.Fatal("expected checksum mismatch error")
}
}
func TestVerifyFile(t *testing.T) {
content := []byte("test file content for verification")
hash := sha256.Sum256(content)
expected := hex.EncodeToString(hash[:])
dir := t.TempDir()
path := filepath.Join(dir, "testfile")
if err := os.WriteFile(path, content, 0o644); err != nil {
t.Fatal(err)
}
// Should pass with correct hash
if err := VerifyFile(path, expected); err != nil {
t.Errorf("expected verification to pass: %v", err)
}
// Should fail with wrong hash
if err := VerifyFile(path, "deadbeef"); err == nil {
t.Error("expected verification to fail with wrong hash")
}
}
func TestVerifyFileNotFound(t *testing.T) {
err := VerifyFile("/nonexistent/file", "abc123")
if err == nil {
t.Error("expected error for nonexistent file")
}
}
func TestCleanup(t *testing.T) {
stageDir := filepath.Join(t.TempDir(), "stage")
os.MkdirAll(stageDir, 0o755)
os.WriteFile(filepath.Join(stageDir, "test"), []byte("data"), 0o644)
client := NewClient("http://unused", stageDir)
if err := client.Cleanup(); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(stageDir); !os.IsNotExist(err) {
t.Error("stage dir should be removed after cleanup")
}
}
func TestUpdateMetadataJSON(t *testing.T) {
meta := UpdateMetadata{
Version: "1.0.0",
VmlinuzURL: "https://example.com/vmlinuz",
VmlinuzSHA256: "abc",
InitramfsURL: "https://example.com/kubesolo-os.gz",
InitramfsSHA256: "def",
ReleaseNotes: "Initial release",
ReleaseDate: "2025-01-01",
}
data, err := json.Marshal(meta)
if err != nil {
t.Fatal(err)
}
var decoded UpdateMetadata
if err := json.Unmarshal(data, &decoded); err != nil {
t.Fatal(err)
}
if decoded.Version != meta.Version {
t.Errorf("version mismatch: %s != %s", decoded.Version, meta.Version)
}
if decoded.ReleaseDate != meta.ReleaseDate {
t.Errorf("release date mismatch: %s != %s", decoded.ReleaseDate, meta.ReleaseDate)
}
}