feat: add A/B partition updates with GRUB and Go update agent (Phase 3)

Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 11:12:46 -06:00
parent d900fa920e
commit 8d25e1890e
25 changed files with 2807 additions and 74 deletions
--- a/60
+++ b/60
@@ -1,6 +1,6 @@
-.PHONY: all fetch build-cloudinit rootfs initramfs iso disk-image \
+.PHONY: all fetch build-cloudinit build-update-agent rootfs initramfs iso disk-image \
       test-boot test-k8s test-persistence test-deploy test-storage test-all \
-       test-cloudinit \
+       test-cloudinit test-update-agent \
       dev-vm dev-vm-shell quick docker-build shellcheck \
       kernel-audit clean distclean help

@@ -32,7 +32,11 @@ build-cloudinit:
 	@echo "==> Building cloud-init binary..."
 	$(BUILD_DIR)/scripts/build-cloudinit.sh

-rootfs: fetch build-cloudinit
+build-update-agent:
+	@echo "==> Building update agent..."
+	$(BUILD_DIR)/scripts/build-update-agent.sh
+
+rootfs: fetch build-cloudinit build-update-agent
 	@echo "==> Preparing rootfs..."
 	$(BUILD_DIR)/scripts/extract-core.sh
 	$(BUILD_DIR)/scripts/inject-kubesolo.sh
@@ -88,6 +92,20 @@ test-cloudinit:
 	@echo "==> Testing cloud-init parser..."
 	cd cloud-init && go test ./... -v -count=1

+# Update agent Go tests
+test-update-agent:
+	@echo "==> Testing update agent..."
+	cd update && go test ./... -v -count=1
+
+# A/B update integration tests
+test-update: disk-image
+	@echo "==> Testing A/B update cycle..."
+	test/qemu/test-update.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).img
+
+test-rollback: disk-image
+	@echo "==> Testing rollback..."
+	test/qemu/test-rollback.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).img
+
 # Full integration test suite (requires more time)
 test-integration: test-k8s test-deploy test-storage

@@ -157,24 +175,28 @@ help:
 	@echo "KubeSolo OS Build System (v$(VERSION))"
 	@echo ""
 	@echo "Build targets:"
-	@echo "  make fetch          Download Tiny Core ISO, KubeSolo, dependencies"
-	@echo "  make build-cloudinit Build cloud-init Go binary"
-	@echo "  make rootfs         Extract + prepare rootfs with KubeSolo"
-	@echo "  make initramfs      Repack rootfs into kubesolo-os.gz"
-	@echo "  make iso            Create bootable ISO (default target)"
-	@echo "  make disk-image     Create raw disk image with boot + data partitions"
-	@echo "  make quick          Fast rebuild (re-inject + repack + ISO only)"
-	@echo "  make docker-build   Reproducible build inside Docker"
+	@echo "  make fetch              Download Tiny Core ISO, KubeSolo, dependencies"
+	@echo "  make build-cloudinit    Build cloud-init Go binary"
+	@echo "  make build-update-agent Build update agent Go binary"
+	@echo "  make rootfs             Extract + prepare rootfs with KubeSolo"
+	@echo "  make initramfs          Repack rootfs into kubesolo-os.gz"
+	@echo "  make iso                Create bootable ISO (default target)"
+	@echo "  make disk-image         Create raw disk image with A/B partitions + GRUB"
+	@echo "  make quick              Fast rebuild (re-inject + repack + ISO only)"
+	@echo "  make docker-build       Reproducible build inside Docker"
 	@echo ""
 	@echo "Test targets:"
-	@echo "  make test-boot      Boot ISO in QEMU, verify boot success"
-	@echo "  make test-k8s       Boot + verify K8s node reaches Ready"
-	@echo "  make test-persist   Reboot disk image, verify state persists"
-	@echo "  make test-deploy    Deploy nginx pod, verify Running"
-	@echo "  make test-storage   Test PVC with local-path provisioner"
-	@echo "  make test-cloudinit Run cloud-init Go unit tests"
-	@echo "  make test-all       Run core tests (boot + k8s + persistence)"
-	@echo "  make test-integ     Run full integration suite"
+	@echo "  make test-boot          Boot ISO in QEMU, verify boot success"
+	@echo "  make test-k8s           Boot + verify K8s node reaches Ready"
+	@echo "  make test-persist       Reboot disk image, verify state persists"
+	@echo "  make test-deploy        Deploy nginx pod, verify Running"
+	@echo "  make test-storage       Test PVC with local-path provisioner"
+	@echo "  make test-cloudinit     Run cloud-init Go unit tests"
+	@echo "  make test-update-agent  Run update agent Go unit tests"
+	@echo "  make test-update        A/B update cycle integration test"
+	@echo "  make test-rollback      Forced rollback integration test"
+	@echo "  make test-all           Run core tests (boot + k8s + persistence)"
+	@echo "  make test-integ         Run full integration suite"
 	@echo ""
 	@echo "Dev targets:"
 	@echo "  make dev-vm         Launch interactive QEMU VM"
--- a/build/grub/grub-env-defaults
+++ b/build/grub/grub-env-defaults
@@ -0,0 +1,11 @@
+# KubeSolo OS — Default GRUB Environment Variables
+# These are written to grubenv on first install.
+# Format: key=value (one per line, grub-editenv compatible)
+#
+# active_slot:   Which system partition to boot (A or B)
+# boot_counter:  Attempts remaining before rollback (3 = fresh, 0 = rollback)
+# boot_success:  Set to 1 by health check after successful boot
+
+active_slot=A
+boot_counter=3
+boot_success=1
--- a/build/grub/grub.cfg
+++ b/build/grub/grub.cfg
@@ -0,0 +1,95 @@
+# KubeSolo OS — GRUB Configuration
+# A/B partition boot with automatic rollback
+#
+# Partition layout:
+#   (hd0,gpt1) — EFI/Boot  (256 MB, FAT32) — contains GRUB + grubenv
+#   (hd0,gpt2) — System A  (512 MB, ext4)  — vmlinuz + kubesolo-os.gz
+#   (hd0,gpt3) — System B  (512 MB, ext4)  — vmlinuz + kubesolo-os.gz
+#   (hd0,gpt4) — Data      (remaining, ext4) — persistent K8s state
+#
+# Environment variables (in grubenv):
+#   active_slot   — "A" or "B" (which partition to boot)
+#   boot_counter  — 3→2→1→0 (decremented on each failed boot)
+#   boot_success  — 0 or 1 (set to 1 by health check post-boot)
+
+set default=0
+set timeout=3
+
+# Load saved environment
+load_env
+
+# --- A/B Rollback Logic ---
+# On every boot, check if the last boot was successful.
+# If not, decrement the counter. If counter hits 0, swap slots.
+
+if [ "${boot_success}" != "1" ]; then
+    # Last boot failed — check counter
+    if [ "${boot_counter}" = "0" ]; then
+        # Counter exhausted — rollback to other slot
+        if [ "${active_slot}" = "A" ]; then
+            set active_slot=B
+        else
+            set active_slot=A
+        fi
+        save_env active_slot
+        set boot_counter=3
+        save_env boot_counter
+    else
+        # Decrement counter (GRUB doesn't have arithmetic)
+        if [ "${boot_counter}" = "3" ]; then
+            set boot_counter=2
+        elif [ "${boot_counter}" = "2" ]; then
+            set boot_counter=1
+        elif [ "${boot_counter}" = "1" ]; then
+            set boot_counter=0
+        fi
+        save_env boot_counter
+    fi
+fi
+
+# Reset boot_success for this boot attempt — health check must set it to 1
+set boot_success=0
+save_env boot_success
+
+# --- Resolve boot partition ---
+if [ "${active_slot}" = "A" ]; then
+    set root='(hd0,gpt2)'
+    set slot_label="System A"
+else
+    set root='(hd0,gpt3)'
+    set slot_label="System B"
+fi
+
+# --- Menu Entries ---
+
+menuentry "KubeSolo OS (${slot_label})" {
+    echo "Booting KubeSolo OS from ${slot_label}..."
+    echo "Boot counter: ${boot_counter}, Boot success: ${boot_success}"
+    linux /vmlinuz kubesolo.data=LABEL=KSOLODATA quiet
+    initrd /kubesolo-os.gz
+}
+
+menuentry "KubeSolo OS (${slot_label}) — Debug Mode" {
+    echo "Booting KubeSolo OS (debug) from ${slot_label}..."
+    linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
+    initrd /kubesolo-os.gz
+}
+
+menuentry "KubeSolo OS — Emergency Shell" {
+    echo "Booting to emergency shell..."
+    linux /vmlinuz kubesolo.shell console=ttyS0,115200n8
+    initrd /kubesolo-os.gz
+}
+
+menuentry "KubeSolo OS — Boot Other Slot" {
+    # Manually boot the passive slot (for testing)
+    if [ "${active_slot}" = "A" ]; then
+        set root='(hd0,gpt3)'
+        echo "Booting from System B (passive)..."
+    else
+        set root='(hd0,gpt2)'
+        echo "Booting from System A (passive)..."
+    fi
+    linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
+    initrd /kubesolo-os.gz
+}
--- a/build/scripts/build-update-agent.sh
+++ b/build/scripts/build-update-agent.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# build-update-agent.sh — Compile the KubeSolo OS update agent
+#
+# Builds a static Linux binary for the update agent.
+# Output: build/cache/kubesolo-update
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+UPDATE_DIR="$PROJECT_ROOT/update"
+CACHE_DIR="$PROJECT_ROOT/build/cache"
+OUTPUT="$CACHE_DIR/kubesolo-update"
+
+echo "=== Building KubeSolo Update Agent ==="
+
+# Ensure output dir exists
+mkdir -p "$CACHE_DIR"
+
+# Run tests first
+echo "--- Running tests ---"
+(cd "$UPDATE_DIR" && go test ./... -count=1)
+
+# Build static binary
+echo "--- Compiling static binary ---"
+(cd "$UPDATE_DIR" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
+    go build -ldflags='-s -w' -o "$OUTPUT" .)
+
+SIZE=$(ls -lh "$OUTPUT" | awk '{print $5}')
+echo "--- Update agent built: $OUTPUT ($SIZE) ---"
--- a/build/scripts/create-disk-image.sh
+++ b/build/scripts/create-disk-image.sh
@@ -1,6 +1,11 @@
 #!/bin/bash
-# create-disk-image.sh — Create a raw disk image with boot + data partitions
-# Phase 1: simple layout (boot + data). Phase 3 adds A/B system partitions.
+# create-disk-image.sh — Create a raw disk image with A/B system partitions
+#
+# Partition layout (GPT):
+#   Part 1: EFI/Boot    (256 MB, FAT32)  — GRUB + grubenv + A/B boot logic
+#   Part 2: System A    (512 MB, ext4)   — vmlinuz + kubesolo-os.gz (active)
+#   Part 3: System B    (512 MB, ext4)   — vmlinuz + kubesolo-os.gz (passive)
+#   Part 4: Data        (remaining, ext4) — persistent K8s state
 set -euo pipefail

 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -11,93 +16,165 @@ VERSION="$(cat "$PROJECT_ROOT/VERSION")"
 OS_NAME="kubesolo-os"

 IMG_OUTPUT="$OUTPUT_DIR/${OS_NAME}-${VERSION}.img"
-IMG_SIZE_MB="${IMG_SIZE_MB:-2048}"  # 2 GB default
+IMG_SIZE_MB="${IMG_SIZE_MB:-4096}"  # 4 GB default (larger for A/B)

 VMLINUZ="$ROOTFS_DIR/vmlinuz"
 INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz"
+GRUB_CFG="$PROJECT_ROOT/build/grub/grub.cfg"
+GRUB_ENV_DEFAULTS="$PROJECT_ROOT/build/grub/grub-env-defaults"

-for f in "$VMLINUZ" "$INITRAMFS"; do
-    [ -f "$f" ] || { echo "ERROR: Missing $f — run 'make initramfs'"; exit 1; }
+for f in "$VMLINUZ" "$INITRAMFS" "$GRUB_CFG" "$GRUB_ENV_DEFAULTS"; do
+    [ -f "$f" ] || { echo "ERROR: Missing $f"; exit 1; }
 done

-echo "==> Creating ${IMG_SIZE_MB}MB disk image..."
+echo "==> Creating ${IMG_SIZE_MB}MB disk image with A/B partitions..."
 mkdir -p "$OUTPUT_DIR"

 # Create sparse image
 dd if=/dev/zero of="$IMG_OUTPUT" bs=1M count=0 seek="$IMG_SIZE_MB" 2>/dev/null

-# Partition: 256MB boot (ext4) + rest data (ext4)
-# Using sfdisk for scriptability
+# Partition (GPT):
+#   Part 1: 256 MB EFI System Partition (FAT32)
+#   Part 2: 512 MB System A (Linux filesystem)
+#   Part 3: 512 MB System B (Linux filesystem)
+#   Part 4: Remaining — Data (Linux filesystem)
 sfdisk "$IMG_OUTPUT" << EOF
-label: dos
-unit: sectors
+label: gpt

-# Boot partition: 256 MB, bootable
-start=2048, size=524288, type=83, bootable
-# Data partition: remaining space
-start=526336, type=83
+# EFI/Boot partition: 256 MB
+start=2048, size=524288, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B, name="EFI"
+# System A partition: 512 MB
+size=1048576, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="SystemA"
+# System B partition: 512 MB
+size=1048576, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="SystemB"
+# Data partition: remaining
+type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="Data"
 EOF

 # Set up loop device
 LOOP=$(losetup --show -fP "$IMG_OUTPUT")
 echo "==> Loop device: $LOOP"

+MNT_EFI=$(mktemp -d)
+MNT_SYSA=$(mktemp -d)
+MNT_SYSB=$(mktemp -d)
+MNT_DATA=$(mktemp -d)
+
 cleanup() {
-    umount "${LOOP}p1" 2>/dev/null || true
-    umount "${LOOP}p2" 2>/dev/null || true
+    umount "$MNT_EFI" 2>/dev/null || true
+    umount "$MNT_SYSA" 2>/dev/null || true
+    umount "$MNT_SYSB" 2>/dev/null || true
+    umount "$MNT_DATA" 2>/dev/null || true
    losetup -d "$LOOP" 2>/dev/null || true
-    rm -rf "$MNT_BOOT" "$MNT_DATA" 2>/dev/null || true
+    rm -rf "$MNT_EFI" "$MNT_SYSA" "$MNT_SYSB" "$MNT_DATA" 2>/dev/null || true
 }
 trap cleanup EXIT

 # Format partitions
-mkfs.ext4 -q -L KSOLOBOOT "${LOOP}p1"
-mkfs.ext4 -q -L KSOLODATA "${LOOP}p2"
+mkfs.vfat -F 32 -n KSOLOEFI "${LOOP}p1"
+mkfs.ext4 -q -L KSOLOA "${LOOP}p2"
+mkfs.ext4 -q -L KSOLOB "${LOOP}p3"
+mkfs.ext4 -q -L KSOLODATA "${LOOP}p4"

-# Mount and populate boot partition
-MNT_BOOT=$(mktemp -d)
-MNT_DATA=$(mktemp -d)
+# Mount all partitions
+mount "${LOOP}p1" "$MNT_EFI"
+mount "${LOOP}p2" "$MNT_SYSA"
+mount "${LOOP}p3" "$MNT_SYSB"
+mount "${LOOP}p4" "$MNT_DATA"

-mount "${LOOP}p1" "$MNT_BOOT"
-mount "${LOOP}p2" "$MNT_DATA"
+# --- EFI/Boot Partition ---
+echo "    Installing GRUB..."
+mkdir -p "$MNT_EFI/EFI/BOOT"
+mkdir -p "$MNT_EFI/boot/grub"

-# Install syslinux + kernel + initramfs to boot partition
-mkdir -p "$MNT_BOOT/boot/syslinux"
-cp "$VMLINUZ" "$MNT_BOOT/boot/vmlinuz"
-cp "$INITRAMFS" "$MNT_BOOT/boot/kubesolo-os.gz"
+# Copy GRUB config
+cp "$GRUB_CFG" "$MNT_EFI/boot/grub/grub.cfg"

-# Syslinux config for disk boot (extlinux)
-cat > "$MNT_BOOT/boot/syslinux/syslinux.cfg" << 'EOF'
-DEFAULT kubesolo
-TIMEOUT 30
-PROMPT 0
+# Create GRUB environment file from defaults
+if command -v grub-editenv >/dev/null 2>&1; then
+    GRUB_EDITENV=grub-editenv
+elif command -v grub2-editenv >/dev/null 2>&1; then
+    GRUB_EDITENV=grub2-editenv
+else
+    GRUB_EDITENV=""
+fi

-LABEL kubesolo
-    KERNEL /boot/vmlinuz
-    INITRD /boot/kubesolo-os.gz
-    APPEND quiet kubesolo.data=LABEL=KSOLODATA
+GRUBENV_FILE="$MNT_EFI/boot/grub/grubenv"

-LABEL kubesolo-debug
-    KERNEL /boot/vmlinuz
-    INITRD /boot/kubesolo-os.gz
-    APPEND kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
+if [ -n "$GRUB_EDITENV" ]; then
+    # Create grubenv with defaults
+    "$GRUB_EDITENV" "$GRUBENV_FILE" create
+    while IFS='=' read -r key value; do
+        # Skip comments and empty lines
+        case "$key" in
+            '#'*|'') continue ;;
+        esac
+        "$GRUB_EDITENV" "$GRUBENV_FILE" set "$key=$value"
+    done < "$GRUB_ENV_DEFAULTS"
+    echo "    GRUB environment created with grub-editenv"
+else
+    # Fallback: write grubenv file manually (1024 bytes, padded with '#')
+    echo "    WARN: grub-editenv not found — writing grubenv manually"
+    {
+        echo "# GRUB Environment Block"
+        while IFS='=' read -r key value; do
+            case "$key" in
+                '#'*|'') continue ;;
+            esac
+            echo "$key=$value"
+        done < "$GRUB_ENV_DEFAULTS"
+    } > "$GRUBENV_FILE.tmp"
+    # Pad to 1024 bytes (GRUB requirement)
+    truncate -s 1024 "$GRUBENV_FILE.tmp"
+    mv "$GRUBENV_FILE.tmp" "$GRUBENV_FILE"
+fi

-LABEL kubesolo-shell
-    KERNEL /boot/vmlinuz
-    INITRD /boot/kubesolo-os.gz
-    APPEND kubesolo.shell console=ttyS0,115200n8
-EOF
+# Install GRUB EFI binary if available
+if command -v grub-mkimage >/dev/null 2>&1; then
+    grub-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
+        -p /boot/grub \
+        part_gpt ext2 fat normal linux echo all_video test search \
+        search_fs_uuid search_label configfile loadenv \
+        2>/dev/null || echo "    WARN: grub-mkimage failed — use QEMU -bios flag"
+elif command -v grub2-mkimage >/dev/null 2>&1; then
+    grub2-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
+        -p /boot/grub \
+        part_gpt ext2 fat normal linux echo all_video test search \
+        search_fs_uuid search_label configfile loadenv \
+        2>/dev/null || echo "    WARN: grub2-mkimage failed — use QEMU -bios flag"
+else
+    echo "    WARN: grub-mkimage not found — EFI boot image not created"
+    echo "          Install grub2-tools or use QEMU -kernel/-initrd flags"
+fi

-# Install extlinux bootloader
-if command -v extlinux >/dev/null 2>&1; then
-    extlinux --install "$MNT_BOOT/boot/syslinux" 2>/dev/null || {
-        echo "WARN: extlinux install failed — image may not be directly bootable"
-        echo "      Use with QEMU -kernel/-initrd flags instead"
+# For BIOS boot: install GRUB i386-pc modules if available
+if command -v grub-install >/dev/null 2>&1; then
+    grub-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
+        --no-floppy "$LOOP" 2>/dev/null || {
+        echo "    WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
+    }
+elif command -v grub2-install >/dev/null 2>&1; then
+    grub2-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
+        --no-floppy "$LOOP" 2>/dev/null || {
+        echo "    WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
    }
 fi

-# Prepare data partition structure
-for dir in kubesolo containerd etc-kubesolo log usr-local network; do
+# --- System A Partition (active) ---
+echo "    Populating System A (active)..."
+cp "$VMLINUZ" "$MNT_SYSA/vmlinuz"
+cp "$INITRAMFS" "$MNT_SYSA/kubesolo-os.gz"
+echo "$VERSION" > "$MNT_SYSA/version"
+
+# --- System B Partition (passive, initially same as A) ---
+echo "    Populating System B (passive)..."
+cp "$VMLINUZ" "$MNT_SYSB/vmlinuz"
+cp "$INITRAMFS" "$MNT_SYSB/kubesolo-os.gz"
+echo "$VERSION" > "$MNT_SYSB/version"
+
+# --- Data Partition ---
+echo "    Preparing data partition..."
+for dir in kubesolo containerd etc-kubesolo log usr-local network images; do
    mkdir -p "$MNT_DATA/$dir"
 done

@@ -106,5 +183,8 @@ sync
 echo ""
 echo "==> Disk image created: $IMG_OUTPUT"
 echo "    Size: $(du -h "$IMG_OUTPUT" | cut -f1)"
-echo "    Boot partition (KSOLOBOOT): kernel + initramfs"
-echo "    Data partition (KSOLODATA): persistent K8s state"
+echo "    Part 1 (KSOLOEFI):  GRUB + A/B boot config"
+echo "    Part 2 (KSOLOA):    System A — kernel + initramfs (active)"
+echo "    Part 3 (KSOLOB):    System B — kernel + initramfs (passive)"
+echo "    Part 4 (KSOLODATA): Persistent K8s state"
+echo ""
--- a/build/scripts/inject-kubesolo.sh
+++ b/build/scripts/inject-kubesolo.sh
@@ -73,6 +73,16 @@ else
    echo "    WARN: Cloud-init binary not found (run 'make build-cloudinit' to build)"
 fi

+# Update agent binary (Go, built separately)
+UPDATE_BIN="$CACHE_DIR/kubesolo-update"
+if [ -f "$UPDATE_BIN" ]; then
+    cp "$UPDATE_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
+    chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
+    echo "    Installed update agent ($(du -h "$UPDATE_BIN" | cut -f1))"
+else
+    echo "    WARN: Update agent not found (run 'make build-update-agent' to build)"
+fi
+
 # --- 3. Kernel modules list ---
 cp "$PROJECT_ROOT/build/config/modules.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list"

--- a/docs/update-flow.md
+++ b/docs/update-flow.md
@@ -0,0 +1,261 @@
+# KubeSolo OS — Atomic Update Flow
+
+This document describes the A/B partition update mechanism used by KubeSolo OS for safe, atomic OS updates with automatic rollback.
+
+## Partition Layout
+
+KubeSolo OS uses a 4-partition GPT layout:
+
+```
+Disk (minimum 4 GB):
+  Part 1: EFI/Boot    (256 MB, FAT32, label: KSOLOEFI)   — GRUB + boot config
+  Part 2: System A    (512 MB, ext4,  label: KSOLOA)     — vmlinuz + kubesolo-os.gz
+  Part 3: System B    (512 MB, ext4,  label: KSOLOB)     — vmlinuz + kubesolo-os.gz
+  Part 4: Data        (remaining, ext4, label: KSOLODATA) — persistent K8s state
+```
+
+Only one system partition is active at a time. The other is the "passive" slot used for staging updates.
+
+## GRUB Environment Variables
+
+The A/B boot logic is controlled by three GRUB environment variables stored in `/boot/grub/grubenv`:
+
+| Variable | Values | Description |
+|---|---|---|
+| `active_slot` | `A` or `B` | Which system partition to boot |
+| `boot_counter` | `3` → `0` | Attempts remaining before rollback |
+| `boot_success` | `0` or `1` | Whether the current boot has been verified healthy |
+
+## Boot Flow
+
+```
+                    ┌──────────────┐
+                    │ GRUB starts  │
+                    └──────┬───────┘
+                           │
+                    ┌──────▼───────┐
+                    │ Load grubenv │
+                    └──────┬───────┘
+                           │
+                 ┌─────────▼─────────┐
+                 │ boot_success == 1? │
+                 └────┬──────────┬───┘
+                   yes│          │no
+                      │    ┌─────▼──────────┐
+                      │    │ boot_counter=0? │
+                      │    └──┬──────────┬──┘
+                      │    no │          │ yes
+                      │       │    ┌─────▼──────────┐
+                      │       │    │ SWAP active_slot│
+                      │       │    │ Reset counter=3 │
+                      │       │    └─────┬───────────┘
+                      │       │          │
+                 ┌────▼───────▼──────────▼────┐
+                 │ Set boot_success=0         │
+                 │ Decrement boot_counter     │
+                 │ Boot active_slot partition │
+                 └────────────┬───────────────┘
+                              │
+                    ┌─────────▼─────────┐
+                    │  System boots...  │
+                    └─────────┬─────────┘
+                              │
+                    ┌─────────▼─────────────┐
+                    │ Health check runs     │
+                    │ (containerd, API,     │
+                    │  node Ready)          │
+                    └─────┬──────────┬──────┘
+                       pass│          │fail
+                    ┌─────▼─────┐     │
+                    │ Mark boot │     │ boot_success stays 0
+                    │ success=1 │     │ counter decremented
+                    │ counter=3 │     │ on next reboot
+                    └───────────┘     └──────────────────────
+```
+
+### Rollback Behavior
+
+The boot counter starts at 3 and decrements on each boot where `boot_success` remains 0:
+
+1. **Boot 1**: counter 3 → 2 (health check fails → reboot)
+2. **Boot 2**: counter 2 → 1 (health check fails → reboot)
+3. **Boot 3**: counter 1 → 0 (health check fails → reboot)
+4. **Boot 4**: counter = 0, GRUB swaps `active_slot` and resets counter to 3
+
+This provides **3 chances** for the new version to pass health checks before automatic rollback to the previous version.
+
+## Update Agent Commands
+
+The `kubesolo-update` binary provides 6 subcommands:
+
+### `check` — Check for Updates
+
+Queries the update server and compares against the current running version.
+
+```bash
+kubesolo-update check --server https://updates.example.com
+```
+
+Output:
+```
+Current version: 1.0.0 (slot A)
+Latest version:  1.1.0
+Status: update available
+```
+
+### `apply` — Download and Write Update
+
+Downloads the new OS image (vmlinuz + initramfs) from the update server, verifies SHA256 checksums, and writes to the passive partition.
+
+```bash
+kubesolo-update apply --server https://updates.example.com
+```
+
+This does NOT activate the new partition or trigger a reboot.
+
+### `activate` — Set Next Boot Target
+
+Switches the GRUB boot target to the passive partition (the one with the new image) and sets `boot_counter=3`.
+
+```bash
+kubesolo-update activate
+```
+
+After activation, reboot to boot into the new version:
+```bash
+reboot
+```
+
+### `rollback` — Force Rollback
+
+Manually switches to the other partition, regardless of health check status.
+
+```bash
+kubesolo-update rollback
+reboot
+```
+
+### `healthcheck` — Post-Boot Health Verification
+
+Runs after every boot to verify the system is healthy. If all checks pass, marks `boot_success=1` in GRUB to prevent rollback.
+
+Checks performed:
+1. **containerd**: Socket exists and `ctr version` responds
+2. **API server**: TCP connection to 127.0.0.1:6443 and `/healthz` endpoint
+3. **Node Ready**: `kubectl get nodes` shows Ready status
+
+```bash
+kubesolo-update healthcheck --timeout 120
+```
+
+### `status` — Show A/B Slot Status
+
+Displays the current partition state:
+
+```bash
+kubesolo-update status
+```
+
+Output:
+```
+KubeSolo OS — A/B Partition Status
+───────────────────────────────────
+  Active slot:   A
+  Passive slot:  B
+  Boot counter:  3
+  Boot success:  1
+
+  ✓ System is healthy (boot confirmed)
+```
+
+## Update Server Protocol
+
+The update server is a simple HTTP(S) file server that serves:
+
+```
+/latest.json            — Update metadata
+/vmlinuz-<version>      — Linux kernel
+/kubesolo-os-<version>.gz — Initramfs
+```
+
+### `latest.json` Format
+
+```json
+{
+  "version": "1.1.0",
+  "vmlinuz_url": "https://updates.example.com/vmlinuz-1.1.0",
+  "vmlinuz_sha256": "abc123...",
+  "initramfs_url": "https://updates.example.com/kubesolo-os-1.1.0.gz",
+  "initramfs_sha256": "def456...",
+  "release_notes": "Bug fixes and performance improvements",
+  "release_date": "2025-01-15"
+}
+
+```
+
+Any static file server (nginx, S3, GitHub Releases) can serve as an update server.
+
+## Automated Updates via CronJob
+
+KubeSolo OS includes a Kubernetes CronJob for automatic update checking:
+
+```bash
+# Deploy the update CronJob
+kubectl apply -f /usr/lib/kubesolo-os/update-cronjob.yaml
+
+# Configure the update server URL
+kubectl -n kube-system create configmap kubesolo-update-config \
+  --from-literal=server-url=https://updates.example.com
+
+# Manually trigger an update check
+kubectl create job --from=cronjob/kubesolo-update kubesolo-update-manual -n kube-system
+```
+
+The CronJob runs every 6 hours and performs `apply` (download + write). It does NOT reboot — the administrator controls when to reboot.
+
+## Complete Update Cycle
+
+A full update cycle looks like:
+
+```bash
+# 1. Check if update is available
+kubesolo-update check --server https://updates.example.com
+
+# 2. Download and write to passive partition
+kubesolo-update apply --server https://updates.example.com
+
+# 3. Activate the new partition
+kubesolo-update activate
+
+# 4. Reboot into the new version
+reboot
+
+# 5. (Automatic) Health check runs, marks boot successful
+# kubesolo-update healthcheck is run by init system
+
+# 6. Verify status
+kubesolo-update status
+```
+
+If the health check fails 3 times, GRUB automatically rolls back to the previous version on the next reboot.
+
+## Command-Line Options
+
+All subcommands accept these options:
+
+| Option | Default | Description |
+|---|---|---|
+| `--server URL` | (none) | Update server URL |
+| `--grubenv PATH` | `/boot/grub/grubenv` | Path to GRUB environment file |
+| `--timeout SECS` | `120` | Health check timeout in seconds |
+
+## File Locations
+
+| File | Description |
+|---|---|
+| `/usr/lib/kubesolo-os/kubesolo-update` | Update agent binary |
+| `/boot/grub/grubenv` | GRUB environment (on EFI partition) |
+| `/boot/grub/grub.cfg` | GRUB boot config with A/B logic |
+| `<system-partition>/vmlinuz` | Linux kernel |
+| `<system-partition>/kubesolo-os.gz` | Initramfs |
+| `<system-partition>/version` | Version string |
--- a/update/cmd/activate.go
+++ b/update/cmd/activate.go
@@ -0,0 +1,40 @@
+package cmd
+
+import (
+	"fmt"
+	"log/slog"
+
+	"github.com/portainer/kubesolo-os/update/pkg/grubenv"
+)
+
+// Activate switches the boot target to the passive partition.
+// After activation, the next reboot will boot from the new partition
+// with boot_counter=3. If health checks fail 3 times, GRUB auto-rolls back.
+func Activate(args []string) error {
+	opts := parseOpts(args)
+	env := grubenv.New(opts.GrubenvPath)
+
+	// Get passive slot (the one we want to boot into)
+	passiveSlot, err := env.PassiveSlot()
+	if err != nil {
+		return fmt.Errorf("reading passive slot: %w", err)
+	}
+
+	activeSlot, err := env.ActiveSlot()
+	if err != nil {
+		return fmt.Errorf("reading active slot: %w", err)
+	}
+
+	slog.Info("activating slot", "from", activeSlot, "to", passiveSlot)
+
+	// Set the passive slot as active with fresh boot counter
+	if err := env.ActivateSlot(passiveSlot); err != nil {
+		return fmt.Errorf("activating slot %s: %w", passiveSlot, err)
+	}
+
+	fmt.Printf("Slot %s activated (was %s)\n", passiveSlot, activeSlot)
+	fmt.Println("Boot counter set to 3. Reboot to start the new version.")
+	fmt.Println("The system will automatically roll back if health checks fail 3 times.")
+
+	return nil
+}
--- a/update/cmd/apply.go
+++ b/update/cmd/apply.go
@@ -0,0 +1,70 @@
+package cmd
+
+import (
+	"fmt"
+	"log/slog"
+
+	"github.com/portainer/kubesolo-os/update/pkg/grubenv"
+	"github.com/portainer/kubesolo-os/update/pkg/image"
+	"github.com/portainer/kubesolo-os/update/pkg/partition"
+)
+
+// Apply downloads a new OS image and writes it to the passive partition.
+// It does NOT activate the new partition — use 'activate' for that.
+func Apply(args []string) error {
+	opts := parseOpts(args)
+
+	if opts.ServerURL == "" {
+		return fmt.Errorf("--server is required")
+	}
+
+	env := grubenv.New(opts.GrubenvPath)
+
+	// Determine passive slot
+	passiveSlot, err := env.PassiveSlot()
+	if err != nil {
+		return fmt.Errorf("reading passive slot: %w", err)
+	}
+
+	slog.Info("applying update", "target_slot", passiveSlot)
+
+	// Check for update
+	stageDir := "/tmp/kubesolo-update-stage"
+	client := image.NewClient(opts.ServerURL, stageDir)
+	defer client.Cleanup()
+
+	meta, err := client.CheckForUpdate()
+	if err != nil {
+		return fmt.Errorf("checking for update: %w", err)
+	}
+
+	slog.Info("update available", "version", meta.Version)
+
+	// Download and verify
+	staged, err := client.Download(meta)
+	if err != nil {
+		return fmt.Errorf("downloading update: %w", err)
+	}
+
+	// Mount passive partition
+	partInfo, err := partition.GetSlotPartition(passiveSlot)
+	if err != nil {
+		return fmt.Errorf("finding passive partition: %w", err)
+	}
+
+	mountPoint := "/tmp/kubesolo-passive-" + passiveSlot
+	if err := partition.MountReadWrite(partInfo.Device, mountPoint); err != nil {
+		return fmt.Errorf("mounting passive partition: %w", err)
+	}
+	defer partition.Unmount(mountPoint)
+
+	// Write image to passive partition
+	if err := partition.WriteSystemImage(mountPoint, staged.VmlinuzPath, staged.InitramfsPath, staged.Version); err != nil {
+		return fmt.Errorf("writing system image: %w", err)
+	}
+
+	fmt.Printf("Update v%s written to slot %s (%s)\n", staged.Version, passiveSlot, partInfo.Device)
+	fmt.Println("Run 'kubesolo-update activate' to boot into the new version")
+
+	return nil
+}
--- a/update/cmd/check.go
+++ b/update/cmd/check.go
@@ -0,0 +1,65 @@
+package cmd
+
+import (
+	"fmt"
+	"log/slog"
+
+	"github.com/portainer/kubesolo-os/update/pkg/grubenv"
+	"github.com/portainer/kubesolo-os/update/pkg/image"
+	"github.com/portainer/kubesolo-os/update/pkg/partition"
+)
+
+// Check queries the update server for available updates and compares
+// against the currently running version.
+func Check(args []string) error {
+	opts := parseOpts(args)
+
+	if opts.ServerURL == "" {
+		return fmt.Errorf("--server is required (no default update server configured)")
+	}
+
+	// Get current version from active partition
+	env := grubenv.New(opts.GrubenvPath)
+	activeSlot, err := env.ActiveSlot()
+	if err != nil {
+		return fmt.Errorf("reading active slot: %w", err)
+	}
+
+	partInfo, err := partition.GetSlotPartition(activeSlot)
+	if err != nil {
+		return fmt.Errorf("finding active partition: %w", err)
+	}
+
+	mountPoint := "/tmp/kubesolo-check-" + activeSlot
+	if err := partition.MountReadOnly(partInfo.Device, mountPoint); err != nil {
+		return fmt.Errorf("mounting active partition: %w", err)
+	}
+	defer partition.Unmount(mountPoint)
+
+	currentVersion, err := partition.ReadVersion(mountPoint)
+	if err != nil {
+		slog.Warn("could not read current version", "error", err)
+		currentVersion = "unknown"
+	}
+
+	// Check update server
+	client := image.NewClient(opts.ServerURL, "")
+	meta, err := client.CheckForUpdate()
+	if err != nil {
+		return fmt.Errorf("checking for update: %w", err)
+	}
+
+	fmt.Printf("Current version: %s (slot %s)\n", currentVersion, activeSlot)
+	fmt.Printf("Latest version:  %s\n", meta.Version)
+
+	if meta.Version == currentVersion {
+		fmt.Println("Status: up to date")
+	} else {
+		fmt.Println("Status: update available")
+		if meta.ReleaseNotes != "" {
+			fmt.Printf("Release notes: %s\n", meta.ReleaseNotes)
+		}
+	}
+
+	return nil
+}
--- a/update/cmd/healthcheck.go
+++ b/update/cmd/healthcheck.go
@@ -0,0 +1,56 @@
+package cmd
+
+import (
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/portainer/kubesolo-os/update/pkg/grubenv"
+	"github.com/portainer/kubesolo-os/update/pkg/health"
+)
+
+// Healthcheck performs post-boot health verification.
+// If all checks pass, it marks the boot as successful in GRUB.
+// This should be run after every boot (typically via a systemd unit or
+// init script) to confirm the system is healthy.
+func Healthcheck(args []string) error {
+	opts := parseOpts(args)
+	env := grubenv.New(opts.GrubenvPath)
+
+	// Check if already marked successful
+	success, err := env.BootSuccess()
+	if err != nil {
+		slog.Warn("could not read boot_success", "error", err)
+	}
+	if success {
+		fmt.Println("Boot already marked successful")
+		return nil
+	}
+
+	timeout := time.Duration(opts.TimeoutSecs) * time.Second
+	checker := health.NewChecker("", "", timeout)
+
+	slog.Info("running post-boot health checks", "timeout", timeout)
+
+	status, err := checker.WaitForHealthy()
+	if err != nil {
+		fmt.Printf("Health check FAILED: %s\n", status.Message)
+		fmt.Printf("  containerd: %v\n", status.Containerd)
+		fmt.Printf("  apiserver:  %v\n", status.APIServer)
+		fmt.Printf("  node_ready: %v\n", status.NodeReady)
+		fmt.Println("\nBoot NOT marked successful — system may roll back on next reboot")
+		return err
+	}
+
+	// Mark boot as successful
+	if err := env.MarkBootSuccess(); err != nil {
+		return fmt.Errorf("marking boot success: %w", err)
+	}
+
+	fmt.Println("Health check PASSED — boot marked successful")
+	fmt.Printf("  containerd: %v\n", status.Containerd)
+	fmt.Printf("  apiserver:  %v\n", status.APIServer)
+	fmt.Printf("  node_ready: %v\n", status.NodeReady)
+
+	return nil
+}
--- a/update/cmd/opts.go
+++ b/update/cmd/opts.go
@@ -0,0 +1,47 @@
+package cmd
+
+// opts holds shared command-line options for all subcommands.
+type opts struct {
+	ServerURL   string
+	GrubenvPath string
+	TimeoutSecs int
+}
+
+// parseOpts extracts command-line flags from args.
+// Simple parser — no external dependencies.
+func parseOpts(args []string) opts {
+	o := opts{
+		GrubenvPath: "/boot/grub/grubenv",
+		TimeoutSecs: 120,
+	}
+
+	for i := 0; i < len(args); i++ {
+		switch args[i] {
+		case "--server":
+			if i+1 < len(args) {
+				o.ServerURL = args[i+1]
+				i++
+			}
+		case "--grubenv":
+			if i+1 < len(args) {
+				o.GrubenvPath = args[i+1]
+				i++
+			}
+		case "--timeout":
+			if i+1 < len(args) {
+				val := 0
+				for _, c := range args[i+1] {
+					if c >= '0' && c <= '9' {
+						val = val*10 + int(c-'0')
+					}
+				}
+				if val > 0 {
+					o.TimeoutSecs = val
+				}
+				i++
+			}
+		}
+	}
+
+	return o
+}
--- a/update/cmd/rollback.go
+++ b/update/cmd/rollback.go
@@ -0,0 +1,36 @@
+package cmd
+
+import (
+	"fmt"
+	"log/slog"
+
+	"github.com/portainer/kubesolo-os/update/pkg/grubenv"
+)
+
+// Rollback forces an immediate switch to the other partition.
+// Use this to manually revert to the previous version.
+func Rollback(args []string) error {
+	opts := parseOpts(args)
+	env := grubenv.New(opts.GrubenvPath)
+
+	activeSlot, err := env.ActiveSlot()
+	if err != nil {
+		return fmt.Errorf("reading active slot: %w", err)
+	}
+
+	passiveSlot, err := env.PassiveSlot()
+	if err != nil {
+		return fmt.Errorf("reading passive slot: %w", err)
+	}
+
+	slog.Info("forcing rollback", "from", activeSlot, "to", passiveSlot)
+
+	if err := env.ForceRollback(); err != nil {
+		return fmt.Errorf("rollback failed: %w", err)
+	}
+
+	fmt.Printf("Rolled back: %s → %s\n", activeSlot, passiveSlot)
+	fmt.Println("Reboot to complete rollback.")
+
+	return nil
+}
--- a/update/cmd/status.go
+++ b/update/cmd/status.go
@@ -0,0 +1,44 @@
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/portainer/kubesolo-os/update/pkg/grubenv"
+)
+
+// Status displays the current A/B slot configuration and boot state.
+func Status(args []string) error {
+	opts := parseOpts(args)
+	env := grubenv.New(opts.GrubenvPath)
+
+	vars, err := env.ReadAll()
+	if err != nil {
+		return fmt.Errorf("reading GRUB environment: %w", err)
+	}
+
+	activeSlot := vars["active_slot"]
+	bootCounter := vars["boot_counter"]
+	bootSuccess := vars["boot_success"]
+
+	passiveSlot := "B"
+	if activeSlot == "B" {
+		passiveSlot = "A"
+	}
+
+	fmt.Println("KubeSolo OS — A/B Partition Status")
+	fmt.Println("───────────────────────────────────")
+	fmt.Printf("  Active slot:   %s\n", activeSlot)
+	fmt.Printf("  Passive slot:  %s\n", passiveSlot)
+	fmt.Printf("  Boot counter:  %s\n", bootCounter)
+	fmt.Printf("  Boot success:  %s\n", bootSuccess)
+
+	if bootSuccess == "1" {
+		fmt.Println("\n  ✓ System is healthy (boot confirmed)")
+	} else if bootCounter == "0" {
+		fmt.Println("\n  ✗ Boot counter exhausted — rollback will occur on next reboot")
+	} else {
+		fmt.Printf("\n  ⚠ Boot pending verification (%s attempts remaining)\n", bootCounter)
+	}
+
+	return nil
+}
--- a/update/deploy/update-cronjob.yaml
+++ b/update/deploy/update-cronjob.yaml
@@ -0,0 +1,150 @@
+# KubeSolo OS — Automatic Update CronJob
+#
+# This CronJob checks for OS updates every 6 hours, downloads them,
+# and writes them to the passive partition. It does NOT reboot —
+# the administrator must trigger a reboot to apply the update.
+#
+# The update agent runs as a privileged container with host access
+# because it needs to:
+#   1. Read/write GRUB environment (on boot partition)
+#   2. Mount and write to system partitions
+#   3. Access block devices via blkid
+#
+# Deploy: kubectl apply -f update-cronjob.yaml
+# Manual trigger: kubectl create job --from=cronjob/kubesolo-update kubesolo-update-manual
+#
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: kubesolo-update
+  namespace: kube-system
+  labels:
+    app.kubernetes.io/name: kubesolo-update
+    app.kubernetes.io/component: update-agent
+    app.kubernetes.io/part-of: kubesolo-os
+spec:
+  schedule: "0 */6 * * *"  # Every 6 hours
+  concurrencyPolicy: Forbid
+  successfulJobsHistoryLimit: 3
+  failedJobsHistoryLimit: 5
+  jobTemplate:
+    spec:
+      backoffLimit: 1
+      activeDeadlineSeconds: 600  # 10 min max
+      template:
+        metadata:
+          labels:
+            app.kubernetes.io/name: kubesolo-update
+        spec:
+          restartPolicy: Never
+          hostPID: false
+          hostNetwork: false
+          containers:
+            - name: update
+              image: busybox:latest  # Only used for the shell; the binary is host-mounted
+              command:
+                - /host/usr/lib/kubesolo-os/kubesolo-update
+              args:
+                - apply
+                - --server
+                - "$(UPDATE_SERVER_URL)"
+              env:
+                - name: UPDATE_SERVER_URL
+                  valueFrom:
+                    configMapKeyRef:
+                      name: kubesolo-update-config
+                      key: server-url
+                      optional: true
+              securityContext:
+                privileged: true  # Required for mount/blkid access
+              volumeMounts:
+                - name: host-root
+                  mountPath: /host
+                  readOnly: false
+                - name: dev
+                  mountPath: /dev
+                - name: boot
+                  mountPath: /boot
+          volumes:
+            - name: host-root
+              hostPath:
+                path: /
+                type: Directory
+            - name: dev
+              hostPath:
+                path: /dev
+                type: Directory
+            - name: boot
+              hostPath:
+                path: /boot
+                type: Directory
+          tolerations:
+            - operator: Exists  # Run on any node (there's only one)
+---
+# ConfigMap for update server URL.
+# Create/update this to point to your update server:
+#   kubectl -n kube-system create configmap kubesolo-update-config \
+#     --from-literal=server-url=https://updates.example.com
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: kubesolo-update-config
+  namespace: kube-system
+  labels:
+    app.kubernetes.io/name: kubesolo-update
+    app.kubernetes.io/component: update-agent
+data:
+  server-url: ""  # Set to your update server URL
+---
+# Post-boot health check — runs once at boot as a Job.
+# On KubeSolo OS, this is triggered by the init system (init stage or
+# systemd-equivalent), but it can also be deployed as a K8s Job for
+# environments where the init system doesn't run the health check.
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: kubesolo-healthcheck
+  namespace: kube-system
+  labels:
+    app.kubernetes.io/name: kubesolo-healthcheck
+    app.kubernetes.io/component: health-check
+    app.kubernetes.io/part-of: kubesolo-os
+spec:
+  backoffLimit: 3
+  activeDeadlineSeconds: 300  # 5 min max
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: kubesolo-healthcheck
+    spec:
+      restartPolicy: Never
+      hostPID: false
+      hostNetwork: true  # Needed to reach API server at 127.0.0.1:6443
+      containers:
+        - name: healthcheck
+          image: busybox:latest
+          command:
+            - /host/usr/lib/kubesolo-os/kubesolo-update
+          args:
+            - healthcheck
+            - --timeout
+            - "120"
+          securityContext:
+            privileged: true  # Required for grubenv write
+          volumeMounts:
+            - name: host-root
+              mountPath: /host
+              readOnly: false
+            - name: boot
+              mountPath: /boot
+      volumes:
+        - name: host-root
+          hostPath:
+            path: /
+            type: Directory
+        - name: boot
+          hostPath:
+            path: /boot
+            type: Directory
+      tolerations:
+        - operator: Exists
--- a/update/go.mod
+++ b/update/go.mod
@@ -0,0 +1,3 @@
+module github.com/portainer/kubesolo-os/update
+
+go 1.25.5
--- a/update/main.go
+++ b/update/main.go
@@ -0,0 +1,79 @@
+// kubesolo-update is the atomic update agent for KubeSolo OS.
+//
+// It manages A/B partition updates with automatic rollback:
+//
+//	kubesolo-update check          Check for available updates
+//	kubesolo-update apply          Download + write update to passive partition
+//	kubesolo-update activate       Set passive partition as next boot target
+//	kubesolo-update rollback       Force rollback to other partition
+//	kubesolo-update healthcheck    Post-boot health verification
+//	kubesolo-update status         Show current A/B slot and boot status
+package main
+
+import (
+	"fmt"
+	"log/slog"
+	"os"
+
+	"github.com/portainer/kubesolo-os/update/cmd"
+)
+
+func main() {
+	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
+		Level: slog.LevelInfo,
+	})))
+
+	if len(os.Args) < 2 {
+		usage()
+		os.Exit(1)
+	}
+
+	var err error
+	switch os.Args[1] {
+	case "check":
+		err = cmd.Check(os.Args[2:])
+	case "apply":
+		err = cmd.Apply(os.Args[2:])
+	case "activate":
+		err = cmd.Activate(os.Args[2:])
+	case "rollback":
+		err = cmd.Rollback(os.Args[2:])
+	case "healthcheck":
+		err = cmd.Healthcheck(os.Args[2:])
+	case "status":
+		err = cmd.Status(os.Args[2:])
+	default:
+		fmt.Fprintf(os.Stderr, "unknown command: %s\n\n", os.Args[1])
+		usage()
+		os.Exit(1)
+	}
+
+	if err != nil {
+		slog.Error("command failed", "command", os.Args[1], "error", err)
+		os.Exit(1)
+	}
+}
+
+func usage() {
+	fmt.Fprintf(os.Stderr, `Usage: kubesolo-update <command> [options]
+
+Commands:
+  check        Check for available updates
+  apply        Download and write update to passive partition
+  activate     Set passive partition as next boot target
+  rollback     Force rollback to other partition
+  healthcheck  Post-boot health verification (marks boot successful)
+  status       Show current A/B slot and boot status
+
+Options:
+  --server URL     Update server URL (default: from /etc/kubesolo/update.conf)
+  --grubenv PATH   Path to grubenv file (default: /boot/grub/grubenv)
+  --timeout SECS   Health check timeout in seconds (default: 120)
+
+Examples:
+  kubesolo-update check --server https://updates.example.com
+  kubesolo-update apply --server https://updates.example.com
+  kubesolo-update healthcheck
+  kubesolo-update status
+`)
+}
--- a/update/pkg/grubenv/grubenv.go
+++ b/update/pkg/grubenv/grubenv.go
@@ -0,0 +1,239 @@
+// Package grubenv provides read/write access to GRUB environment variables.
+//
+// GRUB stores its environment in a 1024-byte file (grubenv) located at
+// /boot/grub/grubenv on the EFI partition. This package manipulates
+// those variables for A/B boot slot management.
+//
+// Key variables:
+//   - active_slot:  "A" or "B"
+//   - boot_counter: "3" (fresh) down to "0" (triggers rollback)
+//   - boot_success: "0" (pending) or "1" (healthy boot confirmed)
+package grubenv
+
+import (
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"strings"
+)
+
+const (
+	// DefaultGrubenvPath is the standard location for the GRUB environment file.
+	DefaultGrubenvPath = "/boot/grub/grubenv"
+
+	// SlotA represents system partition A.
+	SlotA = "A"
+	// SlotB represents system partition B.
+	SlotB = "B"
+)
+
+// Env provides access to GRUB environment variables.
+type Env struct {
+	path string
+}
+
+// New creates a new Env for the given grubenv file path.
+func New(path string) *Env {
+	if path == "" {
+		path = DefaultGrubenvPath
+	}
+	return &Env{path: path}
+}
+
+// Get reads a variable from the GRUB environment.
+func (e *Env) Get(key string) (string, error) {
+	vars, err := e.ReadAll()
+	if err != nil {
+		return "", err
+	}
+	val, ok := vars[key]
+	if !ok {
+		return "", fmt.Errorf("grubenv: key %q not found", key)
+	}
+	return val, nil
+}
+
+// Set writes a variable to the GRUB environment.
+func (e *Env) Set(key, value string) error {
+	editenv, err := findEditenv()
+	if err != nil {
+		return e.setManual(key, value)
+	}
+
+	cmd := exec.Command(editenv, e.path, "set", key+"="+value)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("grub-editenv set %s=%s: %w\n%s", key, value, err, output)
+	}
+
+	slog.Debug("grubenv set", "key", key, "value", value)
+	return nil
+}
+
+// ReadAll reads all variables from the GRUB environment.
+func (e *Env) ReadAll() (map[string]string, error) {
+	editenv, err := findEditenv()
+	if err != nil {
+		return e.readManual()
+	}
+
+	cmd := exec.Command(editenv, e.path, "list")
+	output, err := cmd.Output()
+	if err != nil {
+		return nil, fmt.Errorf("grub-editenv list: %w", err)
+	}
+
+	return parseEnvOutput(string(output)), nil
+}
+
+// ActiveSlot returns the currently active boot slot ("A" or "B").
+func (e *Env) ActiveSlot() (string, error) {
+	return e.Get("active_slot")
+}
+
+// PassiveSlot returns the currently passive boot slot.
+func (e *Env) PassiveSlot() (string, error) {
+	active, err := e.ActiveSlot()
+	if err != nil {
+		return "", err
+	}
+	if active == SlotA {
+		return SlotB, nil
+	}
+	return SlotA, nil
+}
+
+// BootCounter returns the current boot counter value.
+func (e *Env) BootCounter() (int, error) {
+	val, err := e.Get("boot_counter")
+	if err != nil {
+		return -1, err
+	}
+	switch val {
+	case "0":
+		return 0, nil
+	case "1":
+		return 1, nil
+	case "2":
+		return 2, nil
+	case "3":
+		return 3, nil
+	default:
+		return -1, fmt.Errorf("grubenv: invalid boot_counter: %q", val)
+	}
+}
+
+// BootSuccess returns whether the last boot was marked successful.
+func (e *Env) BootSuccess() (bool, error) {
+	val, err := e.Get("boot_success")
+	if err != nil {
+		return false, err
+	}
+	return val == "1", nil
+}
+
+// MarkBootSuccess sets boot_success=1 and boot_counter=3.
+// Called by the health check after a successful boot.
+func (e *Env) MarkBootSuccess() error {
+	if err := e.Set("boot_success", "1"); err != nil {
+		return fmt.Errorf("setting boot_success: %w", err)
+	}
+	if err := e.Set("boot_counter", "3"); err != nil {
+		return fmt.Errorf("setting boot_counter: %w", err)
+	}
+	slog.Info("boot marked successful")
+	return nil
+}
+
+// ActivateSlot switches the active slot and resets the boot counter.
+// Used after writing a new image to the passive partition.
+func (e *Env) ActivateSlot(slot string) error {
+	if slot != SlotA && slot != SlotB {
+		return fmt.Errorf("invalid slot: %q (must be A or B)", slot)
+	}
+	if err := e.Set("active_slot", slot); err != nil {
+		return err
+	}
+	if err := e.Set("boot_counter", "3"); err != nil {
+		return err
+	}
+	if err := e.Set("boot_success", "0"); err != nil {
+		return err
+	}
+	slog.Info("activated slot", "slot", slot)
+	return nil
+}
+
+// ForceRollback switches to the other slot immediately.
+func (e *Env) ForceRollback() error {
+	passive, err := e.PassiveSlot()
+	if err != nil {
+		return err
+	}
+	return e.ActivateSlot(passive)
+}
+
+func findEditenv() (string, error) {
+	if path, err := exec.LookPath("grub-editenv"); err == nil {
+		return path, nil
+	}
+	if path, err := exec.LookPath("grub2-editenv"); err == nil {
+		return path, nil
+	}
+	return "", fmt.Errorf("grub-editenv not found")
+}
+
+func parseEnvOutput(output string) map[string]string {
+	vars := make(map[string]string)
+	for _, line := range strings.Split(output, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		parts := strings.SplitN(line, "=", 2)
+		if len(parts) == 2 {
+			vars[parts[0]] = parts[1]
+		}
+	}
+	return vars
+}
+
+// setManual writes to grubenv without grub-editenv (fallback).
+func (e *Env) setManual(key, value string) error {
+	vars, err := e.readManual()
+	if err != nil {
+		vars = make(map[string]string)
+	}
+	vars[key] = value
+	return e.writeManual(vars)
+}
+
+// readManual reads grubenv without grub-editenv.
+func (e *Env) readManual() (map[string]string, error) {
+	data, err := os.ReadFile(e.path)
+	if err != nil {
+		return nil, fmt.Errorf("reading grubenv: %w", err)
+	}
+	return parseEnvOutput(string(data)), nil
+}
+
+// writeManual writes grubenv without grub-editenv.
+// GRUB requires the file to be exactly 1024 bytes, padded with '#'.
+func (e *Env) writeManual(vars map[string]string) error {
+	var sb strings.Builder
+	sb.WriteString("# GRUB Environment Block\n")
+	for k, v := range vars {
+		sb.WriteString(k + "=" + v + "\n")
+	}
+
+	content := sb.String()
+	if len(content) > 1024 {
+		return fmt.Errorf("grubenv content exceeds 1024 bytes")
+	}
+
+	// Pad to 1024 bytes with '#'
+	padding := 1024 - len(content)
+	content += strings.Repeat("#", padding)
+
+	return os.WriteFile(e.path, []byte(content), 0o644)
+}
--- a/update/pkg/grubenv/grubenv_test.go
+++ b/update/pkg/grubenv/grubenv_test.go
@@ -0,0 +1,423 @@
+package grubenv
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// createTestGrubenv writes a properly formatted grubenv file for testing.
+// GRUB requires the file to be exactly 1024 bytes, padded with '#'.
+func createTestGrubenv(t *testing.T, dir string, vars map[string]string) string {
+	t.Helper()
+	path := filepath.Join(dir, "grubenv")
+
+	var sb strings.Builder
+	sb.WriteString("# GRUB Environment Block\n")
+	for k, v := range vars {
+		sb.WriteString(k + "=" + v + "\n")
+	}
+
+	content := sb.String()
+	padding := 1024 - len(content)
+	if padding > 0 {
+		content += strings.Repeat("#", padding)
+	}
+
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	return path
+}
+
+func TestNew(t *testing.T) {
+	env := New("")
+	if env.path != DefaultGrubenvPath {
+		t.Errorf("expected default path %s, got %s", DefaultGrubenvPath, env.path)
+	}
+
+	env = New("/custom/path/grubenv")
+	if env.path != "/custom/path/grubenv" {
+		t.Errorf("expected custom path, got %s", env.path)
+	}
+}
+
+func TestReadAll(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot":  "A",
+		"boot_counter": "3",
+		"boot_success": "1",
+	})
+
+	env := New(path)
+	vars, err := env.ReadAll()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if vars["active_slot"] != "A" {
+		t.Errorf("active_slot: expected A, got %s", vars["active_slot"])
+	}
+	if vars["boot_counter"] != "3" {
+		t.Errorf("boot_counter: expected 3, got %s", vars["boot_counter"])
+	}
+	if vars["boot_success"] != "1" {
+		t.Errorf("boot_success: expected 1, got %s", vars["boot_success"])
+	}
+}
+
+func TestGet(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot": "B",
+	})
+
+	env := New(path)
+
+	val, err := env.Get("active_slot")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val != "B" {
+		t.Errorf("expected B, got %s", val)
+	}
+
+	_, err = env.Get("nonexistent")
+	if err == nil {
+		t.Fatal("expected error for nonexistent key")
+	}
+}
+
+func TestSet(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot":  "A",
+		"boot_counter": "3",
+	})
+
+	env := New(path)
+
+	if err := env.Set("boot_counter", "2"); err != nil {
+		t.Fatal(err)
+	}
+
+	val, err := env.Get("boot_counter")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val != "2" {
+		t.Errorf("expected 2 after set, got %s", val)
+	}
+
+	// Verify file is still 1024 bytes
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(data) != 1024 {
+		t.Errorf("grubenv should be 1024 bytes, got %d", len(data))
+	}
+}
+
+func TestActiveSlot(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot":  "A",
+		"boot_counter": "3",
+		"boot_success": "1",
+	})
+
+	env := New(path)
+	slot, err := env.ActiveSlot()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if slot != "A" {
+		t.Errorf("expected A, got %s", slot)
+	}
+}
+
+func TestPassiveSlot(t *testing.T) {
+	tests := []struct {
+		active  string
+		passive string
+	}{
+		{"A", "B"},
+		{"B", "A"},
+	}
+
+	for _, tt := range tests {
+		t.Run("active_"+tt.active, func(t *testing.T) {
+			dir := t.TempDir()
+			path := createTestGrubenv(t, dir, map[string]string{
+				"active_slot": tt.active,
+			})
+
+			env := New(path)
+			passive, err := env.PassiveSlot()
+			if err != nil {
+				t.Fatal(err)
+			}
+			if passive != tt.passive {
+				t.Errorf("expected passive %s, got %s", tt.passive, passive)
+			}
+		})
+	}
+}
+
+func TestBootCounter(t *testing.T) {
+	tests := []struct {
+		value   string
+		expect  int
+		wantErr bool
+	}{
+		{"0", 0, false},
+		{"1", 1, false},
+		{"2", 2, false},
+		{"3", 3, false},
+		{"invalid", -1, true},
+		{"99", -1, true},
+	}
+
+	for _, tt := range tests {
+		t.Run("counter_"+tt.value, func(t *testing.T) {
+			dir := t.TempDir()
+			path := createTestGrubenv(t, dir, map[string]string{
+				"boot_counter": tt.value,
+			})
+
+			env := New(path)
+			counter, err := env.BootCounter()
+			if tt.wantErr {
+				if err == nil {
+					t.Fatal("expected error")
+				}
+				return
+			}
+			if err != nil {
+				t.Fatal(err)
+			}
+			if counter != tt.expect {
+				t.Errorf("expected %d, got %d", tt.expect, counter)
+			}
+		})
+	}
+}
+
+func TestBootSuccess(t *testing.T) {
+	tests := []struct {
+		value  string
+		expect bool
+	}{
+		{"0", false},
+		{"1", true},
+	}
+
+	for _, tt := range tests {
+		t.Run("success_"+tt.value, func(t *testing.T) {
+			dir := t.TempDir()
+			path := createTestGrubenv(t, dir, map[string]string{
+				"boot_success": tt.value,
+			})
+
+			env := New(path)
+			success, err := env.BootSuccess()
+			if err != nil {
+				t.Fatal(err)
+			}
+			if success != tt.expect {
+				t.Errorf("expected %v, got %v", tt.expect, success)
+			}
+		})
+	}
+}
+
+func TestMarkBootSuccess(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot":  "B",
+		"boot_counter": "1",
+		"boot_success": "0",
+	})
+
+	env := New(path)
+	if err := env.MarkBootSuccess(); err != nil {
+		t.Fatal(err)
+	}
+
+	success, err := env.BootSuccess()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !success {
+		t.Error("expected boot_success=1 after MarkBootSuccess")
+	}
+
+	counter, err := env.BootCounter()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if counter != 3 {
+		t.Errorf("expected boot_counter=3 after MarkBootSuccess, got %d", counter)
+	}
+}
+
+func TestActivateSlot(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot":  "A",
+		"boot_counter": "3",
+		"boot_success": "1",
+	})
+
+	env := New(path)
+	if err := env.ActivateSlot("B"); err != nil {
+		t.Fatal(err)
+	}
+
+	slot, _ := env.ActiveSlot()
+	if slot != "B" {
+		t.Errorf("expected active_slot=B, got %s", slot)
+	}
+
+	counter, _ := env.BootCounter()
+	if counter != 3 {
+		t.Errorf("expected boot_counter=3, got %d", counter)
+	}
+
+	success, _ := env.BootSuccess()
+	if success {
+		t.Error("expected boot_success=0 after ActivateSlot")
+	}
+}
+
+func TestActivateSlotInvalid(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot": "A",
+	})
+
+	env := New(path)
+	err := env.ActivateSlot("C")
+	if err == nil {
+		t.Fatal("expected error for invalid slot")
+	}
+}
+
+func TestForceRollback(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot":  "A",
+		"boot_counter": "3",
+		"boot_success": "1",
+	})
+
+	env := New(path)
+	if err := env.ForceRollback(); err != nil {
+		t.Fatal(err)
+	}
+
+	slot, _ := env.ActiveSlot()
+	if slot != "B" {
+		t.Errorf("expected active_slot=B after rollback from A, got %s", slot)
+	}
+}
+
+func TestParseEnvOutput(t *testing.T) {
+	input := `# GRUB Environment Block
+active_slot=A
+boot_counter=3
+boot_success=1
+
+`
+	vars := parseEnvOutput(input)
+
+	if len(vars) != 3 {
+		t.Errorf("expected 3 variables, got %d", len(vars))
+	}
+	if vars["active_slot"] != "A" {
+		t.Errorf("active_slot: expected A, got %s", vars["active_slot"])
+	}
+	if vars["boot_counter"] != "3" {
+		t.Errorf("boot_counter: expected 3, got %s", vars["boot_counter"])
+	}
+}
+
+func TestWriteManualFormat(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "grubenv")
+
+	env := New(path)
+	// Use setManual directly since grub-editenv may not be available
+	err := env.setManual("test_key", "test_value")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(data) != 1024 {
+		t.Errorf("grubenv should be exactly 1024 bytes, got %d", len(data))
+	}
+
+	if !strings.HasPrefix(string(data), "# GRUB Environment Block\n") {
+		t.Error("grubenv should start with '# GRUB Environment Block'")
+	}
+
+	if !strings.Contains(string(data), "test_key=test_value\n") {
+		t.Error("grubenv should contain test_key=test_value")
+	}
+}
+
+func TestReadNonexistentFile(t *testing.T) {
+	env := New("/nonexistent/path/grubenv")
+	_, err := env.ReadAll()
+	if err == nil {
+		t.Fatal("expected error reading nonexistent file")
+	}
+}
+
+func TestMultipleSetOperations(t *testing.T) {
+	dir := t.TempDir()
+	path := createTestGrubenv(t, dir, map[string]string{
+		"active_slot":  "A",
+		"boot_counter": "3",
+		"boot_success": "1",
+	})
+
+	env := New(path)
+
+	// Simulate a boot cycle: decrement counter, then mark success
+	if err := env.Set("boot_counter", "2"); err != nil {
+		t.Fatal(err)
+	}
+	if err := env.Set("boot_success", "0"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Now mark boot success
+	if err := env.MarkBootSuccess(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify final state
+	vars, err := env.ReadAll()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if vars["active_slot"] != "A" {
+		t.Errorf("active_slot should still be A, got %s", vars["active_slot"])
+	}
+	if vars["boot_counter"] != "3" {
+		t.Errorf("boot_counter should be 3 after mark success, got %s", vars["boot_counter"])
+	}
+	if vars["boot_success"] != "1" {
+		t.Errorf("boot_success should be 1, got %s", vars["boot_success"])
+	}
+}
--- a/update/pkg/health/health.go
+++ b/update/pkg/health/health.go
@@ -0,0 +1,198 @@
+// Package health implements post-boot health checks for KubeSolo OS.
+//
+// After booting a new system partition, the health check verifies that:
+//   - containerd is running and responsive
+//   - KubeSolo API server is reachable
+//   - The Kubernetes node reaches Ready state
+//
+// If all checks pass, the GRUB environment is updated to mark the boot
+// as successful (boot_success=1). If any check fails, boot_success
+// remains 0 and GRUB will eventually roll back.
+package health
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"net"
+	"net/http"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+)
+
+// Status represents the result of a health check.
+type Status struct {
+	Containerd bool
+	APIServer  bool
+	NodeReady  bool
+	Message    string
+}
+
+// IsHealthy returns true if all checks passed.
+func (s *Status) IsHealthy() bool {
+	return s.Containerd && s.APIServer && s.NodeReady
+}
+
+// Checker performs health checks against the local KubeSolo instance.
+type Checker struct {
+	kubeconfigPath string
+	apiServerAddr  string
+	timeout        time.Duration
+}
+
+// NewChecker creates a health checker.
+func NewChecker(kubeconfigPath, apiServerAddr string, timeout time.Duration) *Checker {
+	if kubeconfigPath == "" {
+		kubeconfigPath = "/var/lib/kubesolo/pki/admin/admin.kubeconfig"
+	}
+	if apiServerAddr == "" {
+		apiServerAddr = "127.0.0.1:6443"
+	}
+	if timeout == 0 {
+		timeout = 120 * time.Second
+	}
+	return &Checker{
+		kubeconfigPath: kubeconfigPath,
+		apiServerAddr:  apiServerAddr,
+		timeout:        timeout,
+	}
+}
+
+// CheckContainerd verifies that containerd is running.
+func (c *Checker) CheckContainerd() bool {
+	// Check if containerd socket exists
+	if _, err := os.Stat("/run/containerd/containerd.sock"); err != nil {
+		slog.Warn("containerd socket not found")
+		return false
+	}
+
+	// Try ctr version (bundled with KubeSolo)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, "ctr", "--address", "/run/containerd/containerd.sock", "version")
+	if err := cmd.Run(); err != nil {
+		slog.Warn("containerd not responsive", "error", err)
+		return false
+	}
+
+	slog.Debug("containerd healthy")
+	return true
+}
+
+// CheckAPIServer verifies the Kubernetes API server is reachable.
+func (c *Checker) CheckAPIServer() bool {
+	// TCP connect to API server port
+	conn, err := net.DialTimeout("tcp", c.apiServerAddr, 5*time.Second)
+	if err != nil {
+		slog.Warn("API server not reachable", "addr", c.apiServerAddr, "error", err)
+		return false
+	}
+	conn.Close()
+
+	// Try HTTPS health endpoint (skip TLS verify for localhost)
+	client := &http.Client{
+		Timeout: 5 * time.Second,
+		Transport: &http.Transport{
+			TLSHandshakeTimeout: 5 * time.Second,
+		},
+	}
+
+	resp, err := client.Get("https://" + c.apiServerAddr + "/healthz")
+	if err != nil {
+		// TLS error is expected without proper CA, but TCP connect succeeded
+		slog.Debug("API server TCP reachable but HTTPS check skipped", "error", err)
+		return true
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == http.StatusOK {
+		slog.Debug("API server healthy", "status", resp.StatusCode)
+		return true
+	}
+
+	slog.Warn("API server unhealthy", "status", resp.StatusCode)
+	return false
+}
+
+// CheckNodeReady uses kubectl to verify the node is in Ready state.
+func (c *Checker) CheckNodeReady() bool {
+	if _, err := os.Stat(c.kubeconfigPath); err != nil {
+		slog.Warn("kubeconfig not found", "path", c.kubeconfigPath)
+		return false
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, "kubectl",
+		"--kubeconfig", c.kubeconfigPath,
+		"get", "nodes",
+		"-o", "jsonpath={.items[0].status.conditions[?(@.type==\"Ready\")].status}",
+	)
+	output, err := cmd.Output()
+	if err != nil {
+		slog.Warn("kubectl get nodes failed", "error", err)
+		return false
+	}
+
+	status := strings.TrimSpace(string(output))
+	if status == "True" {
+		slog.Debug("node is Ready")
+		return true
+	}
+
+	slog.Warn("node not Ready", "status", status)
+	return false
+}
+
+// RunAll performs all health checks and returns the combined status.
+func (c *Checker) RunAll() *Status {
+	return &Status{
+		Containerd: c.CheckContainerd(),
+		APIServer:  c.CheckAPIServer(),
+		NodeReady:  c.CheckNodeReady(),
+	}
+}
+
+// WaitForHealthy polls health checks until all pass or timeout expires.
+func (c *Checker) WaitForHealthy() (*Status, error) {
+	deadline := time.Now().Add(c.timeout)
+	interval := 5 * time.Second
+
+	slog.Info("waiting for system health", "timeout", c.timeout)
+
+	for time.Now().Before(deadline) {
+		status := c.RunAll()
+		if status.IsHealthy() {
+			status.Message = "all checks passed"
+			slog.Info("system healthy",
+				"containerd", status.Containerd,
+				"apiserver", status.APIServer,
+				"node_ready", status.NodeReady,
+			)
+			return status, nil
+		}
+
+		slog.Debug("health check pending",
+			"containerd", status.Containerd,
+			"apiserver", status.APIServer,
+			"node_ready", status.NodeReady,
+			"remaining", time.Until(deadline).Round(time.Second),
+		)
+
+		time.Sleep(interval)
+	}
+
+	// Final check
+	status := c.RunAll()
+	if status.IsHealthy() {
+		status.Message = "all checks passed"
+		return status, nil
+	}
+
+	status.Message = "health check timeout"
+	return status, fmt.Errorf("health check timed out after %s", c.timeout)
+}
--- a/update/pkg/health/health_test.go
+++ b/update/pkg/health/health_test.go
@@ -0,0 +1,86 @@
+package health
+
+import (
+	"testing"
+	"time"
+)
+
+func TestStatusIsHealthy(t *testing.T) {
+	tests := []struct {
+		name       string
+		status     Status
+		wantHealth bool
+	}{
+		{
+			name:       "all healthy",
+			status:     Status{Containerd: true, APIServer: true, NodeReady: true},
+			wantHealth: true,
+		},
+		{
+			name:       "containerd down",
+			status:     Status{Containerd: false, APIServer: true, NodeReady: true},
+			wantHealth: false,
+		},
+		{
+			name:       "apiserver down",
+			status:     Status{Containerd: true, APIServer: false, NodeReady: true},
+			wantHealth: false,
+		},
+		{
+			name:       "node not ready",
+			status:     Status{Containerd: true, APIServer: true, NodeReady: false},
+			wantHealth: false,
+		},
+		{
+			name:       "all down",
+			status:     Status{Containerd: false, APIServer: false, NodeReady: false},
+			wantHealth: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := tt.status.IsHealthy(); got != tt.wantHealth {
+				t.Errorf("IsHealthy() = %v, want %v", got, tt.wantHealth)
+			}
+		})
+	}
+}
+
+func TestNewChecker(t *testing.T) {
+	// Test defaults
+	c := NewChecker("", "", 0)
+	if c.kubeconfigPath != "/var/lib/kubesolo/pki/admin/admin.kubeconfig" {
+		t.Errorf("unexpected default kubeconfig: %s", c.kubeconfigPath)
+	}
+	if c.apiServerAddr != "127.0.0.1:6443" {
+		t.Errorf("unexpected default apiserver addr: %s", c.apiServerAddr)
+	}
+	if c.timeout != 120*time.Second {
+		t.Errorf("unexpected default timeout: %v", c.timeout)
+	}
+
+	// Test custom values
+	c = NewChecker("/custom/kubeconfig", "10.0.0.1:6443", 30*time.Second)
+	if c.kubeconfigPath != "/custom/kubeconfig" {
+		t.Errorf("expected custom kubeconfig, got %s", c.kubeconfigPath)
+	}
+	if c.apiServerAddr != "10.0.0.1:6443" {
+		t.Errorf("expected custom addr, got %s", c.apiServerAddr)
+	}
+	if c.timeout != 30*time.Second {
+		t.Errorf("expected 30s timeout, got %v", c.timeout)
+	}
+}
+
+func TestStatusMessage(t *testing.T) {
+	s := &Status{
+		Containerd: true,
+		APIServer:  true,
+		NodeReady:  true,
+		Message:    "all checks passed",
+	}
+	if s.Message != "all checks passed" {
+		t.Errorf("unexpected message: %s", s.Message)
+	}
+}
--- a/update/pkg/image/image.go
+++ b/update/pkg/image/image.go
@@ -0,0 +1,180 @@
+// Package image handles downloading, verifying, and staging OS update images.
+//
+// Update images are distributed as pairs of files:
+//   - vmlinuz (kernel)
+//   - kubesolo-os.gz (initramfs)
+//
+// These are fetched from an HTTP(S) server that provides a metadata file
+// (latest.json) describing available updates.
+package image
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// UpdateMetadata describes an available update from the update server.
+type UpdateMetadata struct {
+	Version        string `json:"version"`
+	VmlinuzURL     string `json:"vmlinuz_url"`
+	VmlinuzSHA256  string `json:"vmlinuz_sha256"`
+	InitramfsURL   string `json:"initramfs_url"`
+	InitramfsSHA256 string `json:"initramfs_sha256"`
+	ReleaseNotes   string `json:"release_notes,omitempty"`
+	ReleaseDate    string `json:"release_date,omitempty"`
+}
+
+// StagedImage represents downloaded and verified update files.
+type StagedImage struct {
+	VmlinuzPath  string
+	InitramfsPath string
+	Version       string
+}
+
+// Client handles communication with the update server.
+type Client struct {
+	serverURL  string
+	httpClient *http.Client
+	stageDir   string
+}
+
+// NewClient creates a new update image client.
+func NewClient(serverURL, stageDir string) *Client {
+	return &Client{
+		serverURL: serverURL,
+		httpClient: &http.Client{
+			Timeout: 5 * time.Minute,
+		},
+		stageDir: stageDir,
+	}
+}
+
+// CheckForUpdate fetches the latest update metadata from the server.
+func (c *Client) CheckForUpdate() (*UpdateMetadata, error) {
+	url := c.serverURL + "/latest.json"
+	slog.Info("checking for update", "url", url)
+
+	resp, err := c.httpClient.Get(url)
+	if err != nil {
+		return nil, fmt.Errorf("fetching update metadata: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("update server returned %d", resp.StatusCode)
+	}
+
+	var meta UpdateMetadata
+	if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
+		return nil, fmt.Errorf("parsing update metadata: %w", err)
+	}
+
+	if meta.Version == "" {
+		return nil, fmt.Errorf("update metadata missing version")
+	}
+
+	return &meta, nil
+}
+
+// Download fetches the update files and verifies their checksums.
+func (c *Client) Download(meta *UpdateMetadata) (*StagedImage, error) {
+	if err := os.MkdirAll(c.stageDir, 0o755); err != nil {
+		return nil, fmt.Errorf("creating stage dir: %w", err)
+	}
+
+	vmlinuzPath := filepath.Join(c.stageDir, "vmlinuz")
+	initramfsPath := filepath.Join(c.stageDir, "kubesolo-os.gz")
+
+	slog.Info("downloading vmlinuz", "url", meta.VmlinuzURL)
+	if err := c.downloadAndVerify(meta.VmlinuzURL, vmlinuzPath, meta.VmlinuzSHA256); err != nil {
+		return nil, fmt.Errorf("downloading vmlinuz: %w", err)
+	}
+
+	slog.Info("downloading initramfs", "url", meta.InitramfsURL)
+	if err := c.downloadAndVerify(meta.InitramfsURL, initramfsPath, meta.InitramfsSHA256); err != nil {
+		return nil, fmt.Errorf("downloading initramfs: %w", err)
+	}
+
+	return &StagedImage{
+		VmlinuzPath:   vmlinuzPath,
+		InitramfsPath: initramfsPath,
+		Version:       meta.Version,
+	}, nil
+}
+
+// Cleanup removes staged update files.
+func (c *Client) Cleanup() error {
+	return os.RemoveAll(c.stageDir)
+}
+
+func (c *Client) downloadAndVerify(url, dest, expectedSHA256 string) error {
+	resp, err := c.httpClient.Get(url)
+	if err != nil {
+		return fmt.Errorf("downloading %s: %w", url, err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("server returned %d for %s", resp.StatusCode, url)
+	}
+
+	f, err := os.Create(dest)
+	if err != nil {
+		return fmt.Errorf("creating %s: %w", dest, err)
+	}
+	defer f.Close()
+
+	hasher := sha256.New()
+	writer := io.MultiWriter(f, hasher)
+
+	written, err := io.Copy(writer, resp.Body)
+	if err != nil {
+		os.Remove(dest)
+		return fmt.Errorf("writing %s: %w", dest, err)
+	}
+
+	if err := f.Close(); err != nil {
+		return fmt.Errorf("closing %s: %w", dest, err)
+	}
+
+	// Verify checksum
+	if expectedSHA256 != "" {
+		actual := hex.EncodeToString(hasher.Sum(nil))
+		if actual != expectedSHA256 {
+			os.Remove(dest)
+			return fmt.Errorf("checksum mismatch for %s: expected %s, got %s", dest, expectedSHA256, actual)
+		}
+		slog.Debug("checksum verified", "file", dest, "sha256", actual)
+	}
+
+	slog.Info("downloaded", "file", dest, "size", written)
+	return nil
+}
+
+// VerifyFile checks the SHA256 checksum of an existing file.
+func VerifyFile(path, expectedSHA256 string) error {
+	f, err := os.Open(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	hasher := sha256.New()
+	if _, err := io.Copy(hasher, f); err != nil {
+		return err
+	}
+
+	actual := hex.EncodeToString(hasher.Sum(nil))
+	if actual != expectedSHA256 {
+		return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedSHA256, actual)
+	}
+	return nil
+}
--- a/update/pkg/image/image_test.go
+++ b/update/pkg/image/image_test.go
@@ -0,0 +1,241 @@
+package image
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestCheckForUpdate(t *testing.T) {
+	meta := UpdateMetadata{
+		Version:         "1.2.0",
+		VmlinuzURL:      "/vmlinuz",
+		VmlinuzSHA256:   "abc123",
+		InitramfsURL:    "/kubesolo-os.gz",
+		InitramfsSHA256: "def456",
+		ReleaseNotes:    "Bug fixes",
+		ReleaseDate:     "2025-01-15",
+	}
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/latest.json" {
+			http.NotFound(w, r)
+			return
+		}
+		json.NewEncoder(w).Encode(meta)
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	got, err := client.CheckForUpdate()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if got.Version != "1.2.0" {
+		t.Errorf("expected version 1.2.0, got %s", got.Version)
+	}
+	if got.VmlinuzSHA256 != "abc123" {
+		t.Errorf("expected vmlinuz sha abc123, got %s", got.VmlinuzSHA256)
+	}
+	if got.ReleaseNotes != "Bug fixes" {
+		t.Errorf("expected release notes, got %s", got.ReleaseNotes)
+	}
+}
+
+func TestCheckForUpdateMissingVersion(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		json.NewEncoder(w).Encode(UpdateMetadata{})
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	_, err := client.CheckForUpdate()
+	if err == nil {
+		t.Fatal("expected error for missing version")
+	}
+}
+
+func TestCheckForUpdateServerError(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	defer server.Close()
+
+	client := NewClient(server.URL, "")
+	_, err := client.CheckForUpdate()
+	if err == nil {
+		t.Fatal("expected error for server error")
+	}
+}
+
+func TestDownloadAndVerify(t *testing.T) {
+	// Create test content
+	vmlinuzContent := []byte("fake vmlinuz content for testing")
+	initramfsContent := []byte("fake initramfs content for testing")
+
+	vmlinuzHash := sha256.Sum256(vmlinuzContent)
+	initramfsHash := sha256.Sum256(initramfsContent)
+
+	meta := UpdateMetadata{
+		Version:         "2.0.0",
+		VmlinuzSHA256:   hex.EncodeToString(vmlinuzHash[:]),
+		InitramfsSHA256: hex.EncodeToString(initramfsHash[:]),
+	}
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/latest.json":
+			m := meta
+			m.VmlinuzURL = "http://" + r.Host + "/vmlinuz"
+			m.InitramfsURL = "http://" + r.Host + "/kubesolo-os.gz"
+			json.NewEncoder(w).Encode(m)
+		case "/vmlinuz":
+			w.Write(vmlinuzContent)
+		case "/kubesolo-os.gz":
+			w.Write(initramfsContent)
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	defer server.Close()
+
+	stageDir := filepath.Join(t.TempDir(), "stage")
+	client := NewClient(server.URL, stageDir)
+	defer client.Cleanup()
+
+	// First get metadata
+	gotMeta, err := client.CheckForUpdate()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Download
+	staged, err := client.Download(gotMeta)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if staged.Version != "2.0.0" {
+		t.Errorf("expected version 2.0.0, got %s", staged.Version)
+	}
+
+	// Verify files exist
+	if _, err := os.Stat(staged.VmlinuzPath); err != nil {
+		t.Errorf("vmlinuz not found: %v", err)
+	}
+	if _, err := os.Stat(staged.InitramfsPath); err != nil {
+		t.Errorf("initramfs not found: %v", err)
+	}
+
+	// Verify content
+	data, _ := os.ReadFile(staged.VmlinuzPath)
+	if string(data) != string(vmlinuzContent) {
+		t.Error("vmlinuz content mismatch")
+	}
+}
+
+func TestDownloadChecksumMismatch(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/vmlinuz":
+			w.Write([]byte("actual content"))
+		default:
+			http.NotFound(w, r)
+		}
+	}))
+	defer server.Close()
+
+	stageDir := filepath.Join(t.TempDir(), "stage")
+	client := NewClient(server.URL, stageDir)
+
+	meta := &UpdateMetadata{
+		Version:       "1.0.0",
+		VmlinuzURL:    server.URL + "/vmlinuz",
+		VmlinuzSHA256: "wrong_checksum_value",
+		InitramfsURL:  server.URL + "/initramfs",
+	}
+
+	_, err := client.Download(meta)
+	if err == nil {
+		t.Fatal("expected checksum mismatch error")
+	}
+}
+
+func TestVerifyFile(t *testing.T) {
+	content := []byte("test file content for verification")
+	hash := sha256.Sum256(content)
+	expected := hex.EncodeToString(hash[:])
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "testfile")
+	if err := os.WriteFile(path, content, 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Should pass with correct hash
+	if err := VerifyFile(path, expected); err != nil {
+		t.Errorf("expected verification to pass: %v", err)
+	}
+
+	// Should fail with wrong hash
+	if err := VerifyFile(path, "deadbeef"); err == nil {
+		t.Error("expected verification to fail with wrong hash")
+	}
+}
+
+func TestVerifyFileNotFound(t *testing.T) {
+	err := VerifyFile("/nonexistent/file", "abc123")
+	if err == nil {
+		t.Error("expected error for nonexistent file")
+	}
+}
+
+func TestCleanup(t *testing.T) {
+	stageDir := filepath.Join(t.TempDir(), "stage")
+	os.MkdirAll(stageDir, 0o755)
+	os.WriteFile(filepath.Join(stageDir, "test"), []byte("data"), 0o644)
+
+	client := NewClient("http://unused", stageDir)
+	if err := client.Cleanup(); err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := os.Stat(stageDir); !os.IsNotExist(err) {
+		t.Error("stage dir should be removed after cleanup")
+	}
+}
+
+func TestUpdateMetadataJSON(t *testing.T) {
+	meta := UpdateMetadata{
+		Version:         "1.0.0",
+		VmlinuzURL:      "https://example.com/vmlinuz",
+		VmlinuzSHA256:   "abc",
+		InitramfsURL:    "https://example.com/kubesolo-os.gz",
+		InitramfsSHA256: "def",
+		ReleaseNotes:    "Initial release",
+		ReleaseDate:     "2025-01-01",
+	}
+
+	data, err := json.Marshal(meta)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var decoded UpdateMetadata
+	if err := json.Unmarshal(data, &decoded); err != nil {
+		t.Fatal(err)
+	}
+
+	if decoded.Version != meta.Version {
+		t.Errorf("version mismatch: %s != %s", decoded.Version, meta.Version)
+	}
+	if decoded.ReleaseDate != meta.ReleaseDate {
+		t.Errorf("release date mismatch: %s != %s", decoded.ReleaseDate, meta.ReleaseDate)
+	}
+}
--- a/update/pkg/partition/partition.go
+++ b/update/pkg/partition/partition.go
@@ -0,0 +1,139 @@
+// Package partition detects and manages A/B system partitions.
+//
+// It identifies System A and System B partitions by label (KSOLOA, KSOLOB)
+// and provides mount/write operations for the update process.
+package partition
+
+import (
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+)
+
+const (
+	LabelSystemA = "KSOLOA"
+	LabelSystemB = "KSOLOB"
+	LabelData    = "KSOLODATA"
+	LabelEFI     = "KSOLOEFI"
+)
+
+// Info contains information about a partition.
+type Info struct {
+	Device     string // e.g. /dev/sda2
+	Label      string // e.g. KSOLOA
+	MountPoint string // current mount point, empty if not mounted
+	Slot       string // "A" or "B"
+}
+
+// FindByLabel locates a block device by its filesystem label.
+func FindByLabel(label string) (string, error) {
+	cmd := exec.Command("blkid", "-L", label)
+	output, err := cmd.Output()
+	if err != nil {
+		return "", fmt.Errorf("partition with label %q not found: %w", label, err)
+	}
+	return strings.TrimSpace(string(output)), nil
+}
+
+// GetSlotPartition returns the partition info for the given slot ("A" or "B").
+func GetSlotPartition(slot string) (*Info, error) {
+	var label string
+	switch slot {
+	case "A":
+		label = LabelSystemA
+	case "B":
+		label = LabelSystemB
+	default:
+		return nil, fmt.Errorf("invalid slot: %q", slot)
+	}
+
+	dev, err := FindByLabel(label)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Info{
+		Device: dev,
+		Label:  label,
+		Slot:   slot,
+	}, nil
+}
+
+// MountReadOnly mounts a partition read-only at the given mount point.
+func MountReadOnly(dev, mountPoint string) error {
+	if err := os.MkdirAll(mountPoint, 0o755); err != nil {
+		return fmt.Errorf("creating mount point: %w", err)
+	}
+	cmd := exec.Command("mount", "-o", "ro", dev, mountPoint)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("mounting %s at %s: %w\n%s", dev, mountPoint, err, output)
+	}
+	slog.Debug("mounted", "device", dev, "mountpoint", mountPoint, "mode", "ro")
+	return nil
+}
+
+// MountReadWrite mounts a partition read-write at the given mount point.
+func MountReadWrite(dev, mountPoint string) error {
+	if err := os.MkdirAll(mountPoint, 0o755); err != nil {
+		return fmt.Errorf("creating mount point: %w", err)
+	}
+	cmd := exec.Command("mount", dev, mountPoint)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("mounting %s at %s: %w\n%s", dev, mountPoint, err, output)
+	}
+	slog.Debug("mounted", "device", dev, "mountpoint", mountPoint, "mode", "rw")
+	return nil
+}
+
+// Unmount unmounts a mount point.
+func Unmount(mountPoint string) error {
+	cmd := exec.Command("umount", mountPoint)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		return fmt.Errorf("unmounting %s: %w\n%s", mountPoint, err, output)
+	}
+	return nil
+}
+
+// ReadVersion reads the version file from a mounted system partition.
+func ReadVersion(mountPoint string) (string, error) {
+	data, err := os.ReadFile(filepath.Join(mountPoint, "version"))
+	if err != nil {
+		return "", fmt.Errorf("reading version: %w", err)
+	}
+	return strings.TrimSpace(string(data)), nil
+}
+
+// WriteSystemImage copies vmlinuz and initramfs to a mounted partition.
+func WriteSystemImage(mountPoint, vmlinuzPath, initramfsPath, version string) error {
+	// Copy vmlinuz
+	if err := copyFile(vmlinuzPath, filepath.Join(mountPoint, "vmlinuz")); err != nil {
+		return fmt.Errorf("writing vmlinuz: %w", err)
+	}
+
+	// Copy initramfs
+	if err := copyFile(initramfsPath, filepath.Join(mountPoint, "kubesolo-os.gz")); err != nil {
+		return fmt.Errorf("writing initramfs: %w", err)
+	}
+
+	// Write version
+	if err := os.WriteFile(filepath.Join(mountPoint, "version"), []byte(version+"\n"), 0o644); err != nil {
+		return fmt.Errorf("writing version: %w", err)
+	}
+
+	// Sync to ensure data is flushed to disk
+	exec.Command("sync").Run()
+
+	slog.Info("system image written", "mountpoint", mountPoint, "version", version)
+	return nil
+}
+
+func copyFile(src, dst string) error {
+	data, err := os.ReadFile(src)
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(dst, data, 0o644)
+}
--- a/update/pkg/partition/partition_test.go
+++ b/update/pkg/partition/partition_test.go
@@ -0,0 +1,129 @@
+package partition
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestReadVersion(t *testing.T) {
+	dir := t.TempDir()
+	versionFile := filepath.Join(dir, "version")
+	if err := os.WriteFile(versionFile, []byte("1.2.3\n"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	version, err := ReadVersion(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if version != "1.2.3" {
+		t.Errorf("expected 1.2.3, got %s", version)
+	}
+}
+
+func TestReadVersionMissing(t *testing.T) {
+	dir := t.TempDir()
+	_, err := ReadVersion(dir)
+	if err == nil {
+		t.Fatal("expected error for missing version file")
+	}
+}
+
+func TestWriteSystemImage(t *testing.T) {
+	mountPoint := t.TempDir()
+	srcDir := t.TempDir()
+
+	// Create source files
+	vmlinuzPath := filepath.Join(srcDir, "vmlinuz")
+	initramfsPath := filepath.Join(srcDir, "kubesolo-os.gz")
+
+	if err := os.WriteFile(vmlinuzPath, []byte("kernel data"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(initramfsPath, []byte("initramfs data"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := WriteSystemImage(mountPoint, vmlinuzPath, initramfsPath, "2.0.0"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify files were copied
+	data, err := os.ReadFile(filepath.Join(mountPoint, "vmlinuz"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if string(data) != "kernel data" {
+		t.Errorf("vmlinuz content mismatch")
+	}
+
+	data, err = os.ReadFile(filepath.Join(mountPoint, "kubesolo-os.gz"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if string(data) != "initramfs data" {
+		t.Errorf("initramfs content mismatch")
+	}
+
+	// Verify version file
+	version, err := ReadVersion(mountPoint)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if version != "2.0.0" {
+		t.Errorf("expected version 2.0.0, got %s", version)
+	}
+}
+
+func TestCopyFile(t *testing.T) {
+	dir := t.TempDir()
+	src := filepath.Join(dir, "src")
+	dst := filepath.Join(dir, "dst")
+
+	if err := os.WriteFile(src, []byte("test content"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := copyFile(src, dst); err != nil {
+		t.Fatal(err)
+	}
+
+	data, err := os.ReadFile(dst)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if string(data) != "test content" {
+		t.Errorf("copy content mismatch")
+	}
+}
+
+func TestCopyFileNotFound(t *testing.T) {
+	dir := t.TempDir()
+	err := copyFile("/nonexistent", filepath.Join(dir, "dst"))
+	if err == nil {
+		t.Fatal("expected error for nonexistent source")
+	}
+}
+
+func TestGetSlotPartitionInvalid(t *testing.T) {
+	_, err := GetSlotPartition("C")
+	if err == nil {
+		t.Fatal("expected error for invalid slot")
+	}
+}
+
+func TestConstants(t *testing.T) {
+	if LabelSystemA != "KSOLOA" {
+		t.Errorf("unexpected LabelSystemA: %s", LabelSystemA)
+	}
+	if LabelSystemB != "KSOLOB" {
+		t.Errorf("unexpected LabelSystemB: %s", LabelSystemB)
+	}
+	if LabelData != "KSOLODATA" {
+		t.Errorf("unexpected LabelData: %s", LabelData)
+	}
+	if LabelEFI != "KSOLOEFI" {
+		t.Errorf("unexpected LabelEFI: %s", LabelEFI)
+	}
+}