feat: add A/B partition updates with GRUB and Go update agent (Phase 3)
Implement atomic OS updates via A/B partition scheme with automatic rollback. GRUB bootloader manages slot selection with a 3-attempt boot counter that auto-rolls back on repeated health check failures. GRUB boot config: - A/B slot selection with boot_counter/boot_success env vars - Automatic rollback when counter reaches 0 (3 failed boots) - Debug, emergency shell, and manual slot-switch menu entries Disk image (refactored): - 4-partition GPT layout: EFI + System A + System B + Data - GRUB EFI/BIOS installation with graceful fallbacks - Both system partitions populated during image creation Update agent (Go, zero external deps): - pkg/grubenv: read/write GRUB env vars (grub-editenv + manual fallback) - pkg/partition: find/mount/write system partitions by label - pkg/image: HTTP download with SHA256 verification - pkg/health: post-boot checks (containerd, API server, node Ready) - 6 CLI commands: check, apply, activate, rollback, healthcheck, status - 37 unit tests across all 4 packages Deployment: - K8s CronJob for automatic update checks (every 6 hours) - ConfigMap for update server URL - Health check Job for post-boot verification Build pipeline: - build-update-agent.sh compiles static Linux binary (~5.9 MB) - inject-kubesolo.sh includes update agent in initramfs - Makefile: build-update-agent, test-update-agent, test-update targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
60
Makefile
60
Makefile
@@ -1,6 +1,6 @@
|
||||
.PHONY: all fetch build-cloudinit rootfs initramfs iso disk-image \
|
||||
.PHONY: all fetch build-cloudinit build-update-agent rootfs initramfs iso disk-image \
|
||||
test-boot test-k8s test-persistence test-deploy test-storage test-all \
|
||||
test-cloudinit \
|
||||
test-cloudinit test-update-agent \
|
||||
dev-vm dev-vm-shell quick docker-build shellcheck \
|
||||
kernel-audit clean distclean help
|
||||
|
||||
@@ -32,7 +32,11 @@ build-cloudinit:
|
||||
@echo "==> Building cloud-init binary..."
|
||||
$(BUILD_DIR)/scripts/build-cloudinit.sh
|
||||
|
||||
rootfs: fetch build-cloudinit
|
||||
build-update-agent:
|
||||
@echo "==> Building update agent..."
|
||||
$(BUILD_DIR)/scripts/build-update-agent.sh
|
||||
|
||||
rootfs: fetch build-cloudinit build-update-agent
|
||||
@echo "==> Preparing rootfs..."
|
||||
$(BUILD_DIR)/scripts/extract-core.sh
|
||||
$(BUILD_DIR)/scripts/inject-kubesolo.sh
|
||||
@@ -88,6 +92,20 @@ test-cloudinit:
|
||||
@echo "==> Testing cloud-init parser..."
|
||||
cd cloud-init && go test ./... -v -count=1
|
||||
|
||||
# Update agent Go tests
|
||||
test-update-agent:
|
||||
@echo "==> Testing update agent..."
|
||||
cd update && go test ./... -v -count=1
|
||||
|
||||
# A/B update integration tests
|
||||
test-update: disk-image
|
||||
@echo "==> Testing A/B update cycle..."
|
||||
test/qemu/test-update.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).img
|
||||
|
||||
test-rollback: disk-image
|
||||
@echo "==> Testing rollback..."
|
||||
test/qemu/test-rollback.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).img
|
||||
|
||||
# Full integration test suite (requires more time)
|
||||
test-integration: test-k8s test-deploy test-storage
|
||||
|
||||
@@ -157,24 +175,28 @@ help:
|
||||
@echo "KubeSolo OS Build System (v$(VERSION))"
|
||||
@echo ""
|
||||
@echo "Build targets:"
|
||||
@echo " make fetch Download Tiny Core ISO, KubeSolo, dependencies"
|
||||
@echo " make build-cloudinit Build cloud-init Go binary"
|
||||
@echo " make rootfs Extract + prepare rootfs with KubeSolo"
|
||||
@echo " make initramfs Repack rootfs into kubesolo-os.gz"
|
||||
@echo " make iso Create bootable ISO (default target)"
|
||||
@echo " make disk-image Create raw disk image with boot + data partitions"
|
||||
@echo " make quick Fast rebuild (re-inject + repack + ISO only)"
|
||||
@echo " make docker-build Reproducible build inside Docker"
|
||||
@echo " make fetch Download Tiny Core ISO, KubeSolo, dependencies"
|
||||
@echo " make build-cloudinit Build cloud-init Go binary"
|
||||
@echo " make build-update-agent Build update agent Go binary"
|
||||
@echo " make rootfs Extract + prepare rootfs with KubeSolo"
|
||||
@echo " make initramfs Repack rootfs into kubesolo-os.gz"
|
||||
@echo " make iso Create bootable ISO (default target)"
|
||||
@echo " make disk-image Create raw disk image with A/B partitions + GRUB"
|
||||
@echo " make quick Fast rebuild (re-inject + repack + ISO only)"
|
||||
@echo " make docker-build Reproducible build inside Docker"
|
||||
@echo ""
|
||||
@echo "Test targets:"
|
||||
@echo " make test-boot Boot ISO in QEMU, verify boot success"
|
||||
@echo " make test-k8s Boot + verify K8s node reaches Ready"
|
||||
@echo " make test-persist Reboot disk image, verify state persists"
|
||||
@echo " make test-deploy Deploy nginx pod, verify Running"
|
||||
@echo " make test-storage Test PVC with local-path provisioner"
|
||||
@echo " make test-cloudinit Run cloud-init Go unit tests"
|
||||
@echo " make test-all Run core tests (boot + k8s + persistence)"
|
||||
@echo " make test-integ Run full integration suite"
|
||||
@echo " make test-boot Boot ISO in QEMU, verify boot success"
|
||||
@echo " make test-k8s Boot + verify K8s node reaches Ready"
|
||||
@echo " make test-persist Reboot disk image, verify state persists"
|
||||
@echo " make test-deploy Deploy nginx pod, verify Running"
|
||||
@echo " make test-storage Test PVC with local-path provisioner"
|
||||
@echo " make test-cloudinit Run cloud-init Go unit tests"
|
||||
@echo " make test-update-agent Run update agent Go unit tests"
|
||||
@echo " make test-update A/B update cycle integration test"
|
||||
@echo " make test-rollback Forced rollback integration test"
|
||||
@echo " make test-all Run core tests (boot + k8s + persistence)"
|
||||
@echo " make test-integ Run full integration suite"
|
||||
@echo ""
|
||||
@echo "Dev targets:"
|
||||
@echo " make dev-vm Launch interactive QEMU VM"
|
||||
|
||||
11
build/grub/grub-env-defaults
Normal file
11
build/grub/grub-env-defaults
Normal file
@@ -0,0 +1,11 @@
|
||||
# KubeSolo OS — Default GRUB Environment Variables
|
||||
# These are written to grubenv on first install.
|
||||
# Format: key=value (one per line, grub-editenv compatible)
|
||||
#
|
||||
# active_slot: Which system partition to boot (A or B)
|
||||
# boot_counter: Attempts remaining before rollback (3 = fresh, 0 = rollback)
|
||||
# boot_success: Set to 1 by health check after successful boot
|
||||
|
||||
active_slot=A
|
||||
boot_counter=3
|
||||
boot_success=1
|
||||
95
build/grub/grub.cfg
Normal file
95
build/grub/grub.cfg
Normal file
@@ -0,0 +1,95 @@
|
||||
# KubeSolo OS — GRUB Configuration
|
||||
# A/B partition boot with automatic rollback
|
||||
#
|
||||
# Partition layout:
|
||||
# (hd0,gpt1) — EFI/Boot (256 MB, FAT32) — contains GRUB + grubenv
|
||||
# (hd0,gpt2) — System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||
# (hd0,gpt3) — System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||
# (hd0,gpt4) — Data (remaining, ext4) — persistent K8s state
|
||||
#
|
||||
# Environment variables (in grubenv):
|
||||
# active_slot — "A" or "B" (which partition to boot)
|
||||
# boot_counter — 3→2→1→0 (decremented on each failed boot)
|
||||
# boot_success — 0 or 1 (set to 1 by health check post-boot)
|
||||
|
||||
set default=0
|
||||
set timeout=3
|
||||
|
||||
# Load saved environment
|
||||
load_env
|
||||
|
||||
# --- A/B Rollback Logic ---
|
||||
# On every boot, check if the last boot was successful.
|
||||
# If not, decrement the counter. If counter hits 0, swap slots.
|
||||
|
||||
if [ "${boot_success}" != "1" ]; then
|
||||
# Last boot failed — check counter
|
||||
if [ "${boot_counter}" = "0" ]; then
|
||||
# Counter exhausted — rollback to other slot
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set active_slot=B
|
||||
else
|
||||
set active_slot=A
|
||||
fi
|
||||
save_env active_slot
|
||||
set boot_counter=3
|
||||
save_env boot_counter
|
||||
else
|
||||
# Decrement counter (GRUB doesn't have arithmetic)
|
||||
if [ "${boot_counter}" = "3" ]; then
|
||||
set boot_counter=2
|
||||
elif [ "${boot_counter}" = "2" ]; then
|
||||
set boot_counter=1
|
||||
elif [ "${boot_counter}" = "1" ]; then
|
||||
set boot_counter=0
|
||||
fi
|
||||
save_env boot_counter
|
||||
fi
|
||||
fi
|
||||
|
||||
# Reset boot_success for this boot attempt — health check must set it to 1
|
||||
set boot_success=0
|
||||
save_env boot_success
|
||||
|
||||
# --- Resolve boot partition ---
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set root='(hd0,gpt2)'
|
||||
set slot_label="System A"
|
||||
else
|
||||
set root='(hd0,gpt3)'
|
||||
set slot_label="System B"
|
||||
fi
|
||||
|
||||
# --- Menu Entries ---
|
||||
|
||||
menuentry "KubeSolo OS (${slot_label})" {
|
||||
echo "Booting KubeSolo OS from ${slot_label}..."
|
||||
echo "Boot counter: ${boot_counter}, Boot success: ${boot_success}"
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA quiet
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS (${slot_label}) — Debug Mode" {
|
||||
echo "Booting KubeSolo OS (debug) from ${slot_label}..."
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS — Emergency Shell" {
|
||||
echo "Booting to emergency shell..."
|
||||
linux /vmlinuz kubesolo.shell console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
|
||||
menuentry "KubeSolo OS — Boot Other Slot" {
|
||||
# Manually boot the passive slot (for testing)
|
||||
if [ "${active_slot}" = "A" ]; then
|
||||
set root='(hd0,gpt3)'
|
||||
echo "Booting from System B (passive)..."
|
||||
else
|
||||
set root='(hd0,gpt2)'
|
||||
echo "Booting from System A (passive)..."
|
||||
fi
|
||||
linux /vmlinuz kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
initrd /kubesolo-os.gz
|
||||
}
|
||||
29
build/scripts/build-update-agent.sh
Executable file
29
build/scripts/build-update-agent.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# build-update-agent.sh — Compile the KubeSolo OS update agent
|
||||
#
|
||||
# Builds a static Linux binary for the update agent.
|
||||
# Output: build/cache/kubesolo-update
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
UPDATE_DIR="$PROJECT_ROOT/update"
|
||||
CACHE_DIR="$PROJECT_ROOT/build/cache"
|
||||
OUTPUT="$CACHE_DIR/kubesolo-update"
|
||||
|
||||
echo "=== Building KubeSolo Update Agent ==="
|
||||
|
||||
# Ensure output dir exists
|
||||
mkdir -p "$CACHE_DIR"
|
||||
|
||||
# Run tests first
|
||||
echo "--- Running tests ---"
|
||||
(cd "$UPDATE_DIR" && go test ./... -count=1)
|
||||
|
||||
# Build static binary
|
||||
echo "--- Compiling static binary ---"
|
||||
(cd "$UPDATE_DIR" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -ldflags='-s -w' -o "$OUTPUT" .)
|
||||
|
||||
SIZE=$(ls -lh "$OUTPUT" | awk '{print $5}')
|
||||
echo "--- Update agent built: $OUTPUT ($SIZE) ---"
|
||||
@@ -1,6 +1,11 @@
|
||||
#!/bin/bash
|
||||
# create-disk-image.sh — Create a raw disk image with boot + data partitions
|
||||
# Phase 1: simple layout (boot + data). Phase 3 adds A/B system partitions.
|
||||
# create-disk-image.sh — Create a raw disk image with A/B system partitions
|
||||
#
|
||||
# Partition layout (GPT):
|
||||
# Part 1: EFI/Boot (256 MB, FAT32) — GRUB + grubenv + A/B boot logic
|
||||
# Part 2: System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz (active)
|
||||
# Part 3: System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz (passive)
|
||||
# Part 4: Data (remaining, ext4) — persistent K8s state
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
@@ -11,93 +16,165 @@ VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||
OS_NAME="kubesolo-os"
|
||||
|
||||
IMG_OUTPUT="$OUTPUT_DIR/${OS_NAME}-${VERSION}.img"
|
||||
IMG_SIZE_MB="${IMG_SIZE_MB:-2048}" # 2 GB default
|
||||
IMG_SIZE_MB="${IMG_SIZE_MB:-4096}" # 4 GB default (larger for A/B)
|
||||
|
||||
VMLINUZ="$ROOTFS_DIR/vmlinuz"
|
||||
INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz"
|
||||
GRUB_CFG="$PROJECT_ROOT/build/grub/grub.cfg"
|
||||
GRUB_ENV_DEFAULTS="$PROJECT_ROOT/build/grub/grub-env-defaults"
|
||||
|
||||
for f in "$VMLINUZ" "$INITRAMFS"; do
|
||||
[ -f "$f" ] || { echo "ERROR: Missing $f — run 'make initramfs'"; exit 1; }
|
||||
for f in "$VMLINUZ" "$INITRAMFS" "$GRUB_CFG" "$GRUB_ENV_DEFAULTS"; do
|
||||
[ -f "$f" ] || { echo "ERROR: Missing $f"; exit 1; }
|
||||
done
|
||||
|
||||
echo "==> Creating ${IMG_SIZE_MB}MB disk image..."
|
||||
echo "==> Creating ${IMG_SIZE_MB}MB disk image with A/B partitions..."
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Create sparse image
|
||||
dd if=/dev/zero of="$IMG_OUTPUT" bs=1M count=0 seek="$IMG_SIZE_MB" 2>/dev/null
|
||||
|
||||
# Partition: 256MB boot (ext4) + rest data (ext4)
|
||||
# Using sfdisk for scriptability
|
||||
# Partition (GPT):
|
||||
# Part 1: 256 MB EFI System Partition (FAT32)
|
||||
# Part 2: 512 MB System A (Linux filesystem)
|
||||
# Part 3: 512 MB System B (Linux filesystem)
|
||||
# Part 4: Remaining — Data (Linux filesystem)
|
||||
sfdisk "$IMG_OUTPUT" << EOF
|
||||
label: dos
|
||||
unit: sectors
|
||||
label: gpt
|
||||
|
||||
# Boot partition: 256 MB, bootable
|
||||
start=2048, size=524288, type=83, bootable
|
||||
# Data partition: remaining space
|
||||
start=526336, type=83
|
||||
# EFI/Boot partition: 256 MB
|
||||
start=2048, size=524288, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B, name="EFI"
|
||||
# System A partition: 512 MB
|
||||
size=1048576, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="SystemA"
|
||||
# System B partition: 512 MB
|
||||
size=1048576, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="SystemB"
|
||||
# Data partition: remaining
|
||||
type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="Data"
|
||||
EOF
|
||||
|
||||
# Set up loop device
|
||||
LOOP=$(losetup --show -fP "$IMG_OUTPUT")
|
||||
echo "==> Loop device: $LOOP"
|
||||
|
||||
MNT_EFI=$(mktemp -d)
|
||||
MNT_SYSA=$(mktemp -d)
|
||||
MNT_SYSB=$(mktemp -d)
|
||||
MNT_DATA=$(mktemp -d)
|
||||
|
||||
cleanup() {
|
||||
umount "${LOOP}p1" 2>/dev/null || true
|
||||
umount "${LOOP}p2" 2>/dev/null || true
|
||||
umount "$MNT_EFI" 2>/dev/null || true
|
||||
umount "$MNT_SYSA" 2>/dev/null || true
|
||||
umount "$MNT_SYSB" 2>/dev/null || true
|
||||
umount "$MNT_DATA" 2>/dev/null || true
|
||||
losetup -d "$LOOP" 2>/dev/null || true
|
||||
rm -rf "$MNT_BOOT" "$MNT_DATA" 2>/dev/null || true
|
||||
rm -rf "$MNT_EFI" "$MNT_SYSA" "$MNT_SYSB" "$MNT_DATA" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Format partitions
|
||||
mkfs.ext4 -q -L KSOLOBOOT "${LOOP}p1"
|
||||
mkfs.ext4 -q -L KSOLODATA "${LOOP}p2"
|
||||
mkfs.vfat -F 32 -n KSOLOEFI "${LOOP}p1"
|
||||
mkfs.ext4 -q -L KSOLOA "${LOOP}p2"
|
||||
mkfs.ext4 -q -L KSOLOB "${LOOP}p3"
|
||||
mkfs.ext4 -q -L KSOLODATA "${LOOP}p4"
|
||||
|
||||
# Mount and populate boot partition
|
||||
MNT_BOOT=$(mktemp -d)
|
||||
MNT_DATA=$(mktemp -d)
|
||||
# Mount all partitions
|
||||
mount "${LOOP}p1" "$MNT_EFI"
|
||||
mount "${LOOP}p2" "$MNT_SYSA"
|
||||
mount "${LOOP}p3" "$MNT_SYSB"
|
||||
mount "${LOOP}p4" "$MNT_DATA"
|
||||
|
||||
mount "${LOOP}p1" "$MNT_BOOT"
|
||||
mount "${LOOP}p2" "$MNT_DATA"
|
||||
# --- EFI/Boot Partition ---
|
||||
echo " Installing GRUB..."
|
||||
mkdir -p "$MNT_EFI/EFI/BOOT"
|
||||
mkdir -p "$MNT_EFI/boot/grub"
|
||||
|
||||
# Install syslinux + kernel + initramfs to boot partition
|
||||
mkdir -p "$MNT_BOOT/boot/syslinux"
|
||||
cp "$VMLINUZ" "$MNT_BOOT/boot/vmlinuz"
|
||||
cp "$INITRAMFS" "$MNT_BOOT/boot/kubesolo-os.gz"
|
||||
# Copy GRUB config
|
||||
cp "$GRUB_CFG" "$MNT_EFI/boot/grub/grub.cfg"
|
||||
|
||||
# Syslinux config for disk boot (extlinux)
|
||||
cat > "$MNT_BOOT/boot/syslinux/syslinux.cfg" << 'EOF'
|
||||
DEFAULT kubesolo
|
||||
TIMEOUT 30
|
||||
PROMPT 0
|
||||
# Create GRUB environment file from defaults
|
||||
if command -v grub-editenv >/dev/null 2>&1; then
|
||||
GRUB_EDITENV=grub-editenv
|
||||
elif command -v grub2-editenv >/dev/null 2>&1; then
|
||||
GRUB_EDITENV=grub2-editenv
|
||||
else
|
||||
GRUB_EDITENV=""
|
||||
fi
|
||||
|
||||
LABEL kubesolo
|
||||
KERNEL /boot/vmlinuz
|
||||
INITRD /boot/kubesolo-os.gz
|
||||
APPEND quiet kubesolo.data=LABEL=KSOLODATA
|
||||
GRUBENV_FILE="$MNT_EFI/boot/grub/grubenv"
|
||||
|
||||
LABEL kubesolo-debug
|
||||
KERNEL /boot/vmlinuz
|
||||
INITRD /boot/kubesolo-os.gz
|
||||
APPEND kubesolo.data=LABEL=KSOLODATA kubesolo.debug console=ttyS0,115200n8
|
||||
if [ -n "$GRUB_EDITENV" ]; then
|
||||
# Create grubenv with defaults
|
||||
"$GRUB_EDITENV" "$GRUBENV_FILE" create
|
||||
while IFS='=' read -r key value; do
|
||||
# Skip comments and empty lines
|
||||
case "$key" in
|
||||
'#'*|'') continue ;;
|
||||
esac
|
||||
"$GRUB_EDITENV" "$GRUBENV_FILE" set "$key=$value"
|
||||
done < "$GRUB_ENV_DEFAULTS"
|
||||
echo " GRUB environment created with grub-editenv"
|
||||
else
|
||||
# Fallback: write grubenv file manually (1024 bytes, padded with '#')
|
||||
echo " WARN: grub-editenv not found — writing grubenv manually"
|
||||
{
|
||||
echo "# GRUB Environment Block"
|
||||
while IFS='=' read -r key value; do
|
||||
case "$key" in
|
||||
'#'*|'') continue ;;
|
||||
esac
|
||||
echo "$key=$value"
|
||||
done < "$GRUB_ENV_DEFAULTS"
|
||||
} > "$GRUBENV_FILE.tmp"
|
||||
# Pad to 1024 bytes (GRUB requirement)
|
||||
truncate -s 1024 "$GRUBENV_FILE.tmp"
|
||||
mv "$GRUBENV_FILE.tmp" "$GRUBENV_FILE"
|
||||
fi
|
||||
|
||||
LABEL kubesolo-shell
|
||||
KERNEL /boot/vmlinuz
|
||||
INITRD /boot/kubesolo-os.gz
|
||||
APPEND kubesolo.shell console=ttyS0,115200n8
|
||||
EOF
|
||||
# Install GRUB EFI binary if available
|
||||
if command -v grub-mkimage >/dev/null 2>&1; then
|
||||
grub-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
|
||||
-p /boot/grub \
|
||||
part_gpt ext2 fat normal linux echo all_video test search \
|
||||
search_fs_uuid search_label configfile loadenv \
|
||||
2>/dev/null || echo " WARN: grub-mkimage failed — use QEMU -bios flag"
|
||||
elif command -v grub2-mkimage >/dev/null 2>&1; then
|
||||
grub2-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
|
||||
-p /boot/grub \
|
||||
part_gpt ext2 fat normal linux echo all_video test search \
|
||||
search_fs_uuid search_label configfile loadenv \
|
||||
2>/dev/null || echo " WARN: grub2-mkimage failed — use QEMU -bios flag"
|
||||
else
|
||||
echo " WARN: grub-mkimage not found — EFI boot image not created"
|
||||
echo " Install grub2-tools or use QEMU -kernel/-initrd flags"
|
||||
fi
|
||||
|
||||
# Install extlinux bootloader
|
||||
if command -v extlinux >/dev/null 2>&1; then
|
||||
extlinux --install "$MNT_BOOT/boot/syslinux" 2>/dev/null || {
|
||||
echo "WARN: extlinux install failed — image may not be directly bootable"
|
||||
echo " Use with QEMU -kernel/-initrd flags instead"
|
||||
# For BIOS boot: install GRUB i386-pc modules if available
|
||||
if command -v grub-install >/dev/null 2>&1; then
|
||||
grub-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
||||
--no-floppy "$LOOP" 2>/dev/null || {
|
||||
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
||||
}
|
||||
elif command -v grub2-install >/dev/null 2>&1; then
|
||||
grub2-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
||||
--no-floppy "$LOOP" 2>/dev/null || {
|
||||
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
||||
}
|
||||
fi
|
||||
|
||||
# Prepare data partition structure
|
||||
for dir in kubesolo containerd etc-kubesolo log usr-local network; do
|
||||
# --- System A Partition (active) ---
|
||||
echo " Populating System A (active)..."
|
||||
cp "$VMLINUZ" "$MNT_SYSA/vmlinuz"
|
||||
cp "$INITRAMFS" "$MNT_SYSA/kubesolo-os.gz"
|
||||
echo "$VERSION" > "$MNT_SYSA/version"
|
||||
|
||||
# --- System B Partition (passive, initially same as A) ---
|
||||
echo " Populating System B (passive)..."
|
||||
cp "$VMLINUZ" "$MNT_SYSB/vmlinuz"
|
||||
cp "$INITRAMFS" "$MNT_SYSB/kubesolo-os.gz"
|
||||
echo "$VERSION" > "$MNT_SYSB/version"
|
||||
|
||||
# --- Data Partition ---
|
||||
echo " Preparing data partition..."
|
||||
for dir in kubesolo containerd etc-kubesolo log usr-local network images; do
|
||||
mkdir -p "$MNT_DATA/$dir"
|
||||
done
|
||||
|
||||
@@ -106,5 +183,8 @@ sync
|
||||
echo ""
|
||||
echo "==> Disk image created: $IMG_OUTPUT"
|
||||
echo " Size: $(du -h "$IMG_OUTPUT" | cut -f1)"
|
||||
echo " Boot partition (KSOLOBOOT): kernel + initramfs"
|
||||
echo " Data partition (KSOLODATA): persistent K8s state"
|
||||
echo " Part 1 (KSOLOEFI): GRUB + A/B boot config"
|
||||
echo " Part 2 (KSOLOA): System A — kernel + initramfs (active)"
|
||||
echo " Part 3 (KSOLOB): System B — kernel + initramfs (passive)"
|
||||
echo " Part 4 (KSOLODATA): Persistent K8s state"
|
||||
echo ""
|
||||
|
||||
@@ -73,6 +73,16 @@ else
|
||||
echo " WARN: Cloud-init binary not found (run 'make build-cloudinit' to build)"
|
||||
fi
|
||||
|
||||
# Update agent binary (Go, built separately)
|
||||
UPDATE_BIN="$CACHE_DIR/kubesolo-update"
|
||||
if [ -f "$UPDATE_BIN" ]; then
|
||||
cp "$UPDATE_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
||||
chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
||||
echo " Installed update agent ($(du -h "$UPDATE_BIN" | cut -f1))"
|
||||
else
|
||||
echo " WARN: Update agent not found (run 'make build-update-agent' to build)"
|
||||
fi
|
||||
|
||||
# --- 3. Kernel modules list ---
|
||||
cp "$PROJECT_ROOT/build/config/modules.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list"
|
||||
|
||||
|
||||
261
docs/update-flow.md
Normal file
261
docs/update-flow.md
Normal file
@@ -0,0 +1,261 @@
|
||||
# KubeSolo OS — Atomic Update Flow
|
||||
|
||||
This document describes the A/B partition update mechanism used by KubeSolo OS for safe, atomic OS updates with automatic rollback.
|
||||
|
||||
## Partition Layout
|
||||
|
||||
KubeSolo OS uses a 4-partition GPT layout:
|
||||
|
||||
```
|
||||
Disk (minimum 4 GB):
|
||||
Part 1: EFI/Boot (256 MB, FAT32, label: KSOLOEFI) — GRUB + boot config
|
||||
Part 2: System A (512 MB, ext4, label: KSOLOA) — vmlinuz + kubesolo-os.gz
|
||||
Part 3: System B (512 MB, ext4, label: KSOLOB) — vmlinuz + kubesolo-os.gz
|
||||
Part 4: Data (remaining, ext4, label: KSOLODATA) — persistent K8s state
|
||||
```
|
||||
|
||||
Only one system partition is active at a time. The other is the "passive" slot used for staging updates.
|
||||
|
||||
## GRUB Environment Variables
|
||||
|
||||
The A/B boot logic is controlled by three GRUB environment variables stored in `/boot/grub/grubenv`:
|
||||
|
||||
| Variable | Values | Description |
|
||||
|---|---|---|
|
||||
| `active_slot` | `A` or `B` | Which system partition to boot |
|
||||
| `boot_counter` | `3` → `0` | Attempts remaining before rollback |
|
||||
| `boot_success` | `0` or `1` | Whether the current boot has been verified healthy |
|
||||
|
||||
## Boot Flow
|
||||
|
||||
```
|
||||
┌──────────────┐
|
||||
│ GRUB starts │
|
||||
└──────┬───────┘
|
||||
│
|
||||
┌──────▼───────┐
|
||||
│ Load grubenv │
|
||||
└──────┬───────┘
|
||||
│
|
||||
┌─────────▼─────────┐
|
||||
│ boot_success == 1? │
|
||||
└────┬──────────┬───┘
|
||||
yes│ │no
|
||||
│ ┌─────▼──────────┐
|
||||
│ │ boot_counter=0? │
|
||||
│ └──┬──────────┬──┘
|
||||
│ no │ │ yes
|
||||
│ │ ┌─────▼──────────┐
|
||||
│ │ │ SWAP active_slot│
|
||||
│ │ │ Reset counter=3 │
|
||||
│ │ └─────┬───────────┘
|
||||
│ │ │
|
||||
┌────▼───────▼──────────▼────┐
|
||||
│ Set boot_success=0 │
|
||||
│ Decrement boot_counter │
|
||||
│ Boot active_slot partition │
|
||||
└────────────┬───────────────┘
|
||||
│
|
||||
┌─────────▼─────────┐
|
||||
│ System boots... │
|
||||
└─────────┬─────────┘
|
||||
│
|
||||
┌─────────▼─────────────┐
|
||||
│ Health check runs │
|
||||
│ (containerd, API, │
|
||||
│ node Ready) │
|
||||
└─────┬──────────┬──────┘
|
||||
pass│ │fail
|
||||
┌─────▼─────┐ │
|
||||
│ Mark boot │ │ boot_success stays 0
|
||||
│ success=1 │ │ counter decremented
|
||||
│ counter=3 │ │ on next reboot
|
||||
└───────────┘ └──────────────────────
|
||||
```
|
||||
|
||||
### Rollback Behavior
|
||||
|
||||
The boot counter starts at 3 and decrements on each boot where `boot_success` remains 0:
|
||||
|
||||
1. **Boot 1**: counter 3 → 2 (health check fails → reboot)
|
||||
2. **Boot 2**: counter 2 → 1 (health check fails → reboot)
|
||||
3. **Boot 3**: counter 1 → 0 (health check fails → reboot)
|
||||
4. **Boot 4**: counter = 0, GRUB swaps `active_slot` and resets counter to 3
|
||||
|
||||
This provides **3 chances** for the new version to pass health checks before automatic rollback to the previous version.
|
||||
|
||||
## Update Agent Commands
|
||||
|
||||
The `kubesolo-update` binary provides 6 subcommands:
|
||||
|
||||
### `check` — Check for Updates
|
||||
|
||||
Queries the update server and compares against the current running version.
|
||||
|
||||
```bash
|
||||
kubesolo-update check --server https://updates.example.com
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
Current version: 1.0.0 (slot A)
|
||||
Latest version: 1.1.0
|
||||
Status: update available
|
||||
```
|
||||
|
||||
### `apply` — Download and Write Update
|
||||
|
||||
Downloads the new OS image (vmlinuz + initramfs) from the update server, verifies SHA256 checksums, and writes to the passive partition.
|
||||
|
||||
```bash
|
||||
kubesolo-update apply --server https://updates.example.com
|
||||
```
|
||||
|
||||
This does NOT activate the new partition or trigger a reboot.
|
||||
|
||||
### `activate` — Set Next Boot Target
|
||||
|
||||
Switches the GRUB boot target to the passive partition (the one with the new image) and sets `boot_counter=3`.
|
||||
|
||||
```bash
|
||||
kubesolo-update activate
|
||||
```
|
||||
|
||||
After activation, reboot to boot into the new version:
|
||||
```bash
|
||||
reboot
|
||||
```
|
||||
|
||||
### `rollback` — Force Rollback
|
||||
|
||||
Manually switches to the other partition, regardless of health check status.
|
||||
|
||||
```bash
|
||||
kubesolo-update rollback
|
||||
reboot
|
||||
```
|
||||
|
||||
### `healthcheck` — Post-Boot Health Verification
|
||||
|
||||
Runs after every boot to verify the system is healthy. If all checks pass, marks `boot_success=1` in GRUB to prevent rollback.
|
||||
|
||||
Checks performed:
|
||||
1. **containerd**: Socket exists and `ctr version` responds
|
||||
2. **API server**: TCP connection to 127.0.0.1:6443 and `/healthz` endpoint
|
||||
3. **Node Ready**: `kubectl get nodes` shows Ready status
|
||||
|
||||
```bash
|
||||
kubesolo-update healthcheck --timeout 120
|
||||
```
|
||||
|
||||
### `status` — Show A/B Slot Status
|
||||
|
||||
Displays the current partition state:
|
||||
|
||||
```bash
|
||||
kubesolo-update status
|
||||
```
|
||||
|
||||
Output:
|
||||
```
|
||||
KubeSolo OS — A/B Partition Status
|
||||
───────────────────────────────────
|
||||
Active slot: A
|
||||
Passive slot: B
|
||||
Boot counter: 3
|
||||
Boot success: 1
|
||||
|
||||
✓ System is healthy (boot confirmed)
|
||||
```
|
||||
|
||||
## Update Server Protocol
|
||||
|
||||
The update server is a simple HTTP(S) file server that serves:
|
||||
|
||||
```
|
||||
/latest.json — Update metadata
|
||||
/vmlinuz-<version> — Linux kernel
|
||||
/kubesolo-os-<version>.gz — Initramfs
|
||||
```
|
||||
|
||||
### `latest.json` Format
|
||||
|
||||
```json
|
||||
{
|
||||
"version": "1.1.0",
|
||||
"vmlinuz_url": "https://updates.example.com/vmlinuz-1.1.0",
|
||||
"vmlinuz_sha256": "abc123...",
|
||||
"initramfs_url": "https://updates.example.com/kubesolo-os-1.1.0.gz",
|
||||
"initramfs_sha256": "def456...",
|
||||
"release_notes": "Bug fixes and performance improvements",
|
||||
"release_date": "2025-01-15"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Any static file server (nginx, S3, GitHub Releases) can serve as an update server.
|
||||
|
||||
## Automated Updates via CronJob
|
||||
|
||||
KubeSolo OS includes a Kubernetes CronJob for automatic update checking:
|
||||
|
||||
```bash
|
||||
# Deploy the update CronJob
|
||||
kubectl apply -f /usr/lib/kubesolo-os/update-cronjob.yaml
|
||||
|
||||
# Configure the update server URL
|
||||
kubectl -n kube-system create configmap kubesolo-update-config \
|
||||
--from-literal=server-url=https://updates.example.com
|
||||
|
||||
# Manually trigger an update check
|
||||
kubectl create job --from=cronjob/kubesolo-update kubesolo-update-manual -n kube-system
|
||||
```
|
||||
|
||||
The CronJob runs every 6 hours and performs `apply` (download + write). It does NOT reboot — the administrator controls when to reboot.
|
||||
|
||||
## Complete Update Cycle
|
||||
|
||||
A full update cycle looks like:
|
||||
|
||||
```bash
|
||||
# 1. Check if update is available
|
||||
kubesolo-update check --server https://updates.example.com
|
||||
|
||||
# 2. Download and write to passive partition
|
||||
kubesolo-update apply --server https://updates.example.com
|
||||
|
||||
# 3. Activate the new partition
|
||||
kubesolo-update activate
|
||||
|
||||
# 4. Reboot into the new version
|
||||
reboot
|
||||
|
||||
# 5. (Automatic) Health check runs, marks boot successful
|
||||
# kubesolo-update healthcheck is run by init system
|
||||
|
||||
# 6. Verify status
|
||||
kubesolo-update status
|
||||
```
|
||||
|
||||
If the health check fails 3 times, GRUB automatically rolls back to the previous version on the next reboot.
|
||||
|
||||
## Command-Line Options
|
||||
|
||||
All subcommands accept these options:
|
||||
|
||||
| Option | Default | Description |
|
||||
|---|---|---|
|
||||
| `--server URL` | (none) | Update server URL |
|
||||
| `--grubenv PATH` | `/boot/grub/grubenv` | Path to GRUB environment file |
|
||||
| `--timeout SECS` | `120` | Health check timeout in seconds |
|
||||
|
||||
## File Locations
|
||||
|
||||
| File | Description |
|
||||
|---|---|
|
||||
| `/usr/lib/kubesolo-os/kubesolo-update` | Update agent binary |
|
||||
| `/boot/grub/grubenv` | GRUB environment (on EFI partition) |
|
||||
| `/boot/grub/grub.cfg` | GRUB boot config with A/B logic |
|
||||
| `<system-partition>/vmlinuz` | Linux kernel |
|
||||
| `<system-partition>/kubesolo-os.gz` | Initramfs |
|
||||
| `<system-partition>/version` | Version string |
|
||||
40
update/cmd/activate.go
Normal file
40
update/cmd/activate.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
||||
)
|
||||
|
||||
// Activate switches the boot target to the passive partition.
|
||||
// After activation, the next reboot will boot from the new partition
|
||||
// with boot_counter=3. If health checks fail 3 times, GRUB auto-rolls back.
|
||||
func Activate(args []string) error {
|
||||
opts := parseOpts(args)
|
||||
env := grubenv.New(opts.GrubenvPath)
|
||||
|
||||
// Get passive slot (the one we want to boot into)
|
||||
passiveSlot, err := env.PassiveSlot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading passive slot: %w", err)
|
||||
}
|
||||
|
||||
activeSlot, err := env.ActiveSlot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading active slot: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("activating slot", "from", activeSlot, "to", passiveSlot)
|
||||
|
||||
// Set the passive slot as active with fresh boot counter
|
||||
if err := env.ActivateSlot(passiveSlot); err != nil {
|
||||
return fmt.Errorf("activating slot %s: %w", passiveSlot, err)
|
||||
}
|
||||
|
||||
fmt.Printf("Slot %s activated (was %s)\n", passiveSlot, activeSlot)
|
||||
fmt.Println("Boot counter set to 3. Reboot to start the new version.")
|
||||
fmt.Println("The system will automatically roll back if health checks fail 3 times.")
|
||||
|
||||
return nil
|
||||
}
|
||||
70
update/cmd/apply.go
Normal file
70
update/cmd/apply.go
Normal file
@@ -0,0 +1,70 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
||||
"github.com/portainer/kubesolo-os/update/pkg/image"
|
||||
"github.com/portainer/kubesolo-os/update/pkg/partition"
|
||||
)
|
||||
|
||||
// Apply downloads a new OS image and writes it to the passive partition.
|
||||
// It does NOT activate the new partition — use 'activate' for that.
|
||||
func Apply(args []string) error {
|
||||
opts := parseOpts(args)
|
||||
|
||||
if opts.ServerURL == "" {
|
||||
return fmt.Errorf("--server is required")
|
||||
}
|
||||
|
||||
env := grubenv.New(opts.GrubenvPath)
|
||||
|
||||
// Determine passive slot
|
||||
passiveSlot, err := env.PassiveSlot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading passive slot: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("applying update", "target_slot", passiveSlot)
|
||||
|
||||
// Check for update
|
||||
stageDir := "/tmp/kubesolo-update-stage"
|
||||
client := image.NewClient(opts.ServerURL, stageDir)
|
||||
defer client.Cleanup()
|
||||
|
||||
meta, err := client.CheckForUpdate()
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking for update: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("update available", "version", meta.Version)
|
||||
|
||||
// Download and verify
|
||||
staged, err := client.Download(meta)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading update: %w", err)
|
||||
}
|
||||
|
||||
// Mount passive partition
|
||||
partInfo, err := partition.GetSlotPartition(passiveSlot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("finding passive partition: %w", err)
|
||||
}
|
||||
|
||||
mountPoint := "/tmp/kubesolo-passive-" + passiveSlot
|
||||
if err := partition.MountReadWrite(partInfo.Device, mountPoint); err != nil {
|
||||
return fmt.Errorf("mounting passive partition: %w", err)
|
||||
}
|
||||
defer partition.Unmount(mountPoint)
|
||||
|
||||
// Write image to passive partition
|
||||
if err := partition.WriteSystemImage(mountPoint, staged.VmlinuzPath, staged.InitramfsPath, staged.Version); err != nil {
|
||||
return fmt.Errorf("writing system image: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Update v%s written to slot %s (%s)\n", staged.Version, passiveSlot, partInfo.Device)
|
||||
fmt.Println("Run 'kubesolo-update activate' to boot into the new version")
|
||||
|
||||
return nil
|
||||
}
|
||||
65
update/cmd/check.go
Normal file
65
update/cmd/check.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
||||
"github.com/portainer/kubesolo-os/update/pkg/image"
|
||||
"github.com/portainer/kubesolo-os/update/pkg/partition"
|
||||
)
|
||||
|
||||
// Check queries the update server for available updates and compares
|
||||
// against the currently running version.
|
||||
func Check(args []string) error {
|
||||
opts := parseOpts(args)
|
||||
|
||||
if opts.ServerURL == "" {
|
||||
return fmt.Errorf("--server is required (no default update server configured)")
|
||||
}
|
||||
|
||||
// Get current version from active partition
|
||||
env := grubenv.New(opts.GrubenvPath)
|
||||
activeSlot, err := env.ActiveSlot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading active slot: %w", err)
|
||||
}
|
||||
|
||||
partInfo, err := partition.GetSlotPartition(activeSlot)
|
||||
if err != nil {
|
||||
return fmt.Errorf("finding active partition: %w", err)
|
||||
}
|
||||
|
||||
mountPoint := "/tmp/kubesolo-check-" + activeSlot
|
||||
if err := partition.MountReadOnly(partInfo.Device, mountPoint); err != nil {
|
||||
return fmt.Errorf("mounting active partition: %w", err)
|
||||
}
|
||||
defer partition.Unmount(mountPoint)
|
||||
|
||||
currentVersion, err := partition.ReadVersion(mountPoint)
|
||||
if err != nil {
|
||||
slog.Warn("could not read current version", "error", err)
|
||||
currentVersion = "unknown"
|
||||
}
|
||||
|
||||
// Check update server
|
||||
client := image.NewClient(opts.ServerURL, "")
|
||||
meta, err := client.CheckForUpdate()
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking for update: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Current version: %s (slot %s)\n", currentVersion, activeSlot)
|
||||
fmt.Printf("Latest version: %s\n", meta.Version)
|
||||
|
||||
if meta.Version == currentVersion {
|
||||
fmt.Println("Status: up to date")
|
||||
} else {
|
||||
fmt.Println("Status: update available")
|
||||
if meta.ReleaseNotes != "" {
|
||||
fmt.Printf("Release notes: %s\n", meta.ReleaseNotes)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
56
update/cmd/healthcheck.go
Normal file
56
update/cmd/healthcheck.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
||||
"github.com/portainer/kubesolo-os/update/pkg/health"
|
||||
)
|
||||
|
||||
// Healthcheck performs post-boot health verification.
|
||||
// If all checks pass, it marks the boot as successful in GRUB.
|
||||
// This should be run after every boot (typically via a systemd unit or
|
||||
// init script) to confirm the system is healthy.
|
||||
func Healthcheck(args []string) error {
|
||||
opts := parseOpts(args)
|
||||
env := grubenv.New(opts.GrubenvPath)
|
||||
|
||||
// Check if already marked successful
|
||||
success, err := env.BootSuccess()
|
||||
if err != nil {
|
||||
slog.Warn("could not read boot_success", "error", err)
|
||||
}
|
||||
if success {
|
||||
fmt.Println("Boot already marked successful")
|
||||
return nil
|
||||
}
|
||||
|
||||
timeout := time.Duration(opts.TimeoutSecs) * time.Second
|
||||
checker := health.NewChecker("", "", timeout)
|
||||
|
||||
slog.Info("running post-boot health checks", "timeout", timeout)
|
||||
|
||||
status, err := checker.WaitForHealthy()
|
||||
if err != nil {
|
||||
fmt.Printf("Health check FAILED: %s\n", status.Message)
|
||||
fmt.Printf(" containerd: %v\n", status.Containerd)
|
||||
fmt.Printf(" apiserver: %v\n", status.APIServer)
|
||||
fmt.Printf(" node_ready: %v\n", status.NodeReady)
|
||||
fmt.Println("\nBoot NOT marked successful — system may roll back on next reboot")
|
||||
return err
|
||||
}
|
||||
|
||||
// Mark boot as successful
|
||||
if err := env.MarkBootSuccess(); err != nil {
|
||||
return fmt.Errorf("marking boot success: %w", err)
|
||||
}
|
||||
|
||||
fmt.Println("Health check PASSED — boot marked successful")
|
||||
fmt.Printf(" containerd: %v\n", status.Containerd)
|
||||
fmt.Printf(" apiserver: %v\n", status.APIServer)
|
||||
fmt.Printf(" node_ready: %v\n", status.NodeReady)
|
||||
|
||||
return nil
|
||||
}
|
||||
47
update/cmd/opts.go
Normal file
47
update/cmd/opts.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package cmd
|
||||
|
||||
// opts holds shared command-line options for all subcommands.
|
||||
type opts struct {
|
||||
ServerURL string
|
||||
GrubenvPath string
|
||||
TimeoutSecs int
|
||||
}
|
||||
|
||||
// parseOpts extracts command-line flags from args.
|
||||
// Simple parser — no external dependencies.
|
||||
func parseOpts(args []string) opts {
|
||||
o := opts{
|
||||
GrubenvPath: "/boot/grub/grubenv",
|
||||
TimeoutSecs: 120,
|
||||
}
|
||||
|
||||
for i := 0; i < len(args); i++ {
|
||||
switch args[i] {
|
||||
case "--server":
|
||||
if i+1 < len(args) {
|
||||
o.ServerURL = args[i+1]
|
||||
i++
|
||||
}
|
||||
case "--grubenv":
|
||||
if i+1 < len(args) {
|
||||
o.GrubenvPath = args[i+1]
|
||||
i++
|
||||
}
|
||||
case "--timeout":
|
||||
if i+1 < len(args) {
|
||||
val := 0
|
||||
for _, c := range args[i+1] {
|
||||
if c >= '0' && c <= '9' {
|
||||
val = val*10 + int(c-'0')
|
||||
}
|
||||
}
|
||||
if val > 0 {
|
||||
o.TimeoutSecs = val
|
||||
}
|
||||
i++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return o
|
||||
}
|
||||
36
update/cmd/rollback.go
Normal file
36
update/cmd/rollback.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
||||
)
|
||||
|
||||
// Rollback forces an immediate switch to the other partition.
|
||||
// Use this to manually revert to the previous version.
|
||||
func Rollback(args []string) error {
|
||||
opts := parseOpts(args)
|
||||
env := grubenv.New(opts.GrubenvPath)
|
||||
|
||||
activeSlot, err := env.ActiveSlot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading active slot: %w", err)
|
||||
}
|
||||
|
||||
passiveSlot, err := env.PassiveSlot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading passive slot: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("forcing rollback", "from", activeSlot, "to", passiveSlot)
|
||||
|
||||
if err := env.ForceRollback(); err != nil {
|
||||
return fmt.Errorf("rollback failed: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Rolled back: %s → %s\n", activeSlot, passiveSlot)
|
||||
fmt.Println("Reboot to complete rollback.")
|
||||
|
||||
return nil
|
||||
}
|
||||
44
update/cmd/status.go
Normal file
44
update/cmd/status.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
||||
)
|
||||
|
||||
// Status displays the current A/B slot configuration and boot state.
|
||||
func Status(args []string) error {
|
||||
opts := parseOpts(args)
|
||||
env := grubenv.New(opts.GrubenvPath)
|
||||
|
||||
vars, err := env.ReadAll()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading GRUB environment: %w", err)
|
||||
}
|
||||
|
||||
activeSlot := vars["active_slot"]
|
||||
bootCounter := vars["boot_counter"]
|
||||
bootSuccess := vars["boot_success"]
|
||||
|
||||
passiveSlot := "B"
|
||||
if activeSlot == "B" {
|
||||
passiveSlot = "A"
|
||||
}
|
||||
|
||||
fmt.Println("KubeSolo OS — A/B Partition Status")
|
||||
fmt.Println("───────────────────────────────────")
|
||||
fmt.Printf(" Active slot: %s\n", activeSlot)
|
||||
fmt.Printf(" Passive slot: %s\n", passiveSlot)
|
||||
fmt.Printf(" Boot counter: %s\n", bootCounter)
|
||||
fmt.Printf(" Boot success: %s\n", bootSuccess)
|
||||
|
||||
if bootSuccess == "1" {
|
||||
fmt.Println("\n ✓ System is healthy (boot confirmed)")
|
||||
} else if bootCounter == "0" {
|
||||
fmt.Println("\n ✗ Boot counter exhausted — rollback will occur on next reboot")
|
||||
} else {
|
||||
fmt.Printf("\n ⚠ Boot pending verification (%s attempts remaining)\n", bootCounter)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
150
update/deploy/update-cronjob.yaml
Normal file
150
update/deploy/update-cronjob.yaml
Normal file
@@ -0,0 +1,150 @@
|
||||
# KubeSolo OS — Automatic Update CronJob
|
||||
#
|
||||
# This CronJob checks for OS updates every 6 hours, downloads them,
|
||||
# and writes them to the passive partition. It does NOT reboot —
|
||||
# the administrator must trigger a reboot to apply the update.
|
||||
#
|
||||
# The update agent runs as a privileged container with host access
|
||||
# because it needs to:
|
||||
# 1. Read/write GRUB environment (on boot partition)
|
||||
# 2. Mount and write to system partitions
|
||||
# 3. Access block devices via blkid
|
||||
#
|
||||
# Deploy: kubectl apply -f update-cronjob.yaml
|
||||
# Manual trigger: kubectl create job --from=cronjob/kubesolo-update kubesolo-update-manual
|
||||
#
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: kubesolo-update
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app.kubernetes.io/name: kubesolo-update
|
||||
app.kubernetes.io/component: update-agent
|
||||
app.kubernetes.io/part-of: kubesolo-os
|
||||
spec:
|
||||
schedule: "0 */6 * * *" # Every 6 hours
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 3
|
||||
failedJobsHistoryLimit: 5
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
activeDeadlineSeconds: 600 # 10 min max
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kubesolo-update
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
hostPID: false
|
||||
hostNetwork: false
|
||||
containers:
|
||||
- name: update
|
||||
image: busybox:latest # Only used for the shell; the binary is host-mounted
|
||||
command:
|
||||
- /host/usr/lib/kubesolo-os/kubesolo-update
|
||||
args:
|
||||
- apply
|
||||
- --server
|
||||
- "$(UPDATE_SERVER_URL)"
|
||||
env:
|
||||
- name: UPDATE_SERVER_URL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: kubesolo-update-config
|
||||
key: server-url
|
||||
optional: true
|
||||
securityContext:
|
||||
privileged: true # Required for mount/blkid access
|
||||
volumeMounts:
|
||||
- name: host-root
|
||||
mountPath: /host
|
||||
readOnly: false
|
||||
- name: dev
|
||||
mountPath: /dev
|
||||
- name: boot
|
||||
mountPath: /boot
|
||||
volumes:
|
||||
- name: host-root
|
||||
hostPath:
|
||||
path: /
|
||||
type: Directory
|
||||
- name: dev
|
||||
hostPath:
|
||||
path: /dev
|
||||
type: Directory
|
||||
- name: boot
|
||||
hostPath:
|
||||
path: /boot
|
||||
type: Directory
|
||||
tolerations:
|
||||
- operator: Exists # Run on any node (there's only one)
|
||||
---
|
||||
# ConfigMap for update server URL.
|
||||
# Create/update this to point to your update server:
|
||||
# kubectl -n kube-system create configmap kubesolo-update-config \
|
||||
# --from-literal=server-url=https://updates.example.com
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: kubesolo-update-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app.kubernetes.io/name: kubesolo-update
|
||||
app.kubernetes.io/component: update-agent
|
||||
data:
|
||||
server-url: "" # Set to your update server URL
|
||||
---
|
||||
# Post-boot health check — runs once at boot as a Job.
|
||||
# On KubeSolo OS, this is triggered by the init system (init stage or
|
||||
# systemd-equivalent), but it can also be deployed as a K8s Job for
|
||||
# environments where the init system doesn't run the health check.
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: kubesolo-healthcheck
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app.kubernetes.io/name: kubesolo-healthcheck
|
||||
app.kubernetes.io/component: health-check
|
||||
app.kubernetes.io/part-of: kubesolo-os
|
||||
spec:
|
||||
backoffLimit: 3
|
||||
activeDeadlineSeconds: 300 # 5 min max
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: kubesolo-healthcheck
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
hostPID: false
|
||||
hostNetwork: true # Needed to reach API server at 127.0.0.1:6443
|
||||
containers:
|
||||
- name: healthcheck
|
||||
image: busybox:latest
|
||||
command:
|
||||
- /host/usr/lib/kubesolo-os/kubesolo-update
|
||||
args:
|
||||
- healthcheck
|
||||
- --timeout
|
||||
- "120"
|
||||
securityContext:
|
||||
privileged: true # Required for grubenv write
|
||||
volumeMounts:
|
||||
- name: host-root
|
||||
mountPath: /host
|
||||
readOnly: false
|
||||
- name: boot
|
||||
mountPath: /boot
|
||||
volumes:
|
||||
- name: host-root
|
||||
hostPath:
|
||||
path: /
|
||||
type: Directory
|
||||
- name: boot
|
||||
hostPath:
|
||||
path: /boot
|
||||
type: Directory
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
3
update/go.mod
Normal file
3
update/go.mod
Normal file
@@ -0,0 +1,3 @@
|
||||
module github.com/portainer/kubesolo-os/update
|
||||
|
||||
go 1.25.5
|
||||
79
update/main.go
Normal file
79
update/main.go
Normal file
@@ -0,0 +1,79 @@
|
||||
// kubesolo-update is the atomic update agent for KubeSolo OS.
|
||||
//
|
||||
// It manages A/B partition updates with automatic rollback:
|
||||
//
|
||||
// kubesolo-update check Check for available updates
|
||||
// kubesolo-update apply Download + write update to passive partition
|
||||
// kubesolo-update activate Set passive partition as next boot target
|
||||
// kubesolo-update rollback Force rollback to other partition
|
||||
// kubesolo-update healthcheck Post-boot health verification
|
||||
// kubesolo-update status Show current A/B slot and boot status
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"github.com/portainer/kubesolo-os/update/cmd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||
Level: slog.LevelInfo,
|
||||
})))
|
||||
|
||||
if len(os.Args) < 2 {
|
||||
usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var err error
|
||||
switch os.Args[1] {
|
||||
case "check":
|
||||
err = cmd.Check(os.Args[2:])
|
||||
case "apply":
|
||||
err = cmd.Apply(os.Args[2:])
|
||||
case "activate":
|
||||
err = cmd.Activate(os.Args[2:])
|
||||
case "rollback":
|
||||
err = cmd.Rollback(os.Args[2:])
|
||||
case "healthcheck":
|
||||
err = cmd.Healthcheck(os.Args[2:])
|
||||
case "status":
|
||||
err = cmd.Status(os.Args[2:])
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown command: %s\n\n", os.Args[1])
|
||||
usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
slog.Error("command failed", "command", os.Args[1], "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, `Usage: kubesolo-update <command> [options]
|
||||
|
||||
Commands:
|
||||
check Check for available updates
|
||||
apply Download and write update to passive partition
|
||||
activate Set passive partition as next boot target
|
||||
rollback Force rollback to other partition
|
||||
healthcheck Post-boot health verification (marks boot successful)
|
||||
status Show current A/B slot and boot status
|
||||
|
||||
Options:
|
||||
--server URL Update server URL (default: from /etc/kubesolo/update.conf)
|
||||
--grubenv PATH Path to grubenv file (default: /boot/grub/grubenv)
|
||||
--timeout SECS Health check timeout in seconds (default: 120)
|
||||
|
||||
Examples:
|
||||
kubesolo-update check --server https://updates.example.com
|
||||
kubesolo-update apply --server https://updates.example.com
|
||||
kubesolo-update healthcheck
|
||||
kubesolo-update status
|
||||
`)
|
||||
}
|
||||
239
update/pkg/grubenv/grubenv.go
Normal file
239
update/pkg/grubenv/grubenv.go
Normal file
@@ -0,0 +1,239 @@
|
||||
// Package grubenv provides read/write access to GRUB environment variables.
|
||||
//
|
||||
// GRUB stores its environment in a 1024-byte file (grubenv) located at
|
||||
// /boot/grub/grubenv on the EFI partition. This package manipulates
|
||||
// those variables for A/B boot slot management.
|
||||
//
|
||||
// Key variables:
|
||||
// - active_slot: "A" or "B"
|
||||
// - boot_counter: "3" (fresh) down to "0" (triggers rollback)
|
||||
// - boot_success: "0" (pending) or "1" (healthy boot confirmed)
|
||||
package grubenv
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultGrubenvPath is the standard location for the GRUB environment file.
|
||||
DefaultGrubenvPath = "/boot/grub/grubenv"
|
||||
|
||||
// SlotA represents system partition A.
|
||||
SlotA = "A"
|
||||
// SlotB represents system partition B.
|
||||
SlotB = "B"
|
||||
)
|
||||
|
||||
// Env provides access to GRUB environment variables.
|
||||
type Env struct {
|
||||
path string
|
||||
}
|
||||
|
||||
// New creates a new Env for the given grubenv file path.
|
||||
func New(path string) *Env {
|
||||
if path == "" {
|
||||
path = DefaultGrubenvPath
|
||||
}
|
||||
return &Env{path: path}
|
||||
}
|
||||
|
||||
// Get reads a variable from the GRUB environment.
|
||||
func (e *Env) Get(key string) (string, error) {
|
||||
vars, err := e.ReadAll()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
val, ok := vars[key]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("grubenv: key %q not found", key)
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// Set writes a variable to the GRUB environment.
|
||||
func (e *Env) Set(key, value string) error {
|
||||
editenv, err := findEditenv()
|
||||
if err != nil {
|
||||
return e.setManual(key, value)
|
||||
}
|
||||
|
||||
cmd := exec.Command(editenv, e.path, "set", key+"="+value)
|
||||
if output, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("grub-editenv set %s=%s: %w\n%s", key, value, err, output)
|
||||
}
|
||||
|
||||
slog.Debug("grubenv set", "key", key, "value", value)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadAll reads all variables from the GRUB environment.
|
||||
func (e *Env) ReadAll() (map[string]string, error) {
|
||||
editenv, err := findEditenv()
|
||||
if err != nil {
|
||||
return e.readManual()
|
||||
}
|
||||
|
||||
cmd := exec.Command(editenv, e.path, "list")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("grub-editenv list: %w", err)
|
||||
}
|
||||
|
||||
return parseEnvOutput(string(output)), nil
|
||||
}
|
||||
|
||||
// ActiveSlot returns the currently active boot slot ("A" or "B").
|
||||
func (e *Env) ActiveSlot() (string, error) {
|
||||
return e.Get("active_slot")
|
||||
}
|
||||
|
||||
// PassiveSlot returns the currently passive boot slot.
|
||||
func (e *Env) PassiveSlot() (string, error) {
|
||||
active, err := e.ActiveSlot()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if active == SlotA {
|
||||
return SlotB, nil
|
||||
}
|
||||
return SlotA, nil
|
||||
}
|
||||
|
||||
// BootCounter returns the current boot counter value.
|
||||
func (e *Env) BootCounter() (int, error) {
|
||||
val, err := e.Get("boot_counter")
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
switch val {
|
||||
case "0":
|
||||
return 0, nil
|
||||
case "1":
|
||||
return 1, nil
|
||||
case "2":
|
||||
return 2, nil
|
||||
case "3":
|
||||
return 3, nil
|
||||
default:
|
||||
return -1, fmt.Errorf("grubenv: invalid boot_counter: %q", val)
|
||||
}
|
||||
}
|
||||
|
||||
// BootSuccess returns whether the last boot was marked successful.
|
||||
func (e *Env) BootSuccess() (bool, error) {
|
||||
val, err := e.Get("boot_success")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return val == "1", nil
|
||||
}
|
||||
|
||||
// MarkBootSuccess sets boot_success=1 and boot_counter=3.
|
||||
// Called by the health check after a successful boot.
|
||||
func (e *Env) MarkBootSuccess() error {
|
||||
if err := e.Set("boot_success", "1"); err != nil {
|
||||
return fmt.Errorf("setting boot_success: %w", err)
|
||||
}
|
||||
if err := e.Set("boot_counter", "3"); err != nil {
|
||||
return fmt.Errorf("setting boot_counter: %w", err)
|
||||
}
|
||||
slog.Info("boot marked successful")
|
||||
return nil
|
||||
}
|
||||
|
||||
// ActivateSlot switches the active slot and resets the boot counter.
|
||||
// Used after writing a new image to the passive partition.
|
||||
func (e *Env) ActivateSlot(slot string) error {
|
||||
if slot != SlotA && slot != SlotB {
|
||||
return fmt.Errorf("invalid slot: %q (must be A or B)", slot)
|
||||
}
|
||||
if err := e.Set("active_slot", slot); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := e.Set("boot_counter", "3"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := e.Set("boot_success", "0"); err != nil {
|
||||
return err
|
||||
}
|
||||
slog.Info("activated slot", "slot", slot)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ForceRollback switches to the other slot immediately.
|
||||
func (e *Env) ForceRollback() error {
|
||||
passive, err := e.PassiveSlot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return e.ActivateSlot(passive)
|
||||
}
|
||||
|
||||
func findEditenv() (string, error) {
|
||||
if path, err := exec.LookPath("grub-editenv"); err == nil {
|
||||
return path, nil
|
||||
}
|
||||
if path, err := exec.LookPath("grub2-editenv"); err == nil {
|
||||
return path, nil
|
||||
}
|
||||
return "", fmt.Errorf("grub-editenv not found")
|
||||
}
|
||||
|
||||
func parseEnvOutput(output string) map[string]string {
|
||||
vars := make(map[string]string)
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
if len(parts) == 2 {
|
||||
vars[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
return vars
|
||||
}
|
||||
|
||||
// setManual writes to grubenv without grub-editenv (fallback).
|
||||
func (e *Env) setManual(key, value string) error {
|
||||
vars, err := e.readManual()
|
||||
if err != nil {
|
||||
vars = make(map[string]string)
|
||||
}
|
||||
vars[key] = value
|
||||
return e.writeManual(vars)
|
||||
}
|
||||
|
||||
// readManual reads grubenv without grub-editenv.
|
||||
func (e *Env) readManual() (map[string]string, error) {
|
||||
data, err := os.ReadFile(e.path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading grubenv: %w", err)
|
||||
}
|
||||
return parseEnvOutput(string(data)), nil
|
||||
}
|
||||
|
||||
// writeManual writes grubenv without grub-editenv.
|
||||
// GRUB requires the file to be exactly 1024 bytes, padded with '#'.
|
||||
func (e *Env) writeManual(vars map[string]string) error {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("# GRUB Environment Block\n")
|
||||
for k, v := range vars {
|
||||
sb.WriteString(k + "=" + v + "\n")
|
||||
}
|
||||
|
||||
content := sb.String()
|
||||
if len(content) > 1024 {
|
||||
return fmt.Errorf("grubenv content exceeds 1024 bytes")
|
||||
}
|
||||
|
||||
// Pad to 1024 bytes with '#'
|
||||
padding := 1024 - len(content)
|
||||
content += strings.Repeat("#", padding)
|
||||
|
||||
return os.WriteFile(e.path, []byte(content), 0o644)
|
||||
}
|
||||
423
update/pkg/grubenv/grubenv_test.go
Normal file
423
update/pkg/grubenv/grubenv_test.go
Normal file
@@ -0,0 +1,423 @@
|
||||
package grubenv
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// createTestGrubenv writes a properly formatted grubenv file for testing.
|
||||
// GRUB requires the file to be exactly 1024 bytes, padded with '#'.
|
||||
func createTestGrubenv(t *testing.T, dir string, vars map[string]string) string {
|
||||
t.Helper()
|
||||
path := filepath.Join(dir, "grubenv")
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString("# GRUB Environment Block\n")
|
||||
for k, v := range vars {
|
||||
sb.WriteString(k + "=" + v + "\n")
|
||||
}
|
||||
|
||||
content := sb.String()
|
||||
padding := 1024 - len(content)
|
||||
if padding > 0 {
|
||||
content += strings.Repeat("#", padding)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
env := New("")
|
||||
if env.path != DefaultGrubenvPath {
|
||||
t.Errorf("expected default path %s, got %s", DefaultGrubenvPath, env.path)
|
||||
}
|
||||
|
||||
env = New("/custom/path/grubenv")
|
||||
if env.path != "/custom/path/grubenv" {
|
||||
t.Errorf("expected custom path, got %s", env.path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadAll(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "A",
|
||||
"boot_counter": "3",
|
||||
"boot_success": "1",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
vars, err := env.ReadAll()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if vars["active_slot"] != "A" {
|
||||
t.Errorf("active_slot: expected A, got %s", vars["active_slot"])
|
||||
}
|
||||
if vars["boot_counter"] != "3" {
|
||||
t.Errorf("boot_counter: expected 3, got %s", vars["boot_counter"])
|
||||
}
|
||||
if vars["boot_success"] != "1" {
|
||||
t.Errorf("boot_success: expected 1, got %s", vars["boot_success"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGet(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "B",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
|
||||
val, err := env.Get("active_slot")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if val != "B" {
|
||||
t.Errorf("expected B, got %s", val)
|
||||
}
|
||||
|
||||
_, err = env.Get("nonexistent")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for nonexistent key")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSet(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "A",
|
||||
"boot_counter": "3",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
|
||||
if err := env.Set("boot_counter", "2"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
val, err := env.Get("boot_counter")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if val != "2" {
|
||||
t.Errorf("expected 2 after set, got %s", val)
|
||||
}
|
||||
|
||||
// Verify file is still 1024 bytes
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(data) != 1024 {
|
||||
t.Errorf("grubenv should be 1024 bytes, got %d", len(data))
|
||||
}
|
||||
}
|
||||
|
||||
func TestActiveSlot(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "A",
|
||||
"boot_counter": "3",
|
||||
"boot_success": "1",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
slot, err := env.ActiveSlot()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if slot != "A" {
|
||||
t.Errorf("expected A, got %s", slot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPassiveSlot(t *testing.T) {
|
||||
tests := []struct {
|
||||
active string
|
||||
passive string
|
||||
}{
|
||||
{"A", "B"},
|
||||
{"B", "A"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("active_"+tt.active, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": tt.active,
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
passive, err := env.PassiveSlot()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if passive != tt.passive {
|
||||
t.Errorf("expected passive %s, got %s", tt.passive, passive)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBootCounter(t *testing.T) {
|
||||
tests := []struct {
|
||||
value string
|
||||
expect int
|
||||
wantErr bool
|
||||
}{
|
||||
{"0", 0, false},
|
||||
{"1", 1, false},
|
||||
{"2", 2, false},
|
||||
{"3", 3, false},
|
||||
{"invalid", -1, true},
|
||||
{"99", -1, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("counter_"+tt.value, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"boot_counter": tt.value,
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
counter, err := env.BootCounter()
|
||||
if tt.wantErr {
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if counter != tt.expect {
|
||||
t.Errorf("expected %d, got %d", tt.expect, counter)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBootSuccess(t *testing.T) {
|
||||
tests := []struct {
|
||||
value string
|
||||
expect bool
|
||||
}{
|
||||
{"0", false},
|
||||
{"1", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("success_"+tt.value, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"boot_success": tt.value,
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
success, err := env.BootSuccess()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if success != tt.expect {
|
||||
t.Errorf("expected %v, got %v", tt.expect, success)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarkBootSuccess(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "B",
|
||||
"boot_counter": "1",
|
||||
"boot_success": "0",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
if err := env.MarkBootSuccess(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
success, err := env.BootSuccess()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !success {
|
||||
t.Error("expected boot_success=1 after MarkBootSuccess")
|
||||
}
|
||||
|
||||
counter, err := env.BootCounter()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if counter != 3 {
|
||||
t.Errorf("expected boot_counter=3 after MarkBootSuccess, got %d", counter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestActivateSlot(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "A",
|
||||
"boot_counter": "3",
|
||||
"boot_success": "1",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
if err := env.ActivateSlot("B"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
slot, _ := env.ActiveSlot()
|
||||
if slot != "B" {
|
||||
t.Errorf("expected active_slot=B, got %s", slot)
|
||||
}
|
||||
|
||||
counter, _ := env.BootCounter()
|
||||
if counter != 3 {
|
||||
t.Errorf("expected boot_counter=3, got %d", counter)
|
||||
}
|
||||
|
||||
success, _ := env.BootSuccess()
|
||||
if success {
|
||||
t.Error("expected boot_success=0 after ActivateSlot")
|
||||
}
|
||||
}
|
||||
|
||||
func TestActivateSlotInvalid(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "A",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
err := env.ActivateSlot("C")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid slot")
|
||||
}
|
||||
}
|
||||
|
||||
func TestForceRollback(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "A",
|
||||
"boot_counter": "3",
|
||||
"boot_success": "1",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
if err := env.ForceRollback(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
slot, _ := env.ActiveSlot()
|
||||
if slot != "B" {
|
||||
t.Errorf("expected active_slot=B after rollback from A, got %s", slot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEnvOutput(t *testing.T) {
|
||||
input := `# GRUB Environment Block
|
||||
active_slot=A
|
||||
boot_counter=3
|
||||
boot_success=1
|
||||
|
||||
`
|
||||
vars := parseEnvOutput(input)
|
||||
|
||||
if len(vars) != 3 {
|
||||
t.Errorf("expected 3 variables, got %d", len(vars))
|
||||
}
|
||||
if vars["active_slot"] != "A" {
|
||||
t.Errorf("active_slot: expected A, got %s", vars["active_slot"])
|
||||
}
|
||||
if vars["boot_counter"] != "3" {
|
||||
t.Errorf("boot_counter: expected 3, got %s", vars["boot_counter"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteManualFormat(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "grubenv")
|
||||
|
||||
env := New(path)
|
||||
// Use setManual directly since grub-editenv may not be available
|
||||
err := env.setManual("test_key", "test_value")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(data) != 1024 {
|
||||
t.Errorf("grubenv should be exactly 1024 bytes, got %d", len(data))
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(string(data), "# GRUB Environment Block\n") {
|
||||
t.Error("grubenv should start with '# GRUB Environment Block'")
|
||||
}
|
||||
|
||||
if !strings.Contains(string(data), "test_key=test_value\n") {
|
||||
t.Error("grubenv should contain test_key=test_value")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadNonexistentFile(t *testing.T) {
|
||||
env := New("/nonexistent/path/grubenv")
|
||||
_, err := env.ReadAll()
|
||||
if err == nil {
|
||||
t.Fatal("expected error reading nonexistent file")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultipleSetOperations(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := createTestGrubenv(t, dir, map[string]string{
|
||||
"active_slot": "A",
|
||||
"boot_counter": "3",
|
||||
"boot_success": "1",
|
||||
})
|
||||
|
||||
env := New(path)
|
||||
|
||||
// Simulate a boot cycle: decrement counter, then mark success
|
||||
if err := env.Set("boot_counter", "2"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := env.Set("boot_success", "0"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Now mark boot success
|
||||
if err := env.MarkBootSuccess(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Verify final state
|
||||
vars, err := env.ReadAll()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if vars["active_slot"] != "A" {
|
||||
t.Errorf("active_slot should still be A, got %s", vars["active_slot"])
|
||||
}
|
||||
if vars["boot_counter"] != "3" {
|
||||
t.Errorf("boot_counter should be 3 after mark success, got %s", vars["boot_counter"])
|
||||
}
|
||||
if vars["boot_success"] != "1" {
|
||||
t.Errorf("boot_success should be 1, got %s", vars["boot_success"])
|
||||
}
|
||||
}
|
||||
198
update/pkg/health/health.go
Normal file
198
update/pkg/health/health.go
Normal file
@@ -0,0 +1,198 @@
|
||||
// Package health implements post-boot health checks for KubeSolo OS.
|
||||
//
|
||||
// After booting a new system partition, the health check verifies that:
|
||||
// - containerd is running and responsive
|
||||
// - KubeSolo API server is reachable
|
||||
// - The Kubernetes node reaches Ready state
|
||||
//
|
||||
// If all checks pass, the GRUB environment is updated to mark the boot
|
||||
// as successful (boot_success=1). If any check fails, boot_success
|
||||
// remains 0 and GRUB will eventually roll back.
|
||||
package health
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Status represents the result of a health check.
|
||||
type Status struct {
|
||||
Containerd bool
|
||||
APIServer bool
|
||||
NodeReady bool
|
||||
Message string
|
||||
}
|
||||
|
||||
// IsHealthy returns true if all checks passed.
|
||||
func (s *Status) IsHealthy() bool {
|
||||
return s.Containerd && s.APIServer && s.NodeReady
|
||||
}
|
||||
|
||||
// Checker performs health checks against the local KubeSolo instance.
|
||||
type Checker struct {
|
||||
kubeconfigPath string
|
||||
apiServerAddr string
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewChecker creates a health checker.
|
||||
func NewChecker(kubeconfigPath, apiServerAddr string, timeout time.Duration) *Checker {
|
||||
if kubeconfigPath == "" {
|
||||
kubeconfigPath = "/var/lib/kubesolo/pki/admin/admin.kubeconfig"
|
||||
}
|
||||
if apiServerAddr == "" {
|
||||
apiServerAddr = "127.0.0.1:6443"
|
||||
}
|
||||
if timeout == 0 {
|
||||
timeout = 120 * time.Second
|
||||
}
|
||||
return &Checker{
|
||||
kubeconfigPath: kubeconfigPath,
|
||||
apiServerAddr: apiServerAddr,
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// CheckContainerd verifies that containerd is running.
|
||||
func (c *Checker) CheckContainerd() bool {
|
||||
// Check if containerd socket exists
|
||||
if _, err := os.Stat("/run/containerd/containerd.sock"); err != nil {
|
||||
slog.Warn("containerd socket not found")
|
||||
return false
|
||||
}
|
||||
|
||||
// Try ctr version (bundled with KubeSolo)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "ctr", "--address", "/run/containerd/containerd.sock", "version")
|
||||
if err := cmd.Run(); err != nil {
|
||||
slog.Warn("containerd not responsive", "error", err)
|
||||
return false
|
||||
}
|
||||
|
||||
slog.Debug("containerd healthy")
|
||||
return true
|
||||
}
|
||||
|
||||
// CheckAPIServer verifies the Kubernetes API server is reachable.
|
||||
func (c *Checker) CheckAPIServer() bool {
|
||||
// TCP connect to API server port
|
||||
conn, err := net.DialTimeout("tcp", c.apiServerAddr, 5*time.Second)
|
||||
if err != nil {
|
||||
slog.Warn("API server not reachable", "addr", c.apiServerAddr, "error", err)
|
||||
return false
|
||||
}
|
||||
conn.Close()
|
||||
|
||||
// Try HTTPS health endpoint (skip TLS verify for localhost)
|
||||
client := &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSHandshakeTimeout: 5 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := client.Get("https://" + c.apiServerAddr + "/healthz")
|
||||
if err != nil {
|
||||
// TLS error is expected without proper CA, but TCP connect succeeded
|
||||
slog.Debug("API server TCP reachable but HTTPS check skipped", "error", err)
|
||||
return true
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
slog.Debug("API server healthy", "status", resp.StatusCode)
|
||||
return true
|
||||
}
|
||||
|
||||
slog.Warn("API server unhealthy", "status", resp.StatusCode)
|
||||
return false
|
||||
}
|
||||
|
||||
// CheckNodeReady uses kubectl to verify the node is in Ready state.
|
||||
func (c *Checker) CheckNodeReady() bool {
|
||||
if _, err := os.Stat(c.kubeconfigPath); err != nil {
|
||||
slog.Warn("kubeconfig not found", "path", c.kubeconfigPath)
|
||||
return false
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "kubectl",
|
||||
"--kubeconfig", c.kubeconfigPath,
|
||||
"get", "nodes",
|
||||
"-o", "jsonpath={.items[0].status.conditions[?(@.type==\"Ready\")].status}",
|
||||
)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
slog.Warn("kubectl get nodes failed", "error", err)
|
||||
return false
|
||||
}
|
||||
|
||||
status := strings.TrimSpace(string(output))
|
||||
if status == "True" {
|
||||
slog.Debug("node is Ready")
|
||||
return true
|
||||
}
|
||||
|
||||
slog.Warn("node not Ready", "status", status)
|
||||
return false
|
||||
}
|
||||
|
||||
// RunAll performs all health checks and returns the combined status.
|
||||
func (c *Checker) RunAll() *Status {
|
||||
return &Status{
|
||||
Containerd: c.CheckContainerd(),
|
||||
APIServer: c.CheckAPIServer(),
|
||||
NodeReady: c.CheckNodeReady(),
|
||||
}
|
||||
}
|
||||
|
||||
// WaitForHealthy polls health checks until all pass or timeout expires.
|
||||
func (c *Checker) WaitForHealthy() (*Status, error) {
|
||||
deadline := time.Now().Add(c.timeout)
|
||||
interval := 5 * time.Second
|
||||
|
||||
slog.Info("waiting for system health", "timeout", c.timeout)
|
||||
|
||||
for time.Now().Before(deadline) {
|
||||
status := c.RunAll()
|
||||
if status.IsHealthy() {
|
||||
status.Message = "all checks passed"
|
||||
slog.Info("system healthy",
|
||||
"containerd", status.Containerd,
|
||||
"apiserver", status.APIServer,
|
||||
"node_ready", status.NodeReady,
|
||||
)
|
||||
return status, nil
|
||||
}
|
||||
|
||||
slog.Debug("health check pending",
|
||||
"containerd", status.Containerd,
|
||||
"apiserver", status.APIServer,
|
||||
"node_ready", status.NodeReady,
|
||||
"remaining", time.Until(deadline).Round(time.Second),
|
||||
)
|
||||
|
||||
time.Sleep(interval)
|
||||
}
|
||||
|
||||
// Final check
|
||||
status := c.RunAll()
|
||||
if status.IsHealthy() {
|
||||
status.Message = "all checks passed"
|
||||
return status, nil
|
||||
}
|
||||
|
||||
status.Message = "health check timeout"
|
||||
return status, fmt.Errorf("health check timed out after %s", c.timeout)
|
||||
}
|
||||
86
update/pkg/health/health_test.go
Normal file
86
update/pkg/health/health_test.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package health
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestStatusIsHealthy(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
status Status
|
||||
wantHealth bool
|
||||
}{
|
||||
{
|
||||
name: "all healthy",
|
||||
status: Status{Containerd: true, APIServer: true, NodeReady: true},
|
||||
wantHealth: true,
|
||||
},
|
||||
{
|
||||
name: "containerd down",
|
||||
status: Status{Containerd: false, APIServer: true, NodeReady: true},
|
||||
wantHealth: false,
|
||||
},
|
||||
{
|
||||
name: "apiserver down",
|
||||
status: Status{Containerd: true, APIServer: false, NodeReady: true},
|
||||
wantHealth: false,
|
||||
},
|
||||
{
|
||||
name: "node not ready",
|
||||
status: Status{Containerd: true, APIServer: true, NodeReady: false},
|
||||
wantHealth: false,
|
||||
},
|
||||
{
|
||||
name: "all down",
|
||||
status: Status{Containerd: false, APIServer: false, NodeReady: false},
|
||||
wantHealth: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := tt.status.IsHealthy(); got != tt.wantHealth {
|
||||
t.Errorf("IsHealthy() = %v, want %v", got, tt.wantHealth)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewChecker(t *testing.T) {
|
||||
// Test defaults
|
||||
c := NewChecker("", "", 0)
|
||||
if c.kubeconfigPath != "/var/lib/kubesolo/pki/admin/admin.kubeconfig" {
|
||||
t.Errorf("unexpected default kubeconfig: %s", c.kubeconfigPath)
|
||||
}
|
||||
if c.apiServerAddr != "127.0.0.1:6443" {
|
||||
t.Errorf("unexpected default apiserver addr: %s", c.apiServerAddr)
|
||||
}
|
||||
if c.timeout != 120*time.Second {
|
||||
t.Errorf("unexpected default timeout: %v", c.timeout)
|
||||
}
|
||||
|
||||
// Test custom values
|
||||
c = NewChecker("/custom/kubeconfig", "10.0.0.1:6443", 30*time.Second)
|
||||
if c.kubeconfigPath != "/custom/kubeconfig" {
|
||||
t.Errorf("expected custom kubeconfig, got %s", c.kubeconfigPath)
|
||||
}
|
||||
if c.apiServerAddr != "10.0.0.1:6443" {
|
||||
t.Errorf("expected custom addr, got %s", c.apiServerAddr)
|
||||
}
|
||||
if c.timeout != 30*time.Second {
|
||||
t.Errorf("expected 30s timeout, got %v", c.timeout)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusMessage(t *testing.T) {
|
||||
s := &Status{
|
||||
Containerd: true,
|
||||
APIServer: true,
|
||||
NodeReady: true,
|
||||
Message: "all checks passed",
|
||||
}
|
||||
if s.Message != "all checks passed" {
|
||||
t.Errorf("unexpected message: %s", s.Message)
|
||||
}
|
||||
}
|
||||
180
update/pkg/image/image.go
Normal file
180
update/pkg/image/image.go
Normal file
@@ -0,0 +1,180 @@
|
||||
// Package image handles downloading, verifying, and staging OS update images.
|
||||
//
|
||||
// Update images are distributed as pairs of files:
|
||||
// - vmlinuz (kernel)
|
||||
// - kubesolo-os.gz (initramfs)
|
||||
//
|
||||
// These are fetched from an HTTP(S) server that provides a metadata file
|
||||
// (latest.json) describing available updates.
|
||||
package image
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// UpdateMetadata describes an available update from the update server.
|
||||
type UpdateMetadata struct {
|
||||
Version string `json:"version"`
|
||||
VmlinuzURL string `json:"vmlinuz_url"`
|
||||
VmlinuzSHA256 string `json:"vmlinuz_sha256"`
|
||||
InitramfsURL string `json:"initramfs_url"`
|
||||
InitramfsSHA256 string `json:"initramfs_sha256"`
|
||||
ReleaseNotes string `json:"release_notes,omitempty"`
|
||||
ReleaseDate string `json:"release_date,omitempty"`
|
||||
}
|
||||
|
||||
// StagedImage represents downloaded and verified update files.
|
||||
type StagedImage struct {
|
||||
VmlinuzPath string
|
||||
InitramfsPath string
|
||||
Version string
|
||||
}
|
||||
|
||||
// Client handles communication with the update server.
|
||||
type Client struct {
|
||||
serverURL string
|
||||
httpClient *http.Client
|
||||
stageDir string
|
||||
}
|
||||
|
||||
// NewClient creates a new update image client.
|
||||
func NewClient(serverURL, stageDir string) *Client {
|
||||
return &Client{
|
||||
serverURL: serverURL,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 5 * time.Minute,
|
||||
},
|
||||
stageDir: stageDir,
|
||||
}
|
||||
}
|
||||
|
||||
// CheckForUpdate fetches the latest update metadata from the server.
|
||||
func (c *Client) CheckForUpdate() (*UpdateMetadata, error) {
|
||||
url := c.serverURL + "/latest.json"
|
||||
slog.Info("checking for update", "url", url)
|
||||
|
||||
resp, err := c.httpClient.Get(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetching update metadata: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("update server returned %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var meta UpdateMetadata
|
||||
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
|
||||
return nil, fmt.Errorf("parsing update metadata: %w", err)
|
||||
}
|
||||
|
||||
if meta.Version == "" {
|
||||
return nil, fmt.Errorf("update metadata missing version")
|
||||
}
|
||||
|
||||
return &meta, nil
|
||||
}
|
||||
|
||||
// Download fetches the update files and verifies their checksums.
|
||||
func (c *Client) Download(meta *UpdateMetadata) (*StagedImage, error) {
|
||||
if err := os.MkdirAll(c.stageDir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("creating stage dir: %w", err)
|
||||
}
|
||||
|
||||
vmlinuzPath := filepath.Join(c.stageDir, "vmlinuz")
|
||||
initramfsPath := filepath.Join(c.stageDir, "kubesolo-os.gz")
|
||||
|
||||
slog.Info("downloading vmlinuz", "url", meta.VmlinuzURL)
|
||||
if err := c.downloadAndVerify(meta.VmlinuzURL, vmlinuzPath, meta.VmlinuzSHA256); err != nil {
|
||||
return nil, fmt.Errorf("downloading vmlinuz: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("downloading initramfs", "url", meta.InitramfsURL)
|
||||
if err := c.downloadAndVerify(meta.InitramfsURL, initramfsPath, meta.InitramfsSHA256); err != nil {
|
||||
return nil, fmt.Errorf("downloading initramfs: %w", err)
|
||||
}
|
||||
|
||||
return &StagedImage{
|
||||
VmlinuzPath: vmlinuzPath,
|
||||
InitramfsPath: initramfsPath,
|
||||
Version: meta.Version,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Cleanup removes staged update files.
|
||||
func (c *Client) Cleanup() error {
|
||||
return os.RemoveAll(c.stageDir)
|
||||
}
|
||||
|
||||
func (c *Client) downloadAndVerify(url, dest, expectedSHA256 string) error {
|
||||
resp, err := c.httpClient.Get(url)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading %s: %w", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("server returned %d for %s", resp.StatusCode, url)
|
||||
}
|
||||
|
||||
f, err := os.Create(dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating %s: %w", dest, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
hasher := sha256.New()
|
||||
writer := io.MultiWriter(f, hasher)
|
||||
|
||||
written, err := io.Copy(writer, resp.Body)
|
||||
if err != nil {
|
||||
os.Remove(dest)
|
||||
return fmt.Errorf("writing %s: %w", dest, err)
|
||||
}
|
||||
|
||||
if err := f.Close(); err != nil {
|
||||
return fmt.Errorf("closing %s: %w", dest, err)
|
||||
}
|
||||
|
||||
// Verify checksum
|
||||
if expectedSHA256 != "" {
|
||||
actual := hex.EncodeToString(hasher.Sum(nil))
|
||||
if actual != expectedSHA256 {
|
||||
os.Remove(dest)
|
||||
return fmt.Errorf("checksum mismatch for %s: expected %s, got %s", dest, expectedSHA256, actual)
|
||||
}
|
||||
slog.Debug("checksum verified", "file", dest, "sha256", actual)
|
||||
}
|
||||
|
||||
slog.Info("downloaded", "file", dest, "size", written)
|
||||
return nil
|
||||
}
|
||||
|
||||
// VerifyFile checks the SHA256 checksum of an existing file.
|
||||
func VerifyFile(path, expectedSHA256 string) error {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
hasher := sha256.New()
|
||||
if _, err := io.Copy(hasher, f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
actual := hex.EncodeToString(hasher.Sum(nil))
|
||||
if actual != expectedSHA256 {
|
||||
return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedSHA256, actual)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
241
update/pkg/image/image_test.go
Normal file
241
update/pkg/image/image_test.go
Normal file
@@ -0,0 +1,241 @@
|
||||
package image
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCheckForUpdate(t *testing.T) {
|
||||
meta := UpdateMetadata{
|
||||
Version: "1.2.0",
|
||||
VmlinuzURL: "/vmlinuz",
|
||||
VmlinuzSHA256: "abc123",
|
||||
InitramfsURL: "/kubesolo-os.gz",
|
||||
InitramfsSHA256: "def456",
|
||||
ReleaseNotes: "Bug fixes",
|
||||
ReleaseDate: "2025-01-15",
|
||||
}
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/latest.json" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
json.NewEncoder(w).Encode(meta)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL, "")
|
||||
got, err := client.CheckForUpdate()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if got.Version != "1.2.0" {
|
||||
t.Errorf("expected version 1.2.0, got %s", got.Version)
|
||||
}
|
||||
if got.VmlinuzSHA256 != "abc123" {
|
||||
t.Errorf("expected vmlinuz sha abc123, got %s", got.VmlinuzSHA256)
|
||||
}
|
||||
if got.ReleaseNotes != "Bug fixes" {
|
||||
t.Errorf("expected release notes, got %s", got.ReleaseNotes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckForUpdateMissingVersion(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
json.NewEncoder(w).Encode(UpdateMetadata{})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL, "")
|
||||
_, err := client.CheckForUpdate()
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing version")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckForUpdateServerError(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewClient(server.URL, "")
|
||||
_, err := client.CheckForUpdate()
|
||||
if err == nil {
|
||||
t.Fatal("expected error for server error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDownloadAndVerify(t *testing.T) {
|
||||
// Create test content
|
||||
vmlinuzContent := []byte("fake vmlinuz content for testing")
|
||||
initramfsContent := []byte("fake initramfs content for testing")
|
||||
|
||||
vmlinuzHash := sha256.Sum256(vmlinuzContent)
|
||||
initramfsHash := sha256.Sum256(initramfsContent)
|
||||
|
||||
meta := UpdateMetadata{
|
||||
Version: "2.0.0",
|
||||
VmlinuzSHA256: hex.EncodeToString(vmlinuzHash[:]),
|
||||
InitramfsSHA256: hex.EncodeToString(initramfsHash[:]),
|
||||
}
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/latest.json":
|
||||
m := meta
|
||||
m.VmlinuzURL = "http://" + r.Host + "/vmlinuz"
|
||||
m.InitramfsURL = "http://" + r.Host + "/kubesolo-os.gz"
|
||||
json.NewEncoder(w).Encode(m)
|
||||
case "/vmlinuz":
|
||||
w.Write(vmlinuzContent)
|
||||
case "/kubesolo-os.gz":
|
||||
w.Write(initramfsContent)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
stageDir := filepath.Join(t.TempDir(), "stage")
|
||||
client := NewClient(server.URL, stageDir)
|
||||
defer client.Cleanup()
|
||||
|
||||
// First get metadata
|
||||
gotMeta, err := client.CheckForUpdate()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Download
|
||||
staged, err := client.Download(gotMeta)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if staged.Version != "2.0.0" {
|
||||
t.Errorf("expected version 2.0.0, got %s", staged.Version)
|
||||
}
|
||||
|
||||
// Verify files exist
|
||||
if _, err := os.Stat(staged.VmlinuzPath); err != nil {
|
||||
t.Errorf("vmlinuz not found: %v", err)
|
||||
}
|
||||
if _, err := os.Stat(staged.InitramfsPath); err != nil {
|
||||
t.Errorf("initramfs not found: %v", err)
|
||||
}
|
||||
|
||||
// Verify content
|
||||
data, _ := os.ReadFile(staged.VmlinuzPath)
|
||||
if string(data) != string(vmlinuzContent) {
|
||||
t.Error("vmlinuz content mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDownloadChecksumMismatch(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/vmlinuz":
|
||||
w.Write([]byte("actual content"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
stageDir := filepath.Join(t.TempDir(), "stage")
|
||||
client := NewClient(server.URL, stageDir)
|
||||
|
||||
meta := &UpdateMetadata{
|
||||
Version: "1.0.0",
|
||||
VmlinuzURL: server.URL + "/vmlinuz",
|
||||
VmlinuzSHA256: "wrong_checksum_value",
|
||||
InitramfsURL: server.URL + "/initramfs",
|
||||
}
|
||||
|
||||
_, err := client.Download(meta)
|
||||
if err == nil {
|
||||
t.Fatal("expected checksum mismatch error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyFile(t *testing.T) {
|
||||
content := []byte("test file content for verification")
|
||||
hash := sha256.Sum256(content)
|
||||
expected := hex.EncodeToString(hash[:])
|
||||
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "testfile")
|
||||
if err := os.WriteFile(path, content, 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Should pass with correct hash
|
||||
if err := VerifyFile(path, expected); err != nil {
|
||||
t.Errorf("expected verification to pass: %v", err)
|
||||
}
|
||||
|
||||
// Should fail with wrong hash
|
||||
if err := VerifyFile(path, "deadbeef"); err == nil {
|
||||
t.Error("expected verification to fail with wrong hash")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyFileNotFound(t *testing.T) {
|
||||
err := VerifyFile("/nonexistent/file", "abc123")
|
||||
if err == nil {
|
||||
t.Error("expected error for nonexistent file")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanup(t *testing.T) {
|
||||
stageDir := filepath.Join(t.TempDir(), "stage")
|
||||
os.MkdirAll(stageDir, 0o755)
|
||||
os.WriteFile(filepath.Join(stageDir, "test"), []byte("data"), 0o644)
|
||||
|
||||
client := NewClient("http://unused", stageDir)
|
||||
if err := client.Cleanup(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(stageDir); !os.IsNotExist(err) {
|
||||
t.Error("stage dir should be removed after cleanup")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateMetadataJSON(t *testing.T) {
|
||||
meta := UpdateMetadata{
|
||||
Version: "1.0.0",
|
||||
VmlinuzURL: "https://example.com/vmlinuz",
|
||||
VmlinuzSHA256: "abc",
|
||||
InitramfsURL: "https://example.com/kubesolo-os.gz",
|
||||
InitramfsSHA256: "def",
|
||||
ReleaseNotes: "Initial release",
|
||||
ReleaseDate: "2025-01-01",
|
||||
}
|
||||
|
||||
data, err := json.Marshal(meta)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var decoded UpdateMetadata
|
||||
if err := json.Unmarshal(data, &decoded); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if decoded.Version != meta.Version {
|
||||
t.Errorf("version mismatch: %s != %s", decoded.Version, meta.Version)
|
||||
}
|
||||
if decoded.ReleaseDate != meta.ReleaseDate {
|
||||
t.Errorf("release date mismatch: %s != %s", decoded.ReleaseDate, meta.ReleaseDate)
|
||||
}
|
||||
}
|
||||
139
update/pkg/partition/partition.go
Normal file
139
update/pkg/partition/partition.go
Normal file
@@ -0,0 +1,139 @@
|
||||
// Package partition detects and manages A/B system partitions.
|
||||
//
|
||||
// It identifies System A and System B partitions by label (KSOLOA, KSOLOB)
|
||||
// and provides mount/write operations for the update process.
|
||||
package partition
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
LabelSystemA = "KSOLOA"
|
||||
LabelSystemB = "KSOLOB"
|
||||
LabelData = "KSOLODATA"
|
||||
LabelEFI = "KSOLOEFI"
|
||||
)
|
||||
|
||||
// Info contains information about a partition.
|
||||
type Info struct {
|
||||
Device string // e.g. /dev/sda2
|
||||
Label string // e.g. KSOLOA
|
||||
MountPoint string // current mount point, empty if not mounted
|
||||
Slot string // "A" or "B"
|
||||
}
|
||||
|
||||
// FindByLabel locates a block device by its filesystem label.
|
||||
func FindByLabel(label string) (string, error) {
|
||||
cmd := exec.Command("blkid", "-L", label)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("partition with label %q not found: %w", label, err)
|
||||
}
|
||||
return strings.TrimSpace(string(output)), nil
|
||||
}
|
||||
|
||||
// GetSlotPartition returns the partition info for the given slot ("A" or "B").
|
||||
func GetSlotPartition(slot string) (*Info, error) {
|
||||
var label string
|
||||
switch slot {
|
||||
case "A":
|
||||
label = LabelSystemA
|
||||
case "B":
|
||||
label = LabelSystemB
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid slot: %q", slot)
|
||||
}
|
||||
|
||||
dev, err := FindByLabel(label)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Info{
|
||||
Device: dev,
|
||||
Label: label,
|
||||
Slot: slot,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// MountReadOnly mounts a partition read-only at the given mount point.
|
||||
func MountReadOnly(dev, mountPoint string) error {
|
||||
if err := os.MkdirAll(mountPoint, 0o755); err != nil {
|
||||
return fmt.Errorf("creating mount point: %w", err)
|
||||
}
|
||||
cmd := exec.Command("mount", "-o", "ro", dev, mountPoint)
|
||||
if output, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("mounting %s at %s: %w\n%s", dev, mountPoint, err, output)
|
||||
}
|
||||
slog.Debug("mounted", "device", dev, "mountpoint", mountPoint, "mode", "ro")
|
||||
return nil
|
||||
}
|
||||
|
||||
// MountReadWrite mounts a partition read-write at the given mount point.
|
||||
func MountReadWrite(dev, mountPoint string) error {
|
||||
if err := os.MkdirAll(mountPoint, 0o755); err != nil {
|
||||
return fmt.Errorf("creating mount point: %w", err)
|
||||
}
|
||||
cmd := exec.Command("mount", dev, mountPoint)
|
||||
if output, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("mounting %s at %s: %w\n%s", dev, mountPoint, err, output)
|
||||
}
|
||||
slog.Debug("mounted", "device", dev, "mountpoint", mountPoint, "mode", "rw")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unmount unmounts a mount point.
|
||||
func Unmount(mountPoint string) error {
|
||||
cmd := exec.Command("umount", mountPoint)
|
||||
if output, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("unmounting %s: %w\n%s", mountPoint, err, output)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadVersion reads the version file from a mounted system partition.
|
||||
func ReadVersion(mountPoint string) (string, error) {
|
||||
data, err := os.ReadFile(filepath.Join(mountPoint, "version"))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("reading version: %w", err)
|
||||
}
|
||||
return strings.TrimSpace(string(data)), nil
|
||||
}
|
||||
|
||||
// WriteSystemImage copies vmlinuz and initramfs to a mounted partition.
|
||||
func WriteSystemImage(mountPoint, vmlinuzPath, initramfsPath, version string) error {
|
||||
// Copy vmlinuz
|
||||
if err := copyFile(vmlinuzPath, filepath.Join(mountPoint, "vmlinuz")); err != nil {
|
||||
return fmt.Errorf("writing vmlinuz: %w", err)
|
||||
}
|
||||
|
||||
// Copy initramfs
|
||||
if err := copyFile(initramfsPath, filepath.Join(mountPoint, "kubesolo-os.gz")); err != nil {
|
||||
return fmt.Errorf("writing initramfs: %w", err)
|
||||
}
|
||||
|
||||
// Write version
|
||||
if err := os.WriteFile(filepath.Join(mountPoint, "version"), []byte(version+"\n"), 0o644); err != nil {
|
||||
return fmt.Errorf("writing version: %w", err)
|
||||
}
|
||||
|
||||
// Sync to ensure data is flushed to disk
|
||||
exec.Command("sync").Run()
|
||||
|
||||
slog.Info("system image written", "mountpoint", mountPoint, "version", version)
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyFile(src, dst string) error {
|
||||
data, err := os.ReadFile(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(dst, data, 0o644)
|
||||
}
|
||||
129
update/pkg/partition/partition_test.go
Normal file
129
update/pkg/partition/partition_test.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package partition
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReadVersion(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
versionFile := filepath.Join(dir, "version")
|
||||
if err := os.WriteFile(versionFile, []byte("1.2.3\n"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
version, err := ReadVersion(dir)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if version != "1.2.3" {
|
||||
t.Errorf("expected 1.2.3, got %s", version)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadVersionMissing(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
_, err := ReadVersion(dir)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing version file")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteSystemImage(t *testing.T) {
|
||||
mountPoint := t.TempDir()
|
||||
srcDir := t.TempDir()
|
||||
|
||||
// Create source files
|
||||
vmlinuzPath := filepath.Join(srcDir, "vmlinuz")
|
||||
initramfsPath := filepath.Join(srcDir, "kubesolo-os.gz")
|
||||
|
||||
if err := os.WriteFile(vmlinuzPath, []byte("kernel data"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(initramfsPath, []byte("initramfs data"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := WriteSystemImage(mountPoint, vmlinuzPath, initramfsPath, "2.0.0"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Verify files were copied
|
||||
data, err := os.ReadFile(filepath.Join(mountPoint, "vmlinuz"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(data) != "kernel data" {
|
||||
t.Errorf("vmlinuz content mismatch")
|
||||
}
|
||||
|
||||
data, err = os.ReadFile(filepath.Join(mountPoint, "kubesolo-os.gz"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(data) != "initramfs data" {
|
||||
t.Errorf("initramfs content mismatch")
|
||||
}
|
||||
|
||||
// Verify version file
|
||||
version, err := ReadVersion(mountPoint)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if version != "2.0.0" {
|
||||
t.Errorf("expected version 2.0.0, got %s", version)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCopyFile(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
src := filepath.Join(dir, "src")
|
||||
dst := filepath.Join(dir, "dst")
|
||||
|
||||
if err := os.WriteFile(src, []byte("test content"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := copyFile(src, dst); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(dst)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(data) != "test content" {
|
||||
t.Errorf("copy content mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCopyFileNotFound(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
err := copyFile("/nonexistent", filepath.Join(dir, "dst"))
|
||||
if err == nil {
|
||||
t.Fatal("expected error for nonexistent source")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetSlotPartitionInvalid(t *testing.T) {
|
||||
_, err := GetSlotPartition("C")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid slot")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConstants(t *testing.T) {
|
||||
if LabelSystemA != "KSOLOA" {
|
||||
t.Errorf("unexpected LabelSystemA: %s", LabelSystemA)
|
||||
}
|
||||
if LabelSystemB != "KSOLOB" {
|
||||
t.Errorf("unexpected LabelSystemB: %s", LabelSystemB)
|
||||
}
|
||||
if LabelData != "KSOLODATA" {
|
||||
t.Errorf("unexpected LabelData: %s", LabelData)
|
||||
}
|
||||
if LabelEFI != "KSOLOEFI" {
|
||||
t.Errorf("unexpected LabelEFI: %s", LabelEFI)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user