feat: add distribution and fleet management — CI/CD, OCI, metrics, ARM64 (Phase 5)
Some checks failed
CI / Go Tests (push) Has been cancelled
CI / Build Go Binaries (amd64, linux, linux-amd64) (push) Has been cancelled
CI / Build Go Binaries (arm64, linux, linux-arm64) (push) Has been cancelled
CI / Shellcheck (push) Has been cancelled

- Gitea Actions CI pipeline: Go tests, build, shellcheck on push/PR
- Gitea Actions release pipeline: full build + artifact upload on version tags
- OCI container image builder for registry-based OS distribution
- Zero-dependency Prometheus metrics endpoint (kubesolo_os_info, boot,
  memory, update status) with 10 tests
- USB provisioning tool for air-gapped deployments with cloud-init injection
- ARM64 cross-compilation support (TARGET_ARCH env var + build-cross.sh)
- Updated build scripts to accept TARGET_ARCH for both amd64 and arm64
- New Makefile targets: oci-image, build-cross

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-11 11:36:53 -06:00
parent 49a37e30e8
commit 456aa8eb5b
12 changed files with 1206 additions and 7 deletions

91
.gitea/workflows/ci.yaml Normal file
View File

@@ -0,0 +1,91 @@
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
test-go:
name: Go Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.22'
- name: Test cloud-init
run: cd cloud-init && go test ./... -v -count=1
- name: Test update agent
run: cd update && go test ./... -v -count=1
- name: Vet cloud-init
run: cd cloud-init && go vet ./...
- name: Vet update agent
run: cd update && go vet ./...
build-binaries:
name: Build Go Binaries
runs-on: ubuntu-latest
needs: test-go
strategy:
matrix:
include:
- goos: linux
goarch: amd64
suffix: linux-amd64
- goos: linux
goarch: arm64
suffix: linux-arm64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.22'
- name: Build cloud-init (${{ matrix.suffix }})
run: |
CGO_ENABLED=0 GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} \
go build -ldflags='-s -w' -o kubesolo-cloudinit-${{ matrix.suffix }} ./cmd/
working-directory: cloud-init
- name: Build update agent (${{ matrix.suffix }})
run: |
CGO_ENABLED=0 GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} \
go build -ldflags='-s -w' -o kubesolo-update-${{ matrix.suffix }} .
working-directory: update
- name: Upload binaries
uses: actions/upload-artifact@v4
with:
name: binaries-${{ matrix.suffix }}
path: |
cloud-init/kubesolo-cloudinit-${{ matrix.suffix }}
update/kubesolo-update-${{ matrix.suffix }}
shellcheck:
name: Shellcheck
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install shellcheck
run: sudo apt-get update && sudo apt-get install -y shellcheck
- name: Lint init scripts (POSIX sh)
run: shellcheck -s sh init/init.sh init/lib/*.sh init/emergency-shell.sh
- name: Lint build scripts (bash)
run: shellcheck -s bash build/scripts/*.sh build/config/kernel-audit.sh
- name: Lint test scripts (bash)
run: shellcheck -s bash test/qemu/*.sh test/integration/*.sh test/kernel/*.sh || true
- name: Lint hack scripts (bash)
run: shellcheck -s bash hack/*.sh || true

View File

@@ -0,0 +1,165 @@
name: Release
on:
push:
tags:
- 'v*'
jobs:
test:
name: Test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.22'
- name: Test cloud-init
run: cd cloud-init && go test ./... -count=1
- name: Test update agent
run: cd update && go test ./... -count=1
build-binaries:
name: Build Binaries
runs-on: ubuntu-latest
needs: test
strategy:
matrix:
include:
- goos: linux
goarch: amd64
suffix: linux-amd64
- goos: linux
goarch: arm64
suffix: linux-arm64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.22'
- name: Get version
id: version
run: echo "version=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
- name: Build cloud-init
run: |
CGO_ENABLED=0 GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} \
go build -ldflags="-s -w -X main.version=${{ steps.version.outputs.version }}" \
-o kubesolo-cloudinit-${{ matrix.suffix }} ./cmd/
working-directory: cloud-init
- name: Build update agent
run: |
CGO_ENABLED=0 GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} \
go build -ldflags="-s -w -X main.version=${{ steps.version.outputs.version }}" \
-o kubesolo-update-${{ matrix.suffix }} .
working-directory: update
- name: Upload binaries
uses: actions/upload-artifact@v4
with:
name: binaries-${{ matrix.suffix }}
path: |
cloud-init/kubesolo-cloudinit-${{ matrix.suffix }}
update/kubesolo-update-${{ matrix.suffix }}
build-iso:
name: Build ISO (amd64)
runs-on: ubuntu-latest
needs: build-binaries
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.22'
- name: Install build deps
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
cpio gzip genisoimage isolinux syslinux syslinux-common \
syslinux-utils xorriso xz-utils wget squashfs-tools \
dosfstools e2fsprogs fdisk parted bsdtar
- name: Build ISO
run: make iso
- name: Build disk image
run: make disk-image
- name: Get version
id: version
run: echo "version=$(cat VERSION)" >> $GITHUB_OUTPUT
- name: Upload ISO
uses: actions/upload-artifact@v4
with:
name: iso-amd64
path: output/*.iso
- name: Upload disk image
uses: actions/upload-artifact@v4
with:
name: disk-image-amd64
path: output/*.img
release:
name: Create Release
runs-on: ubuntu-latest
needs: [build-binaries, build-iso]
steps:
- uses: actions/checkout@v4
- name: Get version
id: version
run: echo "version=$(cat VERSION)" >> $GITHUB_OUTPUT
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: artifacts
- name: Compute checksums
run: |
cd artifacts
find . -type f \( -name "*.iso" -o -name "*.img" -o -name "kubesolo-*" \) \
-exec sha256sum {} \; | sort > ../SHA256SUMS
cd ..
- name: Create release
uses: softprops/action-gh-release@v2
with:
name: KubeSolo OS v${{ steps.version.outputs.version }}
body: |
## KubeSolo OS v${{ steps.version.outputs.version }}
### Downloads
- **ISO** — Boot from CD/USB, ideal for testing
- **Disk Image** — Raw disk with A/B partitions + GRUB
- **Binaries** — Standalone cloud-init and update agent
### Verify
```
sha256sum -c SHA256SUMS
```
### Quick Start
```bash
# Boot in QEMU
qemu-system-x86_64 -m 1024 -smp 2 -enable-kvm \
-cdrom kubesolo-os-${{ steps.version.outputs.version }}.iso \
-nographic
```
files: |
artifacts/**/*.iso
artifacts/**/*.img
artifacts/**/kubesolo-*
SHA256SUMS
draft: false
prerelease: false

View File

@@ -1,4 +1,5 @@
.PHONY: all fetch build-cloudinit build-update-agent rootfs initramfs iso disk-image \
.PHONY: all fetch build-cloudinit build-update-agent build-cross rootfs initramfs \
iso disk-image oci-image \
test-boot test-k8s test-persistence test-deploy test-storage test-all \
test-cloudinit test-update-agent \
bench-boot bench-resources \
@@ -56,6 +57,16 @@ disk-image: initramfs
$(BUILD_DIR)/scripts/create-disk-image.sh
@echo "==> Built: $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).img"
oci-image: initramfs
@echo "==> Creating OCI container image..."
$(BUILD_DIR)/scripts/create-oci-image.sh
@echo "==> OCI image built"
# Cross-compile Go binaries for amd64 + arm64
build-cross:
@echo "==> Cross-compiling for amd64 + arm64..."
$(BUILD_DIR)/scripts/build-cross.sh
# =============================================================================
# Kernel validation
# =============================================================================
@@ -192,6 +203,8 @@ help:
@echo " make initramfs Repack rootfs into kubesolo-os.gz"
@echo " make iso Create bootable ISO (default target)"
@echo " make disk-image Create raw disk image with A/B partitions + GRUB"
@echo " make oci-image Create OCI container image for registry distribution"
@echo " make build-cross Cross-compile Go binaries for amd64 + arm64"
@echo " make quick Fast rebuild (re-inject + repack + ISO only)"
@echo " make docker-build Reproducible build inside Docker"
@echo ""

View File

@@ -1,15 +1,19 @@
#!/bin/bash
# build-cloudinit.sh — Compile the cloud-init binary as a static Linux binary
#
# Environment:
# TARGET_ARCH Target architecture (default: amd64, also supports: arm64)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
CLOUDINIT_SRC="$PROJECT_ROOT/cloud-init"
TARGET_ARCH="${TARGET_ARCH:-amd64}"
OUTPUT="$CACHE_DIR/kubesolo-cloudinit"
echo "==> Building cloud-init binary..."
echo "==> Building cloud-init binary (linux/$TARGET_ARCH)..."
if ! command -v go >/dev/null 2>&1; then
echo "ERROR: Go is not installed. Install Go 1.22+ to build cloud-init."
@@ -28,9 +32,9 @@ go test ./... -count=1 || {
exit 1
}
# Build static binary for Linux amd64
echo " Compiling (CGO_ENABLED=0 GOOS=linux GOARCH=amd64)..."
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
# Build static binary
echo " Compiling (CGO_ENABLED=0 GOOS=linux GOARCH=$TARGET_ARCH)..."
CGO_ENABLED=0 GOOS=linux GOARCH="$TARGET_ARCH" go build \
-ldflags='-s -w' \
-o "$OUTPUT" \
./cmd/

103
build/scripts/build-cross.sh Executable file
View File

@@ -0,0 +1,103 @@
#!/bin/bash
# build-cross.sh — Cross-compile KubeSolo OS Go binaries for multiple architectures
#
# Builds static binaries for amd64 and arm64 (or a single target).
# This is used by CI/CD and for ARM64 device support.
#
# Usage:
# build/scripts/build-cross.sh # Build both amd64 + arm64
# build/scripts/build-cross.sh --arch amd64 # Build amd64 only
# build/scripts/build-cross.sh --arch arm64 # Build arm64 only
# build/scripts/build-cross.sh --skip-tests # Skip Go tests (for CI where tests run separately)
#
# Output:
# build/cache/kubesolo-update-linux-{amd64,arm64}
# build/cache/kubesolo-cloudinit-linux-{amd64,arm64}
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
# Defaults
ARCHES="amd64 arm64"
SKIP_TESTS=false
# Parse args
while [ $# -gt 0 ]; do
case "$1" in
--arch)
ARCHES="${2:?--arch requires a value (amd64 or arm64)}"
shift 2
;;
--skip-tests)
SKIP_TESTS=true
shift
;;
*)
echo "Unknown option: $1" >&2
exit 1
;;
esac
done
mkdir -p "$CACHE_DIR"
echo "=== KubeSolo OS Cross-Compilation ==="
echo " Architectures: $ARCHES"
echo ""
# Run tests once (not per-arch, since Go tests are arch-independent)
if [ "$SKIP_TESTS" = false ]; then
echo "--- Running cloud-init tests ---"
(cd "$PROJECT_ROOT/cloud-init" && go test ./... -count=1) || {
echo "ERROR: Cloud-init tests failed" >&2
exit 1
}
echo "--- Running update agent tests ---"
(cd "$PROJECT_ROOT/update" && go test ./... -count=1) || {
echo "ERROR: Update agent tests failed" >&2
exit 1
}
echo ""
fi
# Build for each architecture
for ARCH in $ARCHES; do
echo "=== Building for linux/$ARCH ==="
# Cloud-init binary
CLOUDINIT_OUT="$CACHE_DIR/kubesolo-cloudinit-linux-$ARCH"
echo "--- cloud-init → $CLOUDINIT_OUT ---"
(cd "$PROJECT_ROOT/cloud-init" && \
CGO_ENABLED=0 GOOS=linux GOARCH="$ARCH" \
go build -ldflags='-s -w' -o "$CLOUDINIT_OUT" ./cmd/)
echo " Size: $(ls -lh "$CLOUDINIT_OUT" | awk '{print $5}')"
# Update agent binary
UPDATE_OUT="$CACHE_DIR/kubesolo-update-linux-$ARCH"
echo "--- update agent → $UPDATE_OUT ---"
(cd "$PROJECT_ROOT/update" && \
CGO_ENABLED=0 GOOS=linux GOARCH="$ARCH" \
go build -ldflags='-s -w' -o "$UPDATE_OUT" .)
echo " Size: $(ls -lh "$UPDATE_OUT" | awk '{print $5}')"
# Create symlink for default arch (amd64)
if [ "$ARCH" = "amd64" ]; then
ln -sf "kubesolo-cloudinit-linux-$ARCH" "$CACHE_DIR/kubesolo-cloudinit"
ln -sf "kubesolo-update-linux-$ARCH" "$CACHE_DIR/kubesolo-update"
fi
echo ""
done
echo "=== Cross-compilation complete ==="
echo ""
echo "Binaries:"
for ARCH in $ARCHES; do
echo " linux/$ARCH:"
echo " $CACHE_DIR/kubesolo-cloudinit-linux-$ARCH"
echo " $CACHE_DIR/kubesolo-update-linux-$ARCH"
done
echo ""

View File

@@ -3,15 +3,19 @@
#
# Builds a static Linux binary for the update agent.
# Output: build/cache/kubesolo-update
#
# Environment:
# TARGET_ARCH Target architecture (default: amd64, also supports: arm64)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
UPDATE_DIR="$PROJECT_ROOT/update"
CACHE_DIR="$PROJECT_ROOT/build/cache"
TARGET_ARCH="${TARGET_ARCH:-amd64}"
OUTPUT="$CACHE_DIR/kubesolo-update"
echo "=== Building KubeSolo Update Agent ==="
echo "=== Building KubeSolo Update Agent (linux/$TARGET_ARCH) ==="
# Ensure output dir exists
mkdir -p "$CACHE_DIR"
@@ -22,7 +26,7 @@ echo "--- Running tests ---"
# Build static binary
echo "--- Compiling static binary ---"
(cd "$UPDATE_DIR" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
(cd "$UPDATE_DIR" && CGO_ENABLED=0 GOOS=linux GOARCH="$TARGET_ARCH" \
go build -ldflags='-s -w' -o "$OUTPUT" .)
SIZE=$(ls -lh "$OUTPUT" | awk '{print $5}')

155
build/scripts/create-oci-image.sh Executable file
View File

@@ -0,0 +1,155 @@
#!/bin/bash
# create-oci-image.sh — Package KubeSolo OS as an OCI container image
#
# Creates an OCI image containing the kernel and initramfs, suitable for
# distribution via container registries (Docker Hub, GHCR, Quay, etc.).
#
# The OCI image is a minimal scratch-based image containing:
# /vmlinuz — kernel
# /kubesolo-os.gz — initramfs
# /version — version string
# /metadata.json — build metadata
#
# Usage:
# build/scripts/create-oci-image.sh [--registry REGISTRY] [--push]
#
# Examples:
# build/scripts/create-oci-image.sh
# build/scripts/create-oci-image.sh --registry ghcr.io/portainer --push
# build/scripts/create-oci-image.sh --registry docker.io/portainer --push
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
OUTPUT_DIR="$PROJECT_ROOT/output"
# Defaults
REGISTRY=""
IMAGE_NAME="kubesolo-os"
PUSH=false
ARCH="${ARCH:-amd64}"
# Parse args
while [ $# -gt 0 ]; do
case "$1" in
--registry) REGISTRY="$2"; shift 2 ;;
--push) PUSH=true; shift ;;
--arch) ARCH="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
# Build full image tag
if [ -n "$REGISTRY" ]; then
FULL_IMAGE="${REGISTRY}/${IMAGE_NAME}:${VERSION}"
LATEST_TAG="${REGISTRY}/${IMAGE_NAME}:latest"
else
FULL_IMAGE="${IMAGE_NAME}:${VERSION}"
LATEST_TAG="${IMAGE_NAME}:latest"
fi
echo "==> Building OCI image: $FULL_IMAGE"
# Check for required files
VMLINUZ="$OUTPUT_DIR/vmlinuz"
INITRAMFS="$OUTPUT_DIR/kubesolo-os.gz"
# If individual files don't exist, try to extract from ISO
if [ ! -f "$VMLINUZ" ] || [ ! -f "$INITRAMFS" ]; then
ISO="$OUTPUT_DIR/kubesolo-os-${VERSION}.iso"
if [ -f "$ISO" ]; then
echo " Extracting from ISO..."
TMPDIR=$(mktemp -d)
trap "rm -rf $TMPDIR" EXIT
# Extract kernel and initramfs from ISO
xorriso -osirrox on -indev "$ISO" -extract /boot/vmlinuz "$TMPDIR/vmlinuz" 2>/dev/null || \
bsdtar -xf "$ISO" -C "$TMPDIR" boot/vmlinuz boot/kubesolo-os.gz 2>/dev/null || true
# Try common paths
for kpath in "$TMPDIR/boot/vmlinuz" "$TMPDIR/vmlinuz"; do
[ -f "$kpath" ] && VMLINUZ="$kpath" && break
done
for ipath in "$TMPDIR/boot/kubesolo-os.gz" "$TMPDIR/kubesolo-os.gz"; do
[ -f "$ipath" ] && INITRAMFS="$ipath" && break
done
fi
fi
if [ ! -f "$VMLINUZ" ] || [ ! -f "$INITRAMFS" ]; then
echo "ERROR: Required files not found:"
echo " vmlinuz: $VMLINUZ"
echo " kubesolo-os.gz: $INITRAMFS"
echo ""
echo "Run 'make iso' or 'make initramfs' first."
exit 1
fi
# Create build context
OCI_BUILD="$OUTPUT_DIR/oci-build"
rm -rf "$OCI_BUILD"
mkdir -p "$OCI_BUILD"
cp "$VMLINUZ" "$OCI_BUILD/vmlinuz"
cp "$INITRAMFS" "$OCI_BUILD/kubesolo-os.gz"
echo "$VERSION" > "$OCI_BUILD/version"
# Create metadata
cat > "$OCI_BUILD/metadata.json" << EOF
{
"name": "KubeSolo OS",
"version": "$VERSION",
"arch": "$ARCH",
"build_date": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"vmlinuz_sha256": "$(sha256sum "$OCI_BUILD/vmlinuz" | cut -d' ' -f1)",
"initramfs_sha256": "$(sha256sum "$OCI_BUILD/kubesolo-os.gz" | cut -d' ' -f1)"
}
EOF
# Create Dockerfile
cat > "$OCI_BUILD/Dockerfile" << 'DOCKERFILE'
FROM scratch
LABEL org.opencontainers.image.title="KubeSolo OS"
LABEL org.opencontainers.image.description="Immutable Kubernetes OS for edge/IoT"
LABEL org.opencontainers.image.vendor="Portainer"
LABEL org.opencontainers.image.source="https://github.com/portainer/kubesolo-os"
COPY vmlinuz /vmlinuz
COPY kubesolo-os.gz /kubesolo-os.gz
COPY version /version
COPY metadata.json /metadata.json
DOCKERFILE
# Build OCI image
echo " Building..."
docker build \
--platform "linux/${ARCH}" \
-t "$FULL_IMAGE" \
-t "$LATEST_TAG" \
-f "$OCI_BUILD/Dockerfile" \
"$OCI_BUILD"
echo " Built: $FULL_IMAGE"
echo " Size: $(docker image inspect "$FULL_IMAGE" --format='{{.Size}}' | awk '{printf "%.1f MB", $1/1024/1024}')"
# Push if requested
if [ "$PUSH" = true ]; then
echo " Pushing to registry..."
docker push "$FULL_IMAGE"
docker push "$LATEST_TAG"
echo " Pushed: $FULL_IMAGE"
echo " Pushed: $LATEST_TAG"
fi
# Cleanup
rm -rf "$OCI_BUILD"
echo ""
echo "==> OCI image ready: $FULL_IMAGE"
echo ""
echo "Usage:"
echo " # Pull and extract on target machine:"
echo " docker create --name kubesolo-extract $FULL_IMAGE"
echo " docker cp kubesolo-extract:/vmlinuz ./vmlinuz"
echo " docker cp kubesolo-extract:/kubesolo-os.gz ./kubesolo-os.gz"
echo " docker rm kubesolo-extract"

184
hack/usb-provision.sh Executable file
View File

@@ -0,0 +1,184 @@
#!/bin/bash
# usb-provision.sh — Write KubeSolo OS disk image to USB drive for air-gapped deployments
#
# This tool writes a complete KubeSolo OS disk image to a USB drive,
# creating a bootable device with A/B partitions and data partition.
# Optionally bundles a cloud-init config for first-boot provisioning.
#
# Usage:
# sudo ./hack/usb-provision.sh <disk-image> <device> [--cloud-init <config.yaml>]
#
# Example:
# sudo ./hack/usb-provision.sh output/kubesolo-os-0.3.0.img /dev/sdb
# sudo ./hack/usb-provision.sh output/kubesolo-os-0.3.0.img /dev/sdb --cloud-init my-config.yaml
#
# WARNING: This will DESTROY all data on the target device!
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Color output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
die() { echo -e "${RED}ERROR: $*${NC}" >&2; exit 1; }
warn() { echo -e "${YELLOW}WARNING: $*${NC}" >&2; }
info() { echo -e "${GREEN}==> $*${NC}" >&2; }
# Parse arguments
IMAGE="${1:?Usage: usb-provision.sh <disk-image> <device> [--cloud-init <config.yaml>]}"
DEVICE="${2:?Usage: usb-provision.sh <disk-image> <device> [--cloud-init <config.yaml>]}"
CLOUD_INIT=""
shift 2
while [ $# -gt 0 ]; do
case "$1" in
--cloud-init)
CLOUD_INIT="${2:?--cloud-init requires a config file path}"
shift 2
;;
*)
die "Unknown option: $1"
;;
esac
done
# Validation
[ -f "$IMAGE" ] || die "Disk image not found: $IMAGE"
[ -b "$DEVICE" ] || die "Device not found or not a block device: $DEVICE"
[ -n "$CLOUD_INIT" ] && { [ -f "$CLOUD_INIT" ] || die "Cloud-init config not found: $CLOUD_INIT"; }
# Safety: refuse to write to mounted or system devices
if [ "$(id -u)" -ne 0 ]; then
die "This script must be run as root (use sudo)"
fi
# Check for mounted partitions on the target device
MOUNTED=$(mount | grep "^${DEVICE}" || true)
if [ -n "$MOUNTED" ]; then
die "Device ${DEVICE} has mounted partitions. Unmount first:\n${MOUNTED}"
fi
# Refuse to write to common system devices
case "$DEVICE" in
/dev/sda|/dev/nvme0n1|/dev/vda|/dev/xvda)
warn "Device $DEVICE looks like a system disk!"
warn "Make absolutely sure this is the correct USB device."
;;
esac
# Show device info
echo "" >&2
info "KubeSolo OS USB Provisioning Tool"
echo "" >&2
# Get device info
DEV_SIZE=""
DEV_MODEL=""
if command -v lsblk >/dev/null 2>&1; then
DEV_SIZE=$(lsblk -dno SIZE "$DEVICE" 2>/dev/null || echo "unknown")
DEV_MODEL=$(lsblk -dno MODEL "$DEVICE" 2>/dev/null || echo "unknown")
fi
IMAGE_SIZE=$(du -h "$IMAGE" | cut -f1)
IMAGE_NAME=$(basename "$IMAGE")
echo " Source image: $IMAGE_NAME ($IMAGE_SIZE)" >&2
echo " Target device: $DEVICE" >&2
[ -n "$DEV_SIZE" ] && echo " Device size: $DEV_SIZE" >&2
[ -n "$DEV_MODEL" ] && echo " Device model: $DEV_MODEL" >&2
[ -n "$CLOUD_INIT" ] && echo " Cloud-init: $CLOUD_INIT" >&2
echo "" >&2
# Confirmation
echo -e "${RED}WARNING: ALL DATA ON ${DEVICE} WILL BE DESTROYED!${NC}" >&2
echo "" >&2
read -rp "Type 'yes' to continue: " CONFIRM
if [ "$CONFIRM" != "yes" ]; then
echo "Aborted." >&2
exit 1
fi
echo "" >&2
# Step 1: Write disk image
info "Writing disk image to ${DEVICE}..."
dd if="$IMAGE" of="$DEVICE" bs=4M status=progress conv=fsync 2>&1
# Step 2: Ensure partition table is re-read
info "Re-reading partition table..."
sync
partprobe "$DEVICE" 2>/dev/null || true
sleep 2
# Step 3: Determine partition naming
# /dev/sdb → /dev/sdb1, /dev/sdb2, etc.
# /dev/nvme0n1 → /dev/nvme0n1p1, /dev/nvme0n1p2, etc.
if [[ "$DEVICE" =~ nvme|loop|mmcblk ]]; then
PART_PREFIX="${DEVICE}p"
else
PART_PREFIX="${DEVICE}"
fi
DATA_PART="${PART_PREFIX}4"
# Step 4: Expand data partition to fill remaining space
info "Expanding data partition to fill USB drive..."
if command -v growpart >/dev/null 2>&1; then
growpart "$DEVICE" 4 2>/dev/null || true
elif command -v parted >/dev/null 2>&1; then
# Use parted to resize partition 4 to use remaining space
parted -s "$DEVICE" resizepart 4 100% 2>/dev/null || true
else
warn "Neither growpart nor parted found — data partition not expanded."
warn "Install cloud-guest-utils (growpart) or parted to auto-expand."
fi
# Resize the filesystem if the partition was expanded
if [ -b "$DATA_PART" ]; then
e2fsck -f -y "$DATA_PART" 2>/dev/null || true
resize2fs "$DATA_PART" 2>/dev/null || true
fi
# Step 5: Inject cloud-init config (optional)
if [ -n "$CLOUD_INIT" ]; then
info "Injecting cloud-init configuration..."
MOUNT_DIR=$(mktemp -d /tmp/kubesolo-usb-XXXXXX)
trap "umount '$MOUNT_DIR' 2>/dev/null || true; rmdir '$MOUNT_DIR' 2>/dev/null || true" EXIT
if [ -b "$DATA_PART" ]; then
mount "$DATA_PART" "$MOUNT_DIR"
mkdir -p "$MOUNT_DIR/etc-kubesolo"
cp "$CLOUD_INIT" "$MOUNT_DIR/etc-kubesolo/cloud-init.yaml"
sync
umount "$MOUNT_DIR"
info "Cloud-init config written to data partition"
else
warn "Data partition $DATA_PART not found — cloud-init not injected"
warn "You can manually copy the config after first boot"
fi
rmdir "$MOUNT_DIR" 2>/dev/null || true
trap - EXIT
fi
# Step 6: Final sync
sync
echo "" >&2
info "USB provisioning complete!"
echo "" >&2
echo " Device: $DEVICE" >&2
echo " Image: $IMAGE_NAME" >&2
[ -n "$CLOUD_INIT" ] && echo " Cloud-init: injected" >&2
echo "" >&2
echo " Next steps:" >&2
echo " 1. Remove USB drive safely: sudo eject $DEVICE" >&2
echo " 2. Insert into target device and boot from USB" >&2
echo " 3. KubeSolo OS will start automatically" >&2
[ -n "$CLOUD_INIT" ] && echo " 4. Cloud-init config will apply on first boot" >&2
echo "" >&2

21
update/cmd/metrics.go Normal file
View File

@@ -0,0 +1,21 @@
package cmd
import (
"flag"
"fmt"
"github.com/portainer/kubesolo-os/update/pkg/metrics"
)
// Metrics starts the Prometheus-compatible metrics HTTP server.
func Metrics(args []string) error {
fs := flag.NewFlagSet("metrics", flag.ExitOnError)
listenAddr := fs.String("listen", ":9100", "Metrics HTTP listen address")
grubenvPath := fs.String("grubenv", "/boot/grub/grubenv", "Path to grubenv file")
if err := fs.Parse(args); err != nil {
return fmt.Errorf("parse flags: %w", err)
}
srv := metrics.NewServer(*listenAddr, *grubenvPath)
return srv.ListenAndServe()
}

View File

@@ -10,6 +10,7 @@
// kubesolo-update status Show current A/B slot and boot status
// kubesolo-update sign Sign update artifacts with Ed25519 key
// kubesolo-update genkey Generate new Ed25519 signing key pair
// kubesolo-update metrics Start Prometheus-compatible metrics server
package main
import (
@@ -48,6 +49,8 @@ func main() {
err = cmd.Sign(os.Args[2:])
case "genkey":
err = cmd.GenKey(os.Args[2:])
case "metrics":
err = cmd.Metrics(os.Args[2:])
default:
fmt.Fprintf(os.Stderr, "unknown command: %s\n\n", os.Args[1])
usage()
@@ -72,6 +75,7 @@ Commands:
status Show current A/B slot and boot status
sign Sign artifacts with Ed25519 private key (build system)
genkey Generate new Ed25519 signing key pair
metrics Start Prometheus-compatible metrics HTTP server
Options:
--server URL Update server URL (default: from /etc/kubesolo/update.conf)

View File

@@ -0,0 +1,187 @@
// Package metrics exposes a lightweight Prometheus-compatible metrics endpoint
// for KubeSolo OS system and update status.
//
// Metrics exposed:
//
// kubesolo_os_info{version, active_slot} 1 (gauge, labels identify the OS)
// kubesolo_os_boot_success 1 or 0 (gauge)
// kubesolo_os_boot_counter 0-3 (gauge)
// kubesolo_os_uptime_seconds float (gauge)
// kubesolo_os_update_available 1 or 0 (gauge)
// kubesolo_os_update_last_check_timestamp_seconds unix timestamp (gauge)
// kubesolo_os_memory_total_bytes total RAM (gauge)
// kubesolo_os_memory_available_bytes available RAM (gauge)
//
// This is a zero-dependency implementation — no Prometheus client library needed.
// It serves metrics in the Prometheus text exposition format.
package metrics
import (
"fmt"
"log/slog"
"net/http"
"os"
"strconv"
"strings"
"sync"
"time"
)
// Server is a lightweight Prometheus metrics HTTP server.
type Server struct {
grubenvPath string
listenAddr string
startTime time.Time
mu sync.Mutex
updateAvailable int
lastCheckTime float64
}
// NewServer creates a new metrics server.
func NewServer(listenAddr, grubenvPath string) *Server {
return &Server{
grubenvPath: grubenvPath,
listenAddr: listenAddr,
startTime: time.Now(),
}
}
// SetUpdateAvailable records whether an update is available.
func (s *Server) SetUpdateAvailable(available bool) {
s.mu.Lock()
defer s.mu.Unlock()
if available {
s.updateAvailable = 1
} else {
s.updateAvailable = 0
}
s.lastCheckTime = float64(time.Now().Unix())
}
// ListenAndServe starts the metrics HTTP server.
func (s *Server) ListenAndServe() error {
mux := http.NewServeMux()
mux.HandleFunc("/metrics", s.handleMetrics)
mux.HandleFunc("/healthz", s.handleHealthz)
slog.Info("starting metrics server", "addr", s.listenAddr)
return http.ListenAndServe(s.listenAddr, mux)
}
func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, "ok\n")
}
func (s *Server) handleMetrics(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
var sb strings.Builder
// OS info
version := readFileString("/etc/kubesolo-os-version")
activeSlot := s.readGrubenvVar("active_slot")
sb.WriteString("# HELP kubesolo_os_info KubeSolo OS version and slot info.\n")
sb.WriteString("# TYPE kubesolo_os_info gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_info{version=%q,active_slot=%q} 1\n",
version, activeSlot))
// Boot status
bootSuccess := s.readGrubenvVar("boot_success")
bootCounter := s.readGrubenvVar("boot_counter")
sb.WriteString("# HELP kubesolo_os_boot_success Whether the current boot was marked successful.\n")
sb.WriteString("# TYPE kubesolo_os_boot_success gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_boot_success %s\n", safeInt(bootSuccess, "0")))
sb.WriteString("# HELP kubesolo_os_boot_counter Remaining boot attempts before rollback.\n")
sb.WriteString("# TYPE kubesolo_os_boot_counter gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_boot_counter %s\n", safeInt(bootCounter, "0")))
// Uptime
uptime := time.Since(s.startTime).Seconds()
sb.WriteString("# HELP kubesolo_os_uptime_seconds Time since the metrics server started.\n")
sb.WriteString("# TYPE kubesolo_os_uptime_seconds gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_uptime_seconds %.1f\n", uptime))
// Update status
s.mu.Lock()
updateAvail := s.updateAvailable
lastCheck := s.lastCheckTime
s.mu.Unlock()
sb.WriteString("# HELP kubesolo_os_update_available Whether an OS update is available.\n")
sb.WriteString("# TYPE kubesolo_os_update_available gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_update_available %d\n", updateAvail))
sb.WriteString("# HELP kubesolo_os_update_last_check_timestamp_seconds Unix timestamp of last update check.\n")
sb.WriteString("# TYPE kubesolo_os_update_last_check_timestamp_seconds gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_update_last_check_timestamp_seconds %.0f\n", lastCheck))
// Memory
memTotal, memAvail := readMemInfo()
sb.WriteString("# HELP kubesolo_os_memory_total_bytes Total system memory in bytes.\n")
sb.WriteString("# TYPE kubesolo_os_memory_total_bytes gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_memory_total_bytes %d\n", memTotal))
sb.WriteString("# HELP kubesolo_os_memory_available_bytes Available system memory in bytes.\n")
sb.WriteString("# TYPE kubesolo_os_memory_available_bytes gauge\n")
sb.WriteString(fmt.Sprintf("kubesolo_os_memory_available_bytes %d\n", memAvail))
fmt.Fprint(w, sb.String())
}
// readGrubenvVar reads a single variable from grubenv using simple file parse.
func (s *Server) readGrubenvVar(key string) string {
data, err := os.ReadFile(s.grubenvPath)
if err != nil {
return ""
}
for _, line := range strings.Split(string(data), "\n") {
parts := strings.SplitN(line, "=", 2)
if len(parts) == 2 && strings.TrimSpace(parts[0]) == key {
return strings.TrimSpace(parts[1])
}
}
return ""
}
// readFileString reads a file and returns trimmed content.
func readFileString(path string) string {
data, err := os.ReadFile(path)
if err != nil {
return "unknown"
}
return strings.TrimSpace(string(data))
}
// readMemInfo parses /proc/meminfo for total and available memory.
func readMemInfo() (total, available int64) {
data, err := os.ReadFile("/proc/meminfo")
if err != nil {
return 0, 0
}
for _, line := range strings.Split(string(data), "\n") {
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
val, err := strconv.ParseInt(fields[1], 10, 64)
if err != nil {
continue
}
// /proc/meminfo values are in kB
switch fields[0] {
case "MemTotal:":
total = val * 1024
case "MemAvailable:":
available = val * 1024
}
}
return total, available
}
// safeInt returns the value if it's a valid integer, otherwise the default.
func safeInt(s, def string) string {
if _, err := strconv.Atoi(s); err != nil {
return def
}
return s
}

View File

@@ -0,0 +1,268 @@
package metrics
import (
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
)
func TestNewServer(t *testing.T) {
s := NewServer(":9100", "/boot/grub/grubenv")
if s == nil {
t.Fatal("NewServer returned nil")
}
if s.listenAddr != ":9100" {
t.Errorf("listenAddr = %q, want %q", s.listenAddr, ":9100")
}
if s.grubenvPath != "/boot/grub/grubenv" {
t.Errorf("grubenvPath = %q, want %q", s.grubenvPath, "/boot/grub/grubenv")
}
if s.startTime.IsZero() {
t.Error("startTime not set")
}
}
func TestSetUpdateAvailable(t *testing.T) {
s := NewServer(":9100", "/tmp/nonexistent")
s.SetUpdateAvailable(true)
s.mu.Lock()
if s.updateAvailable != 1 {
t.Errorf("updateAvailable = %d, want 1", s.updateAvailable)
}
if s.lastCheckTime == 0 {
t.Error("lastCheckTime not updated")
}
s.mu.Unlock()
s.SetUpdateAvailable(false)
s.mu.Lock()
if s.updateAvailable != 0 {
t.Errorf("updateAvailable = %d, want 0", s.updateAvailable)
}
s.mu.Unlock()
}
func TestHandleHealthz(t *testing.T) {
s := NewServer(":9100", "/tmp/nonexistent")
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
w := httptest.NewRecorder()
s.handleHealthz(w, req)
resp := w.Result()
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusOK)
}
body, _ := io.ReadAll(resp.Body)
if string(body) != "ok\n" {
t.Errorf("body = %q, want %q", string(body), "ok\n")
}
}
func TestHandleMetrics(t *testing.T) {
// Create a temp grubenv
dir := t.TempDir()
grubenv := filepath.Join(dir, "grubenv")
content := "active_slot=A\nboot_success=1\nboot_counter=3\n"
if err := os.WriteFile(grubenv, []byte(content), 0644); err != nil {
t.Fatal(err)
}
// Create a fake version file — we'll test that missing version returns "unknown"
s := NewServer(":9100", grubenv)
s.SetUpdateAvailable(true)
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
w := httptest.NewRecorder()
s.handleMetrics(w, req)
resp := w.Result()
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusOK)
}
ct := resp.Header.Get("Content-Type")
if !strings.Contains(ct, "text/plain") {
t.Errorf("Content-Type = %q, want text/plain", ct)
}
body, _ := io.ReadAll(resp.Body)
output := string(body)
// Check expected metrics are present
expectedMetrics := []string{
"kubesolo_os_info{",
"active_slot=\"A\"",
"kubesolo_os_boot_success 1",
"kubesolo_os_boot_counter 3",
"kubesolo_os_uptime_seconds",
"kubesolo_os_update_available 1",
"kubesolo_os_update_last_check_timestamp_seconds",
"kubesolo_os_memory_total_bytes",
"kubesolo_os_memory_available_bytes",
}
for _, expected := range expectedMetrics {
if !strings.Contains(output, expected) {
t.Errorf("metrics output missing %q\nfull output:\n%s", expected, output)
}
}
// Check HELP and TYPE comments
expectedHelp := []string{
"# HELP kubesolo_os_info",
"# TYPE kubesolo_os_info gauge",
"# HELP kubesolo_os_boot_success",
"# HELP kubesolo_os_uptime_seconds",
"# HELP kubesolo_os_update_available",
"# HELP kubesolo_os_memory_total_bytes",
}
for _, expected := range expectedHelp {
if !strings.Contains(output, expected) {
t.Errorf("metrics output missing %q", expected)
}
}
}
func TestHandleMetricsMissingGrubenv(t *testing.T) {
s := NewServer(":9100", "/tmp/nonexistent-grubenv-file")
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
w := httptest.NewRecorder()
s.handleMetrics(w, req)
resp := w.Result()
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
output := string(body)
// Should still render with defaults
if !strings.Contains(output, "kubesolo_os_boot_success 0") {
t.Errorf("expected boot_success=0 with missing grubenv, got:\n%s", output)
}
if !strings.Contains(output, "kubesolo_os_boot_counter 0") {
t.Errorf("expected boot_counter=0 with missing grubenv, got:\n%s", output)
}
// active_slot should be empty
if !strings.Contains(output, `active_slot=""`) {
t.Errorf("expected empty active_slot with missing grubenv, got:\n%s", output)
}
}
func TestHandleMetricsUpdateNotAvailable(t *testing.T) {
s := NewServer(":9100", "/tmp/nonexistent")
// Don't call SetUpdateAvailable — should default to 0
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
w := httptest.NewRecorder()
s.handleMetrics(w, req)
resp := w.Result()
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
output := string(body)
if !strings.Contains(output, "kubesolo_os_update_available 0") {
t.Errorf("expected update_available=0 by default, got:\n%s", output)
}
if !strings.Contains(output, "kubesolo_os_update_last_check_timestamp_seconds 0") {
t.Errorf("expected last_check=0 by default, got:\n%s", output)
}
}
func TestReadGrubenvVar(t *testing.T) {
dir := t.TempDir()
grubenv := filepath.Join(dir, "grubenv")
content := "active_slot=B\nboot_success=0\nboot_counter=2\nsome_other=value\n"
if err := os.WriteFile(grubenv, []byte(content), 0644); err != nil {
t.Fatal(err)
}
s := NewServer(":9100", grubenv)
tests := []struct {
key string
want string
}{
{"active_slot", "B"},
{"boot_success", "0"},
{"boot_counter", "2"},
{"some_other", "value"},
{"nonexistent", ""},
}
for _, tt := range tests {
got := s.readGrubenvVar(tt.key)
if got != tt.want {
t.Errorf("readGrubenvVar(%q) = %q, want %q", tt.key, got, tt.want)
}
}
}
func TestReadGrubenvVarMissingFile(t *testing.T) {
s := NewServer(":9100", "/tmp/nonexistent-grubenv")
got := s.readGrubenvVar("active_slot")
if got != "" {
t.Errorf("readGrubenvVar with missing file = %q, want empty", got)
}
}
func TestSafeInt(t *testing.T) {
tests := []struct {
input string
def string
want string
}{
{"42", "0", "42"},
{"0", "0", "0"},
{"3", "0", "3"},
{"", "0", "0"},
{"abc", "0", "0"},
{"1.5", "0", "0"},
{"-1", "0", "-1"},
}
for _, tt := range tests {
got := safeInt(tt.input, tt.def)
if got != tt.want {
t.Errorf("safeInt(%q, %q) = %q, want %q", tt.input, tt.def, got, tt.want)
}
}
}
func TestReadFileString(t *testing.T) {
dir := t.TempDir()
// Test existing file
path := filepath.Join(dir, "version")
if err := os.WriteFile(path, []byte(" 1.2.3\n "), 0644); err != nil {
t.Fatal(err)
}
got := readFileString(path)
if got != "1.2.3" {
t.Errorf("readFileString = %q, want %q", got, "1.2.3")
}
// Test missing file
got = readFileString("/tmp/nonexistent-file-12345")
if got != "unknown" {
t.Errorf("readFileString missing file = %q, want %q", got, "unknown")
}
}