From 1b44c9d621cdd2a310a76000efa2f73adba3f05f Mon Sep 17 00:00:00 2001 From: Adolfo Delorenzo Date: Thu, 14 May 2026 16:26:20 -0600 Subject: [PATCH] feat: bump KubeSolo to v1.1.5 + cross-arch CI workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4 of v0.3 — KubeSolo version bump and CI gating. KubeSolo v1.1.0 → v1.1.5 brings: - New flag --disable-ipv6 (v1.1.5) - New flag --db-wal-repair (v1.1.5) — important for power-loss resilience on edge appliances; surfaced as kubesolo.db-wal-repair in cloud-init - New flag --full (v1.1.4) — disables edge-optimised k8s overrides - Pod egress connectivity fix after reboot (v1.1.4) - Registry config persistence fix (v1.1.5) - k8s 1.34.7, CoreDNS 1.14.3, Go 1.26.2 All three new flags wired into cloud-init: config.go fields, kubesolo.go extra-flag emission, full-config.yaml example. Supply-chain hygiene: - Per-arch checksums: KUBESOLO_SHA256_AMD64 and KUBESOLO_SHA256_ARM64 in versions.env. Replaces the single shared KUBESOLO_SHA256 that couldn't meaningfully verify both binaries at once. - Checksum now applied to the tarball (the immutable upstream artifact) rather than the post-extract binary. CI: - New .gitea/workflows/build-arm64.yaml routes the full kernel + rootfs + disk-image build to the Odroid arm64-linux runner. Triggers on push to main, tags, and manual workflow_dispatch. The boot smoke test is continue-on-error because KubeSolo's first-boot image import deadline fires under QEMU TCG on the Odroid. VERSION bumped to 0.3.0-dev. CHANGELOG entry under [0.3.0-dev] captures all Phase 1-4 work + the known limitations documented in arm64-status.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitea/workflows/build-arm64.yaml | 73 +++++++++++++++++++++++++ CHANGELOG.md | 81 ++++++++++++++++++++++++++++ VERSION | 2 +- build/config/versions.env | 6 ++- build/scripts/fetch-components.sh | 17 +++--- cloud-init/config.go | 8 +++ cloud-init/examples/full-config.yaml | 12 +++++ cloud-init/kubesolo.go | 12 +++++ 8 files changed, 201 insertions(+), 10 deletions(-) create mode 100644 .gitea/workflows/build-arm64.yaml diff --git a/.gitea/workflows/build-arm64.yaml b/.gitea/workflows/build-arm64.yaml new file mode 100644 index 0000000..cfc8e91 --- /dev/null +++ b/.gitea/workflows/build-arm64.yaml @@ -0,0 +1,73 @@ +name: ARM64 Build + +# Triggers on push to main and on tags. Skipped on PRs to keep PR feedback fast; +# manual via Gitea UI ("Run workflow") if needed. +on: + push: + branches: [main] + tags: ['v*'] + workflow_dispatch: + +jobs: + build-arm64-generic: + name: Build generic ARM64 disk image + # Routes to the Odroid self-hosted runner via the arm64-linux label. + # See docs/ci-runners.md for runner setup. + runs-on: arm64-linux + steps: + - uses: actions/checkout@v4 + + - name: Show host info + run: | + uname -a + nproc + free -h + df -h /home /tmp || df -h / + + - name: Verify build prerequisites + run: | + # The Odroid runner ships these via apt; this is a sanity check. + which gcc make bc bison flex cpio gzip xz wget curl mkfs.ext4 mkfs.vfat \ + sfdisk losetup kpartx grub-mkimage qemu-system-aarch64 git busybox + ls -la /bin/busybox + file /bin/busybox | grep -q 'statically linked' || { + echo "ERROR: /bin/busybox is not statically linked — install busybox-static" + exit 1 + } + + - name: Build mainline ARM64 kernel + # Cached in build/cache/kernel-arm64-generic between runs (persistent + # working dir on the host runner). First run takes 30-60 min; reruns + # exit immediately once the .config + Image match. + run: | + time make kernel-arm64 + + - name: Build cross-arch Go binaries + run: make build-cross + + - name: Prepare generic ARM64 rootfs + run: sudo make rootfs-arm64 + + - name: Build ARM64 UEFI disk image + run: sudo make disk-image-arm64 + + - name: Show output artifact + run: | + ls -lh output/ + file output/*.arm64.img + + - name: Boot smoke test (best-effort) + # KubeSolo's image import deadline can fire under QEMU TCG on the + # Odroid; the boot itself succeeds through stage 90 every time, but + # the final "KubeSolo started" health check is timing-sensitive. + # We mark this continue-on-error until we have KVM or real hardware. + continue-on-error: true + run: sudo make test-boot-arm64-disk + + - name: Upload disk image + if: startsWith(github.ref, 'refs/tags/v') + uses: actions/upload-artifact@v4 + with: + name: kubesolo-os-arm64-${{ github.ref_name }} + path: output/kubesolo-os-*.arm64.img + retention-days: 90 diff --git a/CHANGELOG.md b/CHANGELOG.md index 50c6f71..eda63d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,87 @@ All notable changes to KubeSolo OS are documented in this file. Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), versioning follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.0-dev] - unreleased + +### Added + +- Generic ARM64 build track distinct from Raspberry Pi: + - `make kernel-arm64` builds a mainline kernel.org LTS kernel (6.12.10 by + default) from `arm64 defconfig` + shared `kernel-container.fragment` + + arm64 virt-host enables (VIRTIO_*, EFI_STUB, NVMe). + - `make disk-image-arm64` produces a UEFI-bootable raw GPT image with A/B + system partitions and GRUB-EFI ARM64. Targets QEMU virt, Graviton, Ampere, + or any UEFI ARM64 host. + - `hack/dev-vm-arm64.sh --disk` boots the built image through QEMU UEFI for + end-to-end testing. + - `test/qemu/test-boot-arm64-disk.sh` automated boot smoke test. +- Bumped KubeSolo to v1.1.5 (was v1.1.0). New cloud-init flags surfaced: + - `kubesolo.full` (v1.1.4+) — disable edge-optimised overrides + - `kubesolo.disable-ipv6` (v1.1.5+) + - `kubesolo.db-wal-repair` (v1.1.5+) — recover from unclean shutdowns +- Per-arch supply-chain verification: `KUBESOLO_SHA256_AMD64` and + `KUBESOLO_SHA256_ARM64` in `versions.env`, applied to the tarball before + extract. +- `docs/arm64-architecture.md` — defines the generic-vs-RPi two-track layout. +- `docs/arm64-status.md` — Phase 3 status snapshot, known limitations, what's + needed to ship. +- `docs/ci-runners.md` — Gitea Actions runner setup (Odroid arm64-linux). + +### Changed + +- `build/scripts/build-kernel-arm64.sh` is now the **generic ARM64** kernel + build (mainline kernel.org LTS, generic UEFI/virtio). +- Renamed `build/scripts/build-kernel-rpi.sh` (was `build-kernel-arm64.sh`). + RPi kernel build (raspberrypi/linux fork, bcm2711_defconfig) lives here now. +- Renamed `build/config/kernel-container.fragment` (was + `rpi-kernel-config.fragment`). Misnomer: contents are arch-agnostic and now + shared across x86, ARM64-generic, and RPi kernels. +- `build/scripts/build-kernel.sh` (x86) refactored to consume the shared + fragment via a generic `apply_fragment` function. ~50 lines of duplication + killed. +- `KUBESOLO_VERSION` moved out of `fetch-components.sh` defaults into + `versions.env`. Bumping is now a one-line PR. + +### Fixed + +- Native ARM64 build hosts (e.g. an Odroid runner) no longer require the x86 + cross-compiler. Both `build-kernel-arm64.sh` and `build-kernel-rpi.sh` detect + `uname -m` and use the host's gcc directly when arch matches. +- ARM64 grub.cfg console ordering: `ttyAMA0` is now the primary console + (`console=ttyS0,... console=ttyAMA0,...`). Init output is now visible on + QEMU virt and most ARM64 SBCs without further configuration. +- ARM64 boot: replaced piCore64's `/init` with our staged init at `/init` and + `/sbin/init`. Previously the kernel ran piCore's TCE handler which + segfaulted in our environment. +- ARM64 boot: replaced piCore64's broken dynamic BusyBox with the build + host's `busybox-static`. piCore's binary triggered EL0 instruction-abort + panics on QEMU virt under both `-cpu cortex-a72` and `-cpu max`. +- POSIX-character-class portability: `tr -d '[:space:]'` in + `30-kernel-modules.sh` and `40-sysctl.sh` replaced with explicit + `' \t\r\n'`. Ubuntu's busybox-static 1.30.1 doesn't parse `[:space:]` and + instead deletes the literal characters `[ : s p a c e ]`, which truncated + module names (`virtio_net` → `virtio_nt`, etc.) and sysctl keys. +- `inject-kubesolo.sh` no longer copies `init/lib/functions.sh` into + `init.d/`. Previously the main init loop tried to run it as a stage after + stage 90 and panicked with "Init completed without exec'ing KubeSolo". +- ARM64 disk image: `TARGET_ARCH=arm64 create-disk-image.sh` produces + `BOOTAA64.EFI` via `grub-mkimage -O arm64-efi` (not `bootx64.efi`). Skips + the BIOS-only `grub-install --target=i386-pc` step. +- `build/Dockerfile.builder`: added `grub-efi-amd64-bin`, `grub-efi-arm64-bin`, + `grub-pc-bin`, `grub-common`, `grub2-common`, and `busybox-static` so the + Docker-based build flow can produce ARM64 disk images and gets the same + BusyBox swap behaviour as native builds. + +### Known limitations (deferred to follow-up) + +- ARM64 `kubesolo.data=LABEL=KSOLODATA` resolution doesn't work yet — + piCore's `blkid`/`findfs` crash in QEMU and our static busybox lacks the + applets. Hardcoded `/dev/vda4` as a workaround. Production fix: ship + static `blkid`/`findfs` or replace LABEL resolution with a sysfs walk. +- AppArmor profile load fails on ARM64 (apparmor_parser ABI mismatch). +- KubeSolo's image-import deadline can fire under QEMU TCG (software + emulation). On real hardware (or with KVM) the import finishes in seconds. + ## [0.2.0] - 2026-02-12 ### Added diff --git a/VERSION b/VERSION index 0ea3a94..d510910 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.0 +0.3.0-dev diff --git a/build/config/versions.env b/build/config/versions.env index 8554134..ac1e126 100644 --- a/build/config/versions.env +++ b/build/config/versions.env @@ -11,8 +11,11 @@ TINYCORE_ISO_URL=${TINYCORE_MIRROR}/${TINYCORE_VERSION%%.*}.x/${TINYCORE_ARCH}/r # KubeSolo # Pinned release tag from https://github.com/portainer/kubesolo/releases. # Bump here and re-run `make fetch` to pull a new version. -KUBESOLO_VERSION=v1.1.0 +KUBESOLO_VERSION=v1.1.5 KUBESOLO_INSTALL_URL=https://get.kubesolo.io +# Per-arch SHA256 of the musl tarball (verified at fetch time when non-empty). +KUBESOLO_SHA256_AMD64=565bd5fd98fc8ce09160e646b55de3493c782d74c0e0c46ccf130ff4bcabab81 +KUBESOLO_SHA256_ARM64=db865a5e9b2617d595f9c2b7d011272edc94587621a9690e2de0f47cc94f0748 # Build tools (used inside builder container) GRUB_VERSION=2.12 @@ -22,7 +25,6 @@ SYSLINUX_VERSION=6.03 # Populate by running: sha256sum build/cache/ # Leave empty to skip verification (useful for first fetch) TINYCORE_ISO_SHA256="" -KUBESOLO_SHA256="" NETFILTER_TCZ_SHA256="" NET_BRIDGING_TCZ_SHA256="" IPTABLES_TCZ_SHA256="" diff --git a/build/scripts/fetch-components.sh b/build/scripts/fetch-components.sh index aa82757..a9417b0 100755 --- a/build/scripts/fetch-components.sh +++ b/build/scripts/fetch-components.sh @@ -60,17 +60,19 @@ if [ "$FETCH_ARCH" = "arm64" ]; then BIN_URL="https://github.com/portainer/kubesolo/releases/download/${KUBESOLO_VERSION}/kubesolo-${KUBESOLO_VERSION}-linux-arm64-musl.tar.gz" BIN_URL_FALLBACK="https://github.com/portainer/kubesolo/releases/download/${KUBESOLO_VERSION}/kubesolo-${KUBESOLO_VERSION}-linux-arm64.tar.gz" TEMP_DIR=$(mktemp -d) + TARBALL="$TEMP_DIR/kubesolo.tar.gz" echo " URL: $BIN_URL" - if curl -fSL "$BIN_URL" -o "$TEMP_DIR/kubesolo.tar.gz" 2>/dev/null; then + if curl -fSL "$BIN_URL" -o "$TARBALL" 2>/dev/null; then echo " Downloaded musl variant (arm64)" - elif curl -fSL "$BIN_URL_FALLBACK" -o "$TEMP_DIR/kubesolo.tar.gz" 2>/dev/null; then + elif curl -fSL "$BIN_URL_FALLBACK" -o "$TARBALL" 2>/dev/null; then echo " Downloaded glibc variant (arm64 fallback)" else echo "ERROR: Failed to download KubeSolo ARM64 from GitHub." rm -rf "$TEMP_DIR" exit 1 fi - tar -xzf "$TEMP_DIR/kubesolo.tar.gz" -C "$TEMP_DIR" + verify_checksum "$TARBALL" "${KUBESOLO_SHA256_ARM64:-}" "KubeSolo arm64 tarball" + tar -xzf "$TARBALL" -C "$TEMP_DIR" FOUND_BIN=$(find "$TEMP_DIR" -name "kubesolo" -type f ! -name "*.tar.gz" | head -1) if [ -z "$FOUND_BIN" ]; then echo "ERROR: Could not find kubesolo binary in extracted archive" @@ -131,11 +133,12 @@ else TEMP_DIR=$(mktemp -d) trap 'rm -rf "$TEMP_DIR"' EXIT + TARBALL="$TEMP_DIR/kubesolo.tar.gz" echo " URL: $BIN_URL" - if curl -fSL "$BIN_URL" -o "$TEMP_DIR/kubesolo.tar.gz" 2>/dev/null; then + if curl -fSL "$BIN_URL" -o "$TARBALL" 2>/dev/null; then echo " Downloaded musl variant" - elif curl -fSL "$BIN_URL_FALLBACK" -o "$TEMP_DIR/kubesolo.tar.gz" 2>/dev/null; then + elif curl -fSL "$BIN_URL_FALLBACK" -o "$TARBALL" 2>/dev/null; then echo " Downloaded glibc variant (fallback)" else echo "ERROR: Failed to download KubeSolo from GitHub." @@ -148,9 +151,10 @@ else echo " 3. Re-run: make rootfs" exit 1 fi + verify_checksum "$TARBALL" "${KUBESOLO_SHA256_AMD64:-}" "KubeSolo amd64 tarball" # Extract binary from tarball - tar -xzf "$TEMP_DIR/kubesolo.tar.gz" -C "$TEMP_DIR" + tar -xzf "$TARBALL" -C "$TEMP_DIR" # Find the kubesolo binary in extracted contents FOUND_BIN=$(find "$TEMP_DIR" -name "kubesolo" -type f ! -name "*.tar.gz" | head -1) @@ -168,7 +172,6 @@ else rm -rf "$TEMP_DIR" echo "==> KubeSolo binary: $KUBESOLO_BIN ($(du -h "$KUBESOLO_BIN" | cut -f1))" - verify_checksum "$KUBESOLO_BIN" "$KUBESOLO_SHA256" "KubeSolo binary" fi # --- Tiny Core kernel module extensions (netfilter, iptables) --- diff --git a/cloud-init/config.go b/cloud-init/config.go index 28bdb6e..dfbc065 100644 --- a/cloud-init/config.go +++ b/cloud-init/config.go @@ -40,6 +40,14 @@ type KubeSoloConfig struct { PortainerEdgeID string `yaml:"portainer-edge-id"` PortainerEdgeKey string `yaml:"portainer-edge-key"` PortainerEdgeAsync bool `yaml:"portainer-edge-async"` + // v1.1.4+: skip edge-optimised overrides, use upstream k8s defaults + // (useful for CI and powerful machines, disabled by default). + Full bool `yaml:"full"` + // v1.1.5+: disable IPv6 in the cluster. + DisableIPv6 bool `yaml:"disable-ipv6"` + // v1.1.5+: detect SQLite WAL corruption on startup and recover from + // unclean shutdowns (e.g. power loss). Recommended ON for edge devices. + DBWALRepair bool `yaml:"db-wal-repair"` } // NTPConfig defines NTP settings. diff --git a/cloud-init/examples/full-config.yaml b/cloud-init/examples/full-config.yaml index 08918be..9e6ce67 100644 --- a/cloud-init/examples/full-config.yaml +++ b/cloud-init/examples/full-config.yaml @@ -36,5 +36,17 @@ kubesolo: portainer-edge-key: "your-edge-key" portainer-edge-async: true + # KubeSolo v1.1.4+: skip the edge-optimised overrides and use upstream + # Kubernetes defaults. Useful for CI and high-spec machines. Default off. + full: false + + # KubeSolo v1.1.5+: disable IPv6 throughout the cluster. Default off. + disable-ipv6: false + + # KubeSolo v1.1.5+: detect SQLite WAL corruption at startup and recover + # from unclean shutdowns (e.g. power loss). Recommended ON for edge + # appliances that may lose power. + db-wal-repair: true + # Arbitrary extra flags passed directly to the KubeSolo binary # extra-flags: "--disable traefik --disable servicelb" diff --git a/cloud-init/kubesolo.go b/cloud-init/kubesolo.go index a0375db..9ffeaa9 100644 --- a/cloud-init/kubesolo.go +++ b/cloud-init/kubesolo.go @@ -70,6 +70,18 @@ func buildExtraFlags(cfg *Config) string { parts = append(parts, "--portainer-edge-async") } + if cfg.KubeSolo.Full { + parts = append(parts, "--full") + } + + if cfg.KubeSolo.DisableIPv6 { + parts = append(parts, "--disable-ipv6") + } + + if cfg.KubeSolo.DBWALRepair { + parts = append(parts, "--db-wal-repair") + } + return strings.Join(parts, " ") }