docs: roll README + CHANGELOG forward past v0.3.1

README: - Status line bumped from v0.3.0 to v0.3.1 with the actually-validated framing (K8s Ready under QEMU virt+HVF, CoreDNS + local-path + nginx all Running) and a link to CHANGELOG.md for full notes. - Roadmap: Phase 7 (generic ARM64) flipped to "Complete (v0.3.1, K8s Ready under QEMU virt+HVF)". OCI cosign verification, LABEL=KSOLODATA on ARM64, and real-hardware ARM64 validation move from "Planned for v0.3.1" to "Planned for v0.3.2" — they didn't make this release. CHANGELOG: - New "[Unreleased]" section covering the four post-v0.3.1 CI / repo housekeeping commits: drop tag trigger on build-arm64.yaml (04a5cd2), gitignore .env/credentials (48267e1), fix gated x86 job staying "queued" instead of "skipped" (fb24e64), and paths-ignore on build-arm64.yaml so workflow/docs-only commits skip the 60-minute kernel rebuild (e1b8a69). No runtime changes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
ci(arm64): skip kernel rebuild on workflow/docs-only changes
2026-05-15 22:46:12 -06:00 · 2026-05-15 19:41:54 -06:00 · 2026-05-15 19:38:15 -06:00 · 2026-05-15 18:55:59 -06:00 · 2026-05-15 18:47:11 -06:00 · 2026-05-15 16:48:58 -06:00
14 changed files with 593 additions and 94 deletions
--- a/.gitea/workflows/build-arm64.yaml
+++ b/.gitea/workflows/build-arm64.yaml
@@ -1,11 +1,26 @@
 name: ARM64 Build
-# Triggers on push to main and on tags. Skipped on PRs to keep PR feedback fast;
+# Smoke-test workflow for main-branch ARM64 builds. Triggers on push to main
-# manual via Gitea UI ("Run workflow") if needed.
+# (so we catch breakages early) and on manual dispatch.
 #
 # Tag pushes are intentionally NOT a trigger — release.yaml handles tags and
 # also produces the disk image. Triggering both on the same tag wastes an
 # hour of Odroid time on a duplicate kernel build.
 #
 # `paths-ignore` keeps workflow-file and docs-only commits from kicking off
 # a 60-minute Odroid rebuild. If you change a kernel fragment, init script,
 # or build/script, this WILL fire — that's by design.
 on:
  push:
    branches: [main]
-    tags: ['v*']
+    paths-ignore:
      - '.gitea/workflows/**'
      - '.github/workflows/**'
      - 'docs/**'
      - '*.md'
      - 'CHANGELOG.md'
      - 'README.md'
      - '.gitignore'
  workflow_dispatch:
 jobs:
--- a/.gitea/workflows/release.yaml
+++ b/.gitea/workflows/release.yaml
@@ -1,5 +1,19 @@
 name: Release
 # Triggered by `git push origin vX.Y.Z`. Builds Go binaries (amd64+arm64),
 # x86_64 ISO + disk image, ARM64 disk image, computes SHA256SUMS over all
 # artifacts, and posts a Gitea release with everything attached via the
 # Gitea API.
 #
 # Notes for future-you:
 #   - upload-artifact / download-artifact are pinned to @v3 because Gitea's
 #     act_runner v1.0.x doesn't fully implement v4 yet.
 #   - The release step uses curl against Gitea's own /api/v1/repos/.../releases
 #     instead of a third-party action (softprops/action-gh-release et al);
 #     act_runner doesn't reliably proxy GitHub.com-targeted actions.
 #   - The arm64 disk-image build runs on the Odroid self-hosted runner via
 #     the `arm64-linux` label. Docs in docs/ci-runners.md.
 on:
  push:
    tags:
@@ -11,19 +25,16 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version: '1.22'
      - name: Test cloud-init
        run: cd cloud-init && go test ./... -count=1
      - name: Test update agent
        run: cd update && go test ./... -count=1
  build-binaries:
-    name: Build Binaries
+    name: Build Binaries (${{ matrix.suffix }})
    runs-on: ubuntu-latest
    needs: test
    strategy:
@@ -37,129 +48,248 @@ jobs:
            suffix: linux-arm64
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version: '1.22'
      - name: Get version
        id: version
        run: echo "version=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
      - name: Build cloud-init
        run: |
          CGO_ENABLED=0 GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} \
            go build -ldflags="-s -w -X main.version=${{ steps.version.outputs.version }}" \
            -o kubesolo-cloudinit-${{ matrix.suffix }} ./cmd/
        working-directory: cloud-init
      - name: Build update agent
        run: |
          CGO_ENABLED=0 GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} \
            go build -ldflags="-s -w -X main.version=${{ steps.version.outputs.version }}" \
            -o kubesolo-update-${{ matrix.suffix }} .
        working-directory: update
      - name: Upload binaries
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v3
        with:
          name: binaries-${{ matrix.suffix }}
          path: |
            cloud-init/kubesolo-cloudinit-${{ matrix.suffix }}
            update/kubesolo-update-${{ matrix.suffix }}
-  build-iso:
+  build-iso-amd64:
-    name: Build ISO (amd64)
+    name: Build x86_64 ISO + disk image
    # Gated until an amd64-linux runner is registered. We use `runs-on:
    # ubuntu-latest` (which the Odroid claims) so SOME runner picks the job
    # up and evaluates `if: false`, marking it `skipped` instead of leaving
    # it `queued` forever — the latter holds the overall run in `queued`
    # state even when every load-bearing job is complete. When we get an
    # amd64 runner, flip `if: false` to `false` -> `true` (and flip the
    # `runs-on:` back to `amd64-linux`).
    if: false
    runs-on: ubuntu-latest
    needs: build-binaries
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version: '1.22'
      - name: Install build deps
        run: |
          sudo apt-get update
          sudo apt-get install -y --no-install-recommends \
            cpio gzip genisoimage isolinux syslinux syslinux-common \
            syslinux-utils xorriso xz-utils wget squashfs-tools \
-            dosfstools e2fsprogs fdisk parted bsdtar
+            dosfstools e2fsprogs fdisk parted libarchive-tools \
-
+            grub-common grub-efi-amd64-bin grub-pc-bin kpartx \
-      - name: Build ISO
+            busybox-static iptables nftables
-        run: make iso
+      - name: Build kernel + ISO + disk-image
-
+        run: |
-      - name: Build disk image
+          make kernel
-        run: make disk-image
+          make build-cloudinit build-update-agent
-
+          make rootfs initramfs
-      - name: Get version
+          make iso
-        id: version
+          make disk-image
-        run: echo "version=$(cat VERSION)" >> $GITHUB_OUTPUT
+      - name: Compress disk image
-
+        # The raw .img is 4 GB sparse; xz takes it to ~50-300 MB depending
-      - name: Upload ISO
+        # on dictionary level. Use -6 (default) for memory safety on the
-        uses: actions/upload-artifact@v4
+        # GitHub-Actions-style runner.
        run: |
          xz -k -T0 --memlimit-compress=1500MiB -6 output/*.img
          ls -lh output/
      - name: Upload x86_64 artifacts
        uses: actions/upload-artifact@v3
        with:
-          name: iso-amd64
+          name: image-amd64
-          path: output/*.iso
+          path: |
            output/*.iso
            output/*.img.xz
-      - name: Upload disk image
+  build-disk-arm64:
-        uses: actions/upload-artifact@v4
+    name: Build ARM64 disk image
    runs-on: arm64-linux
    needs: test
    steps:
      - uses: actions/checkout@v4
      - name: Show host info
        run: |
          uname -a
          nproc
          free -h
          df -h /
      - name: Build kernel + rootfs + disk-image
        # Runner runs as root via systemd; explicit sudo is harmless but
        # documented as such in docs/ci-runners.md.
        run: |
          make kernel-arm64
          make build-cross
          make rootfs-arm64
          make disk-image-arm64
      - name: Compress disk image
        run: |
          xz -k -T0 --memlimit-compress=1500MiB -6 output/*.arm64.img
          ls -lh output/
      - name: Upload ARM64 artifacts
        uses: actions/upload-artifact@v3
        with:
-          name: disk-image-amd64
+          name: image-arm64
-          path: output/*.img
+          path: output/*.arm64.img.xz
  release:
-    name: Create Release
+    name: Publish Gitea Release
    runs-on: ubuntu-latest
-    needs: [build-binaries, build-iso]
+    # build-iso-amd64 is gated `if: false` in v0.3.x (no amd64 runner yet);
    # don't block the release on it. build-disk-arm64 is required — that's
    # the headline artifact for v0.3.x. build-binaries is required since
    # the Go binaries are core to every release.
    needs: [build-binaries, build-disk-arm64]
    # `if: always()` so the release publishes even if the gated x86 job
    # somehow ran-and-failed instead of being skipped. The downstream
    # `find` in the Flatten step ignores missing files gracefully.
    if: always() && needs.build-binaries.result == 'success' && needs.build-disk-arm64.result == 'success'
    steps:
      - uses: actions/checkout@v4
      - name: Get version
        id: version
-        run: echo "version=$(cat VERSION)" >> $GITHUB_OUTPUT
+        # `cat VERSION` would be stale on tag pushes (VERSION already bumped
        # for the tag, but using ref_name is unambiguous).
        run: echo "version=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
      - name: Download all artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v3
        with:
          path: artifacts
-      - name: Compute checksums
+      - name: Flatten artifacts + compute checksums
        run: |
-          cd artifacts
+          mkdir -p release
-          find . -type f \( -name "*.iso" -o -name "*.img" -o -name "kubesolo-*" \) \
+          # Each upload-artifact wrote into artifacts/<name>/...
-            -exec sha256sum {} \; | sort > ../SHA256SUMS
+          find artifacts -type f \( \
-          cd ..
+            -name "*.iso" -o \
            -name "*.img.xz" -o \
            -name "kubesolo-*" \
          \) -exec cp {} release/ \;
          (cd release && sha256sum * | sort > SHA256SUMS)
          ls -lh release/
          cat release/SHA256SUMS
-      - name: Create release
+      - name: Install release tooling
-        uses: softprops/action-gh-release@v2
+        run: sudo apt-get update && sudo apt-get install -y jq curl
        with:
          name: KubeSolo OS v${{ steps.version.outputs.version }}
          body: |
            ## KubeSolo OS v${{ steps.version.outputs.version }}
-            ### Downloads
+      - name: Render release body
-            - **ISO** — Boot from CD/USB, ideal for testing
+        id: body
-            - **Disk Image** — Raw disk with A/B partitions + GRUB
+        run: |
-            - **Binaries** — Standalone cloud-init and update agent
+          VERSION="${{ steps.version.outputs.version }}"
          # Strip the leading 'v' for cosmetic display in the body.
          DISPLAY="${VERSION#v}"
          cat > release-body.md <<EOF
          See [docs/release-notes-${DISPLAY}.md](./docs/release-notes-${DISPLAY}.md)
          and [CHANGELOG.md](./CHANGELOG.md) for the full release notes.
-            ### Verify
+          ### Downloads
            ```
            sha256sum -c SHA256SUMS
            ```
-            ### Quick Start
+          - \`kubesolo-os-${DISPLAY}.arm64.img.xz\` — ARM64 raw disk image (A/B GPT, UEFI)
-            ```bash
+          - \`kubesolo-cloudinit-linux-{amd64,arm64}\` — standalone cloud-init parser
-            # Boot in QEMU
+          - \`kubesolo-update-linux-{amd64,arm64}\` — standalone update agent
-            qemu-system-x86_64 -m 1024 -smp 2 -enable-kvm \
+          - \`SHA256SUMS\` — checksums for every artifact above
-              -cdrom kubesolo-os-${{ steps.version.outputs.version }}.iso \
+
-              -nographic
+          > **x86_64 ISO + disk image**: not built automatically yet. The
-            ```
+          > release workflow's amd64 build job needs an amd64-linux runner,
-          files: |
+          > which this Gitea instance doesn't have yet. To produce them
-            artifacts/**/*.iso
+          > yourself, clone the repo at this tag and run \`make iso disk-image\`
-            artifacts/**/*.img
+          > on any Linux amd64 host.
-            artifacts/**/kubesolo-*
+
-            SHA256SUMS
+          ### Verify
-          draft: false
+
-          prerelease: false
+          \`\`\`
          sha256sum -c SHA256SUMS
          \`\`\`
          ### Quick start (ARM64)
          \`\`\`
          # On Graviton/Ampere/any UEFI ARM64 host:
          xz -d kubesolo-os-${DISPLAY}.arm64.img.xz
          sudo dd if=kubesolo-os-${DISPLAY}.arm64.img of=/dev/sdX bs=4M status=progress
          # Under qemu-system-aarch64 (Apple Silicon w/ HVF):
          UEFI_FW=\$(brew --prefix qemu)/share/qemu/edk2-aarch64-code.fd
          qemu-system-aarch64 -M virt -accel hvf -cpu host -m 2048 -smp 2 \\
            -nographic -bios "\$UEFI_FW" \\
            -drive file=kubesolo-os-${DISPLAY}.arm64.img,format=raw,if=virtio,media=disk \\
            -device virtio-rng-pci \\
            -net nic,model=virtio \\
            -net user,hostfwd=tcp::6443-:6443,hostfwd=tcp::8080-:8080
          \`\`\`
          Then from the host: \`curl http://localhost:8080 > ~/.kube/kubesolo-config\`
          and \`kubectl --kubeconfig ~/.kube/kubesolo-config get nodes\`.
          EOF
          cat release-body.md
      - name: Create release via Gitea API
        env:
          # Gitea's act_runner auto-populates this with repo-write scope.
          # If not, set a personal access token as a secret named GITEA_TOKEN
          # on the org and swap the var name below.
          TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          set -euo pipefail
          TAG="${{ steps.version.outputs.version }}"
          REPO_API="${{ github.api_url }}/repos/${{ github.repository }}"
          # 1. Create the release. The API is GitHub-compatible at the
          # request shape; the response includes the numeric release id we
          # need for asset uploads.
          PAYLOAD=$(jq -n \
            --arg tag "$TAG" \
            --arg name "KubeSolo OS $TAG" \
            --rawfile body release-body.md \
            '{tag_name: $tag, name: $name, body: $body, draft: false, prerelease: false}')
          echo "==> Creating release for $TAG against $REPO_API"
          CREATE_RESP=$(curl -fsSL -X POST \
            -H "Authorization: token $TOKEN" \
            -H "Content-Type: application/json" \
            -d "$PAYLOAD" \
            "$REPO_API/releases")
          RELEASE_ID=$(echo "$CREATE_RESP" | jq -r '.id')
          if [ -z "$RELEASE_ID" ] || [ "$RELEASE_ID" = "null" ]; then
            echo "ERROR: Could not extract release id from response:"
            echo "$CREATE_RESP" | jq . || echo "$CREATE_RESP"
            exit 1
          fi
          echo "==> Release id: $RELEASE_ID"
          # 2. Upload each asset. asset?name= names the attachment; we use
          # the basename so users see the same filename the build produced.
          for f in release/*; do
            [ -f "$f" ] || continue
            name=$(basename "$f")
            echo "==> Uploading $name ($(du -h "$f" | cut -f1))"
            curl -fsSL -X POST \
              -H "Authorization: token $TOKEN" \
              -F "attachment=@$f" \
              "$REPO_API/releases/$RELEASE_ID/assets?name=$name" >/dev/null
          done
          echo "==> Release published: $REPO_API/../releases/tag/$TAG"
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,12 @@ build/rootfs-work/
 *.swo
 *~
 # Secrets — never commit
 .env
 .env.*
 *.token
 *.pat
 # OS
 .DS_Store
 ._*
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,125 @@ All notable changes to KubeSolo OS are documented in this file.
 Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 versioning follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ## [Unreleased]
 Pure CI / repository housekeeping; no runtime changes since v0.3.1. All
 items below shake out workflow-loop bugs exposed by the v0.3.1 release
 flow on Gitea Actions.
 ### Fixed (CI)
 - `build-arm64.yaml` no longer triggers on tag pushes. `release.yaml`
  already produces the ARM64 disk image as part of the release flow, so
  triggering both on the same tag wasted an hour of Odroid runner time
  on a duplicate kernel build. (`04a5cd2`)
 - The gated `build-iso-amd64` job in `release.yaml` (`if: false` until an
  amd64-linux runner exists) used to advertise `runs-on: amd64-linux`.
  With no matching runner, Gitea left the job queued forever and the
  overall workflow run never transitioned to `success` — even though
  every load-bearing job had finished and the release was already
  published. Now uses `runs-on: ubuntu-latest` so any runner picks the
  job up just long enough to evaluate `if: false` and mark it `skipped`.
  (`fb24e64`)
 - `build-arm64.yaml` now ignores workflow-file, docs, and `*.md` changes
  via `paths-ignore` (`.gitea/workflows/**`, `.github/workflows/**`,
  `docs/**`, top-level `*.md`, `.gitignore`). Workflow- / docs-only
  commits no longer kick off a 60-minute kernel rebuild on the Odroid.
  Any change to a kernel fragment, init script, or build script still
  triggers the full build, as intended. (`e1b8a69`)
 ### Changed
 - `.gitignore` now excludes `.env`, `.env.*`, `*.token`, `*.pat` to keep
  Gitea PATs and other credentials used during release ops from being
  accidentally committed. (`48267e1`)
 ## [0.3.1] - 2026-05-15
 First fully-functional generic ARM64 release. v0.3.0 shipped the build
 scaffold; v0.3.1 makes it actually boot a Kubernetes cluster end-to-end
 on QEMU virt under HVF acceleration. Validated by deploying CoreDNS,
 local-path-provisioner, and an `nginx:alpine` workload — all reach
 Running, `kubectl get nodes` reports `Ready`.
 ### Fixed
 - **Dual-glibc loading on ARM64** — piCore64's `/lib/libc.so.6` and the
  build host's `/lib/$LIB_ARCH/libc.so.6` could both be resolved into the
  same process by the dynamic linker, triggering
  `*** stack smashing detected ***` aborts when stack frames crossed
  between functions linked against different libcs. Fix: bundle the full
  glibc family (libc + libpthread + libdl + libm + libresolv + librt +
  libanl + libgcc_s + ld.so), delete piCore's duplicates in `/lib/`,
  and write `/etc/ld.so.conf` + `ldconfig -r` so the runtime linker has
  a deterministic search order. (`76ed2ff`)
 - **`nft` binary not bundled** — KubeSolo v1.1.4+ runs `nft add table ip
  kubesolo-masq` for pod-masquerade setup, but `inject-kubesolo.sh` only
  bundled `xtables-nft-multi`. Without standalone `nft` in `$PATH`,
  KubeSolo FATAL'd at startup. Fix: copy `/usr/sbin/nft` + its
  non-shared libs (libnftables, libedit, libjansson, libgmp, libtinfo,
  libbsd, libmd) into the rootfs. (`51c1f78`)
 - **nftables address-family handlers** — `nf_tables` core was loaded but
  no address families were registered, so `nft add table ip ...`
  returned `EOPNOTSUPP`. The bool Kconfigs `CONFIG_NF_TABLES_IPV4`,
  `CONFIG_NF_TABLES_IPV6`, `CONFIG_NF_TABLES_INET`,
  `CONFIG_NF_TABLES_NETDEV` are required and weren't in the
  fragment. Fix: add to `kernel-container.fragment` as `=y`. (`7e46f8f`)
 - **kube-proxy nftables-backend expression modules** — Kubernetes 1.34's
  kube-proxy nft backend uses `numgen`, `hash`, `limit`, `log`
  expressions. The corresponding kernel modules (`CONFIG_NFT_NUMGEN`,
  etc.) were missing from the fragment AND the runtime module list, so
  even after a kernel rebuild stage 30 didn't load them and stage 85's
  `kernel.modules_disabled=1` lockdown prevented on-demand loads. Fix:
  add to both `kernel-container.fragment` (as `=m`) and
  `modules.list` / `modules-arm64.list`. (`31eee77`, `3bcf2e1`)
 - **`modules.list` inline-comment parser bug** — the inject script's
  comment-strip only matched lines starting with `#`, not lines with
  inline `# comment` tails. So `nft_numgen     # foo` was passed
  verbatim to modprobe, resolved to nothing, and the .ko never made it
  into the initramfs. Fix: parse with `mod="${mod%%#*}"` to strip
  inline tails. (`bc3300e`)
 - **Banner only printed on kubeconfig success** —
  `90-kubesolo.sh` gated the host-access banner behind `if [ -f
  $KUBECONFIG_PATH ]`. When KubeSolo crashed early (bug #2 above) or
  the wait loop timed out, the user never saw the connection
  instructions. Fix: write the banner to `/etc/motd` AND print it
  unconditionally after the wait loop. (`51c1f78`)
 - **`dev-vm-arm64.sh` missing port-8080 hostfwd** — the in-VM HTTP
  server that serves the kubeconfig listens on port 8080, but the
  QEMU `-net user` line only forwarded 6443 and 2222, so
  `curl http://localhost:8080` from the host machine connected to
  nothing. Fix: add the third hostfwd. (`fbe2d0b`)
 ### Fixed (CI)
 - **`release.yaml` workflow** rewritten so v0.3.1+ tag pushes
  auto-publish a complete release page on Gitea: `actions/upload-artifact`
  pinned to `@v3` for act_runner compatibility, the
  `softprops/action-gh-release@v2` step replaced with a direct `curl`
  against `/api/v1/repos/.../releases` (`softprops` hard-codes
  `api.github.com` so it silently no-ops on Gitea), added a
  `build-disk-arm64` job that builds on the `arm64-linux` runner.
  v0.3.0's manual-upload-only release was the canary that exposed all
  three bugs. (`f8c308d`)
 ### Known issues carried forward to v0.3.2
 These don't block normal operation but are tracked:
 - `xt_comment` userspace extension load fails on the iptables-nft path,
  causing kubelet's KUBE-FIREWALL rule install to skip. Reported as
  `Couldn't load match 'comment'` in the boot log. kubelet continues
  without the localhost-drop rule.
 - `containerd-shim-runc-v2 -info` probe reports `runc: executable file
  not found in $PATH`. Cosmetic — containerd uses the absolute path
  from its config when actually launching containers.
 - `kube-proxy conntrack cleanup` logs `Failed to list conntrack entries:
  invalid argument` every cleanup cycle. Probably needs
  `CONFIG_NF_CONNTRACK_PROCFS` or netlink-glue tweaks.
 - Several pods restart 1–2 times on first boot due to a PLEG /
  runtime-probe race in the kubelet startup path. Pods stabilise.
 ## [0.3.0] - 2026-05-14
 The main themes: generic ARM64 (not just Raspberry Pi), an honest update
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 An immutable, bootable Linux distribution purpose-built for [KubeSolo](https://github.com/portainer/kubesolo) — Portainer's ultra-lightweight single-node Kubernetes.
-> **Status (v0.3.0):** x86_64 and generic ARM64 (UEFI / virtio / mainline kernel) both build and boot end-to-end. Update agent has an explicit state machine, OCI registry distribution alongside HTTP, channel + maintenance-window + version-stepping-stone gates, and auto-rollback. ARM64 Raspberry Pi support remains paused pending physical hardware. See [docs/release-notes-0.3.0.md](docs/release-notes-0.3.0.md) for the full v0.3.0 changelog.
+> **Status (v0.3.1):** First fully-validated generic ARM64 release. x86_64 and ARM64 (UEFI / virtio / mainline kernel) both build and boot end-to-end; v0.3.1 closes the dual-glibc, nftables address-family, and kube-proxy expression-module gaps that kept v0.3.0 from reaching a Ready node on ARM64. Validated end-to-end under QEMU virt + HVF on Apple Silicon: `kubectl get nodes` reports `Ready`, CoreDNS, local-path-provisioner, and an nginx test workload all `Running`. The update agent has an explicit state machine, OCI registry distribution alongside HTTP, channel + maintenance-window + version-stepping-stone gates, and auto-rollback. ARM64 Raspberry Pi support remains paused pending physical hardware. See [CHANGELOG.md](CHANGELOG.md) for the full v0.3.1 changelog and [docs/release-notes-0.3.0.md](docs/release-notes-0.3.0.md) for the v0.3.0 milestone summary.
 ## What is this?
@@ -245,12 +245,12 @@ Metrics include: `kubesolo_os_info`, `boot_success`, `boot_counter`, `uptime_sec
 | 5 | CI/CD, OCI distribution, Prometheus metrics, ARM64 cross-compile | Complete |
 | 6 | Security hardening, AppArmor | Complete |
 | - | Custom kernel build for container runtime fixes | Complete (x86_64) |
-| 7 | ARM64 generic (mainline kernel, UEFI, virtio) | Complete (v0.3.0, QEMU validated) |
+| 7 | ARM64 generic (mainline kernel, UEFI, virtio) | Complete (v0.3.1, K8s Ready under QEMU virt+HVF) |
 | 8 | Update engine v2 (state machine, channels, OCI, pre-flight gates) | Complete (v0.3.0) |
 | - | ARM64 Raspberry Pi (custom kernel, firmware, SD card image) | Paused — needs hardware |
-| - | OCI cosign signature verification | Planned for v0.3.1 |
+| - | OCI cosign signature verification | Planned for v0.3.2 |
-| - | LABEL=KSOLODATA on ARM64 (replace blkid/findfs path) | Planned for v0.3.1 |
+| - | LABEL=KSOLODATA on ARM64 (replace blkid/findfs path) | Planned for v0.3.2 |
-| - | Real-hardware ARM64 validation (Graviton / Ampere) | Planned for v0.3.1 |
+| - | Real-hardware ARM64 validation (Graviton / Ampere) | Planned for v0.3.2 |
 ## License
--- a/2
+++ b/2
@@ -1 +1 @@
-0.3.0
+0.3.1
--- a/build/Dockerfile.builder
+++ b/build/Dockerfile.builder
@@ -30,6 +30,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libarchive-tools \
    libelf-dev \
    libssl-dev \
    nftables \
    make \
    parted \
    squashfs-tools \
--- a/build/config/kernel-container.fragment
+++ b/build/config/kernel-container.fragment
@@ -53,6 +53,46 @@ CONFIG_NF_TABLES=m
 CONFIG_VETH=m
 CONFIG_VXLAN=m
 # nftables address-family handlers. These are BOOL Kconfigs (not tristate)
 # so they have to be built into the kernel — there's no module to modprobe
 # at runtime. Without them, `nft add table ip ...` returns EOPNOTSUPP and
 # KubeSolo v1.1.4+'s pod-masquerade setup fails at boot.
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_NETDEV=y
 # nftables expression modules used by KubeSolo's masquerade ruleset, the
 # kube-proxy nft backend (Kubernetes 1.34+), and the xtables compat path.
 # Listed in modules.list / modules-arm64.list so init loads them at boot.
 CONFIG_NFT_NAT=m
 CONFIG_NFT_MASQ=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_REDIR=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_REJECT_INET=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NFT_FIB=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NFT_FIB_IPV6=m
 # numgen drives kube-proxy's random / round-robin endpoint LB:
 #   `numgen random mod N vmap { ... }` in service rules.
 # Without it kube-proxy's nft sync fails with ENOENT on every service.
 CONFIG_NFT_NUMGEN=m
 # hash drives consistent-hash LB (sessionAffinity=ClientIP, etc.).
 CONFIG_NFT_HASH=m
 # objref / limit / log are used by various policy expressions kube-proxy and
 # CNI plugins emit. Including them pre-empts a future "could not process
 # rule" debug loop.
 CONFIG_NFT_OBJREF=m
 CONFIG_NFT_LIMIT=m
 CONFIG_NFT_LOG=m
 # IPv4 NAT bits NFT_MASQ depends on. Auto-selected on most kernels but we
 # pin them explicitly so olddefconfig doesn't strip them when the fragment
 # is applied on top of a minimal defconfig.
 CONFIG_NF_NAT_MASQUERADE=y
 # Security: AppArmor + Audit
 CONFIG_AUDIT=y
 CONFIG_AUDITSYSCALL=y
--- a/build/config/modules-arm64.list
+++ b/build/config/modules-arm64.list
@@ -56,6 +56,17 @@ nft_fib
 nft_fib_ipv4
 nft_fib_ipv6
 # nft expressions used by the Kubernetes 1.34+ nftables kube-proxy backend.
 # Loading these at boot (stage 30) is mandatory because stage 85 sets
 # kernel.modules_disabled=1, which would otherwise block kube-proxy from
 # auto-loading them on first rule install.
 # (Note: list parser only honours full-line "#"-prefixed comments, NOT
 # inline "module # comment". Keep module names on their own line.)
 nft_numgen
 nft_hash
 nft_limit
 nft_log
 # Reject targets (used by kube-proxy iptables-restore rules)
 nf_reject_ipv4
 nf_reject_ipv6
--- a/build/config/modules.list
+++ b/build/config/modules.list
@@ -54,6 +54,14 @@ nft_fib
 nft_fib_ipv4
 nft_fib_ipv6
 # nft expressions used by the Kubernetes 1.34+ nftables kube-proxy backend.
 # Must be loaded at stage 30 because stage 85 sets modules_disabled=1.
 # (Parser ignores full-line "#" comments only — keep module names alone.)
 nft_numgen
 nft_hash
 nft_limit
 nft_log
 # Reject targets (used by kube-proxy iptables-restore rules)
 nf_reject_ipv4
 nf_reject_ipv6
--- a/build/scripts/inject-kubesolo.sh
+++ b/build/scripts/inject-kubesolo.sh
@@ -224,9 +224,14 @@ if [ -f "$CUSTOM_VMLINUZ" ] && [ -d "$CUSTOM_MODULES/lib/modules/$KVER" ]; then
    fi
    while IFS= read -r mod; do
-        # Skip comments and blank lines
+        # Strip any inline "# comment" tail before further processing —
-        case "$mod" in \#*|"") continue ;; esac
+        # several entries in the upstream lists started carrying inline
-        mod=$(echo "$mod" | xargs)  # trim whitespace
+        # docs and silently broke module loading because modprobe got
        # passed "name # comment" as the module name.
        mod="${mod%%#*}"
        # Skip blank-or-comment-only lines
        case "$mod" in "") continue ;; esac
        mod=$(echo "$mod" | xargs)  # trim whitespace + collapse internal
        [ -z "$mod" ] && continue
        if [ "$MODPROBE_WORKS" = true ]; then
@@ -397,7 +402,13 @@ if [ -f /usr/sbin/xtables-nft-multi ]; then
        ln -sf xtables-nft-multi "$ROOTFS/usr/sbin/$cmd"
    done
-    # Copy required shared libraries (architecture-aware paths)
+    # Copy required shared libraries (architecture-aware paths).
    # We deliberately bundle the *full* glibc family from the build host —
    # not just libc.so.6 — so dynamically-linked binaries we ship (nft,
    # xtables-nft-multi, etc.) load a consistent set of libraries. Mixing
    # glibc components across versions causes __stack_chk_guard mismatches
    # ("stack smashing detected" aborts) when stack frames cross between
    # functions linked against different libcs.
    mkdir -p "$ROOTFS/usr/lib/$LIB_ARCH" "$ROOTFS/lib/$LIB_ARCH"
    [ "$INJECT_ARCH" != "arm64" ] && mkdir -p "$ROOTFS/lib64"
    for lib in \
@@ -405,6 +416,13 @@ if [ -f /usr/sbin/xtables-nft-multi ]; then
        "/lib/$LIB_ARCH/libmnl.so.0"* \
        "/lib/$LIB_ARCH/libnftnl.so.11"* \
        "/lib/$LIB_ARCH/libc.so.6" \
        "/lib/$LIB_ARCH/libpthread.so.0" \
        "/lib/$LIB_ARCH/libdl.so.2" \
        "/lib/$LIB_ARCH/libm.so.6" \
        "/lib/$LIB_ARCH/libresolv.so.2" \
        "/lib/$LIB_ARCH/librt.so.1" \
        "/lib/$LIB_ARCH/libanl.so.1" \
        "/lib/$LIB_ARCH/libgcc_s.so.1" \
        "$LD_SO"; do
        [ -e "$lib" ] && cp -aL "$lib" "$ROOTFS${lib}" 2>/dev/null || true
    done
@@ -420,6 +438,30 @@ else
    echo "    WARN: xtables-nft-multi not found in builder (install iptables package)"
 fi
 # Install nft (nftables CLI). KubeSolo v1.1.4+ uses `nft add table ip
 # kubesolo-masq` to own pod masquerade rules directly instead of going
 # through kube-proxy/CNI. Without nft in PATH, KubeSolo FATALs at startup
 # with: nft: executable file not found in $PATH.
 echo "    Installing nft (nftables CLI) from builder..."
 if [ -f /usr/sbin/nft ]; then
    cp /usr/sbin/nft "$ROOTFS/usr/sbin/"
    # nft pulls in libnftables + a few extras beyond what iptables-nft needed.
    # libmnl, libnftnl, libxtables already copied by the iptables-nft block.
    for lib in \
        "/lib/$LIB_ARCH/libnftables.so.1"* \
        "/lib/$LIB_ARCH/libedit.so.2"* \
        "/lib/$LIB_ARCH/libjansson.so.4"* \
        "/lib/$LIB_ARCH/libgmp.so.10"* \
        "/lib/$LIB_ARCH/libtinfo.so.6"* \
        "/lib/$LIB_ARCH/libbsd.so.0"* \
        "/lib/$LIB_ARCH/libmd.so.0"*; do
        [ -e "$lib" ] && cp -aL "$lib" "$ROOTFS${lib}" 2>/dev/null || true
    done
    echo "    Installed nft + shared libs"
 else
    echo "    WARN: nft not found in builder (install nftables package) — KubeSolo v1.1.4+ pod masquerade will fail"
 fi
 # Kernel modules list (for init to load at boot)
 if [ "$INJECT_ARCH" = "arm64" ]; then
    cp "$PROJECT_ROOT/build/config/modules-arm64.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list"
@@ -517,6 +559,54 @@ nameserver 1.1.1.1
 EOF
 fi
 # --- Resolve dual-glibc ambiguity (ARM64) ---
 # piCore64's rootfs ships glibc at /lib/libc.so.6, and we've copied the
 # build host's glibc to /lib/$LIB_ARCH/libc.so.6. Two libc.so.6 in the
 # dynamic linker's search path can lead to a process loading both — one
 # directly, one transitively — and "stack smashing detected" aborts when
 # stack frames cross between them (each libc has its own
 # __stack_chk_guard). Remove piCore's copies so resolution is unambiguous
 # and write a proper /etc/ld.so.conf + cache pointing at our copies.
 if [ "$INJECT_ARCH" = "arm64" ] && [ -d "$ROOTFS/lib/$LIB_ARCH" ]; then
    echo "    Pruning duplicate glibc components in $ROOTFS/lib/..."
    for lib in \
        libc.so.6 \
        libpthread.so.0 \
        libdl.so.2 \
        libm.so.6 \
        libresolv.so.2 \
        librt.so.1 \
        libanl.so.1 \
        libgcc_s.so.1; do
        # Only delete piCore's copy when our version exists; otherwise
        # we'd leave the binary unable to find any libc at all.
        if [ -e "$ROOTFS/lib/$lib" ] && [ -e "$ROOTFS/lib/$LIB_ARCH/$lib" ]; then
            rm -f "$ROOTFS/lib/$lib"
        fi
    done
    # ld.so.conf gives our $LIB_ARCH paths precedence over piCore's /lib
    # (defaults vary by glibc version; this makes the order explicit).
    cat > "$ROOTFS/etc/ld.so.conf" <<EOF
 /lib/$LIB_ARCH
 /usr/lib/$LIB_ARCH
 /usr/local/lib
 /lib
 /usr/lib
 EOF
    # Generate /etc/ld.so.cache. ldconfig -r treats $ROOTFS as the system
    # root, so it reads ld.so.conf from there and writes the cache there.
    # Works even cross-arch (it only parses ELF headers, doesn't execute).
    if command -v ldconfig >/dev/null 2>&1; then
        ldconfig -r "$ROOTFS" 2>/dev/null && \
            echo "    Generated /etc/ld.so.cache via ldconfig" || \
            echo "    WARN: ldconfig failed; falling back to default search order"
    else
        echo "    WARN: ldconfig not on builder; cache not generated"
    fi
 fi
 # --- Summary ---
 echo ""
 echo "==> Injection complete. Rootfs contents:"
--- a/docs/ci-runners.md
+++ b/docs/ci-runners.md
@@ -26,6 +26,63 @@ Generic ubuntu jobs that don't care about arch fall through to whichever runner
 them up first; on the Odroid they run in Docker via the `ubuntu-latest` /
 `ubuntu-22.04` / `ubuntu-24.04` labels.
 ## Workflows in this repo
 | Workflow file | Trigger | Where it runs | What it produces |
 |---|---|---|---|
 | `.gitea/workflows/ci.yaml` | push / PR to main | ubuntu-latest | Go tests, cross-arch binary build, shellcheck |
 | `.gitea/workflows/build-arm64.yaml` | push to main, tags `v*`, manual | `arm64-linux` (Odroid) | ARM64 kernel + rootfs + disk image; uploads as workflow artifact only |
 | `.gitea/workflows/release.yaml` | tags `v*` | mix: ubuntu-latest + `arm64-linux` | Full release: x86 ISO + disk, ARM64 disk, Go binaries, SHA256SUMS — posted to Gitea Releases via API |
 ### Release workflow specifics
 `release.yaml` is what fires when you `git push origin vX.Y.Z`. The pipeline:
 1. **test** — `go test` cloud-init + update modules (ubuntu-latest).
 2. **build-binaries** — cross-compiles `kubesolo-cloudinit` and
   `kubesolo-update` for linux-amd64 + linux-arm64 with the version baked
   in via `-X main.version=…`.
 3. **build-iso-amd64** — runs `make iso disk-image` on ubuntu-latest;
   produces the x86_64 ISO and a `.img.xz` compressed disk image.
 4. **build-disk-arm64** — runs the same flow on the Odroid (`arm64-linux`
   label); produces `.arm64.img.xz`.
 5. **release** — downloads everything, computes `SHA256SUMS`, calls
   Gitea's `POST /api/v1/repos/<owner>/<repo>/releases` to create the
   release, then `POST .../releases/<id>/assets?name=…` once per asset.
 Authentication uses Gitea's built-in `${{ secrets.GITHUB_TOKEN }}` — the
 runner auto-populates that secret with repo-write scope. If your runner
 is configured without that automatic token (e.g. an older `act_runner`),
 generate a personal access token with `repo:write` scope, add it as an
 org secret named `GITEA_TOKEN`, and swap the `TOKEN: ${{ secrets.GITHUB_TOKEN }}`
 line in `release.yaml` for `TOKEN: ${{ secrets.GITEA_TOKEN }}`.
 ### Why not the GitHub Marketplace release actions?
 `release.yaml` used to call `softprops/action-gh-release@v2`. That action
 hard-codes calls to `api.github.com` instead of using `${{ github.api_url }}`
 (which Gitea sets to its own API). On Gitea's act_runner the action fails
 silently — the job reports green but no release is created. We replaced
 it with a direct `curl` so the behaviour is explicit and debuggable.
 Similarly, `actions/upload-artifact@v4` and `@download-artifact@v4` are not
 fully implemented by act_runner v1.0.x. Pin to `@v3` until upstream
 support catches up.
 ### Manually re-running a release
 Releases are immutable once published, but you can:
 - **Delete and recreate the release** through the Gitea UI on the
  `releases/tag/vX.Y.Z` page, then push the tag again (Gitea reuses the
  existing tag), and re-trigger the workflow via the Actions UI.
 - **Trigger the build-arm64 workflow manually** for a one-off arm64
  artifact: Gitea UI → Actions → ARM64 Build → Run workflow.
 Don't force-update a published tag — anyone who already fetched it (or
 downloaded an asset) sees a checksum mismatch. Prefer cutting a new patch
 release (vX.Y.Z+1) over rewriting a published one.
 ## Registering a new runner
 ### Prerequisites
--- a/hack/dev-vm-arm64.sh
+++ b/hack/dev-vm-arm64.sh
@@ -133,7 +133,7 @@ if [ "$MODE" = "disk" ]; then
        -bios "$UEFI_FW" \
        -drive "file=$DISK_IMAGE,format=raw,if=virtio,media=disk" \
        -net "nic,model=virtio" \
-        -net "user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22"
+        -net "user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22,hostfwd=tcp::8080-:8080"
    exit 0
 fi
@@ -199,4 +199,4 @@ qemu-system-aarch64 \
    -append "console=ttyAMA0 kubesolo.data=/dev/vda kubesolo.debug $EXTRA_APPEND" \
    -drive "file=$DATA_DISK,format=raw,if=virtio" \
    -net "nic,model=virtio" \
-    -net "user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22"
+    -net "user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22,hostfwd=tcp::8080-:8080"
--- a/init/lib/90-kubesolo.sh
+++ b/init/lib/90-kubesolo.sh
@@ -76,6 +76,29 @@ while [ ! -f "$KUBECONFIG_PATH" ] && [ $WAIT -lt 120 ]; do
    fi
 done
 # Render the access banner. Written to /etc/motd so it's visible to anyone
 # who later shells in (SSH extension, emergency shell, console login), and
 # printed unconditionally to console below so the user sees it even when
 # KubeSolo hasn't yet finished generating the kubeconfig.
 ACCESS_BANNER="$(cat <<'BANNER'
 ============================================================
  KubeSolo OS — host access
  From your host machine, run:
    curl -s http://localhost:8080 > ~/.kube/kubesolo-config
    kubectl --kubeconfig ~/.kube/kubesolo-config get nodes
  Notes:
    - port 8080 serves the kubeconfig (admin) over HTTP
    - port 6443 serves the Kubernetes API (HTTPS)
    - Both ports are forwarded under QEMU's `-net user,hostfwd=…` config
 ============================================================
 BANNER
 )"
 printf '%s\n' "$ACCESS_BANNER" > /etc/motd 2>/dev/null || true
 if [ -f "$KUBECONFIG_PATH" ]; then
    log_ok "KubeSolo is running (PID $KUBESOLO_PID)"
@@ -95,18 +118,17 @@ if [ -f "$KUBECONFIG_PATH" ]; then
    done) &
    log_ok "Kubeconfig available via HTTP on port 8080"
    echo ""
    echo "============================================================"
    echo "  From your host machine, run:"
    echo ""
    echo "  curl -s http://localhost:8080 > ~/.kube/kubesolo-config"
    echo "  kubectl --kubeconfig ~/.kube/kubesolo-config get nodes"
    echo "============================================================"
    echo ""
 else
    log_warn "Kubeconfig not found after ${WAIT}s — KubeSolo may still be starting"
    log_warn "Check manually: cat $KUBECONFIG_PATH"
 fi
 # Show the banner regardless of kubeconfig state: the HTTP server above only
 # starts on success, but printing the instructions during the long first-boot
 # wait is useful and harmless (user retries the curl until it 200s).
 echo ""
 printf '%s\n' "$ACCESS_BANNER"
 echo ""
 # Keep init alive — wait on KubeSolo process
 wait $KUBESOLO_PID