Compare commits
41 Commits
36311ed4f4
...
v0.3.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b47e7af68 | |||
| 9fb894c5af | |||
| 28de656b97 | |||
| dfed6ddba8 | |||
| bce565e2f7 | |||
| 0c6e200585 | |||
| 1b44c9d621 | |||
| de10de0ef3 | |||
| 1de36289a5 | |||
| 31aac701db | |||
| 06e12a79bd | |||
| dc48caa959 | |||
| 65938d6d04 | |||
| 5cf81049f6 | |||
| 863f498cc2 | |||
| 05ab108de1 | |||
| c20f5a2e8c | |||
| 80aca5e372 | |||
| d51618badb | |||
| 19b99cf101 | |||
| 059ec7955f | |||
| a6c5d56ade | |||
| 6c6940afac | |||
| 4e3f1d6cf0 | |||
| 6ff77c4482 | |||
| a2764218fc | |||
| 2ba816bf6e | |||
| 65dcddb47e | |||
| ba4812f637 | |||
| 09dcea84ef | |||
| a4e719ba0e | |||
| 61bd28c692 | |||
| 4fc078f7a3 | |||
| 6c15ba7776 | |||
| 958524e6d8 | |||
| efc7f80b65 | |||
| 7abf0e0c04 | |||
| 60d0edaf84 | |||
| f3d86e4d8f | |||
| 04a5179533 | |||
| d9ac58418d |
73
.gitea/workflows/build-arm64.yaml
Normal file
73
.gitea/workflows/build-arm64.yaml
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
name: ARM64 Build
|
||||||
|
|
||||||
|
# Triggers on push to main and on tags. Skipped on PRs to keep PR feedback fast;
|
||||||
|
# manual via Gitea UI ("Run workflow") if needed.
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
tags: ['v*']
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-arm64-generic:
|
||||||
|
name: Build generic ARM64 disk image
|
||||||
|
# Routes to the Odroid self-hosted runner via the arm64-linux label.
|
||||||
|
# See docs/ci-runners.md for runner setup.
|
||||||
|
runs-on: arm64-linux
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Show host info
|
||||||
|
run: |
|
||||||
|
uname -a
|
||||||
|
nproc
|
||||||
|
free -h
|
||||||
|
df -h /home /tmp || df -h /
|
||||||
|
|
||||||
|
- name: Verify build prerequisites
|
||||||
|
run: |
|
||||||
|
# The Odroid runner ships these via apt; this is a sanity check.
|
||||||
|
which gcc make bc bison flex cpio gzip xz wget curl mkfs.ext4 mkfs.vfat \
|
||||||
|
sfdisk losetup kpartx grub-mkimage qemu-system-aarch64 git busybox
|
||||||
|
ls -la /bin/busybox
|
||||||
|
file /bin/busybox | grep -q 'statically linked' || {
|
||||||
|
echo "ERROR: /bin/busybox is not statically linked — install busybox-static"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Build mainline ARM64 kernel
|
||||||
|
# Cached in build/cache/kernel-arm64-generic between runs (persistent
|
||||||
|
# working dir on the host runner). First run takes 30-60 min; reruns
|
||||||
|
# exit immediately once the .config + Image match.
|
||||||
|
run: |
|
||||||
|
time make kernel-arm64
|
||||||
|
|
||||||
|
- name: Build cross-arch Go binaries
|
||||||
|
run: make build-cross
|
||||||
|
|
||||||
|
- name: Prepare generic ARM64 rootfs
|
||||||
|
run: sudo make rootfs-arm64
|
||||||
|
|
||||||
|
- name: Build ARM64 UEFI disk image
|
||||||
|
run: sudo make disk-image-arm64
|
||||||
|
|
||||||
|
- name: Show output artifact
|
||||||
|
run: |
|
||||||
|
ls -lh output/
|
||||||
|
file output/*.arm64.img
|
||||||
|
|
||||||
|
- name: Boot smoke test (best-effort)
|
||||||
|
# KubeSolo's image import deadline can fire under QEMU TCG on the
|
||||||
|
# Odroid; the boot itself succeeds through stage 90 every time, but
|
||||||
|
# the final "KubeSolo started" health check is timing-sensitive.
|
||||||
|
# We mark this continue-on-error until we have KVM or real hardware.
|
||||||
|
continue-on-error: true
|
||||||
|
run: sudo make test-boot-arm64-disk
|
||||||
|
|
||||||
|
- name: Upload disk image
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: kubesolo-os-arm64-${{ github.ref_name }}
|
||||||
|
path: output/kubesolo-os-*.arm64.img
|
||||||
|
retention-days: 90
|
||||||
@@ -62,7 +62,8 @@ jobs:
|
|||||||
working-directory: update
|
working-directory: update
|
||||||
|
|
||||||
- name: Upload binaries
|
- name: Upload binaries
|
||||||
uses: actions/upload-artifact@v4
|
# @v4 not yet fully supported by Gitea Actions runner; @v3 works.
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: binaries-${{ matrix.suffix }}
|
name: binaries-${{ matrix.suffix }}
|
||||||
path: |
|
path: |
|
||||||
@@ -78,14 +79,39 @@ jobs:
|
|||||||
- name: Install shellcheck
|
- name: Install shellcheck
|
||||||
run: sudo apt-get update && sudo apt-get install -y shellcheck
|
run: sudo apt-get update && sudo apt-get install -y shellcheck
|
||||||
|
|
||||||
|
# --severity=error filters out style/info/warning findings. Several of
|
||||||
|
# those are unavoidable in init-style scripts that source other files
|
||||||
|
# dynamically (SC1090/SC1091/SC2034). Exclude them explicitly so they
|
||||||
|
# don't fire even at warning level if we lift severity later.
|
||||||
|
# Codes excluded:
|
||||||
|
# SC1090 — non-constant source path (we source by stage name)
|
||||||
|
# SC1091 — source target not specified as input (we reference relative paths)
|
||||||
|
# SC2034 — var "unused" (false positive: used via sourced scripts)
|
||||||
|
# SC2002 — useless cat (style only, very common pattern in our scripts)
|
||||||
|
# SC2015 — A && B || C (deliberate idiom)
|
||||||
|
# SC2012 — use find not ls (style only)
|
||||||
|
# SC2013 — read words not lines (style only, applies to /proc parsing)
|
||||||
|
|
||||||
- name: Lint init scripts (POSIX sh)
|
- name: Lint init scripts (POSIX sh)
|
||||||
run: shellcheck -s sh init/init.sh init/lib/*.sh init/emergency-shell.sh
|
run: |
|
||||||
|
shellcheck -s sh --severity=error \
|
||||||
|
-e SC1090,SC1091,SC2034,SC2002,SC2015,SC2012,SC2013 \
|
||||||
|
init/init.sh init/lib/*.sh init/emergency-shell.sh
|
||||||
|
|
||||||
- name: Lint build scripts (bash)
|
- name: Lint build scripts (bash)
|
||||||
run: shellcheck -s bash build/scripts/*.sh build/config/kernel-audit.sh
|
run: |
|
||||||
|
shellcheck -s bash --severity=error \
|
||||||
|
-e SC1090,SC1091,SC2034,SC2002,SC2015,SC2012,SC2013 \
|
||||||
|
build/scripts/*.sh build/config/kernel-audit.sh
|
||||||
|
|
||||||
- name: Lint test scripts (bash)
|
- name: Lint test scripts (bash)
|
||||||
run: shellcheck -s bash test/qemu/*.sh test/integration/*.sh test/kernel/*.sh || true
|
run: |
|
||||||
|
shellcheck -s bash --severity=error \
|
||||||
|
-e SC1090,SC1091,SC2034,SC2002,SC2015,SC2012,SC2013 \
|
||||||
|
test/qemu/*.sh test/integration/*.sh test/kernel/*.sh
|
||||||
|
|
||||||
- name: Lint hack scripts (bash)
|
- name: Lint hack scripts (bash)
|
||||||
run: shellcheck -s bash hack/*.sh || true
|
run: |
|
||||||
|
shellcheck -s bash --severity=error \
|
||||||
|
-e SC1090,SC1091,SC2034,SC2002,SC2015,SC2012,SC2013 \
|
||||||
|
hack/*.sh
|
||||||
|
|||||||
14
.gitignore
vendored
14
.gitignore
vendored
@@ -18,8 +18,22 @@ build/rootfs-work/
|
|||||||
|
|
||||||
# OS
|
# OS
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
._*
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
|
||||||
|
# Photos / screenshots — keep documentation images under docs/ instead
|
||||||
|
*.PNG
|
||||||
|
*.png
|
||||||
|
*.JPG
|
||||||
|
*.jpg
|
||||||
|
*.JPEG
|
||||||
|
*.jpeg
|
||||||
|
*.HEIC
|
||||||
|
*.heic
|
||||||
|
|
||||||
# Go
|
# Go
|
||||||
update/update-agent
|
update/update-agent
|
||||||
cloud-init/cloud-init-parser
|
cloud-init/cloud-init-parser
|
||||||
|
|
||||||
|
# Local docs (not tracked)
|
||||||
|
TINYCORE-MODIFICATIONS.md
|
||||||
|
|||||||
178
CHANGELOG.md
178
CHANGELOG.md
@@ -5,6 +5,175 @@ All notable changes to KubeSolo OS are documented in this file.
|
|||||||
Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
versioning follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
versioning follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.3.0] - 2026-05-14
|
||||||
|
|
||||||
|
The main themes: generic ARM64 (not just Raspberry Pi), an honest update
|
||||||
|
lifecycle with state file + metrics, OCI multi-arch distribution via ghcr.io,
|
||||||
|
and policy gates (channels, maintenance windows, version stepping-stones,
|
||||||
|
pre-flight checks, auto-rollback).
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Generic ARM64 build track distinct from Raspberry Pi:
|
||||||
|
- `make kernel-arm64` builds a mainline kernel.org LTS kernel (6.12.10 by
|
||||||
|
default) from `arm64 defconfig` + shared `kernel-container.fragment` +
|
||||||
|
arm64 virt-host enables (VIRTIO_*, EFI_STUB, NVMe).
|
||||||
|
- `make disk-image-arm64` produces a UEFI-bootable raw GPT image with A/B
|
||||||
|
system partitions and GRUB-EFI ARM64. Targets QEMU virt, Graviton, Ampere,
|
||||||
|
or any UEFI ARM64 host.
|
||||||
|
- `hack/dev-vm-arm64.sh --disk` boots the built image through QEMU UEFI for
|
||||||
|
end-to-end testing.
|
||||||
|
- `test/qemu/test-boot-arm64-disk.sh` automated boot smoke test.
|
||||||
|
- Bumped KubeSolo to v1.1.5 (was v1.1.0). New cloud-init flags surfaced:
|
||||||
|
- `kubesolo.full` (v1.1.4+) — disable edge-optimised overrides
|
||||||
|
- `kubesolo.disable-ipv6` (v1.1.5+)
|
||||||
|
- `kubesolo.db-wal-repair` (v1.1.5+) — recover from unclean shutdowns
|
||||||
|
- Per-arch supply-chain verification: `KUBESOLO_SHA256_AMD64` and
|
||||||
|
`KUBESOLO_SHA256_ARM64` in `versions.env`, applied to the tarball before
|
||||||
|
extract.
|
||||||
|
- `docs/arm64-architecture.md` — defines the generic-vs-RPi two-track layout.
|
||||||
|
- `docs/arm64-status.md` — Phase 3 status snapshot, known limitations, what's
|
||||||
|
needed to ship.
|
||||||
|
- `docs/ci-runners.md` — Gitea Actions runner setup (Odroid arm64-linux).
|
||||||
|
- Update agent state machine and observability (`update/pkg/state`):
|
||||||
|
- Persistent on-disk `state.json` at `/var/lib/kubesolo/update/state.json`
|
||||||
|
(atomic write via tmp + rename). Records Phase (Idle / Checking /
|
||||||
|
Downloading / Staged / Activated / Verifying / Success / RolledBack /
|
||||||
|
Failed), FromVersion, ToVersion, StartedAt, UpdatedAt, LastError,
|
||||||
|
AttemptCount, HealthCheckFailures.
|
||||||
|
- `apply`, `activate`, `healthcheck`, `rollback` all transition state
|
||||||
|
explicitly on entry / exit / failure. Errors land in LastError so
|
||||||
|
`status` can show why.
|
||||||
|
- `kubesolo-update status --json` emits the full state for
|
||||||
|
orchestration tooling. Human-readable mode adds an "Update Lifecycle"
|
||||||
|
section when not idle.
|
||||||
|
- New Prometheus metrics: `kubesolo_update_phase{phase="..."}` (all 9
|
||||||
|
phase labels always emitted), `kubesolo_update_attempts_total`,
|
||||||
|
`kubesolo_update_last_attempt_timestamp_seconds`.
|
||||||
|
- Channels, maintenance windows, version policy (`update/pkg/config`):
|
||||||
|
- `/etc/kubesolo/update.conf` (key=value, comments, missing-OK) configures
|
||||||
|
server, channel, maintenance_window, pubkey, healthcheck_url,
|
||||||
|
auto_rollback_after.
|
||||||
|
- `cloud-init` top-level `updates:` block writes `update.conf` on first
|
||||||
|
boot. Empty block leaves any existing file alone.
|
||||||
|
- `apply` enforces four gates before download: maintenance window,
|
||||||
|
channel match, runtime architecture match, min_compatible_version
|
||||||
|
stepping-stone. All gate failures land in the state machine as Failed
|
||||||
|
with a clear LastError. `--force` bypasses window + node-block-label.
|
||||||
|
- `UpdateMetadata` JSON gains `channel`, `min_compatible_version`,
|
||||||
|
`architecture` (all optional, omitempty).
|
||||||
|
- OCI registry distribution (`update/pkg/oci`, ~280 LOC, 9 tests):
|
||||||
|
- `kubesolo-update apply --registry ghcr.io/<org>/kubesolo-os --tag stable`
|
||||||
|
pulls update artifacts from any OCI-compliant registry. Multi-arch
|
||||||
|
indexes resolve to the runtime.GOARCH-matching manifest automatically.
|
||||||
|
- Custom media types: `application/vnd.kubesolo.os.kernel.v1+octet-stream`
|
||||||
|
and `application/vnd.kubesolo.os.initramfs.v1+gzip`. Annotations:
|
||||||
|
`io.kubesolo.os.{version,channel,architecture,min_compatible_version,
|
||||||
|
release_notes,release_date}`.
|
||||||
|
- End-to-end digest verification from manifest to blobs via oras-go/v2.
|
||||||
|
- `build/scripts/push-oci-artifact.sh` publishes per-arch artifacts via
|
||||||
|
`oras`. Multi-arch index composition documented inline.
|
||||||
|
- Dependencies added (update module only): oras.land/oras-go/v2 and
|
||||||
|
transitive opencontainers/{go-digest,image-spec} + golang.org/x/sync.
|
||||||
|
- Pre-flight gates and deeper healthcheck (`update/pkg/health` extended,
|
||||||
|
`update/pkg/partition` extended):
|
||||||
|
- Free-space pre-flight on the passive partition (image + 10% headroom)
|
||||||
|
via `partition.FreeBytes` / `HasFreeSpaceFor`.
|
||||||
|
- Node-block-label pre-flight: refuses if the local K8s node carries
|
||||||
|
`updates.kubesolo.io/block=true`. Silently allowed when no kubeconfig
|
||||||
|
(air-gap). Skipped by `--force`.
|
||||||
|
- `CheckKubeSystemReady` waits until every kube-system pod has held
|
||||||
|
Running for ≥ N seconds (configurable via
|
||||||
|
`--kube-system-settle`).
|
||||||
|
- `CheckProbeURL` GETs an operator-supplied URL; 200 = pass. Configurable
|
||||||
|
via `--healthcheck-url` or `healthcheck_url=` in update.conf.
|
||||||
|
- `CheckDiskWritable` writes / fsyncs / reads / deletes a probe file
|
||||||
|
under `/var/lib/kubesolo` to catch a wedged data partition.
|
||||||
|
- `--auto-rollback-after N` (also `auto_rollback_after=` in update.conf):
|
||||||
|
after N consecutive post-activation healthcheck failures, the agent
|
||||||
|
calls `ForceRollback()` and the operator/init reboots. Reset to 0 on
|
||||||
|
a clean pass.
|
||||||
|
- `.gitea/workflows/build-arm64.yaml` — full ARM64 build on the Odroid
|
||||||
|
self-hosted runner. Triggers on push to main, tags, and workflow_dispatch.
|
||||||
|
Boot smoke test marked continue-on-error pending KVM or real-hardware
|
||||||
|
validation.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- `build/scripts/build-kernel-arm64.sh` is now the **generic ARM64** kernel
|
||||||
|
build (mainline kernel.org LTS, generic UEFI/virtio).
|
||||||
|
- Renamed `build/scripts/build-kernel-rpi.sh` (was `build-kernel-arm64.sh`).
|
||||||
|
RPi kernel build (raspberrypi/linux fork, bcm2711_defconfig) lives here now.
|
||||||
|
- Renamed `build/config/kernel-container.fragment` (was
|
||||||
|
`rpi-kernel-config.fragment`). Misnomer: contents are arch-agnostic and now
|
||||||
|
shared across x86, ARM64-generic, and RPi kernels.
|
||||||
|
- `build/scripts/build-kernel.sh` (x86) refactored to consume the shared
|
||||||
|
fragment via a generic `apply_fragment` function. ~50 lines of duplication
|
||||||
|
killed.
|
||||||
|
- `KUBESOLO_VERSION` moved out of `fetch-components.sh` defaults into
|
||||||
|
`versions.env`. Bumping is now a one-line PR.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Native ARM64 build hosts (e.g. an Odroid runner) no longer require the x86
|
||||||
|
cross-compiler. Both `build-kernel-arm64.sh` and `build-kernel-rpi.sh` detect
|
||||||
|
`uname -m` and use the host's gcc directly when arch matches.
|
||||||
|
- ARM64 grub.cfg console ordering: `ttyAMA0` is now the primary console
|
||||||
|
(`console=ttyS0,... console=ttyAMA0,...`). Init output is now visible on
|
||||||
|
QEMU virt and most ARM64 SBCs without further configuration.
|
||||||
|
- ARM64 boot: replaced piCore64's `/init` with our staged init at `/init` and
|
||||||
|
`/sbin/init`. Previously the kernel ran piCore's TCE handler which
|
||||||
|
segfaulted in our environment.
|
||||||
|
- ARM64 boot: replaced piCore64's broken dynamic BusyBox with the build
|
||||||
|
host's `busybox-static`. piCore's binary triggered EL0 instruction-abort
|
||||||
|
panics on QEMU virt under both `-cpu cortex-a72` and `-cpu max`.
|
||||||
|
- POSIX-character-class portability: `tr -d '[:space:]'` in
|
||||||
|
`30-kernel-modules.sh` and `40-sysctl.sh` replaced with explicit
|
||||||
|
`' \t\r\n'`. Ubuntu's busybox-static 1.30.1 doesn't parse `[:space:]` and
|
||||||
|
instead deletes the literal characters `[ : s p a c e ]`, which truncated
|
||||||
|
module names (`virtio_net` → `virtio_nt`, etc.) and sysctl keys.
|
||||||
|
- `inject-kubesolo.sh` no longer copies `init/lib/functions.sh` into
|
||||||
|
`init.d/`. Previously the main init loop tried to run it as a stage after
|
||||||
|
stage 90 and panicked with "Init completed without exec'ing KubeSolo".
|
||||||
|
- ARM64 disk image: `TARGET_ARCH=arm64 create-disk-image.sh` produces
|
||||||
|
`BOOTAA64.EFI` via `grub-mkimage -O arm64-efi` (not `bootx64.efi`). Skips
|
||||||
|
the BIOS-only `grub-install --target=i386-pc` step.
|
||||||
|
- `build/Dockerfile.builder`: added `grub-efi-amd64-bin`, `grub-efi-arm64-bin`,
|
||||||
|
`grub-pc-bin`, `grub-common`, `grub2-common`, and `busybox-static` so the
|
||||||
|
Docker-based build flow can produce ARM64 disk images and gets the same
|
||||||
|
BusyBox swap behaviour as native builds.
|
||||||
|
|
||||||
|
### Known limitations (deferred to follow-up)
|
||||||
|
|
||||||
|
- **ARM64 LABEL= resolution** doesn't work yet — piCore's `blkid`/`findfs`
|
||||||
|
crash in QEMU and our static busybox lacks the applets. Hardcoded
|
||||||
|
`/dev/vda4` as a workaround in `build/grub/grub-arm64.cfg`. Production
|
||||||
|
fix: ship static `blkid`/`findfs` or replace LABEL resolution with a
|
||||||
|
sysfs walk.
|
||||||
|
- **AppArmor profile load fails on ARM64** (apparmor_parser ABI mismatch).
|
||||||
|
Init reports it; boot continues without enforcement.
|
||||||
|
- **OCI signature verification** is deferred. The HTTP transport still
|
||||||
|
honours `--pubkey` for `.sig` files; the OCI transport is digest-verified
|
||||||
|
end-to-end via oras-go but does not yet consume cosign-style referrer
|
||||||
|
attestations. Targeted for v0.3.1.
|
||||||
|
- **Real-hardware validation** of the generic ARM64 image is still
|
||||||
|
pending. Builds and boots end-to-end under QEMU virt; production
|
||||||
|
certification waits on a Graviton / Ampere run.
|
||||||
|
- **QEMU TCG performance** can trigger KubeSolo's first-boot image-import
|
||||||
|
deadline. Not a defect in the OS itself; real hardware and KVM-accelerated
|
||||||
|
QEMU complete the import in seconds.
|
||||||
|
|
||||||
|
## [0.2.0] - 2026-02-12
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Cloud-init: support all documented KubeSolo CLI flags (`--local-storage-shared-path`, `--debug`, `--pprof-server`, `--portainer-edge-id`, `--portainer-edge-key`, `--portainer-edge-async`)
|
||||||
|
- Cloud-init: `full-config.yaml` example showing all supported parameters
|
||||||
|
- Cloud-init: KubeSolo configuration reference table in docs/cloud-init.md
|
||||||
|
- Security hardening: mount hardening, sysctl, kernel module lock, AppArmor profiles
|
||||||
|
- ARM64 Raspberry Pi support with A/B boot via tryboot
|
||||||
|
- BootEnv abstraction for GRUB and RPi boot environments
|
||||||
|
- Go 1.25.5 installed on host for native builds
|
||||||
|
|
||||||
## [0.1.0] - 2026-02-12
|
## [0.1.0] - 2026-02-12
|
||||||
|
|
||||||
First release with all 5 design-doc phases complete. ISO boots and runs K8s pods.
|
First release with all 5 design-doc phases complete. ISO boots and runs K8s pods.
|
||||||
@@ -78,3 +247,12 @@ First release with all 5 design-doc phases complete. ISO boots and runs K8s pods
|
|||||||
- Fixed KVM flag handling in dev-vm.sh (bash array context)
|
- Fixed KVM flag handling in dev-vm.sh (bash array context)
|
||||||
- Added iptables table pre-initialization before kube-proxy start (nf_tables issue)
|
- Added iptables table pre-initialization before kube-proxy start (nf_tables issue)
|
||||||
- Added /dev/kmsg and /etc/machine-id creation for kubelet
|
- Added /dev/kmsg and /etc/machine-id creation for kubelet
|
||||||
|
- Added CA certificates bundle to initramfs (containerd TLS verification for Docker Hub)
|
||||||
|
- Added DNS fallback (10.0.2.3 + 8.8.8.8) when DHCP client doesn't populate resolv.conf
|
||||||
|
- Added headless Service to Portainer Edge Agent manifest (agent peer discovery DNS)
|
||||||
|
- Added kubesolo.edge_id/edge_key kernel boot parameters for Portainer Edge
|
||||||
|
- Added auto-format of unformatted data disks on first boot
|
||||||
|
- Rewrote dev-vm.sh for macOS: bsdtar ISO extraction, Homebrew mkfs.ext4 detection, direct kernel boot, TCG acceleration, port 8080 forwarding
|
||||||
|
- Kubeconfig now served via HTTP on port 8080 (serial console truncates base64 lines)
|
||||||
|
- Added 127.0.0.1 and 10.0.2.15 to API server SANs for QEMU port forwarding
|
||||||
|
- dev-vm.sh now works on Linux: fallback ISO extraction via isoinfo or loop mount, KVM auto-detection, platform-aware error messages
|
||||||
|
|||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 Anthony De Lorenzo
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
85
Makefile
85
Makefile
@@ -1,9 +1,10 @@
|
|||||||
.PHONY: all fetch kernel build-cloudinit build-update-agent build-cross rootfs initramfs \
|
.PHONY: all fetch kernel build-cloudinit build-update-agent build-cross rootfs initramfs \
|
||||||
iso disk-image oci-image \
|
iso disk-image disk-image-arm64 oci-image rpi-image \
|
||||||
test-boot test-k8s test-persistence test-deploy test-storage test-all \
|
kernel-arm64 kernel-rpi rootfs-arm64 rootfs-arm64-rpi \
|
||||||
test-cloudinit test-update-agent \
|
test-boot test-k8s test-persistence test-deploy test-storage test-security test-all \
|
||||||
|
test-boot-arm64 test-boot-arm64-disk test-cloudinit test-update-agent \
|
||||||
bench-boot bench-resources \
|
bench-boot bench-resources \
|
||||||
dev-vm dev-vm-shell quick docker-build shellcheck \
|
dev-vm dev-vm-shell dev-vm-arm64 quick docker-build shellcheck \
|
||||||
kernel-audit clean distclean help
|
kernel-audit clean distclean help
|
||||||
|
|
||||||
SHELL := /bin/bash
|
SHELL := /bin/bash
|
||||||
@@ -71,6 +72,48 @@ build-cross:
|
|||||||
@echo "==> Cross-compiling for amd64 + arm64..."
|
@echo "==> Cross-compiling for amd64 + arm64..."
|
||||||
$(BUILD_DIR)/scripts/build-cross.sh
|
$(BUILD_DIR)/scripts/build-cross.sh
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ARM64 generic targets (mainline kernel, UEFI, virtio — for cloud / SBCs)
|
||||||
|
# =============================================================================
|
||||||
|
kernel-arm64:
|
||||||
|
@echo "==> Building generic ARM64 kernel (mainline LTS)..."
|
||||||
|
$(BUILD_DIR)/scripts/build-kernel-arm64.sh
|
||||||
|
|
||||||
|
# Generic ARM64 rootfs consumes the mainline kernel modules.
|
||||||
|
rootfs-arm64: build-cross
|
||||||
|
@echo "==> Preparing generic ARM64 rootfs..."
|
||||||
|
TARGET_ARCH=arm64 $(BUILD_DIR)/scripts/fetch-components.sh
|
||||||
|
TARGET_ARCH=arm64 $(BUILD_DIR)/scripts/extract-core.sh
|
||||||
|
TARGET_ARCH=arm64 TARGET_VARIANT=generic $(BUILD_DIR)/scripts/inject-kubesolo.sh
|
||||||
|
@echo "==> Packing generic ARM64 initramfs..."
|
||||||
|
$(BUILD_DIR)/scripts/pack-initramfs.sh
|
||||||
|
|
||||||
|
disk-image-arm64: rootfs-arm64 kernel-arm64
|
||||||
|
@echo "==> Creating generic ARM64 disk image (UEFI + GRUB A/B)..."
|
||||||
|
TARGET_ARCH=arm64 $(BUILD_DIR)/scripts/create-disk-image.sh
|
||||||
|
@echo "==> Built: $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).arm64.img"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ARM64 Raspberry Pi targets (RPi-patched kernel, firmware blobs, SD card)
|
||||||
|
# =============================================================================
|
||||||
|
kernel-rpi:
|
||||||
|
@echo "==> Building RPi kernel (raspberrypi/linux)..."
|
||||||
|
$(BUILD_DIR)/scripts/build-kernel-rpi.sh
|
||||||
|
|
||||||
|
# RPi-flavoured rootfs consumes the RPi kernel modules.
|
||||||
|
rootfs-arm64-rpi: build-cross
|
||||||
|
@echo "==> Preparing RPi ARM64 rootfs..."
|
||||||
|
TARGET_ARCH=arm64 $(BUILD_DIR)/scripts/fetch-components.sh
|
||||||
|
TARGET_ARCH=arm64 $(BUILD_DIR)/scripts/extract-core.sh
|
||||||
|
TARGET_ARCH=arm64 TARGET_VARIANT=rpi $(BUILD_DIR)/scripts/inject-kubesolo.sh
|
||||||
|
@echo "==> Packing RPi ARM64 initramfs..."
|
||||||
|
$(BUILD_DIR)/scripts/pack-initramfs.sh
|
||||||
|
|
||||||
|
rpi-image: rootfs-arm64-rpi kernel-rpi
|
||||||
|
@echo "==> Creating Raspberry Pi SD card image..."
|
||||||
|
$(BUILD_DIR)/scripts/create-rpi-image.sh
|
||||||
|
@echo "==> Built: $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).rpi.img"
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Kernel validation
|
# Kernel validation
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -101,6 +144,18 @@ test-storage: iso
|
|||||||
@echo "==> Testing local storage provisioning..."
|
@echo "==> Testing local storage provisioning..."
|
||||||
test/integration/test-local-storage.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).iso
|
test/integration/test-local-storage.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).iso
|
||||||
|
|
||||||
|
test-security: iso
|
||||||
|
@echo "==> Testing security hardening..."
|
||||||
|
test/integration/test-security-hardening.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).iso
|
||||||
|
|
||||||
|
test-boot-arm64:
|
||||||
|
@echo "==> Testing ARM64 boot in QEMU (direct kernel)..."
|
||||||
|
test/qemu/test-boot-arm64.sh
|
||||||
|
|
||||||
|
test-boot-arm64-disk: disk-image-arm64
|
||||||
|
@echo "==> Testing ARM64 UEFI disk boot in QEMU..."
|
||||||
|
test/qemu/test-boot-arm64-disk.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).arm64.img
|
||||||
|
|
||||||
test-all: test-boot test-k8s test-persistence
|
test-all: test-boot test-k8s test-persistence
|
||||||
|
|
||||||
# Cloud-init Go tests
|
# Cloud-init Go tests
|
||||||
@@ -163,6 +218,10 @@ dev-vm-debug: iso
|
|||||||
@echo "==> Launching dev VM (debug mode)..."
|
@echo "==> Launching dev VM (debug mode)..."
|
||||||
hack/dev-vm.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).iso --debug
|
hack/dev-vm.sh $(OUTPUT_DIR)/$(OS_NAME)-$(VERSION).iso --debug
|
||||||
|
|
||||||
|
dev-vm-arm64:
|
||||||
|
@echo "==> Launching ARM64 dev VM..."
|
||||||
|
hack/dev-vm-arm64.sh
|
||||||
|
|
||||||
# Fast rebuild: only repack initramfs + ISO (skip fetch/extract)
|
# Fast rebuild: only repack initramfs + ISO (skip fetch/extract)
|
||||||
quick:
|
quick:
|
||||||
@echo "==> Quick rebuild (repack + ISO only)..."
|
@echo "==> Quick rebuild (repack + ISO only)..."
|
||||||
@@ -199,7 +258,7 @@ distclean: clean
|
|||||||
help:
|
help:
|
||||||
@echo "KubeSolo OS Build System (v$(VERSION))"
|
@echo "KubeSolo OS Build System (v$(VERSION))"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "Build targets:"
|
@echo "Build targets (x86_64):"
|
||||||
@echo " make fetch Download Tiny Core ISO, KubeSolo, dependencies"
|
@echo " make fetch Download Tiny Core ISO, KubeSolo, dependencies"
|
||||||
@echo " make kernel Build custom kernel with CONFIG_CGROUP_BPF=y"
|
@echo " make kernel Build custom kernel with CONFIG_CGROUP_BPF=y"
|
||||||
@echo " make build-cloudinit Build cloud-init Go binary"
|
@echo " make build-cloudinit Build cloud-init Go binary"
|
||||||
@@ -213,25 +272,39 @@ help:
|
|||||||
@echo " make quick Fast rebuild (re-inject + repack + ISO only)"
|
@echo " make quick Fast rebuild (re-inject + repack + ISO only)"
|
||||||
@echo " make docker-build Reproducible build inside Docker"
|
@echo " make docker-build Reproducible build inside Docker"
|
||||||
@echo ""
|
@echo ""
|
||||||
|
@echo "Build targets (ARM64 generic — UEFI / cloud / SBCs):"
|
||||||
|
@echo " make kernel-arm64 Build mainline ARM64 kernel from kernel.org LTS"
|
||||||
|
@echo " make rootfs-arm64 Prepare generic ARM64 rootfs (mainline kernel modules)"
|
||||||
|
@echo " make disk-image-arm64 Create UEFI-bootable A/B GPT disk image (.arm64.img)"
|
||||||
|
@echo ""
|
||||||
|
@echo "Build targets (ARM64 Raspberry Pi):"
|
||||||
|
@echo " make kernel-rpi Build RPi kernel from raspberrypi/linux"
|
||||||
|
@echo " make rootfs-arm64-rpi Prepare RPi-flavoured rootfs (RPi kernel modules)"
|
||||||
|
@echo " make rpi-image Create Raspberry Pi SD card image with A/B autoboot"
|
||||||
|
@echo ""
|
||||||
@echo "Test targets:"
|
@echo "Test targets:"
|
||||||
@echo " make test-boot Boot ISO in QEMU, verify boot success"
|
@echo " make test-boot Boot ISO in QEMU, verify boot success"
|
||||||
@echo " make test-k8s Boot + verify K8s node reaches Ready"
|
@echo " make test-k8s Boot + verify K8s node reaches Ready"
|
||||||
@echo " make test-persist Reboot disk image, verify state persists"
|
@echo " make test-persist Reboot disk image, verify state persists"
|
||||||
@echo " make test-deploy Deploy nginx pod, verify Running"
|
@echo " make test-deploy Deploy nginx pod, verify Running"
|
||||||
@echo " make test-storage Test PVC with local-path provisioner"
|
@echo " make test-storage Test PVC with local-path provisioner"
|
||||||
|
@echo " make test-security Verify security hardening (AppArmor, sysctl, mounts)"
|
||||||
@echo " make test-cloudinit Run cloud-init Go unit tests"
|
@echo " make test-cloudinit Run cloud-init Go unit tests"
|
||||||
@echo " make test-update-agent Run update agent Go unit tests"
|
@echo " make test-update-agent Run update agent Go unit tests"
|
||||||
@echo " make test-update A/B update cycle integration test"
|
@echo " make test-update A/B update cycle integration test"
|
||||||
@echo " make test-rollback Forced rollback integration test"
|
@echo " make test-rollback Forced rollback integration test"
|
||||||
|
@echo " make test-boot-arm64 ARM64 boot test (direct kernel, fast)"
|
||||||
|
@echo " make test-boot-arm64-disk ARM64 full UEFI disk-boot test"
|
||||||
@echo " make test-all Run core tests (boot + k8s + persistence)"
|
@echo " make test-all Run core tests (boot + k8s + persistence)"
|
||||||
@echo " make test-integ Run full integration suite"
|
@echo " make test-integ Run full integration suite"
|
||||||
@echo " make bench-boot Benchmark boot performance (3 runs)"
|
@echo " make bench-boot Benchmark boot performance (3 runs)"
|
||||||
@echo " make bench-resources Benchmark resource usage (requires running VM)"
|
@echo " make bench-resources Benchmark resource usage (requires running VM)"
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "Dev targets:"
|
@echo "Dev targets:"
|
||||||
@echo " make dev-vm Launch interactive QEMU VM"
|
@echo " make dev-vm Launch interactive QEMU VM (x86_64)"
|
||||||
@echo " make dev-vm-shell Launch QEMU VM -> emergency shell"
|
@echo " make dev-vm-shell Launch QEMU VM -> emergency shell"
|
||||||
@echo " make dev-vm-debug Launch QEMU VM with debug logging"
|
@echo " make dev-vm-debug Launch QEMU VM with debug logging"
|
||||||
|
@echo " make dev-vm-arm64 Launch ARM64 QEMU VM"
|
||||||
@echo " make kernel-audit Check kernel config against requirements"
|
@echo " make kernel-audit Check kernel config against requirements"
|
||||||
@echo " make shellcheck Lint all shell scripts"
|
@echo " make shellcheck Lint all shell scripts"
|
||||||
@echo ""
|
@echo ""
|
||||||
|
|||||||
87
README.md
87
README.md
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
An immutable, bootable Linux distribution purpose-built for [KubeSolo](https://github.com/portainer/kubesolo) — Portainer's ultra-lightweight single-node Kubernetes.
|
An immutable, bootable Linux distribution purpose-built for [KubeSolo](https://github.com/portainer/kubesolo) — Portainer's ultra-lightweight single-node Kubernetes.
|
||||||
|
|
||||||
> **Status:** All 5 phases complete. Boots and runs K8s workloads.
|
> **Status (v0.3.0):** x86_64 and generic ARM64 (UEFI / virtio / mainline kernel) both build and boot end-to-end. Update agent has an explicit state machine, OCI registry distribution alongside HTTP, channel + maintenance-window + version-stepping-stone gates, and auto-rollback. ARM64 Raspberry Pi support remains paused pending physical hardware. See [docs/release-notes-0.3.0.md](docs/release-notes-0.3.0.md) for the full v0.3.0 changelog.
|
||||||
|
|
||||||
## What is this?
|
## What is this?
|
||||||
|
|
||||||
@@ -24,29 +24,58 @@ KubeSolo OS combines **Tiny Core Linux** (~11 MB) with **KubeSolo** (single-bina
|
|||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
|
### x86_64 ISO
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Fetch Tiny Core ISO + KubeSolo binary
|
make fetch # Tiny Core ISO + KubeSolo binary
|
||||||
make fetch
|
make kernel # Custom kernel (first time only, ~25 min, cached)
|
||||||
|
|
||||||
# Build custom kernel (first time only, ~25 min, cached)
|
|
||||||
make kernel
|
|
||||||
|
|
||||||
# Build Go binaries
|
|
||||||
make build-cloudinit build-update-agent
|
make build-cloudinit build-update-agent
|
||||||
|
|
||||||
# Build bootable ISO
|
|
||||||
make rootfs initramfs iso
|
make rootfs initramfs iso
|
||||||
|
|
||||||
# Test in QEMU
|
|
||||||
make dev-vm
|
make dev-vm
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Generic ARM64 disk image (v0.3.0+)
|
||||||
|
|
||||||
|
For Graviton / Ampere / generic UEFI ARM64 hosts:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make kernel-arm64 # Mainline 6.12 LTS kernel (first time only, ~30-60 min)
|
||||||
|
make rootfs-arm64 # Mainline kernel modules + KubeSolo arm64
|
||||||
|
make disk-image-arm64 # UEFI-bootable A/B GPT image
|
||||||
|
make test-boot-arm64-disk # boot smoke test under qemu-system-aarch64
|
||||||
|
```
|
||||||
|
|
||||||
|
### Raspberry Pi (work in progress)
|
||||||
|
|
||||||
|
Build path lives at `make kernel-rpi` / `make rpi-image`; needs physical
|
||||||
|
hardware to validate the firmware + autoboot.txt path. See
|
||||||
|
[docs/arm64-architecture.md](docs/arm64-architecture.md) for the two-track
|
||||||
|
build layout.
|
||||||
|
|
||||||
Or build everything at once inside Docker:
|
Or build everything at once inside Docker:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make docker-build
|
make docker-build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
After boot, retrieve the kubeconfig and manage your cluster from the host:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -s http://localhost:8080 > ~/.kube/kubesolo-config
|
||||||
|
export KUBECONFIG=~/.kube/kubesolo-config
|
||||||
|
kubectl get nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Portainer Edge Agent
|
||||||
|
|
||||||
|
Pass Edge credentials via boot parameters:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./hack/dev-vm.sh --edge-id=YOUR_EDGE_ID --edge-key=YOUR_EDGE_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
Or configure via [cloud-init YAML](cloud-init/examples/portainer-edge.yaml).
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
**Build host:**
|
**Build host:**
|
||||||
@@ -104,7 +133,7 @@ Unnecessary subsystems (sound, GPU, wireless, Bluetooth, etc.) are stripped to k
|
|||||||
|
|
||||||
## Cloud-Init
|
## Cloud-Init
|
||||||
|
|
||||||
First-boot configuration via a simple YAML schema:
|
First-boot configuration via a simple YAML schema. All [documented KubeSolo flags](https://www.kubesolo.io/documentation#install) are supported:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
hostname: edge-node-01
|
hostname: edge-node-01
|
||||||
@@ -115,10 +144,15 @@ network:
|
|||||||
dns:
|
dns:
|
||||||
- 8.8.8.8
|
- 8.8.8.8
|
||||||
kubesolo:
|
kubesolo:
|
||||||
node-name: edge-node-01
|
local-storage: true
|
||||||
portainer:
|
local-storage-shared-path: "/mnt/shared"
|
||||||
edge_id: "your-edge-id"
|
apiserver-extra-sans:
|
||||||
edge_key: "your-edge-key"
|
- edge-node-01.local
|
||||||
|
debug: false
|
||||||
|
pprof-server: false
|
||||||
|
portainer-edge-id: "your-edge-id"
|
||||||
|
portainer-edge-key: "your-edge-key"
|
||||||
|
portainer-edge-async: true
|
||||||
```
|
```
|
||||||
|
|
||||||
See [docs/cloud-init.md](docs/cloud-init.md) and the [examples](cloud-init/examples/).
|
See [docs/cloud-init.md](docs/cloud-init.md) and the [examples](cloud-init/examples/).
|
||||||
@@ -189,7 +223,7 @@ Metrics include: `kubesolo_os_info`, `boot_success`, `boot_counter`, `uptime_sec
|
|||||||
| `make build-cross` | Cross-compile for amd64 + arm64 |
|
| `make build-cross` | Cross-compile for amd64 + arm64 |
|
||||||
| `make docker-build` | Build everything in Docker |
|
| `make docker-build` | Build everything in Docker |
|
||||||
| `make quick` | Fast rebuild (re-inject + repack + ISO) |
|
| `make quick` | Fast rebuild (re-inject + repack + ISO) |
|
||||||
| `make dev-vm` | Launch QEMU dev VM |
|
| `make dev-vm` | Launch QEMU dev VM (Linux + macOS) |
|
||||||
| `make test-all` | Run all tests |
|
| `make test-all` | Run all tests |
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
@@ -204,13 +238,20 @@ Metrics include: `kubesolo_os_info`, `boot_success`, `boot_counter`, `uptime_sec
|
|||||||
|
|
||||||
| Phase | Scope | Status |
|
| Phase | Scope | Status |
|
||||||
|-------|-------|--------|
|
|-------|-------|--------|
|
||||||
| 1 | PoC: boot Tiny Core + KubeSolo, verify K8s | Complete |
|
| 1 | PoC: boot Tiny Core + KubeSolo, verify K8s | Complete (x86_64) |
|
||||||
| 2 | Cloud-init Go parser, network, hostname | Complete |
|
| 2 | Cloud-init Go parser, network, hostname | Complete |
|
||||||
| 3 | A/B atomic updates, GRUB, rollback agent | Complete |
|
| 3 | A/B atomic updates, GRUB, rollback agent | Complete (x86_64) |
|
||||||
| 4 | Ed25519 signing, Portainer Edge, SSH extension | Complete |
|
| 4 | Ed25519 signing, Portainer Edge, SSH extension | Complete |
|
||||||
| 5 | CI/CD, OCI distribution, Prometheus metrics, ARM64 | Complete |
|
| 5 | CI/CD, OCI distribution, Prometheus metrics, ARM64 cross-compile | Complete |
|
||||||
| - | Custom kernel build for container runtime fixes | Complete |
|
| 6 | Security hardening, AppArmor | Complete |
|
||||||
|
| - | Custom kernel build for container runtime fixes | Complete (x86_64) |
|
||||||
|
| 7 | ARM64 generic (mainline kernel, UEFI, virtio) | Complete (v0.3.0, QEMU validated) |
|
||||||
|
| 8 | Update engine v2 (state machine, channels, OCI, pre-flight gates) | Complete (v0.3.0) |
|
||||||
|
| - | ARM64 Raspberry Pi (custom kernel, firmware, SD card image) | Paused — needs hardware |
|
||||||
|
| - | OCI cosign signature verification | Planned for v0.3.1 |
|
||||||
|
| - | LABEL=KSOLODATA on ARM64 (replace blkid/findfs path) | Planned for v0.3.1 |
|
||||||
|
| - | Real-hardware ARM64 validation (Graviton / Ampere) | Planned for v0.3.1 |
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
TBD
|
MIT License — see [LICENSE](LICENSE) for details.
|
||||||
|
|||||||
@@ -18,6 +18,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
file \
|
file \
|
||||||
flex \
|
flex \
|
||||||
genisoimage \
|
genisoimage \
|
||||||
|
grub-common \
|
||||||
|
grub-efi-amd64-bin \
|
||||||
|
grub-efi-arm64-bin \
|
||||||
|
grub-pc-bin \
|
||||||
|
grub2-common \
|
||||||
gzip \
|
gzip \
|
||||||
isolinux \
|
isolinux \
|
||||||
iptables \
|
iptables \
|
||||||
@@ -31,17 +36,32 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
syslinux \
|
syslinux \
|
||||||
syslinux-common \
|
syslinux-common \
|
||||||
syslinux-utils \
|
syslinux-utils \
|
||||||
|
apparmor \
|
||||||
|
apparmor-utils \
|
||||||
|
gcc-aarch64-linux-gnu \
|
||||||
|
binutils-aarch64-linux-gnu \
|
||||||
|
busybox-static \
|
||||||
|
git \
|
||||||
|
kpartx \
|
||||||
|
unzip \
|
||||||
wget \
|
wget \
|
||||||
xorriso \
|
xorriso \
|
||||||
xz-utils \
|
xz-utils \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install Go (for building cloud-init and update agent)
|
# Install Go (for building cloud-init and update agent)
|
||||||
ARG GO_VERSION=1.24.0
|
ARG GO_VERSION=1.25.5
|
||||||
RUN curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" \
|
RUN curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" \
|
||||||
| tar -C /usr/local -xzf -
|
| tar -C /usr/local -xzf -
|
||||||
ENV PATH="/usr/local/go/bin:${PATH}"
|
ENV PATH="/usr/local/go/bin:${PATH}"
|
||||||
|
|
||||||
|
# Install oras (OCI artifact CLI) for push-oci-artifact.sh.
|
||||||
|
# Bump ORAS_VERSION when pushing breaks or when oras gains useful flags.
|
||||||
|
ARG ORAS_VERSION=1.2.3
|
||||||
|
RUN curl -fsSL "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" \
|
||||||
|
| tar -C /usr/local/bin -xzf - oras \
|
||||||
|
&& chmod +x /usr/local/bin/oras
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
COPY . /build
|
COPY . /build
|
||||||
|
|
||||||
|
|||||||
@@ -128,7 +128,12 @@ echo "Security:"
|
|||||||
check_config CONFIG_SECCOMP recommended "Seccomp (container security)"
|
check_config CONFIG_SECCOMP recommended "Seccomp (container security)"
|
||||||
check_config CONFIG_SECCOMP_FILTER recommended "Seccomp BPF filter"
|
check_config CONFIG_SECCOMP_FILTER recommended "Seccomp BPF filter"
|
||||||
check_config CONFIG_BPF_SYSCALL recommended "BPF syscall"
|
check_config CONFIG_BPF_SYSCALL recommended "BPF syscall"
|
||||||
check_config CONFIG_AUDIT recommended "Audit framework"
|
check_config CONFIG_AUDIT mandatory "Audit framework"
|
||||||
|
check_config CONFIG_AUDITSYSCALL mandatory "Audit system call events"
|
||||||
|
check_config CONFIG_SECURITY mandatory "Security framework"
|
||||||
|
check_config CONFIG_SECURITYFS mandatory "Security filesystem"
|
||||||
|
check_config CONFIG_SECURITY_APPARMOR mandatory "AppArmor LSM"
|
||||||
|
check_config CONFIG_SECURITY_NETWORK recommended "Network security hooks"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# --- Crypto ---
|
# --- Crypto ---
|
||||||
|
|||||||
90
build/config/kernel-container.fragment
Normal file
90
build/config/kernel-container.fragment
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
# KubeSolo OS — Shared kernel config fragment for container workloads
|
||||||
|
#
|
||||||
|
# Applied on top of:
|
||||||
|
# - Tiny Core stock config (x86_64) via build-kernel.sh
|
||||||
|
# - mainline kernel.org arm64 defconfig via build-kernel-arm64.sh
|
||||||
|
# - bcm2711_defconfig / bcm2712_defconfig via build-kernel-rpi.sh
|
||||||
|
#
|
||||||
|
# All entries here are architecture-agnostic.
|
||||||
|
# Apply this fragment twice with `make olddefconfig` between passes — TC's stock
|
||||||
|
# config has CONFIG_SECURITY disabled, which causes a single-pass olddefconfig
|
||||||
|
# to strip the security subtree before its dependencies (SYSFS, MULTIUSER) are
|
||||||
|
# resolved.
|
||||||
|
|
||||||
|
# cgroup v2 (mandatory for containerd/runc)
|
||||||
|
CONFIG_CGROUPS=y
|
||||||
|
CONFIG_CGROUP_CPUACCT=y
|
||||||
|
CONFIG_CGROUP_DEVICE=y
|
||||||
|
CONFIG_CGROUP_FREEZER=y
|
||||||
|
CONFIG_CGROUP_SCHED=y
|
||||||
|
CONFIG_CGROUP_PIDS=y
|
||||||
|
CONFIG_MEMCG=y
|
||||||
|
CONFIG_CGROUP_BPF=y
|
||||||
|
CONFIG_CFS_BANDWIDTH=y
|
||||||
|
|
||||||
|
# BPF (required for cgroup v2 device control)
|
||||||
|
CONFIG_BPF=y
|
||||||
|
CONFIG_BPF_SYSCALL=y
|
||||||
|
|
||||||
|
# Namespaces (mandatory for containers)
|
||||||
|
CONFIG_NAMESPACES=y
|
||||||
|
CONFIG_NET_NS=y
|
||||||
|
CONFIG_PID_NS=y
|
||||||
|
CONFIG_USER_NS=y
|
||||||
|
CONFIG_UTS_NS=y
|
||||||
|
CONFIG_IPC_NS=y
|
||||||
|
|
||||||
|
# Device management
|
||||||
|
CONFIG_DEVTMPFS=y
|
||||||
|
CONFIG_DEVTMPFS_MOUNT=y
|
||||||
|
|
||||||
|
# Filesystem
|
||||||
|
CONFIG_OVERLAY_FS=y
|
||||||
|
CONFIG_SQUASHFS=y
|
||||||
|
CONFIG_EXT4_FS=y
|
||||||
|
CONFIG_VFAT_FS=y
|
||||||
|
|
||||||
|
# Networking
|
||||||
|
CONFIG_BRIDGE=m
|
||||||
|
CONFIG_NETFILTER=y
|
||||||
|
CONFIG_NF_CONNTRACK=m
|
||||||
|
CONFIG_NF_NAT=m
|
||||||
|
CONFIG_NF_TABLES=m
|
||||||
|
CONFIG_VETH=m
|
||||||
|
CONFIG_VXLAN=m
|
||||||
|
|
||||||
|
# Security: AppArmor + Audit
|
||||||
|
CONFIG_AUDIT=y
|
||||||
|
CONFIG_AUDITSYSCALL=y
|
||||||
|
CONFIG_SECURITY=y
|
||||||
|
CONFIG_SECURITYFS=y
|
||||||
|
CONFIG_SECURITY_NETWORK=y
|
||||||
|
CONFIG_SECURITY_APPARMOR=y
|
||||||
|
CONFIG_DEFAULT_SECURITY_APPARMOR=y
|
||||||
|
CONFIG_LSM=lockdown,yama,apparmor
|
||||||
|
|
||||||
|
# Security: seccomp
|
||||||
|
CONFIG_SECCOMP=y
|
||||||
|
CONFIG_SECCOMP_FILTER=y
|
||||||
|
|
||||||
|
# Crypto (image verification)
|
||||||
|
CONFIG_CRYPTO_SHA256=y
|
||||||
|
|
||||||
|
# Disable unnecessary subsystems for headless edge appliance
|
||||||
|
# CONFIG_SOUND is not set
|
||||||
|
# CONFIG_DRM is not set
|
||||||
|
# CONFIG_KVM is not set
|
||||||
|
# CONFIG_MEDIA_SUPPORT is not set
|
||||||
|
# CONFIG_WIRELESS is not set
|
||||||
|
# CONFIG_WLAN is not set
|
||||||
|
# CONFIG_CFG80211 is not set
|
||||||
|
# CONFIG_BT is not set
|
||||||
|
# CONFIG_NFC is not set
|
||||||
|
# CONFIG_INFINIBAND is not set
|
||||||
|
# CONFIG_PCMCIA is not set
|
||||||
|
# CONFIG_HAMRADIO is not set
|
||||||
|
# CONFIG_ISDN is not set
|
||||||
|
# CONFIG_ATM is not set
|
||||||
|
# CONFIG_INPUT_JOYSTICK is not set
|
||||||
|
# CONFIG_INPUT_TABLET is not set
|
||||||
|
# CONFIG_FPGA is not set
|
||||||
81
build/config/modules-arm64.list
Normal file
81
build/config/modules-arm64.list
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
# Kernel modules loaded at boot by init (ARM64 / Raspberry Pi)
|
||||||
|
# One module per line. Lines starting with # are ignored.
|
||||||
|
# Modules are loaded in order listed — dependencies must come first.
|
||||||
|
|
||||||
|
# Network device drivers (loaded early so interfaces are available)
|
||||||
|
# Note: no e1000/e1000e on ARM64 — those are x86 Intel NIC drivers
|
||||||
|
virtio_net
|
||||||
|
|
||||||
|
# Virtio support (for QEMU VMs — block, entropy)
|
||||||
|
virtio_blk
|
||||||
|
virtio_rng
|
||||||
|
|
||||||
|
# Raspberry Pi specific (USB Ethernet on Pi 4 is built-in, no module needed)
|
||||||
|
# Pi 5 uses PCIe ethernet, also typically built-in
|
||||||
|
|
||||||
|
# Filesystem — overlay (required for containerd)
|
||||||
|
overlay
|
||||||
|
|
||||||
|
# Netfilter dependencies (must load before conntrack)
|
||||||
|
nf_defrag_ipv4
|
||||||
|
nf_defrag_ipv6
|
||||||
|
|
||||||
|
# Netfilter / connection tracking (required for kube-proxy)
|
||||||
|
nf_conntrack
|
||||||
|
nf_nat
|
||||||
|
nf_conntrack_netlink
|
||||||
|
|
||||||
|
# nftables (modern iptables backend)
|
||||||
|
nf_tables
|
||||||
|
nft_compat
|
||||||
|
nft_chain_nat
|
||||||
|
nft_ct
|
||||||
|
nft_masq
|
||||||
|
nft_nat
|
||||||
|
nft_redir
|
||||||
|
|
||||||
|
# Netfilter xt match/target modules (used by kube-proxy iptables rules via nft_compat)
|
||||||
|
xt_conntrack
|
||||||
|
xt_MASQUERADE
|
||||||
|
xt_mark
|
||||||
|
xt_comment
|
||||||
|
xt_multiport
|
||||||
|
xt_nat
|
||||||
|
xt_addrtype
|
||||||
|
xt_connmark
|
||||||
|
xt_REDIRECT
|
||||||
|
xt_recent
|
||||||
|
xt_statistic
|
||||||
|
xt_set
|
||||||
|
|
||||||
|
# nft extras (reject, fib — used by kube-proxy nf_tables rules)
|
||||||
|
nft_reject
|
||||||
|
nft_reject_ipv4
|
||||||
|
nft_reject_ipv6
|
||||||
|
nft_fib
|
||||||
|
nft_fib_ipv4
|
||||||
|
nft_fib_ipv6
|
||||||
|
|
||||||
|
# Reject targets (used by kube-proxy iptables-restore rules)
|
||||||
|
nf_reject_ipv4
|
||||||
|
nf_reject_ipv6
|
||||||
|
ipt_REJECT
|
||||||
|
ip6t_REJECT
|
||||||
|
|
||||||
|
# nfacct extension (kube-proxy probes for it)
|
||||||
|
xt_nfacct
|
||||||
|
|
||||||
|
# Networking — bridge and netfilter (required for K8s pod networking)
|
||||||
|
# Load order: llc → stp → bridge → br_netfilter
|
||||||
|
llc
|
||||||
|
stp
|
||||||
|
bridge
|
||||||
|
br_netfilter
|
||||||
|
veth
|
||||||
|
vxlan
|
||||||
|
|
||||||
|
# IPVS — useful for kube-proxy IPVS mode and CNI plugins
|
||||||
|
ip_vs
|
||||||
|
ip_vs_rr
|
||||||
|
ip_vs_wrr
|
||||||
|
ip_vs_sh
|
||||||
@@ -9,11 +9,47 @@ TINYCORE_ISO=CorePure64-${TINYCORE_VERSION}.iso
|
|||||||
TINYCORE_ISO_URL=${TINYCORE_MIRROR}/${TINYCORE_VERSION%%.*}.x/${TINYCORE_ARCH}/release/${TINYCORE_ISO}
|
TINYCORE_ISO_URL=${TINYCORE_MIRROR}/${TINYCORE_VERSION%%.*}.x/${TINYCORE_ARCH}/release/${TINYCORE_ISO}
|
||||||
|
|
||||||
# KubeSolo
|
# KubeSolo
|
||||||
|
# Pinned release tag from https://github.com/portainer/kubesolo/releases.
|
||||||
|
# Bump here and re-run `make fetch` to pull a new version.
|
||||||
|
KUBESOLO_VERSION=v1.1.5
|
||||||
KUBESOLO_INSTALL_URL=https://get.kubesolo.io
|
KUBESOLO_INSTALL_URL=https://get.kubesolo.io
|
||||||
|
# Per-arch SHA256 of the musl tarball (verified at fetch time when non-empty).
|
||||||
|
KUBESOLO_SHA256_AMD64=565bd5fd98fc8ce09160e646b55de3493c782d74c0e0c46ccf130ff4bcabab81
|
||||||
|
KUBESOLO_SHA256_ARM64=db865a5e9b2617d595f9c2b7d011272edc94587621a9690e2de0f47cc94f0748
|
||||||
|
|
||||||
# Build tools (used inside builder container)
|
# Build tools (used inside builder container)
|
||||||
GRUB_VERSION=2.12
|
GRUB_VERSION=2.12
|
||||||
SYSLINUX_VERSION=6.03
|
SYSLINUX_VERSION=6.03
|
||||||
|
|
||||||
|
# SHA256 checksums for supply chain verification
|
||||||
|
# Populate by running: sha256sum build/cache/<file>
|
||||||
|
# Leave empty to skip verification (useful for first fetch)
|
||||||
|
TINYCORE_ISO_SHA256=""
|
||||||
|
NETFILTER_TCZ_SHA256=""
|
||||||
|
NET_BRIDGING_TCZ_SHA256=""
|
||||||
|
IPTABLES_TCZ_SHA256=""
|
||||||
|
|
||||||
|
# piCore64 (ARM64 — Raspberry Pi)
|
||||||
|
PICORE_VERSION=15.0.0
|
||||||
|
PICORE_ARCH=aarch64
|
||||||
|
PICORE_IMAGE=piCore64-${PICORE_VERSION}.zip
|
||||||
|
PICORE_IMAGE_URL=http://www.tinycorelinux.net/${PICORE_VERSION%%.*}.x/${PICORE_ARCH}/releases/RPi/${PICORE_IMAGE}
|
||||||
|
|
||||||
|
# Raspberry Pi firmware (boot blobs, DTBs)
|
||||||
|
RPI_FIRMWARE_TAG=1.20240529
|
||||||
|
RPI_FIRMWARE_URL=https://github.com/raspberrypi/firmware/archive/refs/tags/${RPI_FIRMWARE_TAG}.tar.gz
|
||||||
|
|
||||||
|
# Raspberry Pi kernel source
|
||||||
|
RPI_KERNEL_BRANCH=rpi-6.6.y
|
||||||
|
RPI_KERNEL_REPO=https://github.com/raspberrypi/linux
|
||||||
|
|
||||||
|
# Mainline Linux kernel (for generic ARM64 — kernel.org LTS)
|
||||||
|
# Bump within the 6.12 LTS series as patch levels release.
|
||||||
|
# 6.12 LTS is supported until Dec 2029.
|
||||||
|
MAINLINE_KERNEL_VERSION=6.12.10
|
||||||
|
MAINLINE_KERNEL_MAJOR=v6.x
|
||||||
|
MAINLINE_KERNEL_URL=https://cdn.kernel.org/pub/linux/kernel/${MAINLINE_KERNEL_MAJOR}/linux-${MAINLINE_KERNEL_VERSION}.tar.xz
|
||||||
|
MAINLINE_KERNEL_SHA256=""
|
||||||
|
|
||||||
# Output naming
|
# Output naming
|
||||||
OS_NAME=kubesolo-os
|
OS_NAME=kubesolo-os
|
||||||
|
|||||||
93
build/grub/grub-arm64.cfg
Normal file
93
build/grub/grub-arm64.cfg
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# KubeSolo OS — GRUB Configuration (ARM64)
|
||||||
|
# A/B partition boot with automatic rollback.
|
||||||
|
#
|
||||||
|
# Same A/B logic as build/grub/grub.cfg; only the console parameters differ
|
||||||
|
# (ARM64 PL011 / 16550-compat UART rather than x86 ttyS0).
|
||||||
|
#
|
||||||
|
# Partition layout:
|
||||||
|
# (hd0,gpt1) — EFI/Boot (256 MB, FAT32) — contains GRUB + grubenv
|
||||||
|
# (hd0,gpt2) — System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||||
|
# (hd0,gpt3) — System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz
|
||||||
|
# (hd0,gpt4) — Data (remaining, ext4) — persistent K8s state
|
||||||
|
|
||||||
|
set default=0
|
||||||
|
set timeout=3
|
||||||
|
|
||||||
|
load_env
|
||||||
|
|
||||||
|
# --- A/B Rollback Logic (identical to amd64 grub.cfg) ---
|
||||||
|
|
||||||
|
if [ "${boot_success}" != "1" ]; then
|
||||||
|
if [ "${boot_counter}" = "0" ]; then
|
||||||
|
if [ "${active_slot}" = "A" ]; then
|
||||||
|
set active_slot=B
|
||||||
|
else
|
||||||
|
set active_slot=A
|
||||||
|
fi
|
||||||
|
save_env active_slot
|
||||||
|
set boot_counter=3
|
||||||
|
save_env boot_counter
|
||||||
|
else
|
||||||
|
if [ "${boot_counter}" = "3" ]; then
|
||||||
|
set boot_counter=2
|
||||||
|
elif [ "${boot_counter}" = "2" ]; then
|
||||||
|
set boot_counter=1
|
||||||
|
elif [ "${boot_counter}" = "1" ]; then
|
||||||
|
set boot_counter=0
|
||||||
|
fi
|
||||||
|
save_env boot_counter
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
set boot_success=0
|
||||||
|
save_env boot_success
|
||||||
|
|
||||||
|
if [ "${active_slot}" = "A" ]; then
|
||||||
|
set root='(hd0,gpt2)'
|
||||||
|
set slot_label="System A"
|
||||||
|
else
|
||||||
|
set root='(hd0,gpt3)'
|
||||||
|
set slot_label="System B"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- ARM64 console string ---
|
||||||
|
# Order matters: the LAST `console=` is the primary system console (where /dev/console
|
||||||
|
# points and where init's stdout/stderr land). Earlier `console=` entries get mirrored
|
||||||
|
# kernel output but don't carry process I/O.
|
||||||
|
#
|
||||||
|
# Covers Graviton/16550 (ttyS0) as secondary and QEMU virt / PL011 / Ampere (ttyAMA0)
|
||||||
|
# as primary. ttyAMA0 must be last for `-nographic` QEMU + most ARM64 SBCs.
|
||||||
|
#
|
||||||
|
# `quiet` is intentionally omitted from the default entry while we stabilise the
|
||||||
|
# generic ARM64 boot path. Add back once boots are reliable.
|
||||||
|
|
||||||
|
menuentry "KubeSolo OS (${slot_label})" {
|
||||||
|
echo "Booting KubeSolo OS from ${slot_label}..."
|
||||||
|
echo "Boot counter: ${boot_counter}, Boot success: ${boot_success}"
|
||||||
|
linux /vmlinuz init=/sbin/init kubesolo.data=/dev/vda4 console=ttyS0,115200 console=ttyAMA0,115200
|
||||||
|
initrd /kubesolo-os.gz
|
||||||
|
}
|
||||||
|
|
||||||
|
menuentry "KubeSolo OS (${slot_label}) — Debug Mode" {
|
||||||
|
echo "Booting KubeSolo OS (debug) from ${slot_label}..."
|
||||||
|
linux /vmlinuz kubesolo.data=/dev/vda4 kubesolo.debug console=ttyS0,115200 console=ttyAMA0,115200
|
||||||
|
initrd /kubesolo-os.gz
|
||||||
|
}
|
||||||
|
|
||||||
|
menuentry "KubeSolo OS — Emergency Shell" {
|
||||||
|
echo "Booting to emergency shell..."
|
||||||
|
linux /vmlinuz init=/sbin/init kubesolo.shell console=ttyS0,115200 console=ttyAMA0,115200
|
||||||
|
initrd /kubesolo-os.gz
|
||||||
|
}
|
||||||
|
|
||||||
|
menuentry "KubeSolo OS — Boot Other Slot" {
|
||||||
|
if [ "${active_slot}" = "A" ]; then
|
||||||
|
set root='(hd0,gpt3)'
|
||||||
|
echo "Booting from System B (passive)..."
|
||||||
|
else
|
||||||
|
set root='(hd0,gpt2)'
|
||||||
|
echo "Booting from System A (passive)..."
|
||||||
|
fi
|
||||||
|
linux /vmlinuz kubesolo.data=/dev/vda4 kubesolo.debug console=ttyS0,115200 console=ttyAMA0,115200
|
||||||
|
initrd /kubesolo-os.gz
|
||||||
|
}
|
||||||
52
build/rootfs/etc/apparmor.d/containerd
Normal file
52
build/rootfs/etc/apparmor.d/containerd
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# AppArmor profile for containerd
|
||||||
|
# Start in complain mode to log without blocking
|
||||||
|
|
||||||
|
#include <tunables/global>
|
||||||
|
|
||||||
|
profile containerd /usr/bin/containerd flags=(complain) {
|
||||||
|
#include <abstractions/base>
|
||||||
|
|
||||||
|
# Binary and shared libraries
|
||||||
|
/usr/bin/containerd mr,
|
||||||
|
/usr/lib/** mr,
|
||||||
|
/lib/** mr,
|
||||||
|
|
||||||
|
# Containerd runtime state
|
||||||
|
/var/lib/containerd/** rw,
|
||||||
|
/run/containerd/** rw,
|
||||||
|
|
||||||
|
# Container image layers and snapshots
|
||||||
|
/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/** rw,
|
||||||
|
|
||||||
|
# CNI networking
|
||||||
|
/etc/cni/** r,
|
||||||
|
/opt/cni/bin/** ix,
|
||||||
|
|
||||||
|
# Proc and sys access for containers
|
||||||
|
@{PROC}/** r,
|
||||||
|
/sys/** r,
|
||||||
|
|
||||||
|
# Device access for containers
|
||||||
|
/dev/** rw,
|
||||||
|
|
||||||
|
# Network access
|
||||||
|
network,
|
||||||
|
|
||||||
|
# Container runtime needs broad capabilities
|
||||||
|
capability,
|
||||||
|
|
||||||
|
# Allow executing container runtimes
|
||||||
|
/usr/bin/containerd-shim-runc-v2 ix,
|
||||||
|
/usr/bin/runc ix,
|
||||||
|
/usr/sbin/runc ix,
|
||||||
|
|
||||||
|
# Temp files
|
||||||
|
/tmp/** rw,
|
||||||
|
|
||||||
|
# Log files
|
||||||
|
/var/log/** rw,
|
||||||
|
|
||||||
|
# Signal handling for child processes
|
||||||
|
signal,
|
||||||
|
ptrace,
|
||||||
|
}
|
||||||
56
build/rootfs/etc/apparmor.d/kubelet
Normal file
56
build/rootfs/etc/apparmor.d/kubelet
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# AppArmor profile for kubesolo (kubelet + control plane)
|
||||||
|
# Start in complain mode to log without blocking
|
||||||
|
|
||||||
|
#include <tunables/global>
|
||||||
|
|
||||||
|
profile kubesolo /usr/bin/kubesolo flags=(complain) {
|
||||||
|
#include <abstractions/base>
|
||||||
|
|
||||||
|
# Binary and shared libraries
|
||||||
|
/usr/bin/kubesolo mr,
|
||||||
|
/usr/lib/** mr,
|
||||||
|
/lib/** mr,
|
||||||
|
|
||||||
|
# KubeSolo state (etcd/SQLite, certificates, manifests)
|
||||||
|
/var/lib/kubesolo/** rw,
|
||||||
|
|
||||||
|
# KubeSolo configuration
|
||||||
|
/etc/kubesolo/** r,
|
||||||
|
|
||||||
|
# Containerd socket
|
||||||
|
/run/containerd/** rw,
|
||||||
|
|
||||||
|
# CNI networking
|
||||||
|
/etc/cni/** r,
|
||||||
|
/opt/cni/bin/** ix,
|
||||||
|
|
||||||
|
# Proc and sys access
|
||||||
|
@{PROC}/** r,
|
||||||
|
/sys/** r,
|
||||||
|
|
||||||
|
# Device access
|
||||||
|
/dev/** rw,
|
||||||
|
|
||||||
|
# Network access (API server, kubelet, etcd)
|
||||||
|
network,
|
||||||
|
|
||||||
|
# Control plane needs broad capabilities
|
||||||
|
capability,
|
||||||
|
|
||||||
|
# Kubectl and other tools
|
||||||
|
/usr/bin/kubectl ix,
|
||||||
|
/usr/local/bin/** ix,
|
||||||
|
|
||||||
|
# Temp files
|
||||||
|
/tmp/** rw,
|
||||||
|
|
||||||
|
# Log files
|
||||||
|
/var/log/** rw,
|
||||||
|
|
||||||
|
# Kubelet needs to manage pods
|
||||||
|
/var/lib/kubelet/** rw,
|
||||||
|
|
||||||
|
# Signal handling
|
||||||
|
signal,
|
||||||
|
ptrace,
|
||||||
|
}
|
||||||
27
build/rootfs/etc/sysctl.d/security.conf
Normal file
27
build/rootfs/etc/sysctl.d/security.conf
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Security hardening — applied automatically by 40-sysctl.sh
|
||||||
|
# Network: anti-spoofing
|
||||||
|
net.ipv4.conf.all.rp_filter = 1
|
||||||
|
net.ipv4.conf.default.rp_filter = 1
|
||||||
|
# Network: SYN flood protection
|
||||||
|
net.ipv4.tcp_syncookies = 1
|
||||||
|
# Network: ICMP hardening
|
||||||
|
net.ipv4.conf.all.accept_redirects = 0
|
||||||
|
net.ipv4.conf.default.accept_redirects = 0
|
||||||
|
net.ipv4.conf.all.send_redirects = 0
|
||||||
|
net.ipv4.conf.default.send_redirects = 0
|
||||||
|
net.ipv4.icmp_echo_ignore_broadcasts = 1
|
||||||
|
net.ipv4.icmp_ignore_bogus_error_responses = 1
|
||||||
|
net.ipv4.conf.all.log_martians = 1
|
||||||
|
# Network: IPv6 hardening
|
||||||
|
net.ipv6.conf.all.accept_redirects = 0
|
||||||
|
net.ipv6.conf.default.accept_redirects = 0
|
||||||
|
net.ipv6.conf.all.accept_ra = 0
|
||||||
|
# Network: source routing
|
||||||
|
net.ipv4.conf.all.accept_source_route = 0
|
||||||
|
net.ipv4.conf.default.accept_source_route = 0
|
||||||
|
# Kernel: information disclosure
|
||||||
|
kernel.kptr_restrict = 2
|
||||||
|
kernel.dmesg_restrict = 1
|
||||||
|
kernel.perf_event_paranoid = 3
|
||||||
|
# Kernel: core dump safety
|
||||||
|
fs.suid_dumpable = 0
|
||||||
219
build/scripts/build-kernel-arm64.sh
Executable file
219
build/scripts/build-kernel-arm64.sh
Executable file
@@ -0,0 +1,219 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# build-kernel-arm64.sh — Build generic ARM64 kernel (mainline LTS)
|
||||||
|
#
|
||||||
|
# Builds a Linux kernel from kernel.org mainline LTS source, suitable for:
|
||||||
|
# - qemu-system-aarch64 -machine virt
|
||||||
|
# - UEFI ARM64 hosts (Ampere, Graviton, generic ARM64 servers)
|
||||||
|
# - Future ARM64 SBCs with UEFI/u-boot generic-distro support
|
||||||
|
#
|
||||||
|
# This is the GENERIC ARM64 build track. For Raspberry Pi specifically
|
||||||
|
# (raspberrypi/linux fork, RPi firmware boot path, custom DTBs), see
|
||||||
|
# build/scripts/build-kernel-rpi.sh.
|
||||||
|
#
|
||||||
|
# Output is cached in $CACHE_DIR/kernel-arm64-generic/ and reused across builds.
|
||||||
|
#
|
||||||
|
# Requirements:
|
||||||
|
# - gcc-aarch64-linux-gnu (cross-compiler)
|
||||||
|
# - Standard kernel build deps (bc, bison, flex, libelf-dev, libssl-dev)
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
|
||||||
|
|
||||||
|
# shellcheck source=../config/versions.env
|
||||||
|
. "$SCRIPT_DIR/../config/versions.env"
|
||||||
|
|
||||||
|
KVER="$MAINLINE_KERNEL_VERSION"
|
||||||
|
CUSTOM_KERNEL_DIR="$CACHE_DIR/kernel-arm64-generic"
|
||||||
|
CUSTOM_IMAGE="$CUSTOM_KERNEL_DIR/Image"
|
||||||
|
CUSTOM_MODULES="$CUSTOM_KERNEL_DIR/modules"
|
||||||
|
|
||||||
|
mkdir -p "$CACHE_DIR" "$CUSTOM_KERNEL_DIR"
|
||||||
|
|
||||||
|
# --- Skip if already built ---
|
||||||
|
if [ -f "$CUSTOM_IMAGE" ] && [ -d "$CUSTOM_MODULES/lib/modules/$KVER" ]; then
|
||||||
|
echo "==> Generic ARM64 kernel already built (cached)"
|
||||||
|
echo " Image: $CUSTOM_IMAGE ($(du -h "$CUSTOM_IMAGE" | cut -f1))"
|
||||||
|
echo " Kernel: $KVER"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Toolchain selection: native on arm64 hosts, cross-compile elsewhere ---
|
||||||
|
HOST_ARCH="$(uname -m)"
|
||||||
|
if [ "$HOST_ARCH" = "aarch64" ] || [ "$HOST_ARCH" = "arm64" ]; then
|
||||||
|
# Native build — use the host's gcc
|
||||||
|
if ! command -v gcc >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: gcc not found"
|
||||||
|
echo "Install: apt-get install build-essential"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
CROSS_COMPILE=""
|
||||||
|
echo "==> Native ARM64 build (host arch: $HOST_ARCH)"
|
||||||
|
else
|
||||||
|
# Cross-build from x86 — use aarch64 cross-compiler
|
||||||
|
if ! command -v aarch64-linux-gnu-gcc >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: aarch64-linux-gnu-gcc not found"
|
||||||
|
echo "Install: apt-get install gcc-aarch64-linux-gnu"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
CROSS_COMPILE="aarch64-linux-gnu-"
|
||||||
|
echo "==> Cross-building ARM64 kernel from $HOST_ARCH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Building generic ARM64 kernel (mainline $KVER)..."
|
||||||
|
echo " Source: $MAINLINE_KERNEL_URL"
|
||||||
|
|
||||||
|
# --- Download mainline kernel source ---
|
||||||
|
KERNEL_SRC_ARCHIVE="$CACHE_DIR/linux-${KVER}.tar.xz"
|
||||||
|
if [ ! -f "$KERNEL_SRC_ARCHIVE" ]; then
|
||||||
|
echo "==> Downloading mainline kernel source (~140 MB)..."
|
||||||
|
wget -q --show-progress -O "$KERNEL_SRC_ARCHIVE" "$MAINLINE_KERNEL_URL" 2>/dev/null || \
|
||||||
|
curl -fSL "$MAINLINE_KERNEL_URL" -o "$KERNEL_SRC_ARCHIVE"
|
||||||
|
echo " Downloaded: $(du -h "$KERNEL_SRC_ARCHIVE" | cut -f1)"
|
||||||
|
else
|
||||||
|
echo "==> Kernel source already cached: $(du -h "$KERNEL_SRC_ARCHIVE" | cut -f1)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Verify checksum if pinned ---
|
||||||
|
if [ -n "${MAINLINE_KERNEL_SHA256:-}" ]; then
|
||||||
|
actual=$(sha256sum "$KERNEL_SRC_ARCHIVE" | awk '{print $1}')
|
||||||
|
if [ "$actual" != "$MAINLINE_KERNEL_SHA256" ]; then
|
||||||
|
echo "ERROR: Kernel source checksum mismatch"
|
||||||
|
echo " Expected: $MAINLINE_KERNEL_SHA256"
|
||||||
|
echo " Got: $actual"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " Checksum OK"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Extract to case-sensitive fs ---
|
||||||
|
# The kernel source has files differing only by case (xt_mark.h vs xt_MARK.h).
|
||||||
|
# Build in /tmp (ext4 on Linux runners, case-sensitive).
|
||||||
|
KERNEL_BUILD_DIR="/tmp/kernel-build-arm64-generic"
|
||||||
|
rm -rf "$KERNEL_BUILD_DIR"
|
||||||
|
mkdir -p "$KERNEL_BUILD_DIR"
|
||||||
|
|
||||||
|
echo "==> Extracting kernel source..."
|
||||||
|
tar -xf "$KERNEL_SRC_ARCHIVE" -C "$KERNEL_BUILD_DIR"
|
||||||
|
KERNEL_SRC_DIR=$(find "$KERNEL_BUILD_DIR" -maxdepth 1 -type d -name 'linux-*' | head -1)
|
||||||
|
if [ -z "$KERNEL_SRC_DIR" ]; then
|
||||||
|
echo "ERROR: Could not find extracted source directory"
|
||||||
|
ls -la "$KERNEL_BUILD_DIR"/
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd "$KERNEL_SRC_DIR"
|
||||||
|
|
||||||
|
# --- Base config: arm64 defconfig (generic ARMv8) ---
|
||||||
|
echo "==> Applying arm64 defconfig..."
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" defconfig
|
||||||
|
|
||||||
|
# --- Apply shared container fragment ---
|
||||||
|
CONFIG_FRAGMENT="$PROJECT_ROOT/build/config/kernel-container.fragment"
|
||||||
|
if [ ! -f "$CONFIG_FRAGMENT" ]; then
|
||||||
|
echo "ERROR: Config fragment not found: $CONFIG_FRAGMENT"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
apply_fragment() {
|
||||||
|
local fragment="$1"
|
||||||
|
while IFS= read -r line; do
|
||||||
|
case "$line" in
|
||||||
|
"# CONFIG_"*" is not set")
|
||||||
|
key=$(echo "$line" | sed -n 's/^# \(CONFIG_[A-Z0-9_]*\) is not set$/\1/p')
|
||||||
|
[ -n "$key" ] && ./scripts/config --disable "${key#CONFIG_}"
|
||||||
|
continue
|
||||||
|
;;
|
||||||
|
\#*|"") continue ;;
|
||||||
|
esac
|
||||||
|
key="${line%%=*}"
|
||||||
|
value="${line#*=}"
|
||||||
|
case "$value" in
|
||||||
|
y) ./scripts/config --enable "$key" ;;
|
||||||
|
m) ./scripts/config --module "$key" ;;
|
||||||
|
n) ./scripts/config --disable "${key#CONFIG_}" ;;
|
||||||
|
*) ./scripts/config --set-str "$key" "$value" ;;
|
||||||
|
esac
|
||||||
|
done < "$fragment"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "==> Applying kernel-container.fragment (pass 1)..."
|
||||||
|
apply_fragment "$CONFIG_FRAGMENT"
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" olddefconfig
|
||||||
|
|
||||||
|
echo "==> Applying kernel-container.fragment (pass 2)..."
|
||||||
|
apply_fragment "$CONFIG_FRAGMENT"
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" olddefconfig
|
||||||
|
|
||||||
|
# --- ARM64 virt-host specific enables ---
|
||||||
|
# These are needed for the generic UEFI/virtio boot path but are arch-specific
|
||||||
|
# so they live in this script rather than the shared fragment.
|
||||||
|
echo "==> Enabling ARM64 virt-host configs..."
|
||||||
|
./scripts/config --enable CONFIG_EFI
|
||||||
|
./scripts/config --enable CONFIG_EFI_STUB
|
||||||
|
./scripts/config --enable CONFIG_VIRTIO
|
||||||
|
./scripts/config --enable CONFIG_VIRTIO_PCI
|
||||||
|
./scripts/config --enable CONFIG_VIRTIO_BLK
|
||||||
|
./scripts/config --enable CONFIG_VIRTIO_NET
|
||||||
|
./scripts/config --enable CONFIG_VIRTIO_CONSOLE
|
||||||
|
./scripts/config --enable CONFIG_VIRTIO_MMIO
|
||||||
|
./scripts/config --enable CONFIG_HW_RANDOM_VIRTIO
|
||||||
|
# NVMe for cloud / bare-metal ARM64 hosts that don't use virtio
|
||||||
|
./scripts/config --enable CONFIG_BLK_DEV_NVME
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" olddefconfig
|
||||||
|
|
||||||
|
# --- Verify critical configs ---
|
||||||
|
echo "==> Verifying critical configs..."
|
||||||
|
for cfg in CGROUP_BPF SECURITY_APPARMOR AUDIT VIRTIO_BLK EFI_STUB; do
|
||||||
|
if ! grep -q "CONFIG_${cfg}=y" .config; then
|
||||||
|
echo "ERROR: CONFIG_${cfg} not set after olddefconfig"
|
||||||
|
grep "CONFIG_${cfg}" .config || echo " (not found)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " CONFIG_${cfg}=y confirmed"
|
||||||
|
done
|
||||||
|
|
||||||
|
# --- Build kernel + modules (no DTBs — UEFI hosts use ACPI/virtio) ---
|
||||||
|
NPROC=$(nproc 2>/dev/null || echo 4)
|
||||||
|
echo ""
|
||||||
|
echo "==> Building ARM64 kernel (${NPROC} parallel jobs)..."
|
||||||
|
echo " This may take 20-40 minutes on a 6-core Odroid..."
|
||||||
|
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" -j"$NPROC" Image modules 2>&1
|
||||||
|
|
||||||
|
echo "==> Kernel build complete"
|
||||||
|
|
||||||
|
# --- Install to staging ---
|
||||||
|
echo "==> Installing Image..."
|
||||||
|
cp arch/arm64/boot/Image "$CUSTOM_IMAGE"
|
||||||
|
|
||||||
|
echo "==> Installing modules (stripped)..."
|
||||||
|
rm -rf "$CUSTOM_MODULES"
|
||||||
|
mkdir -p "$CUSTOM_MODULES"
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" \
|
||||||
|
INSTALL_MOD_STRIP=1 modules_install INSTALL_MOD_PATH="$CUSTOM_MODULES"
|
||||||
|
|
||||||
|
# Pick up actual kernel version (e.g. 6.12.10 if KVER differs from package suffix)
|
||||||
|
ACTUAL_KVER=$(ls "$CUSTOM_MODULES/lib/modules/" | head -1)
|
||||||
|
rm -f "$CUSTOM_MODULES/lib/modules/$ACTUAL_KVER/build"
|
||||||
|
rm -f "$CUSTOM_MODULES/lib/modules/$ACTUAL_KVER/source"
|
||||||
|
|
||||||
|
depmod -a -b "$CUSTOM_MODULES" "$ACTUAL_KVER" 2>/dev/null || true
|
||||||
|
|
||||||
|
cp .config "$CUSTOM_KERNEL_DIR/.config"
|
||||||
|
|
||||||
|
# --- Clean up ---
|
||||||
|
echo "==> Cleaning kernel build directory..."
|
||||||
|
cd /
|
||||||
|
rm -rf "$KERNEL_BUILD_DIR"
|
||||||
|
|
||||||
|
# --- Summary ---
|
||||||
|
echo ""
|
||||||
|
echo "==> Generic ARM64 kernel build complete:"
|
||||||
|
echo " Image: $CUSTOM_IMAGE ($(du -h "$CUSTOM_IMAGE" | cut -f1))"
|
||||||
|
echo " Kernel ver: $ACTUAL_KVER"
|
||||||
|
MOD_COUNT=$(find "$CUSTOM_MODULES/lib/modules/$ACTUAL_KVER" -name '*.ko*' 2>/dev/null | wc -l)
|
||||||
|
echo " Modules: $MOD_COUNT"
|
||||||
|
echo " Modules size: $(du -sh "$CUSTOM_MODULES/lib/modules/$ACTUAL_KVER" 2>/dev/null | cut -f1)"
|
||||||
|
echo ""
|
||||||
174
build/scripts/build-kernel-rpi.sh
Executable file
174
build/scripts/build-kernel-rpi.sh
Executable file
@@ -0,0 +1,174 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# build-kernel-rpi.sh — Build kernel for Raspberry Pi 4/5 (ARM64)
|
||||||
|
#
|
||||||
|
# Uses the official raspberrypi/linux kernel fork with bcm2711_defconfig as the
|
||||||
|
# base, overlaid with the shared container-config fragment.
|
||||||
|
#
|
||||||
|
# This is the RPi-specific build track. For generic ARM64 (UEFI / virtio /
|
||||||
|
# kernel.org mainline) see build/scripts/build-kernel-arm64.sh.
|
||||||
|
#
|
||||||
|
# Output is cached in $CACHE_DIR/custom-kernel-rpi/ and reused across builds.
|
||||||
|
#
|
||||||
|
# Requirements:
|
||||||
|
# - gcc-aarch64-linux-gnu (cross-compiler)
|
||||||
|
# - Standard kernel build deps (bc, bison, flex, etc.)
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
|
||||||
|
|
||||||
|
# shellcheck source=../config/versions.env
|
||||||
|
. "$SCRIPT_DIR/../config/versions.env"
|
||||||
|
|
||||||
|
CUSTOM_KERNEL_DIR="$CACHE_DIR/custom-kernel-rpi"
|
||||||
|
CUSTOM_IMAGE="$CUSTOM_KERNEL_DIR/Image"
|
||||||
|
CUSTOM_MODULES="$CUSTOM_KERNEL_DIR/modules"
|
||||||
|
CUSTOM_DTBS="$CUSTOM_KERNEL_DIR/dtbs"
|
||||||
|
|
||||||
|
mkdir -p "$CACHE_DIR" "$CUSTOM_KERNEL_DIR"
|
||||||
|
|
||||||
|
# --- Skip if already built ---
|
||||||
|
if [ -f "$CUSTOM_IMAGE" ] && [ -d "$CUSTOM_MODULES" ]; then
|
||||||
|
echo "==> RPi kernel already built (cached)"
|
||||||
|
echo " Image: $CUSTOM_IMAGE ($(du -h "$CUSTOM_IMAGE" | cut -f1))"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Toolchain selection: native on arm64 hosts, cross-compile elsewhere ---
|
||||||
|
HOST_ARCH="$(uname -m)"
|
||||||
|
if [ "$HOST_ARCH" = "aarch64" ] || [ "$HOST_ARCH" = "arm64" ]; then
|
||||||
|
if ! command -v gcc >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: gcc not found"
|
||||||
|
echo "Install: apt-get install build-essential"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
CROSS_COMPILE=""
|
||||||
|
echo "==> Native ARM64 build (host arch: $HOST_ARCH)"
|
||||||
|
else
|
||||||
|
if ! command -v aarch64-linux-gnu-gcc >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: aarch64-linux-gnu-gcc not found"
|
||||||
|
echo "Install: apt-get install gcc-aarch64-linux-gnu"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
CROSS_COMPILE="aarch64-linux-gnu-"
|
||||||
|
echo "==> Cross-building RPi kernel from $HOST_ARCH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Building RPi kernel (raspberrypi/linux)..."
|
||||||
|
echo " Branch: $RPI_KERNEL_BRANCH"
|
||||||
|
echo " Repo: $RPI_KERNEL_REPO"
|
||||||
|
|
||||||
|
# --- Download kernel source ---
|
||||||
|
KERNEL_SRC_DIR="$CACHE_DIR/rpi-linux-${RPI_KERNEL_BRANCH}"
|
||||||
|
if [ ! -d "$KERNEL_SRC_DIR" ]; then
|
||||||
|
echo "==> Downloading RPi kernel source (shallow clone)..."
|
||||||
|
git clone --depth 1 --branch "$RPI_KERNEL_BRANCH" \
|
||||||
|
"$RPI_KERNEL_REPO" "$KERNEL_SRC_DIR"
|
||||||
|
else
|
||||||
|
echo "==> Kernel source already cached"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Build in /tmp for case-sensitivity ---
|
||||||
|
KERNEL_BUILD_DIR="/tmp/kernel-build-arm64"
|
||||||
|
rm -rf "$KERNEL_BUILD_DIR"
|
||||||
|
cp -a "$KERNEL_SRC_DIR" "$KERNEL_BUILD_DIR"
|
||||||
|
|
||||||
|
cd "$KERNEL_BUILD_DIR"
|
||||||
|
|
||||||
|
# --- Apply base config (Pi 4 = bcm2711) ---
|
||||||
|
echo "==> Applying bcm2711_defconfig..."
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" bcm2711_defconfig
|
||||||
|
|
||||||
|
# --- Apply container config overrides ---
|
||||||
|
CONFIG_FRAGMENT="$PROJECT_ROOT/build/config/kernel-container.fragment"
|
||||||
|
if [ -f "$CONFIG_FRAGMENT" ]; then
|
||||||
|
echo "==> Applying KubeSolo config overrides..."
|
||||||
|
while IFS= read -r line; do
|
||||||
|
# Skip comments and empty lines
|
||||||
|
case "$line" in \#*|"") continue ;; esac
|
||||||
|
key="${line%%=*}"
|
||||||
|
value="${line#*=}"
|
||||||
|
case "$value" in
|
||||||
|
y) ./scripts/config --enable "$key" ;;
|
||||||
|
m) ./scripts/config --module "$key" ;;
|
||||||
|
n) ./scripts/config --disable "${key#CONFIG_}" ;;
|
||||||
|
*) ./scripts/config --set-str "$key" "$value" ;;
|
||||||
|
esac
|
||||||
|
done < "$CONFIG_FRAGMENT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Handle "is not set" comments as disables
|
||||||
|
if [ -f "$CONFIG_FRAGMENT" ]; then
|
||||||
|
while IFS= read -r line; do
|
||||||
|
case "$line" in
|
||||||
|
"# CONFIG_"*" is not set")
|
||||||
|
key=$(echo "$line" | sed -n 's/^# \(CONFIG_[A-Z_]*\) is not set$/\1/p')
|
||||||
|
[ -n "$key" ] && ./scripts/config --disable "${key#CONFIG_}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done < "$CONFIG_FRAGMENT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Resolve dependencies
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" olddefconfig
|
||||||
|
|
||||||
|
# --- Build kernel + modules + DTBs ---
|
||||||
|
NPROC=$(nproc 2>/dev/null || echo 4)
|
||||||
|
echo ""
|
||||||
|
echo "==> Building RPi kernel (${NPROC} parallel jobs)..."
|
||||||
|
echo " This may take 20-30 minutes..."
|
||||||
|
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" -j"$NPROC" Image modules dtbs 2>&1
|
||||||
|
|
||||||
|
echo "==> RPi kernel build complete"
|
||||||
|
|
||||||
|
# --- Install to staging ---
|
||||||
|
echo "==> Installing Image..."
|
||||||
|
cp arch/arm64/boot/Image "$CUSTOM_IMAGE"
|
||||||
|
|
||||||
|
echo "==> Installing modules (stripped)..."
|
||||||
|
rm -rf "$CUSTOM_MODULES"
|
||||||
|
mkdir -p "$CUSTOM_MODULES"
|
||||||
|
make ARCH=arm64 CROSS_COMPILE="$CROSS_COMPILE" \
|
||||||
|
INSTALL_MOD_STRIP=1 modules_install INSTALL_MOD_PATH="$CUSTOM_MODULES"
|
||||||
|
|
||||||
|
# Remove build/source symlinks
|
||||||
|
KVER=$(ls "$CUSTOM_MODULES/lib/modules/" | head -1)
|
||||||
|
rm -f "$CUSTOM_MODULES/lib/modules/$KVER/build"
|
||||||
|
rm -f "$CUSTOM_MODULES/lib/modules/$KVER/source"
|
||||||
|
|
||||||
|
# Run depmod
|
||||||
|
depmod -a -b "$CUSTOM_MODULES" "$KVER" 2>/dev/null || true
|
||||||
|
|
||||||
|
echo "==> Installing Device Tree Blobs..."
|
||||||
|
rm -rf "$CUSTOM_DTBS"
|
||||||
|
mkdir -p "$CUSTOM_DTBS/overlays"
|
||||||
|
# Pi 4 DTBs
|
||||||
|
cp arch/arm64/boot/dts/broadcom/bcm2711*.dtb "$CUSTOM_DTBS/" 2>/dev/null || true
|
||||||
|
# Pi 5 DTBs
|
||||||
|
cp arch/arm64/boot/dts/broadcom/bcm2712*.dtb "$CUSTOM_DTBS/" 2>/dev/null || true
|
||||||
|
# Overlays we need
|
||||||
|
for overlay in disable-wifi disable-bt; do
|
||||||
|
[ -f "arch/arm64/boot/dts/overlays/${overlay}.dtbo" ] && \
|
||||||
|
cp "arch/arm64/boot/dts/overlays/${overlay}.dtbo" "$CUSTOM_DTBS/overlays/"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Save config for reference
|
||||||
|
cp .config "$CUSTOM_KERNEL_DIR/.config"
|
||||||
|
|
||||||
|
# --- Clean up ---
|
||||||
|
echo "==> Cleaning kernel build directory..."
|
||||||
|
cd /
|
||||||
|
rm -rf "$KERNEL_BUILD_DIR"
|
||||||
|
|
||||||
|
# --- Summary ---
|
||||||
|
echo ""
|
||||||
|
echo "==> RPi kernel build complete:"
|
||||||
|
echo " Image: $CUSTOM_IMAGE ($(du -h "$CUSTOM_IMAGE" | cut -f1))"
|
||||||
|
echo " Kernel ver: $KVER"
|
||||||
|
MOD_COUNT=$(find "$CUSTOM_MODULES/lib/modules/$KVER" -name '*.ko*' 2>/dev/null | wc -l)
|
||||||
|
echo " Modules: $MOD_COUNT"
|
||||||
|
echo " Modules size: $(du -sh "$CUSTOM_MODULES/lib/modules/$KVER" 2>/dev/null | cut -f1)"
|
||||||
|
echo " DTBs: $(ls "$CUSTOM_DTBS"/*.dtb 2>/dev/null | wc -l)"
|
||||||
|
echo ""
|
||||||
@@ -85,73 +85,53 @@ echo " Source dir: $(basename "$KERNEL_SRC_DIR")"
|
|||||||
|
|
||||||
cd "$KERNEL_SRC_DIR"
|
cd "$KERNEL_SRC_DIR"
|
||||||
|
|
||||||
# --- Apply stock config + enable CONFIG_CGROUP_BPF ---
|
# --- Apply stock config + shared container-config fragment ---
|
||||||
echo "==> Applying stock Tiny Core config..."
|
echo "==> Applying stock Tiny Core config..."
|
||||||
cp "$KERNEL_CFG" .config
|
cp "$KERNEL_CFG" .config
|
||||||
|
|
||||||
echo "==> Enabling required kernel configs..."
|
CONFIG_FRAGMENT="$PROJECT_ROOT/build/config/kernel-container.fragment"
|
||||||
./scripts/config --enable CONFIG_CGROUP_BPF
|
if [ ! -f "$CONFIG_FRAGMENT" ]; then
|
||||||
./scripts/config --enable CONFIG_DEVTMPFS
|
echo "ERROR: Config fragment not found: $CONFIG_FRAGMENT"
|
||||||
./scripts/config --enable CONFIG_DEVTMPFS_MOUNT
|
exit 1
|
||||||
./scripts/config --enable CONFIG_MEMCG
|
fi
|
||||||
./scripts/config --enable CONFIG_CFS_BANDWIDTH
|
|
||||||
|
|
||||||
# --- Strip unnecessary subsystems for smallest footprint ---
|
# Apply the fragment: each "CONFIG_X=v" line becomes the right scripts/config
|
||||||
# This is a headless K8s edge appliance — no sound, GPU, wireless, etc.
|
# invocation; "# CONFIG_X is not set" comments become --disable.
|
||||||
echo "==> Disabling unnecessary subsystems for minimal footprint..."
|
apply_fragment() {
|
||||||
|
local fragment="$1"
|
||||||
|
while IFS= read -r line; do
|
||||||
|
case "$line" in
|
||||||
|
"# CONFIG_"*" is not set")
|
||||||
|
key=$(echo "$line" | sed -n 's/^# \(CONFIG_[A-Z0-9_]*\) is not set$/\1/p')
|
||||||
|
[ -n "$key" ] && ./scripts/config --disable "${key#CONFIG_}"
|
||||||
|
continue
|
||||||
|
;;
|
||||||
|
\#*|"") continue ;;
|
||||||
|
esac
|
||||||
|
key="${line%%=*}"
|
||||||
|
value="${line#*=}"
|
||||||
|
case "$value" in
|
||||||
|
y) ./scripts/config --enable "$key" ;;
|
||||||
|
m) ./scripts/config --module "$key" ;;
|
||||||
|
n) ./scripts/config --disable "${key#CONFIG_}" ;;
|
||||||
|
*) ./scripts/config --set-str "$key" "$value" ;;
|
||||||
|
esac
|
||||||
|
done < "$fragment"
|
||||||
|
}
|
||||||
|
|
||||||
# Sound subsystem (not needed on headless appliance)
|
# Two-pass apply: TC's stock config has CONFIG_SECURITY disabled, so olddefconfig
|
||||||
./scripts/config --disable SOUND
|
# strips the security subtree before its dependencies resolve. Re-applying the
|
||||||
|
# fragment after the first olddefconfig restores those entries.
|
||||||
# GPU/DRM (serial console only, no display)
|
echo "==> Applying kernel-container.fragment (pass 1)..."
|
||||||
./scripts/config --disable DRM
|
apply_fragment "$CONFIG_FRAGMENT"
|
||||||
|
|
||||||
# KVM hypervisor (this IS the guest/bare metal, not a hypervisor)
|
|
||||||
./scripts/config --disable KVM
|
|
||||||
|
|
||||||
# Media/camera/TV/radio (not needed)
|
|
||||||
./scripts/config --disable MEDIA_SUPPORT
|
|
||||||
|
|
||||||
# Wireless networking (wired edge device)
|
|
||||||
./scripts/config --disable WIRELESS
|
|
||||||
./scripts/config --disable WLAN
|
|
||||||
./scripts/config --disable CFG80211
|
|
||||||
|
|
||||||
# Bluetooth (not needed)
|
|
||||||
./scripts/config --disable BT
|
|
||||||
|
|
||||||
# NFC (not needed)
|
|
||||||
./scripts/config --disable NFC
|
|
||||||
|
|
||||||
# Infiniband (not needed on edge)
|
|
||||||
./scripts/config --disable INFINIBAND
|
|
||||||
|
|
||||||
# PCMCIA (legacy, not needed)
|
|
||||||
./scripts/config --disable PCMCIA
|
|
||||||
|
|
||||||
# Amateur radio (not needed)
|
|
||||||
./scripts/config --disable HAMRADIO
|
|
||||||
|
|
||||||
# ISDN (not needed)
|
|
||||||
./scripts/config --disable ISDN
|
|
||||||
|
|
||||||
# ATM networking (not needed)
|
|
||||||
./scripts/config --disable ATM
|
|
||||||
|
|
||||||
# Joystick/gamepad (not needed)
|
|
||||||
./scripts/config --disable INPUT_JOYSTICK
|
|
||||||
./scripts/config --disable INPUT_TABLET
|
|
||||||
|
|
||||||
# FPGA (not needed)
|
|
||||||
./scripts/config --disable FPGA
|
|
||||||
|
|
||||||
# Resolve dependencies (olddefconfig accepts defaults for new options)
|
|
||||||
make olddefconfig
|
make olddefconfig
|
||||||
|
|
||||||
# Verify CONFIG_CGROUP_BPF is set
|
echo "==> Applying kernel-container.fragment (pass 2)..."
|
||||||
if grep -q 'CONFIG_CGROUP_BPF=y' .config; then
|
apply_fragment "$CONFIG_FRAGMENT"
|
||||||
echo " CONFIG_CGROUP_BPF=y confirmed in .config"
|
make olddefconfig
|
||||||
else
|
|
||||||
|
# Verify critical configs are set
|
||||||
|
if ! grep -q 'CONFIG_CGROUP_BPF=y' .config; then
|
||||||
echo "ERROR: CONFIG_CGROUP_BPF not set after olddefconfig"
|
echo "ERROR: CONFIG_CGROUP_BPF not set after olddefconfig"
|
||||||
grep 'CGROUP_BPF' .config || echo " (CGROUP_BPF not found in .config)"
|
grep 'CGROUP_BPF' .config || echo " (CGROUP_BPF not found in .config)"
|
||||||
echo ""
|
echo ""
|
||||||
@@ -159,10 +139,25 @@ else
|
|||||||
grep -E 'CONFIG_BPF=|CONFIG_BPF_SYSCALL=' .config || echo " BPF not found"
|
grep -E 'CONFIG_BPF=|CONFIG_BPF_SYSCALL=' .config || echo " BPF not found"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
echo " CONFIG_CGROUP_BPF=y confirmed"
|
||||||
|
|
||||||
# Show what changed
|
if ! grep -q 'CONFIG_SECURITY_APPARMOR=y' .config; then
|
||||||
echo " Config diff from stock:"
|
echo "ERROR: CONFIG_SECURITY_APPARMOR not set after olddefconfig"
|
||||||
diff "$KERNEL_CFG" .config | grep '^[<>]' | head -20 || echo " (no differences beyond CGROUP_BPF)"
|
echo " Security-related configs:"
|
||||||
|
grep -E 'CONFIG_SECURITY=|CONFIG_SECURITYFS=|CONFIG_SECURITY_APPARMOR=' .config
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " CONFIG_SECURITY_APPARMOR=y confirmed"
|
||||||
|
|
||||||
|
if ! grep -q 'CONFIG_AUDIT=y' .config; then
|
||||||
|
echo "ERROR: CONFIG_AUDIT not set after olddefconfig"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " CONFIG_AUDIT=y confirmed"
|
||||||
|
|
||||||
|
# Show what changed (security-related)
|
||||||
|
echo " Key config values:"
|
||||||
|
grep -E 'CONFIG_SECURITY=|CONFIG_SECURITY_APPARMOR=|CONFIG_AUDIT=|CONFIG_LSM=|CONFIG_CGROUP_BPF=' .config | sed 's/^/ /'
|
||||||
|
|
||||||
# --- Build kernel + modules ---
|
# --- Build kernel + modules ---
|
||||||
NPROC=$(nproc 2>/dev/null || echo 4)
|
NPROC=$(nproc 2>/dev/null || echo 4)
|
||||||
|
|||||||
@@ -6,28 +6,61 @@
|
|||||||
# Part 2: System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz (active)
|
# Part 2: System A (512 MB, ext4) — vmlinuz + kubesolo-os.gz (active)
|
||||||
# Part 3: System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz (passive)
|
# Part 3: System B (512 MB, ext4) — vmlinuz + kubesolo-os.gz (passive)
|
||||||
# Part 4: Data (remaining, ext4) — persistent K8s state
|
# Part 4: Data (remaining, ext4) — persistent K8s state
|
||||||
|
#
|
||||||
|
# Supports both x86_64 (default) and ARM64 generic UEFI targets. ARM64 RPi
|
||||||
|
# uses a different image format — see build/scripts/create-rpi-image.sh.
|
||||||
|
#
|
||||||
|
# Environment:
|
||||||
|
# TARGET_ARCH amd64 (default) or arm64
|
||||||
|
# IMG_SIZE_MB Image size in MB (default 4096)
|
||||||
|
# CACHE_DIR Build cache (default <project>/build/cache)
|
||||||
|
# ROOTFS_DIR Rootfs work dir (default <project>/build/rootfs-work)
|
||||||
|
# OUTPUT_DIR Output dir (default <project>/output)
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
|
ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
|
||||||
|
CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
|
||||||
OUTPUT_DIR="${OUTPUT_DIR:-$PROJECT_ROOT/output}"
|
OUTPUT_DIR="${OUTPUT_DIR:-$PROJECT_ROOT/output}"
|
||||||
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||||
OS_NAME="kubesolo-os"
|
OS_NAME="kubesolo-os"
|
||||||
|
TARGET_ARCH="${TARGET_ARCH:-amd64}"
|
||||||
|
|
||||||
IMG_OUTPUT="$OUTPUT_DIR/${OS_NAME}-${VERSION}.img"
|
|
||||||
IMG_SIZE_MB="${IMG_SIZE_MB:-4096}" # 4 GB default (larger for A/B)
|
IMG_SIZE_MB="${IMG_SIZE_MB:-4096}" # 4 GB default (larger for A/B)
|
||||||
|
|
||||||
VMLINUZ="$ROOTFS_DIR/vmlinuz"
|
# --- Arch-specific paths ---
|
||||||
|
case "$TARGET_ARCH" in
|
||||||
|
amd64)
|
||||||
|
IMG_OUTPUT="$OUTPUT_DIR/${OS_NAME}-${VERSION}.img"
|
||||||
|
VMLINUZ="$ROOTFS_DIR/vmlinuz"
|
||||||
|
GRUB_CFG="$PROJECT_ROOT/build/grub/grub.cfg"
|
||||||
|
GRUB_TARGET="x86_64-efi"
|
||||||
|
GRUB_EFI_BIN="bootx64.efi"
|
||||||
|
GRUB_INSTALL_BIOS=true
|
||||||
|
;;
|
||||||
|
arm64)
|
||||||
|
IMG_OUTPUT="$OUTPUT_DIR/${OS_NAME}-${VERSION}.arm64.img"
|
||||||
|
VMLINUZ="$CACHE_DIR/kernel-arm64-generic/Image"
|
||||||
|
GRUB_CFG="$PROJECT_ROOT/build/grub/grub-arm64.cfg"
|
||||||
|
GRUB_TARGET="arm64-efi"
|
||||||
|
GRUB_EFI_BIN="BOOTAA64.EFI"
|
||||||
|
GRUB_INSTALL_BIOS=false
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "ERROR: TARGET_ARCH must be 'amd64' or 'arm64' (got: $TARGET_ARCH)"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz"
|
INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz"
|
||||||
GRUB_CFG="$PROJECT_ROOT/build/grub/grub.cfg"
|
|
||||||
GRUB_ENV_DEFAULTS="$PROJECT_ROOT/build/grub/grub-env-defaults"
|
GRUB_ENV_DEFAULTS="$PROJECT_ROOT/build/grub/grub-env-defaults"
|
||||||
|
|
||||||
for f in "$VMLINUZ" "$INITRAMFS" "$GRUB_CFG" "$GRUB_ENV_DEFAULTS"; do
|
for f in "$VMLINUZ" "$INITRAMFS" "$GRUB_CFG" "$GRUB_ENV_DEFAULTS"; do
|
||||||
[ -f "$f" ] || { echo "ERROR: Missing $f"; exit 1; }
|
[ -f "$f" ] || { echo "ERROR: Missing $f"; exit 1; }
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "==> Creating ${IMG_SIZE_MB}MB disk image with A/B partitions..."
|
echo "==> Creating ${IMG_SIZE_MB}MB ${TARGET_ARCH} disk image with A/B partitions..."
|
||||||
mkdir -p "$OUTPUT_DIR"
|
mkdir -p "$OUTPUT_DIR"
|
||||||
|
|
||||||
# Create sparse image
|
# Create sparse image
|
||||||
@@ -51,10 +84,39 @@ size=1048576, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="SystemB"
|
|||||||
type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="Data"
|
type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, name="Data"
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Set up loop device
|
# Set up loop device with partition mappings
|
||||||
LOOP=$(losetup --show -fP "$IMG_OUTPUT")
|
LOOP=$(losetup --show -f "$IMG_OUTPUT")
|
||||||
echo "==> Loop device: $LOOP"
|
echo "==> Loop device: $LOOP"
|
||||||
|
|
||||||
|
# Use kpartx for reliable partition device nodes (works in Docker/containers)
|
||||||
|
USE_KPARTX=false
|
||||||
|
if [ ! -b "${LOOP}p1" ]; then
|
||||||
|
if command -v kpartx >/dev/null 2>&1; then
|
||||||
|
kpartx -a "$LOOP"
|
||||||
|
USE_KPARTX=true
|
||||||
|
sleep 1
|
||||||
|
LOOP_NAME=$(basename "$LOOP")
|
||||||
|
P1="/dev/mapper/${LOOP_NAME}p1"
|
||||||
|
P2="/dev/mapper/${LOOP_NAME}p2"
|
||||||
|
P3="/dev/mapper/${LOOP_NAME}p3"
|
||||||
|
P4="/dev/mapper/${LOOP_NAME}p4"
|
||||||
|
else
|
||||||
|
# Retry with -P flag
|
||||||
|
losetup -d "$LOOP"
|
||||||
|
LOOP=$(losetup --show -fP "$IMG_OUTPUT")
|
||||||
|
sleep 1
|
||||||
|
P1="${LOOP}p1"
|
||||||
|
P2="${LOOP}p2"
|
||||||
|
P3="${LOOP}p3"
|
||||||
|
P4="${LOOP}p4"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
P1="${LOOP}p1"
|
||||||
|
P2="${LOOP}p2"
|
||||||
|
P3="${LOOP}p3"
|
||||||
|
P4="${LOOP}p4"
|
||||||
|
fi
|
||||||
|
|
||||||
MNT_EFI=$(mktemp -d)
|
MNT_EFI=$(mktemp -d)
|
||||||
MNT_SYSA=$(mktemp -d)
|
MNT_SYSA=$(mktemp -d)
|
||||||
MNT_SYSB=$(mktemp -d)
|
MNT_SYSB=$(mktemp -d)
|
||||||
@@ -65,22 +127,25 @@ cleanup() {
|
|||||||
umount "$MNT_SYSA" 2>/dev/null || true
|
umount "$MNT_SYSA" 2>/dev/null || true
|
||||||
umount "$MNT_SYSB" 2>/dev/null || true
|
umount "$MNT_SYSB" 2>/dev/null || true
|
||||||
umount "$MNT_DATA" 2>/dev/null || true
|
umount "$MNT_DATA" 2>/dev/null || true
|
||||||
|
if [ "$USE_KPARTX" = true ]; then
|
||||||
|
kpartx -d "$LOOP" 2>/dev/null || true
|
||||||
|
fi
|
||||||
losetup -d "$LOOP" 2>/dev/null || true
|
losetup -d "$LOOP" 2>/dev/null || true
|
||||||
rm -rf "$MNT_EFI" "$MNT_SYSA" "$MNT_SYSB" "$MNT_DATA" 2>/dev/null || true
|
rm -rf "$MNT_EFI" "$MNT_SYSA" "$MNT_SYSB" "$MNT_DATA" 2>/dev/null || true
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
# Format partitions
|
# Format partitions
|
||||||
mkfs.vfat -F 32 -n KSOLOEFI "${LOOP}p1"
|
mkfs.vfat -F 32 -n KSOLOEFI "$P1"
|
||||||
mkfs.ext4 -q -L KSOLOA "${LOOP}p2"
|
mkfs.ext4 -q -L KSOLOA "$P2"
|
||||||
mkfs.ext4 -q -L KSOLOB "${LOOP}p3"
|
mkfs.ext4 -q -L KSOLOB "$P3"
|
||||||
mkfs.ext4 -q -L KSOLODATA "${LOOP}p4"
|
mkfs.ext4 -q -L KSOLODATA "$P4"
|
||||||
|
|
||||||
# Mount all partitions
|
# Mount all partitions
|
||||||
mount "${LOOP}p1" "$MNT_EFI"
|
mount "$P1" "$MNT_EFI"
|
||||||
mount "${LOOP}p2" "$MNT_SYSA"
|
mount "$P2" "$MNT_SYSA"
|
||||||
mount "${LOOP}p3" "$MNT_SYSB"
|
mount "$P3" "$MNT_SYSB"
|
||||||
mount "${LOOP}p4" "$MNT_DATA"
|
mount "$P4" "$MNT_DATA"
|
||||||
|
|
||||||
# --- EFI/Boot Partition ---
|
# --- EFI/Boot Partition ---
|
||||||
echo " Installing GRUB..."
|
echo " Installing GRUB..."
|
||||||
@@ -129,35 +194,44 @@ else
|
|||||||
mv "$GRUBENV_FILE.tmp" "$GRUBENV_FILE"
|
mv "$GRUBENV_FILE.tmp" "$GRUBENV_FILE"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install GRUB EFI binary if available
|
# Install GRUB EFI binary
|
||||||
if command -v grub-mkimage >/dev/null 2>&1; then
|
# Modules required: part_gpt + fat (boot partition), ext2 (system A/B),
|
||||||
grub-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
|
# normal + linux + echo + configfile + loadenv (boot menu + grubenv),
|
||||||
-p /boot/grub \
|
# search_* (locate partitions by label).
|
||||||
part_gpt ext2 fat normal linux echo all_video test search \
|
# all_video + test are x86-specific (DRM init); leave them out on arm64.
|
||||||
search_fs_uuid search_label configfile loadenv \
|
if [ "$TARGET_ARCH" = "arm64" ]; then
|
||||||
2>/dev/null || echo " WARN: grub-mkimage failed — use QEMU -bios flag"
|
GRUB_MODULES="part_gpt ext2 fat normal linux echo test search search_fs_uuid search_label configfile loadenv"
|
||||||
elif command -v grub2-mkimage >/dev/null 2>&1; then
|
|
||||||
grub2-mkimage -O x86_64-efi -o "$MNT_EFI/EFI/BOOT/bootx64.efi" \
|
|
||||||
-p /boot/grub \
|
|
||||||
part_gpt ext2 fat normal linux echo all_video test search \
|
|
||||||
search_fs_uuid search_label configfile loadenv \
|
|
||||||
2>/dev/null || echo " WARN: grub2-mkimage failed — use QEMU -bios flag"
|
|
||||||
else
|
else
|
||||||
echo " WARN: grub-mkimage not found — EFI boot image not created"
|
GRUB_MODULES="part_gpt ext2 fat normal linux echo all_video test search search_fs_uuid search_label configfile loadenv"
|
||||||
echo " Install grub2-tools or use QEMU -kernel/-initrd flags"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# For BIOS boot: install GRUB i386-pc modules if available
|
# shellcheck disable=SC2086 # GRUB_MODULES is intentionally word-split
|
||||||
if command -v grub-install >/dev/null 2>&1; then
|
if command -v grub-mkimage >/dev/null 2>&1; then
|
||||||
grub-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
grub-mkimage -O "$GRUB_TARGET" -o "$MNT_EFI/EFI/BOOT/$GRUB_EFI_BIN" \
|
||||||
--no-floppy "$LOOP" 2>/dev/null || {
|
-p /boot/grub $GRUB_MODULES \
|
||||||
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
|| echo " WARN: grub-mkimage failed — use QEMU -bios flag"
|
||||||
}
|
elif command -v grub2-mkimage >/dev/null 2>&1; then
|
||||||
elif command -v grub2-install >/dev/null 2>&1; then
|
grub2-mkimage -O "$GRUB_TARGET" -o "$MNT_EFI/EFI/BOOT/$GRUB_EFI_BIN" \
|
||||||
grub2-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
-p /boot/grub $GRUB_MODULES \
|
||||||
--no-floppy "$LOOP" 2>/dev/null || {
|
|| echo " WARN: grub2-mkimage failed — use QEMU -bios flag"
|
||||||
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
else
|
||||||
}
|
echo " WARN: grub-mkimage not found — EFI boot image not created"
|
||||||
|
echo " Install grub-efi-${TARGET_ARCH}-bin or use QEMU -kernel/-initrd flags"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# For BIOS boot: install GRUB i386-pc modules (x86 only — ARM64 is UEFI-only).
|
||||||
|
if [ "$GRUB_INSTALL_BIOS" = "true" ]; then
|
||||||
|
if command -v grub-install >/dev/null 2>&1; then
|
||||||
|
grub-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
||||||
|
--no-floppy "$LOOP" 2>/dev/null || {
|
||||||
|
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
||||||
|
}
|
||||||
|
elif command -v grub2-install >/dev/null 2>&1; then
|
||||||
|
grub2-install --target=i386-pc --boot-directory="$MNT_EFI/boot" \
|
||||||
|
--no-floppy "$LOOP" 2>/dev/null || {
|
||||||
|
echo " WARN: BIOS GRUB install failed — EFI-only or use QEMU -kernel"
|
||||||
|
}
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- System A Partition (active) ---
|
# --- System A Partition (active) ---
|
||||||
@@ -181,9 +255,9 @@ done
|
|||||||
sync
|
sync
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "==> Disk image created: $IMG_OUTPUT"
|
echo "==> ${TARGET_ARCH} disk image created: $IMG_OUTPUT"
|
||||||
echo " Size: $(du -h "$IMG_OUTPUT" | cut -f1)"
|
echo " Size: $(du -h "$IMG_OUTPUT" | cut -f1)"
|
||||||
echo " Part 1 (KSOLOEFI): GRUB + A/B boot config"
|
echo " Part 1 (KSOLOEFI): GRUB ($GRUB_TARGET) + A/B boot config"
|
||||||
echo " Part 2 (KSOLOA): System A — kernel + initramfs (active)"
|
echo " Part 2 (KSOLOA): System A — kernel + initramfs (active)"
|
||||||
echo " Part 3 (KSOLOB): System B — kernel + initramfs (passive)"
|
echo " Part 3 (KSOLOB): System B — kernel + initramfs (passive)"
|
||||||
echo " Part 4 (KSOLODATA): Persistent K8s state"
|
echo " Part 4 (KSOLODATA): Persistent K8s state"
|
||||||
|
|||||||
256
build/scripts/create-rpi-image.sh
Executable file
256
build/scripts/create-rpi-image.sh
Executable file
@@ -0,0 +1,256 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# create-rpi-image.sh — Create a raw disk image for Raspberry Pi SD card
|
||||||
|
#
|
||||||
|
# Partition layout (MBR):
|
||||||
|
# Part 1: Boot/Control (384 MB, FAT32, label KSOLOCTL) — firmware + kernel + initramfs + autoboot.txt
|
||||||
|
# Part 2: Boot A (256 MB, FAT32, label KSOLOA) — kernel + DTBs + initramfs
|
||||||
|
# Part 3: Boot B (256 MB, FAT32, label KSOLOB) — same as Boot A (initially identical)
|
||||||
|
# Part 4: Data (remaining of 2GB, ext4, label KSOLODATA)
|
||||||
|
#
|
||||||
|
# The RPi EEPROM loads start4.elf from partition 1.
|
||||||
|
# If autoboot.txt is supported (newer EEPROM), firmware redirects to partition 2/3 for A/B boot.
|
||||||
|
# If autoboot.txt is NOT supported (older EEPROM), partition 1 has full boot files as fallback.
|
||||||
|
#
|
||||||
|
# MBR is required — GPT + autoboot.txt is not reliably supported on Pi 4.
|
||||||
|
#
|
||||||
|
# Usage: build/scripts/create-rpi-image.sh
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
|
||||||
|
# shellcheck source=../config/versions.env
|
||||||
|
. "$SCRIPT_DIR/../config/versions.env"
|
||||||
|
|
||||||
|
ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
|
||||||
|
OUTPUT_DIR="${OUTPUT_DIR:-$PROJECT_ROOT/output}"
|
||||||
|
CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
|
||||||
|
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||||
|
|
||||||
|
IMG_OUTPUT="$OUTPUT_DIR/${OS_NAME}-${VERSION}.rpi.img"
|
||||||
|
IMG_SIZE_MB="${IMG_SIZE_MB:-2048}" # 2 GB default
|
||||||
|
|
||||||
|
# ARM64 kernel (Image format, not bzImage)
|
||||||
|
KERNEL="${CACHE_DIR}/custom-kernel-rpi/Image"
|
||||||
|
INITRAMFS="${ROOTFS_DIR}/kubesolo-os.gz"
|
||||||
|
RPI_FIRMWARE_DIR="${CACHE_DIR}/rpi-firmware"
|
||||||
|
# DTBs MUST come from the kernel build (not firmware repo) to match the kernel.
|
||||||
|
# A DTB mismatch causes sdhci-iproc to silently fail — zero block devices.
|
||||||
|
KERNEL_DTBS_DIR="${CACHE_DIR}/custom-kernel-rpi/dtbs"
|
||||||
|
|
||||||
|
echo "==> Creating ${IMG_SIZE_MB}MB Raspberry Pi disk image..."
|
||||||
|
|
||||||
|
# --- Verify required files ---
|
||||||
|
MISSING=0
|
||||||
|
for f in "$KERNEL" "$INITRAMFS"; do
|
||||||
|
if [ ! -f "$f" ]; then
|
||||||
|
echo "ERROR: Missing $f"
|
||||||
|
MISSING=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ ! -d "$RPI_FIRMWARE_DIR" ]; then
|
||||||
|
echo "ERROR: Missing RPi firmware directory: $RPI_FIRMWARE_DIR"
|
||||||
|
echo " Run 'make fetch' to download firmware blobs."
|
||||||
|
MISSING=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$MISSING" = "1" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "Required files:"
|
||||||
|
echo " Kernel: $KERNEL (run 'make kernel-arm64')"
|
||||||
|
echo " Initramfs: $INITRAMFS (run 'make initramfs')"
|
||||||
|
echo " Firmware: $RPI_FIRMWARE_DIR/ (run 'make fetch')"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$OUTPUT_DIR"
|
||||||
|
|
||||||
|
# --- Create sparse image ---
|
||||||
|
dd if=/dev/zero of="$IMG_OUTPUT" bs=1M count=0 seek="$IMG_SIZE_MB" 2>/dev/null
|
||||||
|
|
||||||
|
# --- Partition table (MBR) ---
|
||||||
|
# MBR is required for reliable RPi boot with autoboot.txt.
|
||||||
|
# GPT + autoboot.txt fails on many Pi 4 EEPROM versions.
|
||||||
|
# Part 1: Boot/Control 384 MB FAT32 (firmware + kernel + initramfs + autoboot.txt)
|
||||||
|
# Part 2: Boot A 256 MB FAT32 (kernel + initramfs + DTBs)
|
||||||
|
# Part 3: Boot B 256 MB FAT32 (kernel + initramfs + DTBs)
|
||||||
|
# Part 4: Data remaining ext4
|
||||||
|
sfdisk "$IMG_OUTPUT" << EOF
|
||||||
|
label: dos
|
||||||
|
|
||||||
|
# Boot/Control partition: 384 MB, FAT32 (type 0c = W95 FAT32 LBA)
|
||||||
|
# Contains firmware + autoboot.txt for A/B redirect, PLUS full boot files as fallback
|
||||||
|
start=2048, size=786432, type=c, bootable
|
||||||
|
# Boot A partition: 256 MB, FAT32
|
||||||
|
size=524288, type=c
|
||||||
|
# Boot B partition: 256 MB, FAT32
|
||||||
|
size=524288, type=c
|
||||||
|
# Data partition: remaining, Linux
|
||||||
|
type=83
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# --- Set up loop device ---
|
||||||
|
LOOP=$(losetup --show -f "$IMG_OUTPUT")
|
||||||
|
echo "==> Loop device: $LOOP"
|
||||||
|
|
||||||
|
# Use kpartx for reliable partition device nodes (works in Docker/containers)
|
||||||
|
USE_KPARTX=false
|
||||||
|
if [ ! -b "${LOOP}p1" ]; then
|
||||||
|
if command -v kpartx >/dev/null 2>&1; then
|
||||||
|
kpartx -a "$LOOP"
|
||||||
|
USE_KPARTX=true
|
||||||
|
sleep 1
|
||||||
|
LOOP_NAME=$(basename "$LOOP")
|
||||||
|
P1="/dev/mapper/${LOOP_NAME}p1"
|
||||||
|
P2="/dev/mapper/${LOOP_NAME}p2"
|
||||||
|
P3="/dev/mapper/${LOOP_NAME}p3"
|
||||||
|
P4="/dev/mapper/${LOOP_NAME}p4"
|
||||||
|
else
|
||||||
|
# Retry with -P flag
|
||||||
|
losetup -d "$LOOP"
|
||||||
|
LOOP=$(losetup --show -fP "$IMG_OUTPUT")
|
||||||
|
sleep 1
|
||||||
|
P1="${LOOP}p1"
|
||||||
|
P2="${LOOP}p2"
|
||||||
|
P3="${LOOP}p3"
|
||||||
|
P4="${LOOP}p4"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
P1="${LOOP}p1"
|
||||||
|
P2="${LOOP}p2"
|
||||||
|
P3="${LOOP}p3"
|
||||||
|
P4="${LOOP}p4"
|
||||||
|
fi
|
||||||
|
|
||||||
|
MNT_CTL=$(mktemp -d)
|
||||||
|
MNT_BOOTA=$(mktemp -d)
|
||||||
|
MNT_BOOTB=$(mktemp -d)
|
||||||
|
MNT_DATA=$(mktemp -d)
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
umount "$MNT_CTL" 2>/dev/null || true
|
||||||
|
umount "$MNT_BOOTA" 2>/dev/null || true
|
||||||
|
umount "$MNT_BOOTB" 2>/dev/null || true
|
||||||
|
umount "$MNT_DATA" 2>/dev/null || true
|
||||||
|
if [ "$USE_KPARTX" = true ]; then
|
||||||
|
kpartx -d "$LOOP" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
losetup -d "$LOOP" 2>/dev/null || true
|
||||||
|
rm -rf "$MNT_CTL" "$MNT_BOOTA" "$MNT_BOOTB" "$MNT_DATA" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# --- Format partitions ---
|
||||||
|
mkfs.vfat -F 32 -n KSOLOCTL "$P1"
|
||||||
|
mkfs.vfat -F 32 -n KSOLOA "$P2"
|
||||||
|
mkfs.vfat -F 32 -n KSOLOB "$P3"
|
||||||
|
mkfs.ext4 -q -L KSOLODATA "$P4"
|
||||||
|
|
||||||
|
# --- Mount all partitions ---
|
||||||
|
mount "$P1" "$MNT_CTL"
|
||||||
|
mount "$P2" "$MNT_BOOTA"
|
||||||
|
mount "$P3" "$MNT_BOOTB"
|
||||||
|
mount "$P4" "$MNT_DATA"
|
||||||
|
|
||||||
|
# --- Helper: populate a boot partition ---
|
||||||
|
populate_boot_partition() {
|
||||||
|
local MNT="$1"
|
||||||
|
local LABEL="$2"
|
||||||
|
|
||||||
|
echo " Populating $LABEL..."
|
||||||
|
|
||||||
|
# config.txt — Raspberry Pi boot configuration
|
||||||
|
cat > "$MNT/config.txt" << 'CFGTXT'
|
||||||
|
arm_64bit=1
|
||||||
|
kernel=kernel8.img
|
||||||
|
initramfs kubesolo-os.gz followkernel
|
||||||
|
enable_uart=1
|
||||||
|
gpu_mem=16
|
||||||
|
dtoverlay=disable-wifi
|
||||||
|
dtoverlay=disable-bt
|
||||||
|
CFGTXT
|
||||||
|
|
||||||
|
# cmdline.txt — kernel command line
|
||||||
|
# Note: must be a single line
|
||||||
|
echo "console=serial0,115200 console=tty1 kubesolo.data=LABEL=KSOLODATA initcall_debug loglevel=7" > "$MNT/cmdline.txt"
|
||||||
|
|
||||||
|
# Copy kernel as kernel8.img (RPi 3/4/5 ARM64 convention)
|
||||||
|
cp "$KERNEL" "$MNT/kernel8.img"
|
||||||
|
|
||||||
|
# Copy initramfs
|
||||||
|
cp "$INITRAMFS" "$MNT/kubesolo-os.gz"
|
||||||
|
|
||||||
|
# Copy DTBs from kernel build (MUST match kernel to avoid driver probe failures)
|
||||||
|
if ls "$KERNEL_DTBS_DIR"/bcm27*.dtb 1>/dev/null 2>&1; then
|
||||||
|
cp "$KERNEL_DTBS_DIR"/bcm27*.dtb "$MNT/"
|
||||||
|
fi
|
||||||
|
# Copy overlays — prefer kernel-built, fall back to firmware repo
|
||||||
|
if [ -d "$KERNEL_DTBS_DIR/overlays" ]; then
|
||||||
|
cp -r "$KERNEL_DTBS_DIR/overlays" "$MNT/"
|
||||||
|
elif [ -d "$RPI_FIRMWARE_DIR/overlays" ]; then
|
||||||
|
cp -r "$RPI_FIRMWARE_DIR/overlays" "$MNT/"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Write version marker
|
||||||
|
echo "$VERSION" > "$MNT/version.txt"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Boot Control Partition (KSOLOCTL) ---
|
||||||
|
# Partition 1 serves dual purpose:
|
||||||
|
# 1. Contains firmware + autoboot.txt for A/B redirect (if EEPROM supports it)
|
||||||
|
# 2. Contains full boot files (kernel + initramfs) as fallback if autoboot.txt isn't supported
|
||||||
|
echo " Writing firmware + autoboot.txt + boot files to partition 1..."
|
||||||
|
|
||||||
|
# autoboot.txt — tells firmware which partition to boot from (A/B switching)
|
||||||
|
# If the EEPROM doesn't support this, it's silently ignored and the firmware
|
||||||
|
# falls back to booting from partition 1 using config.txt below.
|
||||||
|
cat > "$MNT_CTL/autoboot.txt" << 'AUTOBOOT'
|
||||||
|
[all]
|
||||||
|
tryboot_a_b=1
|
||||||
|
boot_partition=2
|
||||||
|
[tryboot]
|
||||||
|
boot_partition=3
|
||||||
|
AUTOBOOT
|
||||||
|
|
||||||
|
# Copy firmware blobs — REQUIRED on partition 1 for EEPROM to boot
|
||||||
|
if ls "$RPI_FIRMWARE_DIR"/start*.elf 1>/dev/null 2>&1; then
|
||||||
|
cp "$RPI_FIRMWARE_DIR"/start*.elf "$MNT_CTL/"
|
||||||
|
fi
|
||||||
|
if ls "$RPI_FIRMWARE_DIR"/fixup*.dat 1>/dev/null 2>&1; then
|
||||||
|
cp "$RPI_FIRMWARE_DIR"/fixup*.dat "$MNT_CTL/"
|
||||||
|
fi
|
||||||
|
if [ -f "$RPI_FIRMWARE_DIR/bootcode.bin" ]; then
|
||||||
|
cp "$RPI_FIRMWARE_DIR/bootcode.bin" "$MNT_CTL/"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Full boot files on partition 1 — fallback if autoboot.txt redirect doesn't work.
|
||||||
|
# When autoboot.txt works, firmware switches to partition 2 and reads config.txt there.
|
||||||
|
# When autoboot.txt is unsupported, firmware reads THIS config.txt and boots from here.
|
||||||
|
populate_boot_partition "$MNT_CTL" "Boot Control (KSOLOCTL)"
|
||||||
|
|
||||||
|
# --- Boot A Partition (KSOLOA) ---
|
||||||
|
populate_boot_partition "$MNT_BOOTA" "Boot A (KSOLOA)"
|
||||||
|
|
||||||
|
# --- Boot B Partition (KSOLOB, initially identical) ---
|
||||||
|
populate_boot_partition "$MNT_BOOTB" "Boot B (KSOLOB)"
|
||||||
|
|
||||||
|
# --- Data Partition (KSOLODATA) ---
|
||||||
|
echo " Preparing data partition..."
|
||||||
|
for dir in kubesolo containerd etc-kubesolo log usr-local network images; do
|
||||||
|
mkdir -p "$MNT_DATA/$dir"
|
||||||
|
done
|
||||||
|
|
||||||
|
sync
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "==> Raspberry Pi disk image created: $IMG_OUTPUT"
|
||||||
|
echo " Size: $(du -h "$IMG_OUTPUT" | cut -f1)"
|
||||||
|
echo " Part 1 (KSOLOCTL): Firmware + kernel + initramfs + autoboot.txt (boot/control)"
|
||||||
|
echo " Part 2 (KSOLOA): Boot A — kernel + initramfs + DTBs"
|
||||||
|
echo " Part 3 (KSOLOB): Boot B — kernel + initramfs + DTBs"
|
||||||
|
echo " Part 4 (KSOLODATA): Persistent K8s state"
|
||||||
|
echo ""
|
||||||
|
echo "Write to SD card with:"
|
||||||
|
echo " sudo dd if=$IMG_OUTPUT of=/dev/sdX bs=4M status=progress"
|
||||||
|
echo ""
|
||||||
@@ -10,6 +10,111 @@ ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
|
|||||||
# shellcheck source=../config/versions.env
|
# shellcheck source=../config/versions.env
|
||||||
. "$SCRIPT_DIR/../config/versions.env"
|
. "$SCRIPT_DIR/../config/versions.env"
|
||||||
|
|
||||||
|
EXTRACT_ARCH="${TARGET_ARCH:-amd64}"
|
||||||
|
|
||||||
|
# Clean previous rootfs
|
||||||
|
rm -rf "$ROOTFS_DIR"
|
||||||
|
mkdir -p "$ROOTFS_DIR"
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# ARM64: piCore64 .img.gz extraction (SD card image, not ISO)
|
||||||
|
# =========================================================================
|
||||||
|
if [ "$EXTRACT_ARCH" = "arm64" ]; then
|
||||||
|
PICORE_IMG="$CACHE_DIR/$PICORE_IMAGE"
|
||||||
|
if [ ! -f "$PICORE_IMG" ]; then
|
||||||
|
echo "ERROR: piCore64 image not found: $PICORE_IMG"
|
||||||
|
echo "Run 'TARGET_ARCH=arm64 make fetch' first."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Extracting piCore64 image: $PICORE_IMG"
|
||||||
|
|
||||||
|
# Decompress to raw image (.img.gz or .zip)
|
||||||
|
PICORE_RAW="$CACHE_DIR/piCore-${PICORE_VERSION}.img"
|
||||||
|
if [ ! -f "$PICORE_RAW" ]; then
|
||||||
|
echo " Decompressing..."
|
||||||
|
case "$PICORE_IMG" in
|
||||||
|
*.zip)
|
||||||
|
unzip -o -j "$PICORE_IMG" '*.img' -d "$CACHE_DIR" 2>/dev/null || \
|
||||||
|
unzip -o "$PICORE_IMG" -d "$CACHE_DIR"
|
||||||
|
# Find the extracted .img file
|
||||||
|
EXTRACTED_IMG=$(find "$CACHE_DIR" -maxdepth 1 -name '*.img' -newer "$PICORE_IMG" | head -1)
|
||||||
|
if [ -n "$EXTRACTED_IMG" ] && [ "$EXTRACTED_IMG" != "$PICORE_RAW" ]; then
|
||||||
|
mv "$EXTRACTED_IMG" "$PICORE_RAW"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*.img.gz)
|
||||||
|
gunzip -k "$PICORE_IMG" 2>/dev/null || \
|
||||||
|
zcat "$PICORE_IMG" > "$PICORE_RAW"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "ERROR: Unknown piCore image format: $PICORE_IMG"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mount the piCore boot partition (partition 1) to find kernel/initramfs
|
||||||
|
# piCore layout: p1=boot (FAT32, has kernel+initramfs), p2=rootfs (ext4, has tce/)
|
||||||
|
IMG_MNT=$(mktemp -d)
|
||||||
|
echo " Mounting piCore boot partition..."
|
||||||
|
|
||||||
|
# Get partition 1 offset (boot/FAT partition with kernel+initramfs)
|
||||||
|
OFFSET=$(fdisk -l "$PICORE_RAW" 2>/dev/null | awk '/^.*img1/{print $2}')
|
||||||
|
if [ -z "$OFFSET" ]; then
|
||||||
|
# Fallback: try sfdisk (first partition)
|
||||||
|
OFFSET=$(sfdisk -d "$PICORE_RAW" 2>/dev/null | awk -F'[=,]' '/start=/{print $2; exit}' | tr -d ' ')
|
||||||
|
fi
|
||||||
|
if [ -z "$OFFSET" ]; then
|
||||||
|
echo "ERROR: Could not determine partition offset in piCore image"
|
||||||
|
fdisk -l "$PICORE_RAW" || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
BYTE_OFFSET=$((OFFSET * 512))
|
||||||
|
mount -o loop,ro,offset="$BYTE_OFFSET" "$PICORE_RAW" "$IMG_MNT" || {
|
||||||
|
echo "ERROR: Failed to mount piCore boot partition (need root for losetup)"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Find initramfs in the piCore boot partition
|
||||||
|
COREGZ=""
|
||||||
|
for f in "$IMG_MNT"/rootfs-piCore64*.gz "$IMG_MNT"/boot/corepure64.gz "$IMG_MNT"/boot/core.gz "$IMG_MNT"/corepure64.gz "$IMG_MNT"/core.gz; do
|
||||||
|
[ -f "$f" ] && COREGZ="$f" && break
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$COREGZ" ]; then
|
||||||
|
echo "ERROR: Could not find initramfs in piCore image"
|
||||||
|
echo "Contents:"
|
||||||
|
ls -la "$IMG_MNT"/
|
||||||
|
ls -la "$IMG_MNT"/boot/ 2>/dev/null || true
|
||||||
|
umount "$IMG_MNT" 2>/dev/null || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Found initramfs: $COREGZ"
|
||||||
|
|
||||||
|
# Extract initramfs
|
||||||
|
mkdir -p "$ROOTFS_DIR/rootfs"
|
||||||
|
cd "$ROOTFS_DIR/rootfs"
|
||||||
|
zcat "$COREGZ" | cpio -idm 2>/dev/null
|
||||||
|
|
||||||
|
# Note: ARM64 kernel comes from build-kernel-arm64.sh, not from piCore
|
||||||
|
# We only use piCore for the BusyBox userland
|
||||||
|
|
||||||
|
cd "$PROJECT_ROOT"
|
||||||
|
umount "$IMG_MNT" 2>/dev/null || true
|
||||||
|
rm -rf "$IMG_MNT"
|
||||||
|
|
||||||
|
echo "==> ARM64 rootfs extracted: $ROOTFS_DIR/rootfs"
|
||||||
|
echo " Size: $(du -sh "$ROOTFS_DIR/rootfs" | cut -f1)"
|
||||||
|
echo "==> Extract complete (ARM64). Kernel will come from build-kernel-arm64.sh"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# x86_64: Tiny Core ISO extraction
|
||||||
|
# =========================================================================
|
||||||
TC_ISO="$CACHE_DIR/$TINYCORE_ISO"
|
TC_ISO="$CACHE_DIR/$TINYCORE_ISO"
|
||||||
ISO_MNT="$ROOTFS_DIR/iso-mount"
|
ISO_MNT="$ROOTFS_DIR/iso-mount"
|
||||||
|
|
||||||
@@ -19,9 +124,7 @@ if [ ! -f "$TC_ISO" ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Clean previous rootfs
|
mkdir -p "$ISO_MNT"
|
||||||
rm -rf "$ROOTFS_DIR"
|
|
||||||
mkdir -p "$ROOTFS_DIR" "$ISO_MNT"
|
|
||||||
|
|
||||||
# --- Mount ISO and extract kernel + initramfs ---
|
# --- Mount ISO and extract kernel + initramfs ---
|
||||||
echo "==> Mounting ISO: $TC_ISO"
|
echo "==> Mounting ISO: $TC_ISO"
|
||||||
|
|||||||
@@ -10,9 +10,90 @@ CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
|
|||||||
# shellcheck source=../config/versions.env
|
# shellcheck source=../config/versions.env
|
||||||
. "$SCRIPT_DIR/../config/versions.env"
|
. "$SCRIPT_DIR/../config/versions.env"
|
||||||
|
|
||||||
|
# Verify SHA256 checksum of a downloaded file
|
||||||
|
verify_checksum() {
|
||||||
|
local file="$1" expected="$2" name="$3"
|
||||||
|
# Skip if no expected checksum provided
|
||||||
|
[ -z "$expected" ] && return 0
|
||||||
|
local actual
|
||||||
|
actual=$(sha256sum "$file" | awk '{print $1}')
|
||||||
|
if [ "$actual" = "$expected" ]; then
|
||||||
|
echo " Checksum OK: $name"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo "ERROR: Checksum mismatch for $name"
|
||||||
|
echo " Expected: $expected"
|
||||||
|
echo " Got: $actual"
|
||||||
|
rm -f "$file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
mkdir -p "$CACHE_DIR"
|
mkdir -p "$CACHE_DIR"
|
||||||
|
|
||||||
# --- Tiny Core Linux ISO ---
|
# Detect target architecture
|
||||||
|
FETCH_ARCH="${TARGET_ARCH:-amd64}"
|
||||||
|
|
||||||
|
# --- ARM64: piCore64 image instead of x86_64 ISO ---
|
||||||
|
if [ "$FETCH_ARCH" = "arm64" ]; then
|
||||||
|
PICORE_IMG="$CACHE_DIR/$PICORE_IMAGE"
|
||||||
|
if [ -f "$PICORE_IMG" ]; then
|
||||||
|
echo "==> piCore64 image already cached: $PICORE_IMG"
|
||||||
|
else
|
||||||
|
echo "==> Downloading piCore64 ${PICORE_VERSION} (${PICORE_ARCH})..."
|
||||||
|
echo " URL: $PICORE_IMAGE_URL"
|
||||||
|
wget -q --show-progress -O "$PICORE_IMG" "$PICORE_IMAGE_URL" 2>/dev/null || \
|
||||||
|
curl -fSL "$PICORE_IMAGE_URL" -o "$PICORE_IMG"
|
||||||
|
echo "==> Downloaded: $PICORE_IMG ($(du -h "$PICORE_IMG" | cut -f1))"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Also fetch RPi firmware
|
||||||
|
echo "==> Fetching RPi firmware..."
|
||||||
|
"$SCRIPT_DIR/fetch-rpi-firmware.sh"
|
||||||
|
|
||||||
|
# Download ARM64 KubeSolo binary (KUBESOLO_VERSION set from versions.env)
|
||||||
|
KUBESOLO_BIN_ARM64="$CACHE_DIR/kubesolo-arm64"
|
||||||
|
if [ -f "$KUBESOLO_BIN_ARM64" ]; then
|
||||||
|
echo "==> KubeSolo ARM64 binary already cached: $KUBESOLO_BIN_ARM64"
|
||||||
|
else
|
||||||
|
echo "==> Downloading KubeSolo ${KUBESOLO_VERSION} (arm64)..."
|
||||||
|
BIN_URL="https://github.com/portainer/kubesolo/releases/download/${KUBESOLO_VERSION}/kubesolo-${KUBESOLO_VERSION}-linux-arm64-musl.tar.gz"
|
||||||
|
BIN_URL_FALLBACK="https://github.com/portainer/kubesolo/releases/download/${KUBESOLO_VERSION}/kubesolo-${KUBESOLO_VERSION}-linux-arm64.tar.gz"
|
||||||
|
TEMP_DIR=$(mktemp -d)
|
||||||
|
TARBALL="$TEMP_DIR/kubesolo.tar.gz"
|
||||||
|
echo " URL: $BIN_URL"
|
||||||
|
if curl -fSL "$BIN_URL" -o "$TARBALL" 2>/dev/null; then
|
||||||
|
echo " Downloaded musl variant (arm64)"
|
||||||
|
elif curl -fSL "$BIN_URL_FALLBACK" -o "$TARBALL" 2>/dev/null; then
|
||||||
|
echo " Downloaded glibc variant (arm64 fallback)"
|
||||||
|
else
|
||||||
|
echo "ERROR: Failed to download KubeSolo ARM64 from GitHub."
|
||||||
|
rm -rf "$TEMP_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
verify_checksum "$TARBALL" "${KUBESOLO_SHA256_ARM64:-}" "KubeSolo arm64 tarball"
|
||||||
|
tar -xzf "$TARBALL" -C "$TEMP_DIR"
|
||||||
|
FOUND_BIN=$(find "$TEMP_DIR" -name "kubesolo" -type f ! -name "*.tar.gz" | head -1)
|
||||||
|
if [ -z "$FOUND_BIN" ]; then
|
||||||
|
echo "ERROR: Could not find kubesolo binary in extracted archive"
|
||||||
|
rm -rf "$TEMP_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
cp "$FOUND_BIN" "$KUBESOLO_BIN_ARM64"
|
||||||
|
chmod +x "$KUBESOLO_BIN_ARM64"
|
||||||
|
rm -rf "$TEMP_DIR"
|
||||||
|
echo "==> KubeSolo ARM64 binary: $KUBESOLO_BIN_ARM64 ($(du -h "$KUBESOLO_BIN_ARM64" | cut -f1))"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Skip x86_64 ISO and TCZ downloads for ARM64
|
||||||
|
echo ""
|
||||||
|
echo "==> ARM64 fetch complete."
|
||||||
|
echo "==> Component cache:"
|
||||||
|
ls -lh "$CACHE_DIR"/ 2>/dev/null || true
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- x86_64: Tiny Core Linux ISO ---
|
||||||
TC_ISO="$CACHE_DIR/$TINYCORE_ISO"
|
TC_ISO="$CACHE_DIR/$TINYCORE_ISO"
|
||||||
TC_URL="${TINYCORE_MIRROR}/${TINYCORE_VERSION%%.*}.x/${TINYCORE_ARCH}/release/${TINYCORE_ISO}"
|
TC_URL="${TINYCORE_MIRROR}/${TINYCORE_VERSION%%.*}.x/${TINYCORE_ARCH}/release/${TINYCORE_ISO}"
|
||||||
|
|
||||||
@@ -28,10 +109,11 @@ else
|
|||||||
wget -q --show-progress -O "$TC_ISO" "$TC_URL_ALT"
|
wget -q --show-progress -O "$TC_ISO" "$TC_URL_ALT"
|
||||||
}
|
}
|
||||||
echo "==> Downloaded: $TC_ISO ($(du -h "$TC_ISO" | cut -f1))"
|
echo "==> Downloaded: $TC_ISO ($(du -h "$TC_ISO" | cut -f1))"
|
||||||
|
verify_checksum "$TC_ISO" "$TINYCORE_ISO_SHA256" "Tiny Core ISO"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- KubeSolo ---
|
# --- KubeSolo ---
|
||||||
KUBESOLO_VERSION="${KUBESOLO_VERSION:-v1.1.0}"
|
# KUBESOLO_VERSION sourced from versions.env
|
||||||
KUBESOLO_BIN="$CACHE_DIR/kubesolo"
|
KUBESOLO_BIN="$CACHE_DIR/kubesolo"
|
||||||
|
|
||||||
if [ -f "$KUBESOLO_BIN" ]; then
|
if [ -f "$KUBESOLO_BIN" ]; then
|
||||||
@@ -50,12 +132,13 @@ else
|
|||||||
BIN_URL_FALLBACK="https://github.com/portainer/kubesolo/releases/download/${KUBESOLO_VERSION}/kubesolo-${KUBESOLO_VERSION}-${OS}-${ARCH}.tar.gz"
|
BIN_URL_FALLBACK="https://github.com/portainer/kubesolo/releases/download/${KUBESOLO_VERSION}/kubesolo-${KUBESOLO_VERSION}-${OS}-${ARCH}.tar.gz"
|
||||||
|
|
||||||
TEMP_DIR=$(mktemp -d)
|
TEMP_DIR=$(mktemp -d)
|
||||||
trap "rm -rf '$TEMP_DIR'" EXIT
|
trap 'rm -rf "$TEMP_DIR"' EXIT
|
||||||
|
TARBALL="$TEMP_DIR/kubesolo.tar.gz"
|
||||||
|
|
||||||
echo " URL: $BIN_URL"
|
echo " URL: $BIN_URL"
|
||||||
if curl -fSL "$BIN_URL" -o "$TEMP_DIR/kubesolo.tar.gz" 2>/dev/null; then
|
if curl -fSL "$BIN_URL" -o "$TARBALL" 2>/dev/null; then
|
||||||
echo " Downloaded musl variant"
|
echo " Downloaded musl variant"
|
||||||
elif curl -fSL "$BIN_URL_FALLBACK" -o "$TEMP_DIR/kubesolo.tar.gz" 2>/dev/null; then
|
elif curl -fSL "$BIN_URL_FALLBACK" -o "$TARBALL" 2>/dev/null; then
|
||||||
echo " Downloaded glibc variant (fallback)"
|
echo " Downloaded glibc variant (fallback)"
|
||||||
else
|
else
|
||||||
echo "ERROR: Failed to download KubeSolo from GitHub."
|
echo "ERROR: Failed to download KubeSolo from GitHub."
|
||||||
@@ -68,9 +151,10 @@ else
|
|||||||
echo " 3. Re-run: make rootfs"
|
echo " 3. Re-run: make rootfs"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
verify_checksum "$TARBALL" "${KUBESOLO_SHA256_AMD64:-}" "KubeSolo amd64 tarball"
|
||||||
|
|
||||||
# Extract binary from tarball
|
# Extract binary from tarball
|
||||||
tar -xzf "$TEMP_DIR/kubesolo.tar.gz" -C "$TEMP_DIR"
|
tar -xzf "$TARBALL" -C "$TEMP_DIR"
|
||||||
|
|
||||||
# Find the kubesolo binary in extracted contents
|
# Find the kubesolo binary in extracted contents
|
||||||
FOUND_BIN=$(find "$TEMP_DIR" -name "kubesolo" -type f ! -name "*.tar.gz" | head -1)
|
FOUND_BIN=$(find "$TEMP_DIR" -name "kubesolo" -type f ! -name "*.tar.gz" | head -1)
|
||||||
@@ -114,6 +198,7 @@ else
|
|||||||
if wget -q --show-progress -O "$NETFILTER_TCZ" "$NETFILTER_TCZ_URL" 2>/dev/null || \
|
if wget -q --show-progress -O "$NETFILTER_TCZ" "$NETFILTER_TCZ_URL" 2>/dev/null || \
|
||||||
curl -fSL "$NETFILTER_TCZ_URL" -o "$NETFILTER_TCZ" 2>/dev/null; then
|
curl -fSL "$NETFILTER_TCZ_URL" -o "$NETFILTER_TCZ" 2>/dev/null; then
|
||||||
echo "==> Downloaded: $NETFILTER_TCZ ($(du -h "$NETFILTER_TCZ" | cut -f1))"
|
echo "==> Downloaded: $NETFILTER_TCZ ($(du -h "$NETFILTER_TCZ" | cut -f1))"
|
||||||
|
verify_checksum "$NETFILTER_TCZ" "$NETFILTER_TCZ_SHA256" "netfilter TCZ"
|
||||||
else
|
else
|
||||||
echo "WARN: Failed to download netfilter modules. kube-proxy may not work."
|
echo "WARN: Failed to download netfilter modules. kube-proxy may not work."
|
||||||
rm -f "$NETFILTER_TCZ"
|
rm -f "$NETFILTER_TCZ"
|
||||||
@@ -131,6 +216,7 @@ else
|
|||||||
if wget -q --show-progress -O "$NET_BRIDGING_TCZ" "$NET_BRIDGING_TCZ_URL" 2>/dev/null || \
|
if wget -q --show-progress -O "$NET_BRIDGING_TCZ" "$NET_BRIDGING_TCZ_URL" 2>/dev/null || \
|
||||||
curl -fSL "$NET_BRIDGING_TCZ_URL" -o "$NET_BRIDGING_TCZ" 2>/dev/null; then
|
curl -fSL "$NET_BRIDGING_TCZ_URL" -o "$NET_BRIDGING_TCZ" 2>/dev/null; then
|
||||||
echo "==> Downloaded: $NET_BRIDGING_TCZ ($(du -h "$NET_BRIDGING_TCZ" | cut -f1))"
|
echo "==> Downloaded: $NET_BRIDGING_TCZ ($(du -h "$NET_BRIDGING_TCZ" | cut -f1))"
|
||||||
|
verify_checksum "$NET_BRIDGING_TCZ" "$NET_BRIDGING_TCZ_SHA256" "net-bridging TCZ"
|
||||||
else
|
else
|
||||||
echo "WARN: Failed to download net-bridging modules. CNI bridge may not work."
|
echo "WARN: Failed to download net-bridging modules. CNI bridge may not work."
|
||||||
rm -f "$NET_BRIDGING_TCZ"
|
rm -f "$NET_BRIDGING_TCZ"
|
||||||
@@ -148,6 +234,7 @@ else
|
|||||||
if wget -q --show-progress -O "$IPTABLES_TCZ" "$IPTABLES_TCZ_URL" 2>/dev/null || \
|
if wget -q --show-progress -O "$IPTABLES_TCZ" "$IPTABLES_TCZ_URL" 2>/dev/null || \
|
||||||
curl -fSL "$IPTABLES_TCZ_URL" -o "$IPTABLES_TCZ" 2>/dev/null; then
|
curl -fSL "$IPTABLES_TCZ_URL" -o "$IPTABLES_TCZ" 2>/dev/null; then
|
||||||
echo "==> Downloaded: $IPTABLES_TCZ ($(du -h "$IPTABLES_TCZ" | cut -f1))"
|
echo "==> Downloaded: $IPTABLES_TCZ ($(du -h "$IPTABLES_TCZ" | cut -f1))"
|
||||||
|
verify_checksum "$IPTABLES_TCZ" "$IPTABLES_TCZ_SHA256" "iptables TCZ"
|
||||||
else
|
else
|
||||||
echo "WARN: Failed to download iptables. KubeSolo bundles its own but this is a fallback."
|
echo "WARN: Failed to download iptables. KubeSolo bundles its own but this is a fallback."
|
||||||
rm -f "$IPTABLES_TCZ"
|
rm -f "$IPTABLES_TCZ"
|
||||||
|
|||||||
88
build/scripts/fetch-rpi-firmware.sh
Executable file
88
build/scripts/fetch-rpi-firmware.sh
Executable file
@@ -0,0 +1,88 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# fetch-rpi-firmware.sh — Download Raspberry Pi firmware blobs for boot
|
||||||
|
#
|
||||||
|
# Downloads firmware from the official raspberrypi/firmware GitHub repository.
|
||||||
|
# Extracts only the boot files needed: start*.elf, fixup*.dat, DTBs, bootcode.bin.
|
||||||
|
#
|
||||||
|
# Output: build/cache/rpi-firmware/ containing all required boot files.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
|
||||||
|
|
||||||
|
# shellcheck source=../config/versions.env
|
||||||
|
. "$SCRIPT_DIR/../config/versions.env"
|
||||||
|
|
||||||
|
RPI_FW_DIR="$CACHE_DIR/rpi-firmware"
|
||||||
|
RPI_FW_ARCHIVE="$CACHE_DIR/rpi-firmware-${RPI_FIRMWARE_TAG}.tar.gz"
|
||||||
|
|
||||||
|
# --- Skip if already fetched ---
|
||||||
|
if [ -d "$RPI_FW_DIR" ] && [ -f "$RPI_FW_DIR/start4.elf" ]; then
|
||||||
|
echo "==> RPi firmware already cached: $RPI_FW_DIR"
|
||||||
|
echo " Files: $(ls "$RPI_FW_DIR" | wc -l)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Downloading Raspberry Pi firmware (tag: ${RPI_FIRMWARE_TAG})..."
|
||||||
|
mkdir -p "$CACHE_DIR" "$RPI_FW_DIR"
|
||||||
|
|
||||||
|
# --- Download firmware archive ---
|
||||||
|
if [ ! -f "$RPI_FW_ARCHIVE" ]; then
|
||||||
|
echo " URL: $RPI_FIRMWARE_URL"
|
||||||
|
wget -q --show-progress -O "$RPI_FW_ARCHIVE" "$RPI_FIRMWARE_URL" 2>/dev/null || \
|
||||||
|
curl -fSL "$RPI_FIRMWARE_URL" -o "$RPI_FW_ARCHIVE"
|
||||||
|
echo " Downloaded: $(du -h "$RPI_FW_ARCHIVE" | cut -f1)"
|
||||||
|
else
|
||||||
|
echo " Archive already cached: $(du -h "$RPI_FW_ARCHIVE" | cut -f1)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Extract boot files only ---
|
||||||
|
echo "==> Extracting boot files..."
|
||||||
|
TEMP_DIR=$(mktemp -d)
|
||||||
|
trap 'rm -rf "$TEMP_DIR"' EXIT
|
||||||
|
|
||||||
|
# Extract only the boot/ directory from the archive
|
||||||
|
# Archive structure: firmware-<tag>/boot/...
|
||||||
|
tar -xzf "$RPI_FW_ARCHIVE" -C "$TEMP_DIR" --strip-components=1 --wildcards '*/boot/'
|
||||||
|
|
||||||
|
BOOT_SRC="$TEMP_DIR/boot"
|
||||||
|
if [ ! -d "$BOOT_SRC" ]; then
|
||||||
|
echo "ERROR: boot/ directory not found in firmware archive"
|
||||||
|
ls -la "$TEMP_DIR"/
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy GPU firmware (required for boot)
|
||||||
|
for f in "$BOOT_SRC"/start*.elf "$BOOT_SRC"/fixup*.dat; do
|
||||||
|
[ -f "$f" ] && cp "$f" "$RPI_FW_DIR/"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Copy bootcode.bin (first-stage boot for Pi 3 and older)
|
||||||
|
[ -f "$BOOT_SRC/bootcode.bin" ] && cp "$BOOT_SRC/bootcode.bin" "$RPI_FW_DIR/"
|
||||||
|
|
||||||
|
# Copy Device Tree Blobs for Pi 4 + Pi 5
|
||||||
|
for dtb in bcm2711-rpi-4-b.dtb bcm2711-rpi-400.dtb bcm2711-rpi-cm4.dtb \
|
||||||
|
bcm2712-rpi-5-b.dtb bcm2712d0-rpi-5-b.dtb; do
|
||||||
|
[ -f "$BOOT_SRC/$dtb" ] && cp "$BOOT_SRC/$dtb" "$RPI_FW_DIR/"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Copy overlays directory (needed for config.txt dtoverlay= directives)
|
||||||
|
if [ -d "$BOOT_SRC/overlays" ]; then
|
||||||
|
mkdir -p "$RPI_FW_DIR/overlays"
|
||||||
|
# Only copy overlays we actually use (disable-wifi, disable-bt)
|
||||||
|
for overlay in disable-wifi.dtbo disable-bt.dtbo; do
|
||||||
|
[ -f "$BOOT_SRC/overlays/$overlay" ] && \
|
||||||
|
cp "$BOOT_SRC/overlays/$overlay" "$RPI_FW_DIR/overlays/"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
trap - EXIT
|
||||||
|
rm -rf "$TEMP_DIR"
|
||||||
|
|
||||||
|
# --- Summary ---
|
||||||
|
echo ""
|
||||||
|
echo "==> RPi firmware extracted to: $RPI_FW_DIR"
|
||||||
|
echo " Files:"
|
||||||
|
ls -1 "$RPI_FW_DIR" | head -20
|
||||||
|
echo " Total size: $(du -sh "$RPI_FW_DIR" | cut -f1)"
|
||||||
@@ -8,6 +8,16 @@ CACHE_DIR="${CACHE_DIR:-$PROJECT_ROOT/build/cache}"
|
|||||||
ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
|
ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
|
||||||
ROOTFS="$ROOTFS_DIR/rootfs"
|
ROOTFS="$ROOTFS_DIR/rootfs"
|
||||||
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||||
|
INJECT_ARCH="${TARGET_ARCH:-amd64}"
|
||||||
|
|
||||||
|
# Architecture-specific paths
|
||||||
|
if [ "$INJECT_ARCH" = "arm64" ]; then
|
||||||
|
LIB_ARCH="aarch64-linux-gnu"
|
||||||
|
LD_SO="/lib/ld-linux-aarch64.so.1"
|
||||||
|
else
|
||||||
|
LIB_ARCH="x86_64-linux-gnu"
|
||||||
|
LD_SO="/lib64/ld-linux-x86-64.so.2"
|
||||||
|
fi
|
||||||
|
|
||||||
if [ ! -d "$ROOTFS" ]; then
|
if [ ! -d "$ROOTFS" ]; then
|
||||||
echo "ERROR: Rootfs not found: $ROOTFS"
|
echo "ERROR: Rootfs not found: $ROOTFS"
|
||||||
@@ -15,7 +25,11 @@ if [ ! -d "$ROOTFS" ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
KUBESOLO_BIN="$CACHE_DIR/kubesolo"
|
if [ "$INJECT_ARCH" = "arm64" ]; then
|
||||||
|
KUBESOLO_BIN="$CACHE_DIR/kubesolo-arm64"
|
||||||
|
else
|
||||||
|
KUBESOLO_BIN="$CACHE_DIR/kubesolo"
|
||||||
|
fi
|
||||||
if [ ! -f "$KUBESOLO_BIN" ]; then
|
if [ ! -f "$KUBESOLO_BIN" ]; then
|
||||||
echo "ERROR: KubeSolo binary not found: $KUBESOLO_BIN"
|
echo "ERROR: KubeSolo binary not found: $KUBESOLO_BIN"
|
||||||
echo "See fetch-components.sh output for instructions."
|
echo "See fetch-components.sh output for instructions."
|
||||||
@@ -41,10 +55,44 @@ rm -f "$ROOTFS/sbin/init"
|
|||||||
cp "$PROJECT_ROOT/init/init.sh" "$ROOTFS/sbin/init"
|
cp "$PROJECT_ROOT/init/init.sh" "$ROOTFS/sbin/init"
|
||||||
chmod +x "$ROOTFS/sbin/init"
|
chmod +x "$ROOTFS/sbin/init"
|
||||||
|
|
||||||
# Init stages
|
# Replace the upstream /init at the rootfs root with our staged init.
|
||||||
|
# The kernel ALWAYS runs /init when booting from an initramfs (legacy root-mount
|
||||||
|
# fallback otherwise). piCore/TC ship their own /init; ours has to take its
|
||||||
|
# place so the kernel runs our staged boot, not piCore's TCE handler.
|
||||||
|
rm -f "$ROOTFS/init"
|
||||||
|
cp "$PROJECT_ROOT/init/init.sh" "$ROOTFS/init"
|
||||||
|
chmod +x "$ROOTFS/init"
|
||||||
|
echo " Installed staged init at /init and /sbin/init"
|
||||||
|
|
||||||
|
# --- 2b. BusyBox override for ARM64 ---
|
||||||
|
# piCore64 v15's BusyBox is dynamically linked and uses ARM instructions that
|
||||||
|
# QEMU virt cannot emulate even with -cpu max, causing applets (mkdir, uname,
|
||||||
|
# etc.) to SIGILL. Replace with the host's statically-linked busybox-static
|
||||||
|
# package, which is built for generic ARMv8-A and runs anywhere.
|
||||||
|
#
|
||||||
|
# On x86 builds this isn't an issue (TC's BusyBox works fine on QEMU x86).
|
||||||
|
if [ "$INJECT_ARCH" = "arm64" ] && [ -x /bin/busybox ]; then
|
||||||
|
if file /bin/busybox 2>/dev/null | grep -q 'statically linked'; then
|
||||||
|
cp /bin/busybox "$ROOTFS/bin/busybox"
|
||||||
|
# busybox.suid is used by mount/su/etc. Same binary; suid bit applied
|
||||||
|
# separately. We don't need suid for our use (init runs as PID 1 / uid 0).
|
||||||
|
cp /bin/busybox "$ROOTFS/bin/busybox.suid"
|
||||||
|
chmod +x "$ROOTFS/bin/busybox" "$ROOTFS/bin/busybox.suid"
|
||||||
|
echo " Replaced piCore BusyBox with host's static busybox ($(du -h /bin/busybox | cut -f1))"
|
||||||
|
else
|
||||||
|
echo " WARN: /bin/busybox on host is not static; piCore BusyBox kept (may crash in QEMU virt)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Init stages — copy NN-name.sh files only. functions.sh is a shared library
|
||||||
|
# (sourced by init.sh proper), not a numbered stage; if it ends up in init.d
|
||||||
|
# the main loop will try to run it as a stage and fail.
|
||||||
mkdir -p "$ROOTFS/usr/lib/kubesolo-os/init.d"
|
mkdir -p "$ROOTFS/usr/lib/kubesolo-os/init.d"
|
||||||
for stage in "$PROJECT_ROOT"/init/lib/*.sh; do
|
for stage in "$PROJECT_ROOT"/init/lib/*.sh; do
|
||||||
[ -f "$stage" ] || continue
|
[ -f "$stage" ] || continue
|
||||||
|
case "$(basename "$stage")" in
|
||||||
|
functions.sh) continue ;;
|
||||||
|
esac
|
||||||
cp "$stage" "$ROOTFS/usr/lib/kubesolo-os/init.d/"
|
cp "$stage" "$ROOTFS/usr/lib/kubesolo-os/init.d/"
|
||||||
chmod +x "$ROOTFS/usr/lib/kubesolo-os/init.d/$(basename "$stage")"
|
chmod +x "$ROOTFS/usr/lib/kubesolo-os/init.d/$(basename "$stage")"
|
||||||
done
|
done
|
||||||
@@ -68,30 +116,51 @@ for lib in network.sh health.sh; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
# Cloud-init binary (Go, built separately)
|
# Cloud-init binary (Go, built separately)
|
||||||
CLOUDINIT_BIN="$CACHE_DIR/kubesolo-cloudinit"
|
# Try arch-specific binary first, then fall back to generic
|
||||||
|
CLOUDINIT_BIN="$CACHE_DIR/kubesolo-cloudinit-linux-$INJECT_ARCH"
|
||||||
|
[ ! -f "$CLOUDINIT_BIN" ] && CLOUDINIT_BIN="$CACHE_DIR/kubesolo-cloudinit"
|
||||||
if [ -f "$CLOUDINIT_BIN" ]; then
|
if [ -f "$CLOUDINIT_BIN" ]; then
|
||||||
cp "$CLOUDINIT_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-cloudinit"
|
cp "$CLOUDINIT_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-cloudinit"
|
||||||
chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-cloudinit"
|
chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-cloudinit"
|
||||||
echo " Installed cloud-init binary ($(du -h "$CLOUDINIT_BIN" | cut -f1))"
|
echo " Installed cloud-init binary ($(du -h "$CLOUDINIT_BIN" | cut -f1))"
|
||||||
else
|
else
|
||||||
echo " WARN: Cloud-init binary not found (run 'make build-cloudinit' to build)"
|
echo " WARN: Cloud-init binary not found (run 'make build-cloudinit' or 'make build-cross' to build)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Update agent binary (Go, built separately)
|
# Update agent binary (Go, built separately)
|
||||||
UPDATE_BIN="$CACHE_DIR/kubesolo-update"
|
# Try arch-specific binary first, then fall back to generic
|
||||||
|
UPDATE_BIN="$CACHE_DIR/kubesolo-update-linux-$INJECT_ARCH"
|
||||||
|
[ ! -f "$UPDATE_BIN" ] && UPDATE_BIN="$CACHE_DIR/kubesolo-update"
|
||||||
if [ -f "$UPDATE_BIN" ]; then
|
if [ -f "$UPDATE_BIN" ]; then
|
||||||
cp "$UPDATE_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
cp "$UPDATE_BIN" "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
||||||
chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
chmod +x "$ROOTFS/usr/lib/kubesolo-os/kubesolo-update"
|
||||||
echo " Installed update agent ($(du -h "$UPDATE_BIN" | cut -f1))"
|
echo " Installed update agent ($(du -h "$UPDATE_BIN" | cut -f1))"
|
||||||
else
|
else
|
||||||
echo " WARN: Update agent not found (run 'make build-update-agent' to build)"
|
echo " WARN: Update agent not found (run 'make build-update-agent' or 'make build-cross' to build)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- 3. Custom kernel or TCZ kernel modules ---
|
# --- 3. Custom kernel or TCZ kernel modules ---
|
||||||
# If a custom kernel was built (with CONFIG_CGROUP_BPF=y), use it.
|
# If a custom kernel was built (with CONFIG_CGROUP_BPF=y), use it.
|
||||||
# Otherwise fall back to TCZ-extracted modules with manual modules.dep.
|
# Otherwise fall back to TCZ-extracted modules with manual modules.dep.
|
||||||
CUSTOM_KERNEL_DIR="$CACHE_DIR/custom-kernel"
|
if [ "$INJECT_ARCH" = "arm64" ]; then
|
||||||
CUSTOM_VMLINUZ="$CUSTOM_KERNEL_DIR/vmlinuz"
|
# TARGET_VARIANT selects which ARM64 kernel to consume:
|
||||||
|
# rpi -> $CACHE_DIR/custom-kernel-rpi/ (raspberrypi/linux fork)
|
||||||
|
# generic -> $CACHE_DIR/kernel-arm64-generic/ (mainline kernel.org LTS)
|
||||||
|
# Default is rpi for backwards compatibility with existing rpi-image target.
|
||||||
|
TARGET_VARIANT="${TARGET_VARIANT:-rpi}"
|
||||||
|
case "$TARGET_VARIANT" in
|
||||||
|
generic) CUSTOM_KERNEL_DIR="$CACHE_DIR/kernel-arm64-generic" ;;
|
||||||
|
rpi) CUSTOM_KERNEL_DIR="$CACHE_DIR/custom-kernel-rpi" ;;
|
||||||
|
*)
|
||||||
|
echo "ERROR: TARGET_VARIANT must be 'rpi' or 'generic' (got: $TARGET_VARIANT)"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
CUSTOM_VMLINUZ="$CUSTOM_KERNEL_DIR/Image"
|
||||||
|
else
|
||||||
|
CUSTOM_KERNEL_DIR="$CACHE_DIR/custom-kernel"
|
||||||
|
CUSTOM_VMLINUZ="$CUSTOM_KERNEL_DIR/vmlinuz"
|
||||||
|
fi
|
||||||
CUSTOM_MODULES="$CUSTOM_KERNEL_DIR/modules"
|
CUSTOM_MODULES="$CUSTOM_KERNEL_DIR/modules"
|
||||||
|
|
||||||
# Detect kernel version from rootfs
|
# Detect kernel version from rootfs
|
||||||
@@ -100,8 +169,16 @@ for d in "$ROOTFS"/lib/modules/*/; do
|
|||||||
[ -d "$d" ] && KVER="$(basename "$d")" && break
|
[ -d "$d" ] && KVER="$(basename "$d")" && break
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Fallback: detect from custom kernel modules directory
|
||||||
|
if [ -z "$KVER" ] && [ -d "$CUSTOM_MODULES/lib/modules" ]; then
|
||||||
|
for d in "$CUSTOM_MODULES"/lib/modules/*/; do
|
||||||
|
[ -d "$d" ] && KVER="$(basename "$d")" && break
|
||||||
|
done
|
||||||
|
echo " Detected kernel version from custom kernel: $KVER"
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -z "$KVER" ]; then
|
if [ -z "$KVER" ]; then
|
||||||
echo " WARN: Could not detect kernel version from rootfs"
|
echo " WARN: Could not detect kernel version from rootfs or custom kernel"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo " Kernel version: $KVER"
|
echo " Kernel version: $KVER"
|
||||||
@@ -130,20 +207,49 @@ if [ -f "$CUSTOM_VMLINUZ" ] && [ -d "$CUSTOM_MODULES/lib/modules/$KVER" ]; then
|
|||||||
[ -f "$CUSTOM_MOD_DIR/$f" ] && cp "$CUSTOM_MOD_DIR/$f" "$ROOTFS/lib/modules/$KVER/"
|
[ -f "$CUSTOM_MOD_DIR/$f" ] && cp "$CUSTOM_MOD_DIR/$f" "$ROOTFS/lib/modules/$KVER/"
|
||||||
done
|
done
|
||||||
|
|
||||||
# Use modprobe --show-depends to resolve each module + its transitive deps
|
# Resolve and install modules from modules.list + transitive deps
|
||||||
MODULES_LIST="$PROJECT_ROOT/build/config/modules.list"
|
if [ "$INJECT_ARCH" = "arm64" ]; then
|
||||||
|
MODULES_LIST="$PROJECT_ROOT/build/config/modules-arm64.list"
|
||||||
|
else
|
||||||
|
MODULES_LIST="$PROJECT_ROOT/build/config/modules.list"
|
||||||
|
fi
|
||||||
NEEDED_MODS=$(mktemp)
|
NEEDED_MODS=$(mktemp)
|
||||||
|
|
||||||
|
# Try modprobe first (works for same-arch builds)
|
||||||
|
MODPROBE_WORKS=true
|
||||||
|
FIRST_MOD=$(grep -v '^#' "$MODULES_LIST" | grep -v '^$' | head -1 | xargs)
|
||||||
|
if ! modprobe -S "$KVER" -d "$CUSTOM_MODULES" --show-depends "$FIRST_MOD" >/dev/null 2>&1; then
|
||||||
|
MODPROBE_WORKS=false
|
||||||
|
echo " modprobe cannot resolve modules (cross-arch build) — using find fallback"
|
||||||
|
fi
|
||||||
|
|
||||||
while IFS= read -r mod; do
|
while IFS= read -r mod; do
|
||||||
# Skip comments and blank lines
|
# Skip comments and blank lines
|
||||||
case "$mod" in \#*|"") continue ;; esac
|
case "$mod" in \#*|"") continue ;; esac
|
||||||
mod=$(echo "$mod" | xargs) # trim whitespace
|
mod=$(echo "$mod" | xargs) # trim whitespace
|
||||||
[ -z "$mod" ] && continue
|
[ -z "$mod" ] && continue
|
||||||
|
|
||||||
# modprobe -S <ver> -d <root> --show-depends <module> lists all deps in load order
|
if [ "$MODPROBE_WORKS" = true ]; then
|
||||||
# Output format: "insmod /path/to/module.ko" — extract path with awk
|
# modprobe -S <ver> -d <root> --show-depends <module> lists all deps in load order
|
||||||
modprobe -S "$KVER" -d "$CUSTOM_MODULES" --show-depends "$mod" 2>/dev/null \
|
modprobe -S "$KVER" -d "$CUSTOM_MODULES" --show-depends "$mod" 2>/dev/null \
|
||||||
| awk '/^insmod/{print $2}' >> "$NEEDED_MODS" \
|
| awk '/^insmod/{print $2}' >> "$NEEDED_MODS" \
|
||||||
|| echo " WARN: modprobe could not resolve: $mod"
|
|| echo " WARN: modprobe could not resolve: $mod"
|
||||||
|
else
|
||||||
|
# Cross-arch fallback: find module by name in kernel tree
|
||||||
|
found=$(find "$CUSTOM_MOD_DIR/kernel" -name "${mod}.ko" -o -name "${mod}.ko.xz" -o -name "${mod}.ko.gz" -o -name "${mod}.ko.zst" 2>/dev/null | head -1)
|
||||||
|
if [ -n "$found" ]; then
|
||||||
|
echo "$found" >> "$NEEDED_MODS"
|
||||||
|
else
|
||||||
|
# Try replacing hyphens with underscores and vice versa
|
||||||
|
mod_alt=$(echo "$mod" | tr '-' '_')
|
||||||
|
found=$(find "$CUSTOM_MOD_DIR/kernel" -name "${mod_alt}.ko" -o -name "${mod_alt}.ko.xz" -o -name "${mod_alt}.ko.gz" -o -name "${mod_alt}.ko.zst" 2>/dev/null | head -1)
|
||||||
|
if [ -n "$found" ]; then
|
||||||
|
echo "$found" >> "$NEEDED_MODS"
|
||||||
|
else
|
||||||
|
echo " WARN: could not find module: $mod"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
done < "$MODULES_LIST"
|
done < "$MODULES_LIST"
|
||||||
|
|
||||||
# Deduplicate and copy each needed module
|
# Deduplicate and copy each needed module
|
||||||
@@ -291,21 +397,22 @@ if [ -f /usr/sbin/xtables-nft-multi ]; then
|
|||||||
ln -sf xtables-nft-multi "$ROOTFS/usr/sbin/$cmd"
|
ln -sf xtables-nft-multi "$ROOTFS/usr/sbin/$cmd"
|
||||||
done
|
done
|
||||||
|
|
||||||
# Copy required shared libraries
|
# Copy required shared libraries (architecture-aware paths)
|
||||||
mkdir -p "$ROOTFS/usr/lib/x86_64-linux-gnu" "$ROOTFS/lib/x86_64-linux-gnu" "$ROOTFS/lib64"
|
mkdir -p "$ROOTFS/usr/lib/$LIB_ARCH" "$ROOTFS/lib/$LIB_ARCH"
|
||||||
|
[ "$INJECT_ARCH" != "arm64" ] && mkdir -p "$ROOTFS/lib64"
|
||||||
for lib in \
|
for lib in \
|
||||||
/lib/x86_64-linux-gnu/libxtables.so.12* \
|
"/lib/$LIB_ARCH/libxtables.so.12"* \
|
||||||
/lib/x86_64-linux-gnu/libmnl.so.0* \
|
"/lib/$LIB_ARCH/libmnl.so.0"* \
|
||||||
/lib/x86_64-linux-gnu/libnftnl.so.11* \
|
"/lib/$LIB_ARCH/libnftnl.so.11"* \
|
||||||
/lib/x86_64-linux-gnu/libc.so.6 \
|
"/lib/$LIB_ARCH/libc.so.6" \
|
||||||
/lib64/ld-linux-x86-64.so.2; do
|
"$LD_SO"; do
|
||||||
[ -e "$lib" ] && cp -aL "$lib" "$ROOTFS${lib}" 2>/dev/null || true
|
[ -e "$lib" ] && cp -aL "$lib" "$ROOTFS${lib}" 2>/dev/null || true
|
||||||
done
|
done
|
||||||
|
|
||||||
# Copy xtables modules directory (match extensions)
|
# Copy xtables modules directory (match extensions)
|
||||||
if [ -d /usr/lib/x86_64-linux-gnu/xtables ]; then
|
if [ -d "/usr/lib/$LIB_ARCH/xtables" ]; then
|
||||||
mkdir -p "$ROOTFS/usr/lib/x86_64-linux-gnu/xtables"
|
mkdir -p "$ROOTFS/usr/lib/$LIB_ARCH/xtables"
|
||||||
cp -a /usr/lib/x86_64-linux-gnu/xtables/*.so "$ROOTFS/usr/lib/x86_64-linux-gnu/xtables/" 2>/dev/null || true
|
cp -a "/usr/lib/$LIB_ARCH/xtables/"*.so "$ROOTFS/usr/lib/$LIB_ARCH/xtables/" 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo " Installed iptables-nft (xtables-nft-multi) + shared libs"
|
echo " Installed iptables-nft (xtables-nft-multi) + shared libs"
|
||||||
@@ -314,11 +421,16 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Kernel modules list (for init to load at boot)
|
# Kernel modules list (for init to load at boot)
|
||||||
cp "$PROJECT_ROOT/build/config/modules.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list"
|
if [ "$INJECT_ARCH" = "arm64" ]; then
|
||||||
|
cp "$PROJECT_ROOT/build/config/modules-arm64.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list"
|
||||||
|
else
|
||||||
|
cp "$PROJECT_ROOT/build/config/modules.list" "$ROOTFS/usr/lib/kubesolo-os/modules.list"
|
||||||
|
fi
|
||||||
|
|
||||||
# --- 4. Sysctl config ---
|
# --- 4. Sysctl config ---
|
||||||
mkdir -p "$ROOTFS/etc/sysctl.d"
|
mkdir -p "$ROOTFS/etc/sysctl.d"
|
||||||
cp "$PROJECT_ROOT/build/rootfs/etc/sysctl.d/k8s.conf" "$ROOTFS/etc/sysctl.d/k8s.conf"
|
cp "$PROJECT_ROOT/build/rootfs/etc/sysctl.d/k8s.conf" "$ROOTFS/etc/sysctl.d/k8s.conf"
|
||||||
|
cp "$PROJECT_ROOT/build/rootfs/etc/sysctl.d/security.conf" "$ROOTFS/etc/sysctl.d/security.conf"
|
||||||
|
|
||||||
# --- 5. OS metadata ---
|
# --- 5. OS metadata ---
|
||||||
echo "$VERSION" > "$ROOTFS/etc/kubesolo-os-version"
|
echo "$VERSION" > "$ROOTFS/etc/kubesolo-os-version"
|
||||||
@@ -350,7 +462,47 @@ mkdir -p "$ROOTFS/usr/local"
|
|||||||
mkdir -p "$ROOTFS/mnt/data"
|
mkdir -p "$ROOTFS/mnt/data"
|
||||||
mkdir -p "$ROOTFS/run/containerd"
|
mkdir -p "$ROOTFS/run/containerd"
|
||||||
|
|
||||||
# --- 8. Ensure /etc/hosts and /etc/resolv.conf exist ---
|
# --- 8. CA certificates (required for containerd to pull from registries) ---
|
||||||
|
mkdir -p "$ROOTFS/etc/ssl/certs"
|
||||||
|
if [ -f /etc/ssl/certs/ca-certificates.crt ]; then
|
||||||
|
cp /etc/ssl/certs/ca-certificates.crt "$ROOTFS/etc/ssl/certs/ca-certificates.crt"
|
||||||
|
echo " Installed CA certificates bundle"
|
||||||
|
elif [ -f /etc/pki/tls/certs/ca-bundle.crt ]; then
|
||||||
|
cp /etc/pki/tls/certs/ca-bundle.crt "$ROOTFS/etc/ssl/certs/ca-certificates.crt"
|
||||||
|
echo " Installed CA certificates bundle (from ca-bundle.crt)"
|
||||||
|
else
|
||||||
|
echo " WARN: No CA certificates found in builder — TLS verification will fail"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- 9. AppArmor parser + profiles ---
|
||||||
|
echo " Installing AppArmor..."
|
||||||
|
if [ -f /usr/sbin/apparmor_parser ]; then
|
||||||
|
mkdir -p "$ROOTFS/usr/sbin"
|
||||||
|
cp /usr/sbin/apparmor_parser "$ROOTFS/usr/sbin/apparmor_parser"
|
||||||
|
chmod +x "$ROOTFS/usr/sbin/apparmor_parser"
|
||||||
|
|
||||||
|
# Copy shared libraries required by apparmor_parser
|
||||||
|
for lib in "/lib/$LIB_ARCH/libapparmor.so.1"*; do
|
||||||
|
[ -e "$lib" ] && cp -aL "$lib" "$ROOTFS${lib}" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
|
||||||
|
echo " Installed apparmor_parser + shared libs"
|
||||||
|
else
|
||||||
|
echo " WARN: apparmor_parser not found in builder (install apparmor package)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy AppArmor profiles
|
||||||
|
APPARMOR_PROFILES="$PROJECT_ROOT/build/rootfs/etc/apparmor.d"
|
||||||
|
if [ -d "$APPARMOR_PROFILES" ]; then
|
||||||
|
mkdir -p "$ROOTFS/etc/apparmor.d"
|
||||||
|
cp "$APPARMOR_PROFILES"/* "$ROOTFS/etc/apparmor.d/" 2>/dev/null || true
|
||||||
|
PROFILE_COUNT=$(ls "$ROOTFS/etc/apparmor.d/" 2>/dev/null | wc -l)
|
||||||
|
echo " Installed $PROFILE_COUNT AppArmor profiles"
|
||||||
|
else
|
||||||
|
echo " WARN: No AppArmor profiles found at $APPARMOR_PROFILES"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- 10. Ensure /etc/hosts and /etc/resolv.conf exist ---
|
||||||
if [ ! -f "$ROOTFS/etc/hosts" ]; then
|
if [ ! -f "$ROOTFS/etc/hosts" ]; then
|
||||||
cat > "$ROOTFS/etc/hosts" << EOF
|
cat > "$ROOTFS/etc/hosts" << EOF
|
||||||
127.0.0.1 localhost
|
127.0.0.1 localhost
|
||||||
|
|||||||
150
build/scripts/push-oci-artifact.sh
Executable file
150
build/scripts/push-oci-artifact.sh
Executable file
@@ -0,0 +1,150 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# push-oci-artifact.sh — Publish a KubeSolo OS update artifact to an OCI registry.
|
||||||
|
#
|
||||||
|
# Produces the artifact format consumed by `kubesolo-update --registry`:
|
||||||
|
#
|
||||||
|
# <registry>/<repo>:<version>-<arch> per-arch manifest, layers:
|
||||||
|
# * vmlinuz (Image on arm64) → application/vnd.kubesolo.os.kernel.v1+octet-stream
|
||||||
|
# * kubesolo-os.gz → application/vnd.kubesolo.os.initramfs.v1+gzip
|
||||||
|
# annotations:
|
||||||
|
# io.kubesolo.os.version
|
||||||
|
# io.kubesolo.os.channel
|
||||||
|
# io.kubesolo.os.architecture
|
||||||
|
# io.kubesolo.os.min_compatible_version (optional)
|
||||||
|
#
|
||||||
|
# After running this for each architecture, combine the per-arch tags into a
|
||||||
|
# multi-arch index with `oras manifest index create` (see end of script).
|
||||||
|
#
|
||||||
|
# Requires: oras (>= 1.2), curl, jq.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# build/scripts/push-oci-artifact.sh \
|
||||||
|
# --registry ghcr.io/portainer/kubesolo-os \
|
||||||
|
# --arch amd64 \
|
||||||
|
# --channel stable \
|
||||||
|
# [--min-compatible-version v0.2.0]
|
||||||
|
#
|
||||||
|
# Authentication: oras reads ~/.docker/config.json. In CI, run
|
||||||
|
# `oras login ghcr.io -u USER -p TOKEN` before invoking this script
|
||||||
|
# (or set DOCKER_CONFIG to a directory with config.json).
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||||
|
OUTPUT_DIR="$PROJECT_ROOT/output"
|
||||||
|
CACHE_DIR="$PROJECT_ROOT/build/cache"
|
||||||
|
|
||||||
|
REGISTRY=""
|
||||||
|
ARCH=""
|
||||||
|
CHANNEL="stable"
|
||||||
|
MIN_COMPATIBLE_VERSION=""
|
||||||
|
RELEASE_NOTES=""
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--registry) REGISTRY="$2"; shift 2 ;;
|
||||||
|
--arch) ARCH="$2"; shift 2 ;;
|
||||||
|
--channel) CHANNEL="$2"; shift 2 ;;
|
||||||
|
--min-compatible-version) MIN_COMPATIBLE_VERSION="$2"; shift 2 ;;
|
||||||
|
--release-notes) RELEASE_NOTES="$2"; shift 2 ;;
|
||||||
|
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$REGISTRY" ] || [ -z "$ARCH" ]; then
|
||||||
|
echo "Usage: $0 --registry REGISTRY/REPO --arch (amd64|arm64) [--channel stable] [--min-compatible-version vX.Y.Z]" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v oras >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: oras CLI not found. Install from https://oras.land/docs/installation/" >&2
|
||||||
|
echo " or apt-get install oras (Ubuntu 24.04+)" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Locate the artifacts. For arm64 the kernel is "Image"; everywhere else it's
|
||||||
|
# "vmlinuz". Initramfs is always kubesolo-os.gz.
|
||||||
|
case "$ARCH" in
|
||||||
|
amd64)
|
||||||
|
KERNEL="$CACHE_DIR/custom-kernel/vmlinuz"
|
||||||
|
[ -f "$KERNEL" ] || KERNEL="$OUTPUT_DIR/vmlinuz"
|
||||||
|
KERNEL_BASENAME="vmlinuz"
|
||||||
|
;;
|
||||||
|
arm64)
|
||||||
|
KERNEL="$CACHE_DIR/kernel-arm64-generic/Image"
|
||||||
|
KERNEL_BASENAME="vmlinuz" # we publish under the vmlinuz name regardless;
|
||||||
|
# the consumer looks up by media type, not filename.
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "ERROR: unsupported --arch $ARCH (use amd64 or arm64)" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
INITRAMFS="$PROJECT_ROOT/build/rootfs-work/kubesolo-os.gz"
|
||||||
|
|
||||||
|
if [ ! -f "$KERNEL" ]; then
|
||||||
|
echo "ERROR: kernel not found at $KERNEL" >&2
|
||||||
|
echo " Run 'make kernel' (amd64) or 'make kernel-arm64' (arm64) first." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ ! -f "$INITRAMFS" ]; then
|
||||||
|
echo "ERROR: initramfs not found at $INITRAMFS" >&2
|
||||||
|
echo " Run 'make initramfs' or 'make rootfs-arm64' first." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Stage files in a temp dir so the basenames in the manifest are clean.
|
||||||
|
STAGE="$(mktemp -d)"
|
||||||
|
trap 'rm -rf "$STAGE"' EXIT
|
||||||
|
cp "$KERNEL" "$STAGE/$KERNEL_BASENAME"
|
||||||
|
cp "$INITRAMFS" "$STAGE/kubesolo-os.gz"
|
||||||
|
|
||||||
|
KERNEL_MEDIA="application/vnd.kubesolo.os.kernel.v1+octet-stream"
|
||||||
|
INITRD_MEDIA="application/vnd.kubesolo.os.initramfs.v1+gzip"
|
||||||
|
|
||||||
|
REF="${REGISTRY}:${VERSION}-${ARCH}"
|
||||||
|
CHANNEL_REF="${REGISTRY}:${CHANNEL}-${ARCH}"
|
||||||
|
|
||||||
|
echo "==> Pushing ${REF}"
|
||||||
|
echo " kernel: $KERNEL ($(du -h "$KERNEL" | cut -f1))"
|
||||||
|
echo " initramfs: $INITRAMFS ($(du -h "$INITRAMFS" | cut -f1))"
|
||||||
|
|
||||||
|
ORAS_ANNOTATIONS=(
|
||||||
|
--annotation "io.kubesolo.os.version=${VERSION}"
|
||||||
|
--annotation "io.kubesolo.os.channel=${CHANNEL}"
|
||||||
|
--annotation "io.kubesolo.os.architecture=${ARCH}"
|
||||||
|
)
|
||||||
|
if [ -n "$MIN_COMPATIBLE_VERSION" ]; then
|
||||||
|
ORAS_ANNOTATIONS+=(--annotation "io.kubesolo.os.min_compatible_version=${MIN_COMPATIBLE_VERSION}")
|
||||||
|
fi
|
||||||
|
if [ -n "$RELEASE_NOTES" ]; then
|
||||||
|
ORAS_ANNOTATIONS+=(--annotation "io.kubesolo.os.release_notes=${RELEASE_NOTES}")
|
||||||
|
fi
|
||||||
|
ORAS_ANNOTATIONS+=(--annotation "io.kubesolo.os.release_date=$(date -u +%Y-%m-%dT%H:%M:%SZ)")
|
||||||
|
|
||||||
|
# oras push: --artifact-type sets the manifest artifactType field;
|
||||||
|
# file:type syntax sets per-layer media types.
|
||||||
|
(cd "$STAGE" && oras push "$REF" \
|
||||||
|
--artifact-type "application/vnd.kubesolo.os.update.v1+json" \
|
||||||
|
"${ORAS_ANNOTATIONS[@]}" \
|
||||||
|
"${KERNEL_BASENAME}:${KERNEL_MEDIA}" \
|
||||||
|
"kubesolo-os.gz:${INITRD_MEDIA}")
|
||||||
|
|
||||||
|
# Also tag as <channel>-<arch> so the manifest-index step can reference it
|
||||||
|
# stably across patch releases.
|
||||||
|
echo "==> Tagging ${CHANNEL_REF}"
|
||||||
|
oras tag "$REF" "${CHANNEL}-${ARCH}"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "==> Published:"
|
||||||
|
echo " ${REF}"
|
||||||
|
echo " ${CHANNEL_REF}"
|
||||||
|
echo ""
|
||||||
|
echo "To combine multi-arch into the channel index, run after both arches are pushed:"
|
||||||
|
echo ""
|
||||||
|
echo " oras manifest index create ${REGISTRY}:${CHANNEL} \\"
|
||||||
|
echo " ${REGISTRY}:${CHANNEL}-amd64,platform=linux/amd64 \\"
|
||||||
|
echo " ${REGISTRY}:${CHANNEL}-arm64,platform=linux/arm64"
|
||||||
|
echo ""
|
||||||
@@ -97,6 +97,11 @@ func cmdApply(configPath string) error {
|
|||||||
return fmt.Errorf("portainer edge agent: %w", err)
|
return fmt.Errorf("portainer edge agent: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 5. Write /etc/kubesolo/update.conf from updates: block (if any).
|
||||||
|
if err := cloudinit.ApplyUpdates(cfg, ""); err != nil {
|
||||||
|
return fmt.Errorf("updates: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// 5. Save persistent configs for next boot
|
// 5. Save persistent configs for next boot
|
||||||
if err := cloudinit.SaveHostname(cfg, persistDataDir+"/etc-kubesolo"); err != nil {
|
if err := cloudinit.SaveHostname(cfg, persistDataDir+"/etc-kubesolo"); err != nil {
|
||||||
slog.Warn("failed to save hostname", "error", err)
|
slog.Warn("failed to save hostname", "error", err)
|
||||||
|
|||||||
@@ -12,12 +12,30 @@ package cloudinit
|
|||||||
|
|
||||||
// Config is the top-level cloud-init configuration.
|
// Config is the top-level cloud-init configuration.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Hostname string `yaml:"hostname"`
|
Hostname string `yaml:"hostname"`
|
||||||
Network NetworkConfig `yaml:"network"`
|
Network NetworkConfig `yaml:"network"`
|
||||||
KubeSolo KubeSoloConfig `yaml:"kubesolo"`
|
KubeSolo KubeSoloConfig `yaml:"kubesolo"`
|
||||||
NTP NTPConfig `yaml:"ntp"`
|
NTP NTPConfig `yaml:"ntp"`
|
||||||
Airgap AirgapConfig `yaml:"airgap"`
|
Airgap AirgapConfig `yaml:"airgap"`
|
||||||
Portainer PortainerConfig `yaml:"portainer"`
|
Portainer PortainerConfig `yaml:"portainer"`
|
||||||
|
Updates UpdatesConfig `yaml:"updates"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdatesConfig configures the kubesolo-update agent. Written to
|
||||||
|
// /etc/kubesolo/update.conf on first boot. See update/pkg/config.
|
||||||
|
type UpdatesConfig struct {
|
||||||
|
// Server is the update server URL (HTTP or OCI registry).
|
||||||
|
Server string `yaml:"server"`
|
||||||
|
// Channel selects which channel to track ("stable", "beta", "edge").
|
||||||
|
// Empty = "stable".
|
||||||
|
Channel string `yaml:"channel"`
|
||||||
|
// MaintenanceWindow restricts apply to the given local time range,
|
||||||
|
// e.g. "03:00-05:00". Wrapping windows like "23:00-01:00" supported.
|
||||||
|
// Empty = no restriction.
|
||||||
|
MaintenanceWindow string `yaml:"maintenance_window"`
|
||||||
|
// PubKey is the path to the Ed25519 public key file used to verify
|
||||||
|
// signed update artifacts. Empty = signature verification disabled.
|
||||||
|
PubKey string `yaml:"pubkey"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NetworkConfig defines network settings.
|
// NetworkConfig defines network settings.
|
||||||
@@ -31,9 +49,23 @@ type NetworkConfig struct {
|
|||||||
|
|
||||||
// KubeSoloConfig defines KubeSolo-specific settings.
|
// KubeSoloConfig defines KubeSolo-specific settings.
|
||||||
type KubeSoloConfig struct {
|
type KubeSoloConfig struct {
|
||||||
ExtraFlags string `yaml:"extra-flags"`
|
ExtraFlags string `yaml:"extra-flags"`
|
||||||
LocalStorage *bool `yaml:"local-storage"`
|
LocalStorage *bool `yaml:"local-storage"`
|
||||||
ExtraSANs []string `yaml:"apiserver-extra-sans"`
|
LocalStorageSharedPath string `yaml:"local-storage-shared-path"`
|
||||||
|
ExtraSANs []string `yaml:"apiserver-extra-sans"`
|
||||||
|
Debug bool `yaml:"debug"`
|
||||||
|
PprofServer bool `yaml:"pprof-server"`
|
||||||
|
PortainerEdgeID string `yaml:"portainer-edge-id"`
|
||||||
|
PortainerEdgeKey string `yaml:"portainer-edge-key"`
|
||||||
|
PortainerEdgeAsync bool `yaml:"portainer-edge-async"`
|
||||||
|
// v1.1.4+: skip edge-optimised overrides, use upstream k8s defaults
|
||||||
|
// (useful for CI and powerful machines, disabled by default).
|
||||||
|
Full bool `yaml:"full"`
|
||||||
|
// v1.1.5+: disable IPv6 in the cluster.
|
||||||
|
DisableIPv6 bool `yaml:"disable-ipv6"`
|
||||||
|
// v1.1.5+: detect SQLite WAL corruption on startup and recover from
|
||||||
|
// unclean shutdowns (e.g. power loss). Recommended ON for edge devices.
|
||||||
|
DBWALRepair bool `yaml:"db-wal-repair"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NTPConfig defines NTP settings.
|
// NTPConfig defines NTP settings.
|
||||||
|
|||||||
85
cloud-init/examples/full-config.yaml
Normal file
85
cloud-init/examples/full-config.yaml
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# KubeSolo OS Cloud-Init — Full Configuration Reference
|
||||||
|
# Shows ALL supported KubeSolo parameters.
|
||||||
|
# Place at: /mnt/data/etc-kubesolo/cloud-init.yaml (on data partition)
|
||||||
|
# Or pass via boot param: kubesolo.cloudinit=/path/to/this.yaml
|
||||||
|
|
||||||
|
hostname: kubesolo-edge-01
|
||||||
|
|
||||||
|
network:
|
||||||
|
mode: dhcp
|
||||||
|
# interface: eth0 # Optional: specify interface (auto-detected if omitted)
|
||||||
|
# dns: # Optional: override DHCP-provided DNS
|
||||||
|
# - 8.8.8.8
|
||||||
|
|
||||||
|
kubesolo:
|
||||||
|
# Enable local-path-provisioner for persistent volumes (default: true)
|
||||||
|
local-storage: true
|
||||||
|
|
||||||
|
# Shared path for local-path-provisioner storage
|
||||||
|
local-storage-shared-path: "/mnt/shared"
|
||||||
|
|
||||||
|
# Extra SANs for API server TLS certificate
|
||||||
|
apiserver-extra-sans:
|
||||||
|
- kubesolo-edge-01.local
|
||||||
|
- 192.168.1.100
|
||||||
|
|
||||||
|
# Enable verbose debug logging
|
||||||
|
debug: false
|
||||||
|
|
||||||
|
# Enable Go pprof profiling server
|
||||||
|
pprof-server: false
|
||||||
|
|
||||||
|
# Portainer Edge Agent connection (alternative to portainer.edge-agent section)
|
||||||
|
# These generate --portainer-edge-id, --portainer-edge-key, --portainer-edge-async
|
||||||
|
# CLI flags for KubeSolo's built-in Edge Agent support.
|
||||||
|
portainer-edge-id: "your-edge-id"
|
||||||
|
portainer-edge-key: "your-edge-key"
|
||||||
|
portainer-edge-async: true
|
||||||
|
|
||||||
|
# KubeSolo v1.1.4+: skip the edge-optimised overrides and use upstream
|
||||||
|
# Kubernetes defaults. Useful for CI and high-spec machines. Default off.
|
||||||
|
full: false
|
||||||
|
|
||||||
|
# KubeSolo v1.1.5+: disable IPv6 throughout the cluster. Default off.
|
||||||
|
disable-ipv6: false
|
||||||
|
|
||||||
|
# KubeSolo v1.1.5+: detect SQLite WAL corruption at startup and recover
|
||||||
|
# from unclean shutdowns (e.g. power loss). Recommended ON for edge
|
||||||
|
# appliances that may lose power.
|
||||||
|
db-wal-repair: true
|
||||||
|
|
||||||
|
# Arbitrary extra flags passed directly to the KubeSolo binary
|
||||||
|
# extra-flags: "--disable traefik --disable servicelb"
|
||||||
|
|
||||||
|
# Update agent settings (written to /etc/kubesolo/update.conf on first boot).
|
||||||
|
# Omit any subfield to leave the corresponding default in place.
|
||||||
|
updates:
|
||||||
|
# Update server URL — HTTPS for the JSON+blob protocol, or an OCI registry
|
||||||
|
# reference (e.g. ghcr.io/portainer/kubesolo-os) when OCI distribution
|
||||||
|
# lands in v0.3.
|
||||||
|
server: "https://updates.kubesolo.example.com"
|
||||||
|
|
||||||
|
# Channel to track. "stable" is the default; "beta"/"edge" expose
|
||||||
|
# pre-release artifacts. The agent refuses to apply metadata whose
|
||||||
|
# channel doesn't match.
|
||||||
|
channel: "stable"
|
||||||
|
|
||||||
|
# Maintenance window (local time, HH:MM-HH:MM, wrapping midnight OK).
|
||||||
|
# `apply` refuses to run outside this window unless --force is passed.
|
||||||
|
# Leave empty (or omit) to allow updates at any time.
|
||||||
|
maintenance_window: "03:00-05:00"
|
||||||
|
|
||||||
|
# Path to Ed25519 public key for signature verification. Omit to disable
|
||||||
|
# signature verification (NOT recommended for production fleets).
|
||||||
|
# pubkey: "/etc/kubesolo/update-pubkey.hex"
|
||||||
|
|
||||||
|
# Optional post-boot healthcheck probe URL. If set, healthcheck GETs it
|
||||||
|
# and treats anything other than HTTP 200 as a failure. Useful when your
|
||||||
|
# workload exposes its own readiness on a known endpoint.
|
||||||
|
# healthcheck_url: "http://localhost:8000/ready"
|
||||||
|
|
||||||
|
# Auto-rollback threshold: after N consecutive post-activation healthcheck
|
||||||
|
# failures, the agent triggers a rollback on its own. 0 disables the
|
||||||
|
# feature (the bootloader still does GRUB-counter-based rollback after
|
||||||
|
# 3 failed boots). Recommended: 3 for production fleets.
|
||||||
|
# auto_rollback_after: 3
|
||||||
@@ -46,6 +46,42 @@ func buildExtraFlags(cfg *Config) string {
|
|||||||
parts = append(parts, "--apiserver-extra-sans", san)
|
parts = append(parts, "--apiserver-extra-sans", san)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.LocalStorageSharedPath != "" {
|
||||||
|
parts = append(parts, "--local-storage-shared-path", cfg.KubeSolo.LocalStorageSharedPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.Debug {
|
||||||
|
parts = append(parts, "--debug")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.PprofServer {
|
||||||
|
parts = append(parts, "--pprof-server")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.PortainerEdgeID != "" {
|
||||||
|
parts = append(parts, "--portainer-edge-id", cfg.KubeSolo.PortainerEdgeID)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.PortainerEdgeKey != "" {
|
||||||
|
parts = append(parts, "--portainer-edge-key", cfg.KubeSolo.PortainerEdgeKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.PortainerEdgeAsync {
|
||||||
|
parts = append(parts, "--portainer-edge-async")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.Full {
|
||||||
|
parts = append(parts, "--full")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.DisableIPv6 {
|
||||||
|
parts = append(parts, "--disable-ipv6")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.KubeSolo.DBWALRepair {
|
||||||
|
parts = append(parts, "--db-wal-repair")
|
||||||
|
}
|
||||||
|
|
||||||
return strings.Join(parts, " ")
|
return strings.Join(parts, " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -44,6 +44,54 @@ func TestBuildExtraFlags(t *testing.T) {
|
|||||||
},
|
},
|
||||||
want: "--disable servicelb --apiserver-extra-sans edge.local",
|
want: "--disable servicelb --apiserver-extra-sans edge.local",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "debug flag",
|
||||||
|
cfg: Config{
|
||||||
|
KubeSolo: KubeSoloConfig{Debug: true},
|
||||||
|
},
|
||||||
|
want: "--debug",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "pprof-server flag",
|
||||||
|
cfg: Config{
|
||||||
|
KubeSolo: KubeSoloConfig{PprofServer: true},
|
||||||
|
},
|
||||||
|
want: "--pprof-server",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "local-storage-shared-path",
|
||||||
|
cfg: Config{
|
||||||
|
KubeSolo: KubeSoloConfig{LocalStorageSharedPath: "/mnt/shared"},
|
||||||
|
},
|
||||||
|
want: "--local-storage-shared-path /mnt/shared",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "portainer edge flags",
|
||||||
|
cfg: Config{
|
||||||
|
KubeSolo: KubeSoloConfig{
|
||||||
|
PortainerEdgeID: "test-id-123",
|
||||||
|
PortainerEdgeKey: "test-key-456",
|
||||||
|
PortainerEdgeAsync: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: "--portainer-edge-id test-id-123 --portainer-edge-key test-key-456 --portainer-edge-async",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all new flags",
|
||||||
|
cfg: Config{
|
||||||
|
KubeSolo: KubeSoloConfig{
|
||||||
|
ExtraFlags: "--disable traefik",
|
||||||
|
ExtraSANs: []string{"node.local"},
|
||||||
|
LocalStorageSharedPath: "/mnt/data/shared",
|
||||||
|
Debug: true,
|
||||||
|
PprofServer: true,
|
||||||
|
PortainerEdgeID: "eid",
|
||||||
|
PortainerEdgeKey: "ekey",
|
||||||
|
PortainerEdgeAsync: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: "--disable traefik --apiserver-extra-sans node.local --local-storage-shared-path /mnt/data/shared --debug --pprof-server --portainer-edge-id eid --portainer-edge-key ekey --portainer-edge-async",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
@@ -61,9 +109,14 @@ func TestApplyKubeSolo(t *testing.T) {
|
|||||||
tr := true
|
tr := true
|
||||||
cfg := &Config{
|
cfg := &Config{
|
||||||
KubeSolo: KubeSoloConfig{
|
KubeSolo: KubeSoloConfig{
|
||||||
ExtraFlags: "--disable traefik",
|
ExtraFlags: "--disable traefik",
|
||||||
LocalStorage: &tr,
|
LocalStorage: &tr,
|
||||||
ExtraSANs: []string{"test.local"},
|
ExtraSANs: []string{"test.local"},
|
||||||
|
LocalStorageSharedPath: "/mnt/shared",
|
||||||
|
Debug: true,
|
||||||
|
PortainerEdgeID: "eid",
|
||||||
|
PortainerEdgeKey: "ekey",
|
||||||
|
PortainerEdgeAsync: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -83,6 +136,21 @@ func TestApplyKubeSolo(t *testing.T) {
|
|||||||
if !strings.Contains(flags, "--apiserver-extra-sans test.local") {
|
if !strings.Contains(flags, "--apiserver-extra-sans test.local") {
|
||||||
t.Errorf("extra-flags missing SANs: %q", flags)
|
t.Errorf("extra-flags missing SANs: %q", flags)
|
||||||
}
|
}
|
||||||
|
if !strings.Contains(flags, "--local-storage-shared-path /mnt/shared") {
|
||||||
|
t.Errorf("extra-flags missing local-storage-shared-path: %q", flags)
|
||||||
|
}
|
||||||
|
if !strings.Contains(flags, "--debug") {
|
||||||
|
t.Errorf("extra-flags missing --debug: %q", flags)
|
||||||
|
}
|
||||||
|
if !strings.Contains(flags, "--portainer-edge-id eid") {
|
||||||
|
t.Errorf("extra-flags missing --portainer-edge-id: %q", flags)
|
||||||
|
}
|
||||||
|
if !strings.Contains(flags, "--portainer-edge-key ekey") {
|
||||||
|
t.Errorf("extra-flags missing --portainer-edge-key: %q", flags)
|
||||||
|
}
|
||||||
|
if !strings.Contains(flags, "--portainer-edge-async") {
|
||||||
|
t.Errorf("extra-flags missing --portainer-edge-async: %q", flags)
|
||||||
|
}
|
||||||
|
|
||||||
// Check config.yaml
|
// Check config.yaml
|
||||||
configData, err := os.ReadFile(filepath.Join(dir, "config.yaml"))
|
configData, err := os.ReadFile(filepath.Join(dir, "config.yaml"))
|
||||||
|
|||||||
@@ -225,6 +225,7 @@ func TestParseExampleFiles(t *testing.T) {
|
|||||||
"examples/static-ip.yaml",
|
"examples/static-ip.yaml",
|
||||||
"examples/portainer-edge.yaml",
|
"examples/portainer-edge.yaml",
|
||||||
"examples/airgapped.yaml",
|
"examples/airgapped.yaml",
|
||||||
|
"examples/full-config.yaml",
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, path := range examples {
|
for _, path := range examples {
|
||||||
|
|||||||
@@ -77,6 +77,21 @@ func buildEdgeAgentManifest(edgeID, edgeKey, portainerURL, image string) string
|
|||||||
sb.WriteString(" name: portainer-sa-clusteradmin\n")
|
sb.WriteString(" name: portainer-sa-clusteradmin\n")
|
||||||
sb.WriteString(" namespace: portainer\n")
|
sb.WriteString(" namespace: portainer\n")
|
||||||
sb.WriteString("---\n")
|
sb.WriteString("---\n")
|
||||||
|
sb.WriteString("apiVersion: v1\n")
|
||||||
|
sb.WriteString("kind: Service\n")
|
||||||
|
sb.WriteString("metadata:\n")
|
||||||
|
sb.WriteString(" name: portainer-agent\n")
|
||||||
|
sb.WriteString(" namespace: portainer\n")
|
||||||
|
sb.WriteString("spec:\n")
|
||||||
|
sb.WriteString(" clusterIP: None\n")
|
||||||
|
sb.WriteString(" selector:\n")
|
||||||
|
sb.WriteString(" app: portainer-agent\n")
|
||||||
|
sb.WriteString(" ports:\n")
|
||||||
|
sb.WriteString(" - name: agent\n")
|
||||||
|
sb.WriteString(" port: 9001\n")
|
||||||
|
sb.WriteString(" targetPort: 9001\n")
|
||||||
|
sb.WriteString(" protocol: TCP\n")
|
||||||
|
sb.WriteString("---\n")
|
||||||
sb.WriteString("apiVersion: apps/v1\n")
|
sb.WriteString("apiVersion: apps/v1\n")
|
||||||
sb.WriteString("kind: Deployment\n")
|
sb.WriteString("kind: Deployment\n")
|
||||||
sb.WriteString("metadata:\n")
|
sb.WriteString("metadata:\n")
|
||||||
|
|||||||
57
cloud-init/updates.go
Normal file
57
cloud-init/updates.go
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
package cloudinit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultUpdateConfPath is where the update agent expects to find its config.
|
||||||
|
// Kept in sync with update/pkg/config.DefaultPath.
|
||||||
|
const DefaultUpdateConfPath = "/etc/kubesolo/update.conf"
|
||||||
|
|
||||||
|
// ApplyUpdates writes /etc/kubesolo/update.conf from the cloud-init
|
||||||
|
// updates: block. Called once per boot; idempotent (overwrites any existing
|
||||||
|
// file with the cloud-init values).
|
||||||
|
//
|
||||||
|
// If the updates: block is empty (all fields blank), the file is not
|
||||||
|
// written — preserves any hand-edited update.conf on systems that aren't
|
||||||
|
// managed via cloud-init.
|
||||||
|
func ApplyUpdates(cfg *Config, confPath string) error {
|
||||||
|
if confPath == "" {
|
||||||
|
confPath = DefaultUpdateConfPath
|
||||||
|
}
|
||||||
|
u := cfg.Updates
|
||||||
|
if u.Server == "" && u.Channel == "" && u.MaintenanceWindow == "" && u.PubKey == "" {
|
||||||
|
// Nothing to write — leave any existing file alone.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.MkdirAll(filepath.Dir(confPath), 0o755); err != nil {
|
||||||
|
return fmt.Errorf("creating dir for %s: %w", confPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString("# Generated by KubeSolo OS cloud-init — edit this file or the\n")
|
||||||
|
sb.WriteString("# cloud-init source YAML; subsequent first-boots will regenerate it.\n")
|
||||||
|
if u.Server != "" {
|
||||||
|
fmt.Fprintf(&sb, "server = %s\n", u.Server)
|
||||||
|
}
|
||||||
|
if u.Channel != "" {
|
||||||
|
fmt.Fprintf(&sb, "channel = %s\n", u.Channel)
|
||||||
|
}
|
||||||
|
if u.MaintenanceWindow != "" {
|
||||||
|
fmt.Fprintf(&sb, "maintenance_window = %s\n", u.MaintenanceWindow)
|
||||||
|
}
|
||||||
|
if u.PubKey != "" {
|
||||||
|
fmt.Fprintf(&sb, "pubkey = %s\n", u.PubKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(confPath, []byte(sb.String()), 0o644); err != nil {
|
||||||
|
return fmt.Errorf("writing %s: %w", confPath, err)
|
||||||
|
}
|
||||||
|
slog.Info("wrote update.conf", "path", confPath)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
81
cloud-init/updates_test.go
Normal file
81
cloud-init/updates_test.go
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
package cloudinit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestApplyUpdatesEmptyConfigSkipsWrite(t *testing.T) {
|
||||||
|
confPath := filepath.Join(t.TempDir(), "update.conf")
|
||||||
|
cfg := &Config{} // Updates block default-zero
|
||||||
|
if err := ApplyUpdates(cfg, confPath); err != nil {
|
||||||
|
t.Fatalf("apply: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(confPath); !os.IsNotExist(err) {
|
||||||
|
t.Errorf("expected no file when cloud-init Updates is empty, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyUpdatesAllFields(t *testing.T) {
|
||||||
|
confPath := filepath.Join(t.TempDir(), "update.conf")
|
||||||
|
cfg := &Config{Updates: UpdatesConfig{
|
||||||
|
Server: "https://updates.example.com",
|
||||||
|
Channel: "stable",
|
||||||
|
MaintenanceWindow: "03:00-05:00",
|
||||||
|
PubKey: "/etc/kubesolo/pub.hex",
|
||||||
|
}}
|
||||||
|
if err := ApplyUpdates(cfg, confPath); err != nil {
|
||||||
|
t.Fatalf("apply: %v", err)
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(confPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read: %v", err)
|
||||||
|
}
|
||||||
|
out := string(data)
|
||||||
|
|
||||||
|
wants := []string{
|
||||||
|
"server = https://updates.example.com",
|
||||||
|
"channel = stable",
|
||||||
|
"maintenance_window = 03:00-05:00",
|
||||||
|
"pubkey = /etc/kubesolo/pub.hex",
|
||||||
|
}
|
||||||
|
for _, w := range wants {
|
||||||
|
if !strings.Contains(out, w) {
|
||||||
|
t.Errorf("update.conf missing %q in output:\n%s", w, out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyUpdatesPartialFields(t *testing.T) {
|
||||||
|
// Only server set — others should be omitted from the file, not written
|
||||||
|
// as blank values.
|
||||||
|
confPath := filepath.Join(t.TempDir(), "update.conf")
|
||||||
|
cfg := &Config{Updates: UpdatesConfig{Server: "https://x.example.com"}}
|
||||||
|
if err := ApplyUpdates(cfg, confPath); err != nil {
|
||||||
|
t.Fatalf("apply: %v", err)
|
||||||
|
}
|
||||||
|
data, _ := os.ReadFile(confPath)
|
||||||
|
out := string(data)
|
||||||
|
if !strings.Contains(out, "server = https://x.example.com") {
|
||||||
|
t.Errorf("missing server line:\n%s", out)
|
||||||
|
}
|
||||||
|
for _, unwanted := range []string{"channel = ", "maintenance_window = ", "pubkey = "} {
|
||||||
|
if strings.Contains(out, unwanted) {
|
||||||
|
t.Errorf("unexpected empty line %q present in:\n%s", unwanted, out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyUpdatesCreatesParentDir(t *testing.T) {
|
||||||
|
// /etc/kubesolo may not exist on first boot before cloud-init runs.
|
||||||
|
confPath := filepath.Join(t.TempDir(), "nested", "kubesolo", "update.conf")
|
||||||
|
cfg := &Config{Updates: UpdatesConfig{Server: "https://x"}}
|
||||||
|
if err := ApplyUpdates(cfg, confPath); err != nil {
|
||||||
|
t.Fatalf("apply: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(confPath); err != nil {
|
||||||
|
t.Errorf("file not created: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
124
docs/arm64-architecture.md
Normal file
124
docs/arm64-architecture.md
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
# ARM64 Build Architecture
|
||||||
|
|
||||||
|
KubeSolo OS supports ARM64 via two distinct build tracks. This document defines the
|
||||||
|
split, lists which files belong to each track, and identifies the shared substrate.
|
||||||
|
|
||||||
|
## The two tracks
|
||||||
|
|
||||||
|
### Generic ARM64 (UEFI / virtio / GRUB)
|
||||||
|
|
||||||
|
**Target:** Any UEFI-compliant ARM64 host — Ampere/Graviton VMs, generic ARM64
|
||||||
|
servers, `qemu-system-aarch64 -machine virt`, future SBCs that boot via UEFI.
|
||||||
|
|
||||||
|
**Boot path:** UEFI firmware → GRUB-EFI → kernel + initramfs → KubeSolo init.
|
||||||
|
|
||||||
|
**Kernel:** Mainline Linux (kernel.org LTS), built from `defconfig` + shared
|
||||||
|
container-config fragment.
|
||||||
|
|
||||||
|
**Storage:** virtio-blk / NVMe / SATA — detected and probed by mainline drivers.
|
||||||
|
|
||||||
|
**Disk image format:** GPT, identical 4-partition layout to x86_64 (EFI + System A
|
||||||
|
+ System B + Data).
|
||||||
|
|
||||||
|
### Raspberry Pi ARM64
|
||||||
|
|
||||||
|
**Target:** Raspberry Pi 4 and 5 specifically.
|
||||||
|
|
||||||
|
**Boot path:** RPi EEPROM → VideoCore firmware (`start4.elf`) → `config.txt` →
|
||||||
|
kernel + DTB + initramfs → KubeSolo init. (No UEFI, no GRUB — `autoboot.txt`
|
||||||
|
provides the A/B selection.)
|
||||||
|
|
||||||
|
**Kernel:** Built from `raspberrypi/linux` fork with `bcm2711_defconfig`
|
||||||
|
(Pi 4) or `bcm2712_defconfig` (Pi 5). RPi-patched, includes BCM-specific drivers
|
||||||
|
(sdhci-iproc, bcm2835-mmc, GPIO, mailbox).
|
||||||
|
|
||||||
|
**Storage:** SD card via `sdhci-iproc` driver — requires kernel-built DTBs to match
|
||||||
|
the kernel binary.
|
||||||
|
|
||||||
|
**Disk image format:** MBR with `autoboot.txt` A/B redirect:
|
||||||
|
- Part 1: Boot/Control (FAT32, firmware + fallback kernel)
|
||||||
|
- Part 2: Boot A (FAT32, kernel + DTBs + initramfs)
|
||||||
|
- Part 3: Boot B (FAT32, same as A initially)
|
||||||
|
- Part 4: Data (ext4)
|
||||||
|
|
||||||
|
## File-by-file ownership
|
||||||
|
|
||||||
|
### Shared substrate (used by both tracks)
|
||||||
|
|
||||||
|
| Path | Why shared |
|
||||||
|
|------|------------|
|
||||||
|
| `init/` (all of it) | Boot is identical post-kernel — same staged init, same persistent mount, same KubeSolo launch |
|
||||||
|
| `cloud-init/` | Arch-agnostic Go binary |
|
||||||
|
| `update/` | Arch-agnostic Go binary; bootenv abstraction handles GRUB vs RPi-autoboot variants |
|
||||||
|
| `build/scripts/inject-kubesolo.sh` | Single script; switches `LIB_ARCH` / `LD_SO` based on `TARGET_ARCH` |
|
||||||
|
| `build/scripts/extract-core.sh` | Single script; arm64 branch uses piCore64 userland (arch-agnostic BusyBox) |
|
||||||
|
| `build/config/modules-arm64.list` | Already generic — no BCM-specific modules; works in QEMU virt, AWS Graviton, and RPi |
|
||||||
|
| `build/config/rpi-kernel-config.fragment` | **Misnamed.** Contents (cgroup, namespaces, netfilter, AppArmor) are arch-agnostic. Will be renamed `kernel-container.fragment` in Phase 2 and applied to x86, generic-ARM64, and RPi kernels alike. |
|
||||||
|
| `hack/dev-vm-arm64.sh` | Uses `-machine virt` + virtio — generic, not RPi-specific |
|
||||||
|
| `test/qemu/test-boot-arm64.sh` | Same as above |
|
||||||
|
|
||||||
|
### Generic ARM64 only (to be created in Phases 2–3)
|
||||||
|
|
||||||
|
| Path | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `build/scripts/build-kernel-arm64.sh` *(rewritten in Phase 2)* | Build mainline kernel.org LTS from `defconfig` + shared fragment + arm64-virt enables (`VIRTIO_BLK`, `EFI_STUB`). Replaces the existing RPi-flavoured script of the same name. |
|
||||||
|
| `build/scripts/create-disk-image-arm64.sh` *(new in Phase 3)* | Build UEFI-bootable raw disk image (GPT + System A/B + Data) using `grub-efi-arm64`. Or fold into existing `create-disk-image.sh` with an arch parameter. |
|
||||||
|
| `build/cache/kernel-arm64-generic/` | Build output for mainline ARM64 kernel — keep separate from RPi-kernel cache. |
|
||||||
|
|
||||||
|
### Raspberry Pi only (to be renamed/reorganised in Phase 2)
|
||||||
|
|
||||||
|
| Path | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `build/scripts/build-kernel-rpi.sh` *(renamed from `build-kernel-arm64.sh`)* | Build kernel from `raspberrypi/linux` with `bcm2711_defconfig` + shared fragment + RPi-specific overrides. |
|
||||||
|
| `build/scripts/create-rpi-image.sh` | Build SD card image (MBR + autoboot.txt + firmware blobs + DTBs). Already correctly scoped. |
|
||||||
|
| `build/scripts/fetch-rpi-firmware.sh` | Download VideoCore firmware blobs from `raspberrypi/firmware`. Already correctly scoped. |
|
||||||
|
| `build/config/rpi-kernel-overrides.fragment` *(new, Phase 2)* | Pi-specific kernel config knobs (DMA, audio off, etc.) layered on top of the shared container fragment. |
|
||||||
|
| `build/cache/custom-kernel-rpi/` *(renamed from `custom-kernel-arm64/`)* | Build output for RPi kernel — DTBs, modules, Image. |
|
||||||
|
| `versions.env` keys: `RPI_KERNEL_BRANCH`, `RPI_KERNEL_REPO`, `RPI_FIRMWARE_TAG`, `RPI_FIRMWARE_URL`, `PICORE_*` | Already correctly named. |
|
||||||
|
|
||||||
|
## Make targets
|
||||||
|
|
||||||
|
| Target | Track |
|
||||||
|
|--------|-------|
|
||||||
|
| `make iso` | x86_64 |
|
||||||
|
| `make disk-image` | x86_64 |
|
||||||
|
| `make kernel` | x86_64 |
|
||||||
|
| `make kernel-arm64` *(Phase 2: now builds mainline)* | Generic ARM64 |
|
||||||
|
| `make rootfs-arm64` | Generic ARM64 (and reusable for RPi rootfs) |
|
||||||
|
| `make disk-image-arm64` *(Phase 3: new)* | Generic ARM64 |
|
||||||
|
| `make kernel-rpi` *(Phase 2: renamed from former kernel-arm64)* | RPi |
|
||||||
|
| `make rpi-image` | RPi |
|
||||||
|
|
||||||
|
## Why two tracks, not one
|
||||||
|
|
||||||
|
The RPi boot path is fundamentally different from generic ARM64:
|
||||||
|
|
||||||
|
- **No UEFI.** RPi boots through a multi-stage firmware chain that ends with
|
||||||
|
`config.txt` parsing and direct kernel load. UEFI/GRUB is not an option without
|
||||||
|
third-party firmware (which has its own bugs).
|
||||||
|
- **DTB required.** RPi kernel needs a device tree blob matching the kernel binary;
|
||||||
|
generic ARM64 under UEFI uses ACPI or self-describing virtio.
|
||||||
|
- **Custom drivers.** SD card (sdhci-iproc), GPIO, mailbox interfaces require
|
||||||
|
RPi-patched kernel sources. Mainline support exists but lags behind the
|
||||||
|
raspberrypi/linux fork for new boards.
|
||||||
|
- **A/B selection mechanism.** RPi uses `autoboot.txt` + EEPROM cooperation; generic
|
||||||
|
ARM64 uses GRUB's `boot_default`/`boot_counter` envvars (same as x86_64).
|
||||||
|
|
||||||
|
Trying to unify into a single track would force compromises in both. Two tracks
|
||||||
|
sharing the post-kernel substrate (init, cloud-init, update agent) gives us the best
|
||||||
|
of both: code reuse where it makes sense, divergence only where the hardware demands
|
||||||
|
it.
|
||||||
|
|
||||||
|
## Migration plan
|
||||||
|
|
||||||
|
This document is descriptive of the **target** v0.3.0 layout. The current code
|
||||||
|
(as of v0.2.0) has:
|
||||||
|
|
||||||
|
- `build/scripts/build-kernel-arm64.sh` building the RPi kernel (will be renamed in
|
||||||
|
Phase 2).
|
||||||
|
- `build/config/rpi-kernel-config.fragment` containing generic configs (will be
|
||||||
|
renamed in Phase 2).
|
||||||
|
- No generic ARM64 kernel script (will be created in Phase 2).
|
||||||
|
- No generic ARM64 disk image script (will be created in Phase 3).
|
||||||
|
|
||||||
|
Phases 2 and 3 of the v0.3.0 plan execute the migration.
|
||||||
125
docs/arm64-status.md
Normal file
125
docs/arm64-status.md
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
# ARM64 Generic Status (v0.3 in-progress)
|
||||||
|
|
||||||
|
End-of-Phase-3 snapshot of the generic ARM64 build track.
|
||||||
|
|
||||||
|
## What works
|
||||||
|
|
||||||
|
End-to-end boot through QEMU on an Odroid (aarch64 Ubuntu 22.04 build host):
|
||||||
|
|
||||||
|
1. `make kernel-arm64` produces a mainline 6.12.10 LTS kernel (44 MB Image, 868
|
||||||
|
modules)
|
||||||
|
2. `make rootfs-arm64` extracts piCore64 userland, replaces BusyBox with
|
||||||
|
Ubuntu's static busybox-static, injects KubeSolo + Go agents + init scripts
|
||||||
|
3. `make disk-image-arm64` produces a UEFI-bootable 4 GB GPT image with GRUB
|
||||||
|
A/B slots
|
||||||
|
4. `hack/dev-vm-arm64.sh --disk` boots the image:
|
||||||
|
- UEFI firmware loads GRUB
|
||||||
|
- GRUB loads kernel + initramfs
|
||||||
|
- Custom init runs all 14 stages (early-mount, parse-cmdline, persistent-mount,
|
||||||
|
kernel-modules, apparmor, sysctl, cloud-init, network, hostname, clock,
|
||||||
|
containerd, security-lockdown, kubesolo)
|
||||||
|
- Data partition mounts (ext4 on vda4)
|
||||||
|
- Network configured (DHCP on virtio eth0)
|
||||||
|
- KubeSolo starts; containerd boots successfully; CoreDNS + pause images
|
||||||
|
register
|
||||||
|
|
||||||
|
## Known limitations of the current dev setup
|
||||||
|
|
||||||
|
These are debugging-environment issues, not production blockers:
|
||||||
|
|
||||||
|
### 1. QEMU TCG performance hits KubeSolo's image-import deadline
|
||||||
|
|
||||||
|
KubeSolo bundles its essential container images and imports them into
|
||||||
|
containerd on first boot. Under QEMU TCG (software emulation on the Odroid's
|
||||||
|
1.8 GB / 6-core ARM64), the import takes longer than KubeSolo's internal
|
||||||
|
deadline, so we see:
|
||||||
|
|
||||||
|
```
|
||||||
|
failed to import images: ... context deadline exceeded
|
||||||
|
shutdown requested before containerd was ready
|
||||||
|
```
|
||||||
|
|
||||||
|
On real ARM64 hardware (Graviton, Ampere, RPi 5, etc.) this import completes
|
||||||
|
in seconds. KVM acceleration on the Odroid would also fix it, but the
|
||||||
|
Odroid's vendor kernel (4.9.337-38) doesn't ship the KVM module — fixing that
|
||||||
|
requires a host-kernel upgrade outside this project's scope.
|
||||||
|
|
||||||
|
### 2. Hardcoded `/dev/vda4` data partition path
|
||||||
|
|
||||||
|
Stage 20 currently expects `kubesolo.data=/dev/vda4` rather than
|
||||||
|
`LABEL=KSOLODATA`. The LABEL= path is preferred (works regardless of disk
|
||||||
|
naming on different hosts), but resolution depends on `blkid` and `findfs`,
|
||||||
|
which:
|
||||||
|
|
||||||
|
- piCore64 ships as dynamic util-linux binaries that crash in QEMU virt
|
||||||
|
- Ubuntu's `busybox-static` 1.30.1 doesn't include the applets
|
||||||
|
|
||||||
|
Production fix options (deferred to next phase):
|
||||||
|
|
||||||
|
- Build a more comprehensive static BusyBox (Alpine's, or upstream + custom config)
|
||||||
|
- Ship statically-linked `blkid` and `findfs` from util-linux
|
||||||
|
- Replace LABEL resolution with a sysfs walk that reads `/sys/class/block/*/holders`
|
||||||
|
and `/dev/<n>` device numbers
|
||||||
|
|
||||||
|
### 3. AppArmor profiles fail to load
|
||||||
|
|
||||||
|
`apparmor_parser` errors on the containerd and kubelet profiles, probably
|
||||||
|
because the parser binary or libraries copied from the build host don't
|
||||||
|
match the rootfs's libc layout. Boot proceeds without AppArmor enforcement.
|
||||||
|
Same fix path as #2 (better static binaries).
|
||||||
|
|
||||||
|
### 4. piCore64 BusyBox swap is a build-host dependency
|
||||||
|
|
||||||
|
`inject-kubesolo.sh` replaces piCore's `/bin/busybox` with the build host's
|
||||||
|
`/bin/busybox` (Ubuntu's busybox-static package). That binary must exist on
|
||||||
|
the build host or in the builder Docker image. Documented; works in CI
|
||||||
|
because the Dockerfile installs busybox-static.
|
||||||
|
|
||||||
|
A more reproducible approach (future work): ship a known-good ARM64 BusyBox
|
||||||
|
binary as a tracked artifact rather than depending on the host package.
|
||||||
|
|
||||||
|
### 5. busybox-static 1.30.1 has its own bugs
|
||||||
|
|
||||||
|
Even after the swap, some applets misbehave inside QEMU:
|
||||||
|
|
||||||
|
- `modprobe` triggers "stack smashing detected" abort (kernel modules still
|
||||||
|
load via direct write to /sys/... in stage 30, so this isn't fatal)
|
||||||
|
- `tr` doesn't parse POSIX character classes like `[:space:]` — already
|
||||||
|
worked around by using explicit `' \t\r\n'` in our scripts
|
||||||
|
- Missing applets: `blkid`, `findfs`, `--version`, etc.
|
||||||
|
|
||||||
|
These won't necessarily manifest on real hardware (different CPU, different
|
||||||
|
glibc interaction) but they confirm that 1.30.1 isn't the right long-term
|
||||||
|
BusyBox.
|
||||||
|
|
||||||
|
## What's needed to ship v0.3 ARM64 as production-ready
|
||||||
|
|
||||||
|
In order of priority:
|
||||||
|
|
||||||
|
1. **Validate on real ARM64 hardware** — boot the image on a Graviton EC2
|
||||||
|
instance, Ampere VPS, RPi 5 (when hardware available), or any UEFI-capable
|
||||||
|
ARM64 board. Confirm full KubeSolo bring-up: node Ready, pods schedule.
|
||||||
|
2. **Fix LABEL=KSOLODATA resolution** — see option list in #2 above.
|
||||||
|
3. **Replace busybox-static with a curated build** — see #4.
|
||||||
|
4. **Add a Gitea workflow** that runs `make kernel-arm64 + disk-image-arm64`
|
||||||
|
on the Odroid runner and the QEMU boot-test as a smoke test (with the
|
||||||
|
expectation that KubeSolo doesn't finish first-boot under TCG).
|
||||||
|
|
||||||
|
## Files exercised by the Phase 3 work
|
||||||
|
|
||||||
|
| Path | Status |
|
||||||
|
|------|--------|
|
||||||
|
| `build/scripts/build-kernel-arm64.sh` | New — mainline 6.12.10 kernel build, native or cross |
|
||||||
|
| `build/scripts/build-kernel-rpi.sh` | Renamed from old `build-kernel-arm64.sh` — RPi path |
|
||||||
|
| `build/config/kernel-container.fragment` | Renamed from `rpi-kernel-config.fragment` |
|
||||||
|
| `build/scripts/create-disk-image.sh` | Refactored — accepts `TARGET_ARCH=arm64` |
|
||||||
|
| `build/grub/grub-arm64.cfg` | New — ARM64 console + `init=/sbin/init` |
|
||||||
|
| `build/scripts/inject-kubesolo.sh` | Updated — BusyBox swap, `/init` install, variant routing |
|
||||||
|
| `init/init.sh` | Updated — output to `/dev/console` for early-boot visibility |
|
||||||
|
| `init/lib/30-kernel-modules.sh` | Fixed — `tr -d ' \t\r\n'` instead of `[:space:]` |
|
||||||
|
| `init/lib/40-sysctl.sh` | Same fix |
|
||||||
|
| `hack/dev-vm-arm64.sh` | Updated — `-cpu max`, UEFI `--disk` mode |
|
||||||
|
| `test/qemu/test-boot-arm64-disk.sh` | New — CI test for UEFI boot |
|
||||||
|
| `Makefile` | New targets: `kernel-arm64`, `kernel-rpi`, `disk-image-arm64`, `test-boot-arm64-disk`, `rootfs-arm64-rpi` |
|
||||||
|
| `build/config/versions.env` | Pinned `MAINLINE_KERNEL_VERSION=6.12.10`, `KUBESOLO_VERSION=v1.1.0` |
|
||||||
|
| `build/Dockerfile.builder` | Added `grub-efi-amd64-bin`, `grub-efi-arm64-bin`, `busybox-static` |
|
||||||
165
docs/ci-runners.md
Normal file
165
docs/ci-runners.md
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
# CI Runners
|
||||||
|
|
||||||
|
KubeSolo OS is built and tested on Gitea Actions runners. This document records the
|
||||||
|
runners currently in service and how to register a new one if a host is wiped.
|
||||||
|
|
||||||
|
## Active runners
|
||||||
|
|
||||||
|
| Name | Host | Arch | OS | Labels | Notes |
|
||||||
|
|------|------|------|-----|--------|-------|
|
||||||
|
| `odroid-arm64` | `odroid.local` | aarch64 | Ubuntu 22.04 LTS | `arm64-linux`, `ubuntu-latest`, `ubuntu-24.04`, `ubuntu-22.04` | Native ARM64 builder; 6 cores, 1.8 GB RAM + 4 GB swap; runs as systemd service `act_runner` |
|
||||||
|
|
||||||
|
## Workflow targeting
|
||||||
|
|
||||||
|
ARM64-specific jobs target the Odroid via the `arm64-linux` label:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
jobs:
|
||||||
|
build-arm64:
|
||||||
|
runs-on: arm64-linux
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- run: make rootfs-arm64
|
||||||
|
```
|
||||||
|
|
||||||
|
Generic ubuntu jobs that don't care about arch fall through to whichever runner picks
|
||||||
|
them up first; on the Odroid they run in Docker via the `ubuntu-latest` /
|
||||||
|
`ubuntu-22.04` / `ubuntu-24.04` labels.
|
||||||
|
|
||||||
|
## Registering a new runner
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Linux host (Ubuntu / Debian preferred; the install instructions below use Ubuntu
|
||||||
|
22.04+ paths).
|
||||||
|
- Outbound HTTPS to the Gitea instance.
|
||||||
|
- Root access on the runner host (the runner needs to create loop devices and run
|
||||||
|
`mkfs.ext4` for disk-image builds).
|
||||||
|
- A Gitea Actions runner registration token. Get it from:
|
||||||
|
- **Repo-scoped:** `<repo>/settings/actions/runners` → "Create new Runner"
|
||||||
|
- **Org-scoped (preferred for this project):** `<org>/-/settings/actions/runners` →
|
||||||
|
"Create new Runner"
|
||||||
|
- **Site-scoped:** `/-/admin/actions/runners` → "Create new Runner"
|
||||||
|
|
||||||
|
### Step 1 — Add swap if the host has <4 GB RAM
|
||||||
|
|
||||||
|
Kernel builds in later phases need ~2 GB resident; tight hosts will OOM-kill `cc1`
|
||||||
|
without swap.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo fallocate -l 4G /swapfile
|
||||||
|
sudo chmod 600 /swapfile
|
||||||
|
sudo mkswap /swapfile
|
||||||
|
sudo swapon /swapfile
|
||||||
|
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2 — Install the gitea-runner binary
|
||||||
|
|
||||||
|
Pinned to a known-good version. Check
|
||||||
|
<https://gitea.com/gitea/runner/releases> for the current stable tag before
|
||||||
|
bumping.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo -i
|
||||||
|
mkdir -p /opt/act_runner && cd /opt/act_runner
|
||||||
|
|
||||||
|
# Bump VERSION to the current stable release as needed
|
||||||
|
VERSION=1.0.3
|
||||||
|
ARCH=$(uname -m | sed 's/aarch64/arm64/; s/x86_64/amd64/')
|
||||||
|
|
||||||
|
curl -fL "https://gitea.com/gitea/runner/releases/download/v${VERSION}/gitea-runner-${VERSION}-linux-${ARCH}" \
|
||||||
|
-o act_runner
|
||||||
|
chmod +x act_runner
|
||||||
|
./act_runner --version
|
||||||
|
```
|
||||||
|
|
||||||
|
> The upstream project was renamed `act_runner` → `gitea-runner` at the v1.0.0
|
||||||
|
> release. The release asset filenames use `gitea-runner-*` even though we keep the
|
||||||
|
> local binary named `act_runner` to match this systemd unit. The CLI surface
|
||||||
|
> (`register`, `daemon`, `generate-config`) is unchanged.
|
||||||
|
|
||||||
|
### Step 3 — Register against Gitea
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./act_runner register --no-interactive \
|
||||||
|
--instance https://git.oe74.net \
|
||||||
|
--token PASTE_TOKEN_HERE \
|
||||||
|
--name <hostname> \
|
||||||
|
--labels arm64-linux # adjust label for amd64 hosts
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a `.runner` file with the registration credentials.
|
||||||
|
|
||||||
|
### Step 4 — Generate and tune config
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./act_runner generate-config > config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
In `config.yaml`, confirm the `runner.labels:` block includes the labels you want.
|
||||||
|
The `:host` suffix routes jobs directly to the host (no Docker wrapper) — required
|
||||||
|
for disk-image builds that need loop devices and `mkfs`.
|
||||||
|
|
||||||
|
Example labels for an arm64 host:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
runner:
|
||||||
|
labels:
|
||||||
|
- "arm64-linux:host"
|
||||||
|
- "ubuntu-latest:docker://docker.gitea.com/runner-images:ubuntu-latest"
|
||||||
|
- "ubuntu-24.04:docker://docker.gitea.com/runner-images:ubuntu-24.04"
|
||||||
|
- "ubuntu-22.04:docker://docker.gitea.com/runner-images:ubuntu-22.04"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 5 — Install as a systemd service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cat > /etc/systemd/system/act_runner.service << 'EOF'
|
||||||
|
[Unit]
|
||||||
|
Description=Gitea Actions runner
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
ExecStart=/opt/act_runner/act_runner daemon --config /opt/act_runner/config.yaml
|
||||||
|
WorkingDirectory=/opt/act_runner
|
||||||
|
User=root
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
EOF
|
||||||
|
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl enable --now act_runner
|
||||||
|
systemctl status act_runner --no-pager
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 6 — Verify in Gitea UI
|
||||||
|
|
||||||
|
Visit the runners page at the scope you registered against. The runner should appear
|
||||||
|
as `Idle` with the labels you configured.
|
||||||
|
|
||||||
|
## Removing a runner
|
||||||
|
|
||||||
|
On the host:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl disable --now act_runner
|
||||||
|
rm -rf /opt/act_runner /etc/systemd/system/act_runner.service
|
||||||
|
systemctl daemon-reload
|
||||||
|
```
|
||||||
|
|
||||||
|
Then delete the runner entry from the Gitea Actions UI so Gitea stops trying to
|
||||||
|
schedule against it.
|
||||||
|
|
||||||
|
## Operational notes
|
||||||
|
|
||||||
|
- The runner stores in-progress job working directories under `/tmp/act_runner` by
|
||||||
|
default. Large disk-image builds may need that path moved to a larger volume —
|
||||||
|
edit `host.workdir_parent:` in `config.yaml`.
|
||||||
|
- Logs are visible via `journalctl -u act_runner -f`.
|
||||||
|
- If a job is interrupted (e.g. host reboot mid-build), the Gitea UI will mark it as
|
||||||
|
failed/cancelled. Re-run from the Actions UI.
|
||||||
@@ -45,9 +45,15 @@ network:
|
|||||||
kubesolo:
|
kubesolo:
|
||||||
extra-flags: "--disable traefik" # Extra CLI flags for KubeSolo binary
|
extra-flags: "--disable traefik" # Extra CLI flags for KubeSolo binary
|
||||||
local-storage: true # Enable local-path provisioner (default: true)
|
local-storage: true # Enable local-path provisioner (default: true)
|
||||||
|
local-storage-shared-path: "/mnt/shared" # Shared path for local-path-provisioner
|
||||||
apiserver-extra-sans: # Extra SANs for API server certificate
|
apiserver-extra-sans: # Extra SANs for API server certificate
|
||||||
- node.example.com
|
- node.example.com
|
||||||
- 10.0.0.50
|
- 10.0.0.50
|
||||||
|
debug: false # Enable verbose debug logging
|
||||||
|
pprof-server: false # Enable Go pprof profiling server
|
||||||
|
portainer-edge-id: "" # Portainer Edge Agent ID
|
||||||
|
portainer-edge-key: "" # Portainer Edge Agent key
|
||||||
|
portainer-edge-async: false # Enable async Portainer Edge communication
|
||||||
|
|
||||||
# NTP servers (optional)
|
# NTP servers (optional)
|
||||||
ntp:
|
ntp:
|
||||||
@@ -129,6 +135,24 @@ kubesolo-cloudinit validate /path/to/cloud-init.yaml
|
|||||||
kubesolo-cloudinit dump /path/to/cloud-init.yaml
|
kubesolo-cloudinit dump /path/to/cloud-init.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## KubeSolo Configuration Reference
|
||||||
|
|
||||||
|
All fields under the `kubesolo:` section and their corresponding CLI flags:
|
||||||
|
|
||||||
|
| YAML Field | CLI Flag | Type | Default | Description |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| `extra-flags` | (raw flags) | string | `""` | Arbitrary extra flags passed to KubeSolo binary |
|
||||||
|
| `local-storage` | `--local-storage` | bool | `true` | Enable local-path-provisioner for PVCs |
|
||||||
|
| `local-storage-shared-path` | `--local-storage-shared-path` | string | `""` | Shared path for local-path-provisioner storage |
|
||||||
|
| `apiserver-extra-sans` | `--apiserver-extra-sans` | list | `[]` | Extra SANs for API server TLS certificate |
|
||||||
|
| `debug` | `--debug` | bool | `false` | Enable verbose debug logging |
|
||||||
|
| `pprof-server` | `--pprof-server` | bool | `false` | Enable Go pprof profiling server |
|
||||||
|
| `portainer-edge-id` | `--portainer-edge-id` | string | `""` | Portainer Edge Agent ID (from Portainer UI) |
|
||||||
|
| `portainer-edge-key` | `--portainer-edge-key` | string | `""` | Portainer Edge Agent key (from Portainer UI) |
|
||||||
|
| `portainer-edge-async` | `--portainer-edge-async` | bool | `false` | Enable async Portainer Edge communication |
|
||||||
|
|
||||||
|
**Note:** The `portainer-edge-*` fields generate CLI flags for KubeSolo's built-in Edge Agent support. This is an alternative to the `portainer.edge-agent` section, which creates a standalone Kubernetes manifest. Use one approach or the other, not both.
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
See `cloud-init/examples/` for complete configuration examples:
|
See `cloud-init/examples/` for complete configuration examples:
|
||||||
@@ -137,6 +161,7 @@ See `cloud-init/examples/` for complete configuration examples:
|
|||||||
- `static-ip.yaml` — Static IP configuration
|
- `static-ip.yaml` — Static IP configuration
|
||||||
- `portainer-edge.yaml` — Portainer Edge Agent integration
|
- `portainer-edge.yaml` — Portainer Edge Agent integration
|
||||||
- `airgapped.yaml` — Air-gapped deployment with pre-loaded images
|
- `airgapped.yaml` — Air-gapped deployment with pre-loaded images
|
||||||
|
- `full-config.yaml` — All supported KubeSolo parameters
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
|
|||||||
181
docs/release-notes-0.3.0.md
Normal file
181
docs/release-notes-0.3.0.md
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
# KubeSolo OS v0.3.0 — Release Notes
|
||||||
|
|
||||||
|
**Released:** 2026-05-14
|
||||||
|
|
||||||
|
v0.3.0 is the second feature release after v0.2.0 and the first release that
|
||||||
|
ships a generic ARM64 build alongside x86_64. The update agent grew up: it
|
||||||
|
now has an explicit on-disk lifecycle, OCI registry distribution, and a
|
||||||
|
fleet-friendly set of policy gates (channels, maintenance windows,
|
||||||
|
version-stepping-stones, pre-flight checks, auto-rollback).
|
||||||
|
|
||||||
|
This document is the operator-facing summary. The full per-phase changelog
|
||||||
|
lives in [CHANGELOG.md](../CHANGELOG.md).
|
||||||
|
|
||||||
|
## What's new
|
||||||
|
|
||||||
|
### Generic ARM64 build
|
||||||
|
|
||||||
|
The image you build with `make disk-image-arm64` now targets any UEFI-capable
|
||||||
|
ARM64 host: AWS Graviton, Oracle Ampere, generic ARM64 servers, future SBCs
|
||||||
|
with UEFI-compatible firmware. The kernel comes from kernel.org mainline LTS
|
||||||
|
(6.12.10 by default, configurable via `MAINLINE_KERNEL_VERSION` in
|
||||||
|
`build/config/versions.env`).
|
||||||
|
|
||||||
|
This is **distinct** from the Raspberry Pi build path. RPi keeps its
|
||||||
|
specialised kernel from `raspberrypi/linux` with bcm-defconfig + custom DTBs;
|
||||||
|
the generic ARM64 path uses mainline + arm64-defconfig + UEFI/virtio. See
|
||||||
|
[docs/arm64-architecture.md](arm64-architecture.md) for the file-by-file
|
||||||
|
split.
|
||||||
|
|
||||||
|
KubeSolo bumped to **v1.1.5** (was v1.1.0). New flags surfaced via cloud-init:
|
||||||
|
- `kubesolo.full` — disable edge-optimised k8s overrides
|
||||||
|
- `kubesolo.disable-ipv6` — disable IPv6 cluster-wide
|
||||||
|
- `kubesolo.db-wal-repair` — recover from unclean shutdowns
|
||||||
|
|
||||||
|
### Update lifecycle is now observable
|
||||||
|
|
||||||
|
The update agent writes a `state.json` at `/var/lib/kubesolo/update/state.json`
|
||||||
|
recording where the current attempt is in the lifecycle:
|
||||||
|
|
||||||
|
```
|
||||||
|
idle → checking → downloading → staged → activated → verifying → success
|
||||||
|
↘ rolled_back
|
||||||
|
↘ failed
|
||||||
|
```
|
||||||
|
|
||||||
|
`kubesolo-update status --json` emits the full state for orchestration tooling.
|
||||||
|
The Prometheus metrics endpoint gains three new series:
|
||||||
|
|
||||||
|
- `kubesolo_update_phase{phase="..."}` — 1 for current phase, 0 for others (all 9 always emitted)
|
||||||
|
- `kubesolo_update_attempts_total`
|
||||||
|
- `kubesolo_update_last_attempt_timestamp_seconds`
|
||||||
|
|
||||||
|
### OCI registry distribution
|
||||||
|
|
||||||
|
Update artifacts can now be pulled from any OCI-compliant registry alongside
|
||||||
|
the existing HTTP `latest.json` protocol:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# HTTP, unchanged from v0.2:
|
||||||
|
kubesolo-update apply --server https://updates.example.com
|
||||||
|
|
||||||
|
# New: OCI from ghcr.io (or quay.io, harbor, zot, ...)
|
||||||
|
kubesolo-update apply --registry ghcr.io/yourorg/kubesolo-os --tag stable
|
||||||
|
```
|
||||||
|
|
||||||
|
Multi-arch is handled transparently — the same `stable` tag points at a
|
||||||
|
manifest index, the agent picks the manifest matching its `runtime.GOARCH`.
|
||||||
|
|
||||||
|
Publish your own artifacts with `build/scripts/push-oci-artifact.sh`. See
|
||||||
|
the script's header comment for the full publishing flow.
|
||||||
|
|
||||||
|
### Policy gates
|
||||||
|
|
||||||
|
`apply` now enforces five gates before destroying the passive slot:
|
||||||
|
|
||||||
|
1. **Maintenance window** (configurable, e.g. `03:00-05:00`; wrapping
|
||||||
|
midnight supported)
|
||||||
|
2. **Node-block-label** — refuses if the K8s node carries
|
||||||
|
`updates.kubesolo.io/block=true` (workload-author kill switch)
|
||||||
|
3. **Channel** — `stable` / `beta` / `edge` must match between the artifact
|
||||||
|
metadata and the local channel
|
||||||
|
4. **Architecture** — refuses cross-arch artifacts via `runtime.GOARCH` check
|
||||||
|
5. **Min compatible version** — stepping-stone enforcement; refuses an
|
||||||
|
upgrade that bypasses a required intermediate version
|
||||||
|
|
||||||
|
`--force` bypasses the maintenance window and node-block label (channel /
|
||||||
|
arch / min-version are non-negotiable). Failures are recorded in `state.json`
|
||||||
|
with a clear `LastError` field.
|
||||||
|
|
||||||
|
### Healthcheck deepening + auto-rollback
|
||||||
|
|
||||||
|
`kubesolo-update healthcheck` grew three optional probes:
|
||||||
|
|
||||||
|
- **Kube-system pods** must hold Running for ≥ N seconds before passing
|
||||||
|
- **Operator probe URL** — GET an operator-supplied endpoint; 200 = pass
|
||||||
|
- **Disk smoke test** — write/fsync/read/delete a probe file under
|
||||||
|
`/var/lib/kubesolo` to catch a wedged data partition
|
||||||
|
|
||||||
|
Plus auto-rollback: with `--auto-rollback-after N` (or `auto_rollback_after=`
|
||||||
|
in `update.conf`), after N consecutive post-activation failures, the agent
|
||||||
|
calls `ForceRollback()` and the operator/init is expected to reboot. The
|
||||||
|
counter resets on a clean pass.
|
||||||
|
|
||||||
|
### Persistent configuration via `/etc/kubesolo/update.conf`
|
||||||
|
|
||||||
|
Cloud-init writes this file on first boot from a new `updates:` block; you
|
||||||
|
can also hand-edit it. Recognised keys:
|
||||||
|
|
||||||
|
```
|
||||||
|
server = https://updates.example.com # or omit if using registry
|
||||||
|
registry = # OCI registry ref (alt to server)
|
||||||
|
channel = stable
|
||||||
|
maintenance_window = 03:00-05:00
|
||||||
|
pubkey = /etc/kubesolo/update-pubkey.hex
|
||||||
|
healthcheck_url = http://localhost:8000/ready
|
||||||
|
auto_rollback_after = 3
|
||||||
|
```
|
||||||
|
|
||||||
|
Cloud-init full reference at
|
||||||
|
[cloud-init/examples/full-config.yaml](../cloud-init/examples/full-config.yaml).
|
||||||
|
|
||||||
|
## Migration from v0.2.x
|
||||||
|
|
||||||
|
This is a non-breaking release for live systems. v0.2.x → v0.3.0 changes:
|
||||||
|
|
||||||
|
- **`state.json` will appear** at `/var/lib/kubesolo/update/state.json` the
|
||||||
|
first time a v0.3 agent runs `apply`. Pre-existing v0.2 deployments without
|
||||||
|
this file are fine — the agent treats a missing file as fresh Idle state.
|
||||||
|
- **`update.conf` is optional**. v0.2 deployments that pass everything via
|
||||||
|
CLI flags keep working unchanged.
|
||||||
|
- **HTTP `latest.json` protocol unchanged**. Existing update servers don't
|
||||||
|
need a rebuild.
|
||||||
|
- **GRUB env (boot counter, active slot)** unchanged. The bootloader's
|
||||||
|
rollback behaviour is the same.
|
||||||
|
- **No new mandatory kernel command-line parameters**.
|
||||||
|
|
||||||
|
To opt into the new lifecycle, transports, and gates, drop in an
|
||||||
|
`update.conf` (or update cloud-init) and switch to `--registry` if you want
|
||||||
|
OCI distribution.
|
||||||
|
|
||||||
|
## Known limitations
|
||||||
|
|
||||||
|
These shipped intentionally with v0.3.0 and are explicitly tracked for
|
||||||
|
v0.3.1+:
|
||||||
|
|
||||||
|
- **OCI signature verification** — the OCI transport is digest-verified
|
||||||
|
end-to-end via oras-go, but does not yet consume cosign-style referrer
|
||||||
|
attestations. The HTTP transport still honours `--pubkey` for `.sig`
|
||||||
|
files.
|
||||||
|
- **ARM64 LABEL=KSOLODATA** resolution doesn't work yet — piCore's
|
||||||
|
`blkid`/`findfs` crash on QEMU virt under our mainline kernel; the
|
||||||
|
static `busybox-static` we ship doesn't include those applets.
|
||||||
|
`build/grub/grub-arm64.cfg` hardcodes `kubesolo.data=/dev/vda4` as a
|
||||||
|
workaround. On real ARM64 hardware the device path may differ.
|
||||||
|
- **Real-hardware ARM64 validation** is pending. The image builds and
|
||||||
|
boots end-to-end under QEMU virt; production certification waits on a
|
||||||
|
Graviton / Ampere run.
|
||||||
|
- **AppArmor profile load fails on ARM64** (`apparmor_parser` ABI mismatch).
|
||||||
|
Init reports the failure; boot continues without AppArmor enforcement.
|
||||||
|
- **QEMU TCG performance** can trigger KubeSolo's first-boot image-import
|
||||||
|
deadline. Not an OS defect; real hardware and KVM-accelerated QEMU
|
||||||
|
complete the import in seconds.
|
||||||
|
|
||||||
|
## How to upgrade your build host
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git pull
|
||||||
|
make distclean # optional — drops the build cache; full rebuild takes ~30 min
|
||||||
|
make iso # or disk-image, or disk-image-arm64
|
||||||
|
```
|
||||||
|
|
||||||
|
The Docker-based builder (`make docker-build`) regenerates its own image
|
||||||
|
from `build/Dockerfile.builder` on next invocation; oras 1.2.3 and
|
||||||
|
busybox-static are now included.
|
||||||
|
|
||||||
|
## Acknowledgements
|
||||||
|
|
||||||
|
v0.3.0 work was driven by a single multi-week pair-programming session
|
||||||
|
working through Phases 0–9 of the v0.3 roadmap. The Odroid self-hosted
|
||||||
|
Gitea Actions runner (`odroid.local`, arm64-linux) carried every ARM64
|
||||||
|
build during development.
|
||||||
202
hack/dev-vm-arm64.sh
Executable file
202
hack/dev-vm-arm64.sh
Executable file
@@ -0,0 +1,202 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# dev-vm-arm64.sh — Launch ARM64 QEMU VM for development
|
||||||
|
#
|
||||||
|
# Two modes:
|
||||||
|
#
|
||||||
|
# Default (direct kernel boot — fast iteration):
|
||||||
|
# qemu loads the kernel Image + initramfs directly via -kernel/-initrd.
|
||||||
|
# Skips bootloader, UEFI firmware, and disk image entirely.
|
||||||
|
# Use this for kernel and init-script changes.
|
||||||
|
#
|
||||||
|
# --disk (full UEFI boot — integration testing):
|
||||||
|
# qemu boots the .arm64.img disk image via UEFI firmware -> GRUB -> kernel.
|
||||||
|
# Exercises the full boot chain. Use this when changing the disk image
|
||||||
|
# layout, GRUB config, or anything that touches the EFI partition.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./hack/dev-vm-arm64.sh # direct kernel boot (default)
|
||||||
|
# ./hack/dev-vm-arm64.sh --disk # full UEFI boot from built image
|
||||||
|
# ./hack/dev-vm-arm64.sh --debug # enable kubesolo.debug
|
||||||
|
# ./hack/dev-vm-arm64.sh --shell # drop to emergency shell
|
||||||
|
# ./hack/dev-vm-arm64.sh --disk /path/to.img # boot a specific disk image
|
||||||
|
# ./hack/dev-vm-arm64.sh <kernel> <initramfs> # direct boot with custom files
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||||
|
|
||||||
|
MODE="kernel" # kernel | disk
|
||||||
|
VMLINUZ=""
|
||||||
|
INITRD=""
|
||||||
|
DISK_IMAGE=""
|
||||||
|
EXTRA_APPEND=""
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--shell) EXTRA_APPEND="$EXTRA_APPEND kubesolo.shell"; shift ;;
|
||||||
|
--debug) EXTRA_APPEND="$EXTRA_APPEND kubesolo.debug"; shift ;;
|
||||||
|
--disk)
|
||||||
|
MODE="disk"
|
||||||
|
shift
|
||||||
|
# Optional next-arg as disk image path
|
||||||
|
if [ $# -gt 0 ] && [ -f "$1" ]; then
|
||||||
|
DISK_IMAGE="$1"
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if [ "$MODE" = "kernel" ] && [ -z "$VMLINUZ" ]; then
|
||||||
|
VMLINUZ="$1"
|
||||||
|
elif [ "$MODE" = "kernel" ] && [ -z "$INITRD" ]; then
|
||||||
|
INITRD="$1"
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# UEFI firmware probe (used for --disk mode)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
find_uefi_firmware() {
|
||||||
|
local candidates=(
|
||||||
|
/usr/share/qemu-efi-aarch64/QEMU_EFI.fd
|
||||||
|
/usr/share/AAVMF/AAVMF_CODE.fd
|
||||||
|
/usr/share/edk2/aarch64/QEMU_EFI.fd
|
||||||
|
/usr/share/qemu/edk2-aarch64-code.fd
|
||||||
|
/opt/homebrew/share/qemu/edk2-aarch64-code.fd
|
||||||
|
/usr/local/share/qemu/edk2-aarch64-code.fd
|
||||||
|
)
|
||||||
|
for f in "${candidates[@]}"; do
|
||||||
|
[ -f "$f" ] && echo "$f" && return 0
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# mkfs.ext4 probe (kernel mode creates a scratch data disk)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
find_mkfs_ext4() {
|
||||||
|
if command -v mkfs.ext4 >/dev/null 2>&1; then
|
||||||
|
echo "mkfs.ext4"
|
||||||
|
elif [ -x "/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4" ]; then
|
||||||
|
echo "/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4"
|
||||||
|
elif [ -x "/usr/local/opt/e2fsprogs/sbin/mkfs.ext4" ]; then
|
||||||
|
echo "/usr/local/opt/e2fsprogs/sbin/mkfs.ext4"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Disk mode: boot the built .arm64.img through UEFI firmware + GRUB
|
||||||
|
# ===========================================================================
|
||||||
|
if [ "$MODE" = "disk" ]; then
|
||||||
|
DISK_IMAGE="${DISK_IMAGE:-$PROJECT_ROOT/output/kubesolo-os-${VERSION}.arm64.img}"
|
||||||
|
|
||||||
|
if [ ! -f "$DISK_IMAGE" ]; then
|
||||||
|
echo "ERROR: Disk image not found: $DISK_IMAGE"
|
||||||
|
echo " Run 'make disk-image-arm64' to build it."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
UEFI_FW="$(find_uefi_firmware || true)"
|
||||||
|
if [ -z "$UEFI_FW" ]; then
|
||||||
|
echo "ERROR: No ARM64 UEFI firmware found."
|
||||||
|
echo " Install one of:"
|
||||||
|
echo " apt install qemu-efi-aarch64 # Debian/Ubuntu"
|
||||||
|
echo " dnf install edk2-aarch64 # Fedora/RHEL"
|
||||||
|
echo " brew install qemu # macOS (bundled)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Pad UEFI firmware variable store to 64 MiB if QEMU expects pflash sizing.
|
||||||
|
# Most ARM64 EFI .fd files are 64 MB; if yours is smaller, QEMU may refuse.
|
||||||
|
echo "==> Launching ARM64 QEMU (UEFI disk boot)..."
|
||||||
|
echo " Firmware: $UEFI_FW"
|
||||||
|
echo " Disk: $DISK_IMAGE"
|
||||||
|
echo ""
|
||||||
|
echo " K8s API: localhost:6443"
|
||||||
|
echo " SSH: localhost:2222"
|
||||||
|
echo " Press Ctrl+A X to exit QEMU"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# -cpu max enables all emulated ARMv8 features (atomics, crypto, fp16).
|
||||||
|
# piCore64's BusyBox is built with -march=armv8-a+crypto+lse and segfaults
|
||||||
|
# under -cpu cortex-a72 because some required extensions aren't on by
|
||||||
|
# default in that model.
|
||||||
|
qemu-system-aarch64 \
|
||||||
|
-machine virt \
|
||||||
|
-cpu max \
|
||||||
|
-m 2048 \
|
||||||
|
-smp 2 \
|
||||||
|
-nographic \
|
||||||
|
-bios "$UEFI_FW" \
|
||||||
|
-drive "file=$DISK_IMAGE,format=raw,if=virtio,media=disk" \
|
||||||
|
-net "nic,model=virtio" \
|
||||||
|
-net "user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Kernel mode (default): direct -kernel / -initrd, fast iteration
|
||||||
|
# ===========================================================================
|
||||||
|
VMLINUZ="${VMLINUZ:-$PROJECT_ROOT/build/cache/kernel-arm64-generic/Image}"
|
||||||
|
INITRD="${INITRD:-$PROJECT_ROOT/build/rootfs-work/kubesolo-os.gz}"
|
||||||
|
|
||||||
|
# Fallback: previous-generation RPi kernel cache, in case someone hasn't yet
|
||||||
|
# rebuilt under v0.3 paths.
|
||||||
|
if [ ! -f "$VMLINUZ" ] && [ -f "$PROJECT_ROOT/build/cache/custom-kernel-rpi/Image" ]; then
|
||||||
|
VMLINUZ="$PROJECT_ROOT/build/cache/custom-kernel-rpi/Image"
|
||||||
|
echo "==> Note: falling back to RPi kernel ($VMLINUZ)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "$VMLINUZ" ]; then
|
||||||
|
echo "ERROR: Kernel not found: $VMLINUZ"
|
||||||
|
echo " Run 'make kernel-arm64' (generic) or 'make kernel-rpi' to build a kernel."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ ! -f "$INITRD" ]; then
|
||||||
|
echo "ERROR: Initrd not found: $INITRD"
|
||||||
|
echo " Run 'make rootfs-arm64' to build the initramfs."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
MKFS_EXT4="$(find_mkfs_ext4)"
|
||||||
|
if [ -z "$MKFS_EXT4" ]; then
|
||||||
|
echo "ERROR: mkfs.ext4 not found. Install e2fsprogs:"
|
||||||
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
echo " brew install e2fsprogs"
|
||||||
|
else
|
||||||
|
echo " apt install e2fsprogs # Debian/Ubuntu"
|
||||||
|
echo " dnf install e2fsprogs # Fedora/RHEL"
|
||||||
|
fi
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
DATA_DISK="$(mktemp /tmp/kubesolo-arm64-data-XXXXXX).img"
|
||||||
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null
|
||||||
|
"$MKFS_EXT4" -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
trap 'rm -f "$DATA_DISK"' EXIT
|
||||||
|
|
||||||
|
echo "==> Launching ARM64 QEMU (direct kernel boot)..."
|
||||||
|
echo " Kernel: $VMLINUZ"
|
||||||
|
echo " Initrd: $INITRD"
|
||||||
|
echo " Data: $DATA_DISK"
|
||||||
|
echo ""
|
||||||
|
echo " K8s API: localhost:6443"
|
||||||
|
echo " SSH: localhost:2222"
|
||||||
|
echo " Press Ctrl+A X to exit QEMU"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
qemu-system-aarch64 \
|
||||||
|
-machine virt \
|
||||||
|
-cpu max \
|
||||||
|
-m 2048 \
|
||||||
|
-smp 2 \
|
||||||
|
-nographic \
|
||||||
|
-kernel "$VMLINUZ" \
|
||||||
|
-initrd "$INITRD" \
|
||||||
|
-append "console=ttyAMA0 kubesolo.data=/dev/vda kubesolo.debug $EXTRA_APPEND" \
|
||||||
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
|
-net "nic,model=virtio" \
|
||||||
|
-net "user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22"
|
||||||
163
hack/dev-vm.sh
163
hack/dev-vm.sh
@@ -1,24 +1,29 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# dev-vm.sh — Launch a QEMU VM for development and testing
|
# dev-vm.sh — Launch a QEMU VM for development and testing
|
||||||
# Usage: ./hack/dev-vm.sh [path-to-iso-or-img] [--shell] [--debug]
|
# Usage: ./hack/dev-vm.sh [path-to-iso-or-img] [--shell] [--debug]
|
||||||
|
#
|
||||||
|
# Works on both Linux (with KVM) and macOS (TCG emulation).
|
||||||
|
# On macOS/Apple Silicon, x86_64 guests run under TCG (~5-15x slower than KVM).
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||||
|
ROOTFS_DIR="${ROOTFS_DIR:-$PROJECT_ROOT/build/rootfs-work}"
|
||||||
DEFAULT_ISO="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.iso"
|
DEFAULT_ISO="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.iso"
|
||||||
DEFAULT_IMG="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.img"
|
DEFAULT_IMG="$PROJECT_ROOT/output/kubesolo-os-${VERSION}.img"
|
||||||
|
|
||||||
IMAGE="${1:-}"
|
IMAGE=""
|
||||||
EXTRA_APPEND=""
|
EXTRA_APPEND=""
|
||||||
SERIAL_OPTS="-serial stdio"
|
|
||||||
|
|
||||||
# Parse flags
|
# Parse all arguments — flags and optional image path
|
||||||
shift || true
|
|
||||||
for arg in "$@"; do
|
for arg in "$@"; do
|
||||||
case "$arg" in
|
case "$arg" in
|
||||||
--shell) EXTRA_APPEND="$EXTRA_APPEND kubesolo.shell" ;;
|
--shell) EXTRA_APPEND="$EXTRA_APPEND kubesolo.shell" ;;
|
||||||
--debug) EXTRA_APPEND="$EXTRA_APPEND kubesolo.debug" ;;
|
--debug) EXTRA_APPEND="$EXTRA_APPEND kubesolo.debug" ;;
|
||||||
|
--edge-id=*) EXTRA_APPEND="$EXTRA_APPEND kubesolo.edge_id=${arg#--edge-id=}" ;;
|
||||||
|
--edge-key=*) EXTRA_APPEND="$EXTRA_APPEND kubesolo.edge_key=${arg#--edge-key=}" ;;
|
||||||
|
*) IMAGE="$arg" ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -39,42 +44,146 @@ echo "==> Launching QEMU with: $IMAGE"
|
|||||||
echo " Press Ctrl+A, X to exit"
|
echo " Press Ctrl+A, X to exit"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Create a temporary data disk for persistence testing
|
DATA_APPEND=""
|
||||||
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
DATA_DISK=""
|
||||||
dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null
|
|
||||||
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
|
||||||
|
|
||||||
cleanup() { rm -f "$DATA_DISK"; }
|
# Find mkfs.ext4 (Homebrew on macOS installs to a non-PATH location)
|
||||||
|
MKFS_EXT4=""
|
||||||
|
if command -v mkfs.ext4 >/dev/null 2>&1; then
|
||||||
|
MKFS_EXT4="mkfs.ext4"
|
||||||
|
elif [ -x "/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4" ]; then
|
||||||
|
MKFS_EXT4="/opt/homebrew/opt/e2fsprogs/sbin/mkfs.ext4"
|
||||||
|
elif [ -x "/usr/local/opt/e2fsprogs/sbin/mkfs.ext4" ]; then
|
||||||
|
MKFS_EXT4="/usr/local/opt/e2fsprogs/sbin/mkfs.ext4"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create and attach a formatted data disk for persistent K8s state.
|
||||||
|
if [ -n "$MKFS_EXT4" ]; then
|
||||||
|
DATA_DISK="$(mktemp /tmp/kubesolo-data-XXXXXX).img"
|
||||||
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null
|
||||||
|
"$MKFS_EXT4" -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
DATA_APPEND="kubesolo.data=/dev/vda"
|
||||||
|
echo " Data disk: 2 GB ext4 (persistent)"
|
||||||
|
else
|
||||||
|
echo "ERROR: mkfs.ext4 not found. Install e2fsprogs:"
|
||||||
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
echo " brew install e2fsprogs"
|
||||||
|
else
|
||||||
|
echo " apt install e2fsprogs # Debian/Ubuntu"
|
||||||
|
echo " dnf install e2fsprogs # Fedora/RHEL"
|
||||||
|
fi
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
[ -n "$DATA_DISK" ] && rm -f "$DATA_DISK" "${DATA_DISK%.img}"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
COMMON_OPTS=(
|
# Build QEMU command
|
||||||
-m 2048
|
QEMU_ARGS=(-m 2048 -smp 2 -nographic -cpu max)
|
||||||
-smp 2
|
QEMU_ARGS+=(-net "nic,model=virtio")
|
||||||
-nographic
|
QEMU_ARGS+=(-net "user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22,hostfwd=tcp::8080-:8080")
|
||||||
-net nic,model=virtio
|
|
||||||
-net user,hostfwd=tcp::6443-:6443,hostfwd=tcp::2222-:22
|
|
||||||
-drive "file=$DATA_DISK,format=raw,if=virtio"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Enable KVM if available
|
if [ -n "$DATA_DISK" ]; then
|
||||||
|
QEMU_ARGS+=(-drive "file=$DATA_DISK,format=raw,if=virtio")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Enable KVM on Linux, fall back to TCG everywhere else
|
||||||
if [ -w /dev/kvm ] 2>/dev/null; then
|
if [ -w /dev/kvm ] 2>/dev/null; then
|
||||||
COMMON_OPTS+=(-enable-kvm)
|
QEMU_ARGS+=(-accel kvm)
|
||||||
echo " KVM acceleration: enabled"
|
echo " KVM acceleration: enabled"
|
||||||
else
|
else
|
||||||
echo " KVM acceleration: not available (using TCG)"
|
QEMU_ARGS+=(-accel tcg)
|
||||||
|
echo " TCG emulation (no KVM — expect slower boot)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
case "$IMAGE" in
|
case "$IMAGE" in
|
||||||
*.iso)
|
*.iso)
|
||||||
|
# -append only works with -kernel, not -cdrom.
|
||||||
|
# Extract kernel + initramfs and use direct kernel boot.
|
||||||
|
VMLINUZ=""
|
||||||
|
INITRAMFS=""
|
||||||
|
|
||||||
|
# Prefer build artifacts if present (no extraction needed)
|
||||||
|
if [ -f "$ROOTFS_DIR/vmlinuz" ] && [ -f "$ROOTFS_DIR/kubesolo-os.gz" ]; then
|
||||||
|
VMLINUZ="$ROOTFS_DIR/vmlinuz"
|
||||||
|
INITRAMFS="$ROOTFS_DIR/kubesolo-os.gz"
|
||||||
|
echo " Using kernel/initramfs from build directory"
|
||||||
|
else
|
||||||
|
# Extract kernel + initramfs from ISO.
|
||||||
|
# Try multiple methods: bsdtar > isoinfo > loop mount
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
|
||||||
|
EXTRACTED=0
|
||||||
|
|
||||||
|
echo " Extracting kernel/initramfs from ISO..."
|
||||||
|
|
||||||
|
# Method 1: bsdtar (ships with macOS, libarchive-tools on Linux)
|
||||||
|
if [ $EXTRACTED -eq 0 ] && command -v bsdtar >/dev/null 2>&1; then
|
||||||
|
if bsdtar -xf "$IMAGE" -C "$EXTRACT_DIR" boot/vmlinuz boot/kubesolo-os.gz 2>/dev/null; then
|
||||||
|
echo " Extracted via bsdtar"
|
||||||
|
EXTRACTED=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Method 2: isoinfo (genisoimage/cdrtools on Linux)
|
||||||
|
if [ $EXTRACTED -eq 0 ] && command -v isoinfo >/dev/null 2>&1; then
|
||||||
|
mkdir -p "$EXTRACT_DIR/boot"
|
||||||
|
isoinfo -i "$IMAGE" -x "/BOOT/VMLINUZ;1" > "$EXTRACT_DIR/boot/vmlinuz" 2>/dev/null || true
|
||||||
|
isoinfo -i "$IMAGE" -x "/BOOT/KUBESOLO-OS.GZ;1" > "$EXTRACT_DIR/boot/kubesolo-os.gz" 2>/dev/null || true
|
||||||
|
# isoinfo writes empty files on failure; check size
|
||||||
|
if [ -s "$EXTRACT_DIR/boot/vmlinuz" ] && [ -s "$EXTRACT_DIR/boot/kubesolo-os.gz" ]; then
|
||||||
|
echo " Extracted via isoinfo"
|
||||||
|
EXTRACTED=1
|
||||||
|
else
|
||||||
|
rm -f "$EXTRACT_DIR/boot/vmlinuz" "$EXTRACT_DIR/boot/kubesolo-os.gz"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Method 3: loop mount (Linux only, requires root)
|
||||||
|
if [ $EXTRACTED -eq 0 ] && [ "$(uname)" = "Linux" ]; then
|
||||||
|
ISO_MOUNT="$EXTRACT_DIR/mnt"
|
||||||
|
mkdir -p "$ISO_MOUNT"
|
||||||
|
if mount -o loop,ro "$IMAGE" "$ISO_MOUNT" 2>/dev/null; then
|
||||||
|
mkdir -p "$EXTRACT_DIR/boot"
|
||||||
|
cp "$ISO_MOUNT/boot/vmlinuz" "$EXTRACT_DIR/boot/" 2>/dev/null || true
|
||||||
|
cp "$ISO_MOUNT/boot/kubesolo-os.gz" "$EXTRACT_DIR/boot/" 2>/dev/null || true
|
||||||
|
umount "$ISO_MOUNT" 2>/dev/null || true
|
||||||
|
if [ -f "$EXTRACT_DIR/boot/vmlinuz" ] && [ -f "$EXTRACT_DIR/boot/kubesolo-os.gz" ]; then
|
||||||
|
echo " Extracted via loop mount"
|
||||||
|
EXTRACTED=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $EXTRACTED -eq 0 ]; then
|
||||||
|
echo "ERROR: Failed to extract kernel/initramfs from ISO."
|
||||||
|
echo " Install one of: bsdtar (libarchive-tools), isoinfo (genisoimage), or run as root for loop mount."
|
||||||
|
echo " Or run 'make rootfs initramfs' to produce build artifacts."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
VMLINUZ="$EXTRACT_DIR/boot/vmlinuz"
|
||||||
|
INITRAMFS="$EXTRACT_DIR/boot/kubesolo-os.gz"
|
||||||
|
|
||||||
|
if [ ! -f "$VMLINUZ" ] || [ ! -f "$INITRAMFS" ]; then
|
||||||
|
echo "ERROR: ISO does not contain expected boot/vmlinuz and boot/kubesolo-os.gz"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
qemu-system-x86_64 \
|
qemu-system-x86_64 \
|
||||||
"${COMMON_OPTS[@]}" \
|
"${QEMU_ARGS[@]}" \
|
||||||
-cdrom "$IMAGE" \
|
-kernel "$VMLINUZ" \
|
||||||
-boot d \
|
-initrd "$INITRAMFS" \
|
||||||
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda $EXTRA_APPEND"
|
-append "console=ttyS0,115200n8 $DATA_APPEND $EXTRA_APPEND"
|
||||||
;;
|
;;
|
||||||
*.img)
|
*.img)
|
||||||
qemu-system-x86_64 \
|
qemu-system-x86_64 \
|
||||||
"${COMMON_OPTS[@]}" \
|
"${QEMU_ARGS[@]}" \
|
||||||
-drive "file=$IMAGE,format=raw,if=virtio"
|
-drive "file=$IMAGE,format=raw,if=virtio"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
|||||||
48
hack/fix-portainer-service.sh
Executable file
48
hack/fix-portainer-service.sh
Executable file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# fix-portainer-service.sh — Create the missing headless Service for Portainer agent
|
||||||
|
# Usage: ./hack/fix-portainer-service.sh
|
||||||
|
#
|
||||||
|
# The Portainer agent does a DNS lookup for "portainer-agent" to discover peers.
|
||||||
|
# Without a Service, this lookup fails and the agent crashes.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
KUBECONFIG_URL="http://localhost:8080"
|
||||||
|
|
||||||
|
echo "==> Fetching kubeconfig from $KUBECONFIG_URL..."
|
||||||
|
KUBECONFIG_FILE=$(mktemp)
|
||||||
|
trap 'rm -f "$KUBECONFIG_FILE"' EXIT
|
||||||
|
|
||||||
|
curl -s "$KUBECONFIG_URL" > "$KUBECONFIG_FILE"
|
||||||
|
|
||||||
|
if [ ! -s "$KUBECONFIG_FILE" ]; then
|
||||||
|
echo "ERROR: Failed to fetch kubeconfig. Is the VM running?"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Creating headless Service for portainer-agent..."
|
||||||
|
kubectl --kubeconfig "$KUBECONFIG_FILE" apply -f - <<'EOF'
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: portainer-agent
|
||||||
|
namespace: portainer
|
||||||
|
spec:
|
||||||
|
clusterIP: None
|
||||||
|
selector:
|
||||||
|
app: portainer-agent
|
||||||
|
ports:
|
||||||
|
- name: agent
|
||||||
|
port: 9001
|
||||||
|
targetPort: 9001
|
||||||
|
protocol: TCP
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "==> Restarting portainer-agent deployment..."
|
||||||
|
kubectl --kubeconfig "$KUBECONFIG_FILE" rollout restart -n portainer deployment/portainer-agent
|
||||||
|
|
||||||
|
echo "==> Waiting for rollout..."
|
||||||
|
kubectl --kubeconfig "$KUBECONFIG_FILE" rollout status -n portainer deployment/portainer-agent --timeout=120s
|
||||||
|
|
||||||
|
echo "==> Done. Checking pod status:"
|
||||||
|
kubectl --kubeconfig "$KUBECONFIG_FILE" get pods -n portainer
|
||||||
@@ -14,6 +14,11 @@
|
|||||||
# kubesolo.cloudinit=<path> Path to cloud-init config
|
# kubesolo.cloudinit=<path> Path to cloud-init config
|
||||||
# kubesolo.flags=<flags> Extra flags for KubeSolo binary
|
# kubesolo.flags=<flags> Extra flags for KubeSolo binary
|
||||||
|
|
||||||
|
# Route early boot output to /dev/console — before switch_root the kernel may
|
||||||
|
# not have a controlling tty, and some stages echo to stderr expecting it to
|
||||||
|
# reach the serial console. This is a no-op once the staged init proper starts.
|
||||||
|
exec >/dev/console 2>&1
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
# --- Switch root: escape initramfs so runc pivot_root works ---
|
# --- Switch root: escape initramfs so runc pivot_root works ---
|
||||||
@@ -62,6 +67,9 @@ export KUBESOLO_SHELL=""
|
|||||||
export KUBESOLO_NOPERSIST=""
|
export KUBESOLO_NOPERSIST=""
|
||||||
export KUBESOLO_CLOUDINIT=""
|
export KUBESOLO_CLOUDINIT=""
|
||||||
export KUBESOLO_EXTRA_FLAGS=""
|
export KUBESOLO_EXTRA_FLAGS=""
|
||||||
|
export KUBESOLO_PORTAINER_EDGE_ID=""
|
||||||
|
export KUBESOLO_PORTAINER_EDGE_KEY=""
|
||||||
|
export KUBESOLO_NOAPPARMOR=""
|
||||||
|
|
||||||
# --- Logging ---
|
# --- Logging ---
|
||||||
log() {
|
log() {
|
||||||
|
|||||||
@@ -12,10 +12,10 @@ if ! mountpoint -q /dev 2>/dev/null; then
|
|||||||
mount -t devtmpfs devtmpfs /dev 2>/dev/null || mount -t tmpfs tmpfs /dev
|
mount -t devtmpfs devtmpfs /dev 2>/dev/null || mount -t tmpfs tmpfs /dev
|
||||||
fi
|
fi
|
||||||
if ! mountpoint -q /tmp 2>/dev/null; then
|
if ! mountpoint -q /tmp 2>/dev/null; then
|
||||||
mount -t tmpfs tmpfs /tmp
|
mount -t tmpfs -o noexec,nosuid,nodev,size=256M tmpfs /tmp
|
||||||
fi
|
fi
|
||||||
if ! mountpoint -q /run 2>/dev/null; then
|
if ! mountpoint -q /run 2>/dev/null; then
|
||||||
mount -t tmpfs tmpfs /run
|
mount -t tmpfs -o nosuid,nodev,size=64M tmpfs /run
|
||||||
fi
|
fi
|
||||||
|
|
||||||
mkdir -p /dev/pts /dev/shm
|
mkdir -p /dev/pts /dev/shm
|
||||||
@@ -23,7 +23,7 @@ if ! mountpoint -q /dev/pts 2>/dev/null; then
|
|||||||
mount -t devpts devpts /dev/pts
|
mount -t devpts devpts /dev/pts
|
||||||
fi
|
fi
|
||||||
if ! mountpoint -q /dev/shm 2>/dev/null; then
|
if ! mountpoint -q /dev/shm 2>/dev/null; then
|
||||||
mount -t tmpfs tmpfs /dev/shm
|
mount -t tmpfs -o noexec,nosuid,nodev,size=64M tmpfs /dev/shm
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Ensure essential device nodes exist (devtmpfs may be incomplete after switch_root)
|
# Ensure essential device nodes exist (devtmpfs may be incomplete after switch_root)
|
||||||
|
|||||||
@@ -9,9 +9,16 @@ for arg in $(cat /proc/cmdline); do
|
|||||||
kubesolo.nopersist) KUBESOLO_NOPERSIST=1 ;;
|
kubesolo.nopersist) KUBESOLO_NOPERSIST=1 ;;
|
||||||
kubesolo.cloudinit=*) KUBESOLO_CLOUDINIT="${arg#kubesolo.cloudinit=}" ;;
|
kubesolo.cloudinit=*) KUBESOLO_CLOUDINIT="${arg#kubesolo.cloudinit=}" ;;
|
||||||
kubesolo.flags=*) KUBESOLO_EXTRA_FLAGS="${arg#kubesolo.flags=}" ;;
|
kubesolo.flags=*) KUBESOLO_EXTRA_FLAGS="${arg#kubesolo.flags=}" ;;
|
||||||
|
kubesolo.edge_id=*) KUBESOLO_PORTAINER_EDGE_ID="${arg#kubesolo.edge_id=}" ;;
|
||||||
|
kubesolo.edge_key=*) KUBESOLO_PORTAINER_EDGE_KEY="${arg#kubesolo.edge_key=}" ;;
|
||||||
|
kubesolo.nomodlock) KUBESOLO_NOMODLOCK=1 ;;
|
||||||
|
kubesolo.noapparmor) KUBESOLO_NOAPPARMOR=1 ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
export KUBESOLO_NOMODLOCK
|
||||||
|
export KUBESOLO_NOAPPARMOR
|
||||||
|
|
||||||
if [ -z "$KUBESOLO_DATA_DEV" ] && [ "$KUBESOLO_NOPERSIST" != "1" ]; then
|
if [ -z "$KUBESOLO_DATA_DEV" ] && [ "$KUBESOLO_NOPERSIST" != "1" ]; then
|
||||||
log_warn "No kubesolo.data= specified and kubesolo.nopersist not set"
|
log_warn "No kubesolo.data= specified and kubesolo.nopersist not set"
|
||||||
log_warn "Attempting auto-detection of data partition (label: KSOLODATA)"
|
log_warn "Attempting auto-detection of data partition (label: KSOLODATA)"
|
||||||
|
|||||||
@@ -11,37 +11,108 @@ fi
|
|||||||
# Load block device drivers before waiting (modules loaded later in stage 30,
|
# Load block device drivers before waiting (modules loaded later in stage 30,
|
||||||
# but we need virtio_blk available NOW for /dev/vda detection)
|
# but we need virtio_blk available NOW for /dev/vda detection)
|
||||||
modprobe virtio_blk 2>/dev/null || true
|
modprobe virtio_blk 2>/dev/null || true
|
||||||
|
modprobe mmc_block 2>/dev/null || true
|
||||||
# Trigger mdev to create device nodes after loading driver
|
# Trigger mdev to create device nodes after loading driver
|
||||||
mdev -s 2>/dev/null || true
|
mdev -s 2>/dev/null || true
|
||||||
|
|
||||||
# Fallback: create device node from sysfs if devtmpfs/mdev didn't
|
# Resolve LABEL= syntax to actual block device path
|
||||||
DEV_NAME="${KUBESOLO_DATA_DEV##*/}"
|
# The RPi cmdline uses kubesolo.data=LABEL=KSOLODATA which needs resolution
|
||||||
if [ ! -b "$KUBESOLO_DATA_DEV" ] && [ -f "/sys/class/block/$DEV_NAME/dev" ]; then
|
|
||||||
MAJMIN=$(cat "/sys/class/block/$DEV_NAME/dev")
|
|
||||||
mknod "$KUBESOLO_DATA_DEV" b "${MAJMIN%%:*}" "${MAJMIN##*:}" 2>/dev/null || true
|
|
||||||
log "Created $KUBESOLO_DATA_DEV via mknod ($MAJMIN)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Wait for device to appear (USB, slow disks, virtio)
|
|
||||||
log "Waiting for data device: $KUBESOLO_DATA_DEV"
|
|
||||||
WAIT_SECS=30
|
WAIT_SECS=30
|
||||||
for i in $(seq 1 "$WAIT_SECS"); do
|
log "Waiting for data device: $KUBESOLO_DATA_DEV"
|
||||||
[ -b "$KUBESOLO_DATA_DEV" ] && break
|
|
||||||
mdev -s 2>/dev/null || true
|
case "$KUBESOLO_DATA_DEV" in
|
||||||
sleep 1
|
LABEL=*)
|
||||||
done
|
# Extract label name and resolve via blkid/findfs
|
||||||
|
DATA_LABEL="${KUBESOLO_DATA_DEV#LABEL=}"
|
||||||
|
RESOLVED=""
|
||||||
|
for i in $(seq 1 "$WAIT_SECS"); do
|
||||||
|
mdev -s 2>/dev/null || true
|
||||||
|
RESOLVED=$(blkid -L "$DATA_LABEL" 2>/dev/null) || true
|
||||||
|
if [ -z "$RESOLVED" ]; then
|
||||||
|
RESOLVED=$(findfs "LABEL=$DATA_LABEL" 2>/dev/null) || true
|
||||||
|
fi
|
||||||
|
if [ -n "$RESOLVED" ] && [ -b "$RESOLVED" ]; then
|
||||||
|
log "Resolved LABEL=$DATA_LABEL -> $RESOLVED"
|
||||||
|
KUBESOLO_DATA_DEV="$RESOLVED"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# Direct block device path — wait for it to appear
|
||||||
|
# Fallback: create device node from sysfs if devtmpfs/mdev didn't
|
||||||
|
DEV_NAME="${KUBESOLO_DATA_DEV##*/}"
|
||||||
|
if [ ! -b "$KUBESOLO_DATA_DEV" ] && [ -f "/sys/class/block/$DEV_NAME/dev" ]; then
|
||||||
|
MAJMIN=$(cat "/sys/class/block/$DEV_NAME/dev")
|
||||||
|
mknod "$KUBESOLO_DATA_DEV" b "${MAJMIN%%:*}" "${MAJMIN##*:}" 2>/dev/null || true
|
||||||
|
log "Created $KUBESOLO_DATA_DEV via mknod ($MAJMIN)"
|
||||||
|
fi
|
||||||
|
for i in $(seq 1 "$WAIT_SECS"); do
|
||||||
|
[ -b "$KUBESOLO_DATA_DEV" ] && break
|
||||||
|
mdev -s 2>/dev/null || true
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
if [ ! -b "$KUBESOLO_DATA_DEV" ]; then
|
if [ ! -b "$KUBESOLO_DATA_DEV" ]; then
|
||||||
log_err "Data device $KUBESOLO_DATA_DEV not found after ${WAIT_SECS}s"
|
log_err "Data device $KUBESOLO_DATA_DEV not found after ${WAIT_SECS}s"
|
||||||
return 1
|
# Comprehensive diagnostics for block device failure
|
||||||
|
log_err "=== Block device diagnostics ==="
|
||||||
|
log_err "--- /dev block devices ---"
|
||||||
|
ls -la /dev/mmc* /dev/sd* /dev/vd* /dev/nvme* 2>/dev/null | while read -r line; do
|
||||||
|
log_err " $line"
|
||||||
|
done
|
||||||
|
log_err "--- /sys/class/block (kernel registered) ---"
|
||||||
|
ls /sys/class/block/ 2>/dev/null | while read -r line; do
|
||||||
|
log_err " $line"
|
||||||
|
done
|
||||||
|
log_err "--- dmesg: MMC/SDHCI/emmc ---"
|
||||||
|
dmesg 2>/dev/null | grep -i -e mmc -e sdhci -e emmc | while read -r line; do
|
||||||
|
log_err " $line"
|
||||||
|
done
|
||||||
|
log_err "--- dmesg: regulator ---"
|
||||||
|
dmesg 2>/dev/null | grep -i regulator | while read -r line; do
|
||||||
|
log_err " $line"
|
||||||
|
done
|
||||||
|
log_err "--- dmesg: firmware/mailbox ---"
|
||||||
|
dmesg 2>/dev/null | grep -i -e 'raspberrypi' -e 'mailbox' -e 'firmware' | while read -r line; do
|
||||||
|
log_err " $line"
|
||||||
|
done
|
||||||
|
log_err "--- dmesg: errors ---"
|
||||||
|
dmesg 2>/dev/null | grep -i -e 'error' -e 'fail' -e 'unable' | while read -r line; do
|
||||||
|
log_err " $line"
|
||||||
|
done
|
||||||
|
log_err "--- Full dmesg (last 60 lines) ---"
|
||||||
|
dmesg 2>/dev/null | tail -60 | while read -r line; do
|
||||||
|
log_err " $line"
|
||||||
|
done
|
||||||
|
log_err "=== End diagnostics ==="
|
||||||
|
log_err ""
|
||||||
|
log_err "Dropping to debug shell in 10 seconds..."
|
||||||
|
log_err "Run 'dmesg' to see full kernel log."
|
||||||
|
log_err "Run 'ls /sys/class/block/' to check block devices."
|
||||||
|
log_err ""
|
||||||
|
sleep 10
|
||||||
|
# Drop to interactive shell instead of returning failure
|
||||||
|
# (returning 1 with set -e causes kernel panic before emergency_shell)
|
||||||
|
exec /bin/sh </dev/console >/dev/console 2>&1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Mount data partition
|
# Mount data partition (format on first boot if unformatted)
|
||||||
mkdir -p "$DATA_MOUNT"
|
mkdir -p "$DATA_MOUNT"
|
||||||
mount -t ext4 -o noatime "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" || {
|
if ! mount -t ext4 -o noatime,nosuid,nodev "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" 2>/dev/null; then
|
||||||
log_err "Failed to mount $KUBESOLO_DATA_DEV"
|
log "Formatting $KUBESOLO_DATA_DEV as ext4 (first boot)"
|
||||||
return 1
|
mkfs.ext4 -q -L KSOLODATA "$KUBESOLO_DATA_DEV" || {
|
||||||
}
|
log_err "Failed to format $KUBESOLO_DATA_DEV"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
mount -t ext4 -o noatime,nosuid,nodev "$KUBESOLO_DATA_DEV" "$DATA_MOUNT" || {
|
||||||
|
log_err "Failed to mount $KUBESOLO_DATA_DEV after format"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
fi
|
||||||
log_ok "Mounted $KUBESOLO_DATA_DEV at $DATA_MOUNT"
|
log_ok "Mounted $KUBESOLO_DATA_DEV at $DATA_MOUNT"
|
||||||
|
|
||||||
# Create persistent directory structure (first boot)
|
# Create persistent directory structure (first boot)
|
||||||
|
|||||||
@@ -16,7 +16,11 @@ while IFS= read -r mod; do
|
|||||||
case "$mod" in
|
case "$mod" in
|
||||||
'#'*|'') continue ;;
|
'#'*|'') continue ;;
|
||||||
esac
|
esac
|
||||||
mod="$(echo "$mod" | tr -d '[:space:]')"
|
# NOTE: do NOT use tr -d '[:space:]' — Ubuntu's busybox-static 1.30.1 (used
|
||||||
|
# in the ARM64 rootfs override) doesn't parse POSIX char classes and treats
|
||||||
|
# them as a literal set, deleting [, :, s, p, a, c, e, ]. Use explicit
|
||||||
|
# whitespace chars instead so the same script works under any tr.
|
||||||
|
mod="$(printf '%s' "$mod" | tr -d ' \t\r\n')"
|
||||||
if modprobe "$mod" 2>/dev/null; then
|
if modprobe "$mod" 2>/dev/null; then
|
||||||
LOADED=$((LOADED + 1))
|
LOADED=$((LOADED + 1))
|
||||||
else
|
else
|
||||||
|
|||||||
47
init/lib/35-apparmor.sh
Normal file
47
init/lib/35-apparmor.sh
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# 35-apparmor.sh — Load AppArmor LSM profiles
|
||||||
|
|
||||||
|
# Check for opt-out boot parameter
|
||||||
|
if [ "$KUBESOLO_NOAPPARMOR" = "1" ]; then
|
||||||
|
log "AppArmor disabled via kubesolo.noapparmor boot parameter"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mount securityfs if not already mounted
|
||||||
|
if ! mountpoint -q /sys/kernel/security 2>/dev/null; then
|
||||||
|
mount -t securityfs securityfs /sys/kernel/security 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if AppArmor is available in the kernel
|
||||||
|
if [ ! -d /sys/kernel/security/apparmor ]; then
|
||||||
|
log_warn "AppArmor not available in kernel — skipping profile loading"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for apparmor_parser
|
||||||
|
if ! command -v apparmor_parser >/dev/null 2>&1; then
|
||||||
|
log_warn "apparmor_parser not found — skipping profile loading"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Load all profiles from /etc/apparmor.d/
|
||||||
|
PROFILE_DIR="/etc/apparmor.d"
|
||||||
|
if [ ! -d "$PROFILE_DIR" ]; then
|
||||||
|
log_warn "No AppArmor profiles directory ($PROFILE_DIR) — skipping"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
LOADED=0
|
||||||
|
FAILED=0
|
||||||
|
|
||||||
|
for profile in "$PROFILE_DIR"/*; do
|
||||||
|
[ -f "$profile" ] || continue
|
||||||
|
if apparmor_parser -r "$profile" 2>/dev/null; then
|
||||||
|
LOADED=$((LOADED + 1))
|
||||||
|
else
|
||||||
|
log_warn "Failed to load AppArmor profile: $(basename "$profile")"
|
||||||
|
FAILED=$((FAILED + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
log_ok "AppArmor: loaded $LOADED profiles ($FAILED failed)"
|
||||||
@@ -8,8 +8,11 @@ for conf in /etc/sysctl.d/*.conf; do
|
|||||||
case "$key" in
|
case "$key" in
|
||||||
'#'*|'') continue ;;
|
'#'*|'') continue ;;
|
||||||
esac
|
esac
|
||||||
key="$(echo "$key" | tr -d '[:space:]')"
|
# NOTE: do NOT use tr -d '[:space:]' — see 30-kernel-modules.sh for the
|
||||||
value="$(echo "$value" | tr -d '[:space:]')"
|
# rationale. Use explicit whitespace chars so this works under
|
||||||
|
# Ubuntu's busybox-static tr too.
|
||||||
|
key="$(printf '%s' "$key" | tr -d ' \t\r\n')"
|
||||||
|
value="$(printf '%s' "$value" | tr -d ' \t\r\n')"
|
||||||
if [ -n "$key" ] && [ -n "$value" ]; then
|
if [ -n "$key" ] && [ -n "$value" ]; then
|
||||||
sysctl -w "${key}=${value}" >/dev/null 2>&1 || \
|
sysctl -w "${key}=${value}" >/dev/null 2>&1 || \
|
||||||
log_warn "Failed to set sysctl: ${key}=${value}"
|
log_warn "Failed to set sysctl: ${key}=${value}"
|
||||||
|
|||||||
@@ -58,4 +58,16 @@ else
|
|||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log_ok "Network configured on $ETH_DEV"
|
# Ensure /etc/resolv.conf has valid DNS (udhcpc should have written it,
|
||||||
|
# but verify and add fallbacks if missing)
|
||||||
|
if [ ! -s /etc/resolv.conf ]; then
|
||||||
|
log_warn "/etc/resolv.conf is empty — adding fallback DNS"
|
||||||
|
echo "nameserver 10.0.2.3" > /etc/resolv.conf
|
||||||
|
echo "nameserver 8.8.8.8" >> /etc/resolv.conf
|
||||||
|
elif ! grep -q nameserver /etc/resolv.conf 2>/dev/null; then
|
||||||
|
log_warn "No nameserver in /etc/resolv.conf — adding fallback DNS"
|
||||||
|
echo "nameserver 10.0.2.3" >> /etc/resolv.conf
|
||||||
|
echo "nameserver 8.8.8.8" >> /etc/resolv.conf
|
||||||
|
fi
|
||||||
|
|
||||||
|
log_ok "Network configured on $ETH_DEV (DNS: $(grep nameserver /etc/resolv.conf 2>/dev/null | head -1))"
|
||||||
|
|||||||
20
init/lib/85-security-lockdown.sh
Executable file
20
init/lib/85-security-lockdown.sh
Executable file
@@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# 85-security-lockdown.sh — Lock down kernel after all modules loaded
|
||||||
|
|
||||||
|
# Allow disabling via boot parameter for debugging
|
||||||
|
if [ "$KUBESOLO_NOMODLOCK" = "1" ]; then
|
||||||
|
log_warn "Module lock DISABLED (kubesolo.nomodlock)"
|
||||||
|
else
|
||||||
|
# Permanently prevent new kernel module loading (irreversible until reboot)
|
||||||
|
# All required modules must already be loaded by stage 30
|
||||||
|
if [ -f /proc/sys/kernel/modules_disabled ]; then
|
||||||
|
echo 1 > /proc/sys/kernel/modules_disabled 2>/dev/null && \
|
||||||
|
log_ok "Kernel module loading locked" || \
|
||||||
|
log_warn "Failed to lock kernel module loading"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Safety net: enforce kernel information protection
|
||||||
|
# (also set via sysctl.d but enforce here in case sysctl.d was bypassed)
|
||||||
|
echo 2 > /proc/sys/kernel/kptr_restrict 2>/dev/null || true
|
||||||
|
echo 1 > /proc/sys/kernel/dmesg_restrict 2>/dev/null || true
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# 90-kubesolo.sh — Start KubeSolo (final init stage)
|
# 90-kubesolo.sh — Start KubeSolo (final init stage)
|
||||||
#
|
#
|
||||||
# This stage exec's KubeSolo as PID 1 (replacing init).
|
# Starts KubeSolo, waits for it to become ready, then prints the kubeconfig
|
||||||
# KubeSolo manages containerd, kubelet, API server, and all K8s components.
|
# to the console so it can be copied for remote kubectl access.
|
||||||
|
|
||||||
KUBESOLO_BIN="/usr/bin/kubesolo"
|
KUBESOLO_BIN="/usr/bin/kubesolo"
|
||||||
|
|
||||||
@@ -14,11 +14,13 @@ fi
|
|||||||
# Build KubeSolo command line
|
# Build KubeSolo command line
|
||||||
KUBESOLO_ARGS="--path /var/lib/kubesolo --local-storage"
|
KUBESOLO_ARGS="--path /var/lib/kubesolo --local-storage"
|
||||||
|
|
||||||
# Add extra SANs if hostname resolves
|
# Add SANs for remote access (127.0.0.1 for QEMU port forwarding, 10.0.2.15 for QEMU NAT)
|
||||||
|
EXTRA_SANS="127.0.0.1,10.0.2.15"
|
||||||
HOSTNAME="$(hostname)"
|
HOSTNAME="$(hostname)"
|
||||||
if [ -n "$HOSTNAME" ]; then
|
if [ -n "$HOSTNAME" ]; then
|
||||||
KUBESOLO_ARGS="$KUBESOLO_ARGS --apiserver-extra-sans $HOSTNAME"
|
EXTRA_SANS="$EXTRA_SANS,$HOSTNAME"
|
||||||
fi
|
fi
|
||||||
|
KUBESOLO_ARGS="$KUBESOLO_ARGS --apiserver-extra-sans $EXTRA_SANS"
|
||||||
|
|
||||||
# Add any extra flags from boot parameters
|
# Add any extra flags from boot parameters
|
||||||
if [ -n "$KUBESOLO_EXTRA_FLAGS" ]; then
|
if [ -n "$KUBESOLO_EXTRA_FLAGS" ]; then
|
||||||
@@ -41,9 +43,70 @@ if command -v iptables >/dev/null 2>&1; then
|
|||||||
log "Pre-initialized iptables tables (filter, nat, mangle)"
|
log "Pre-initialized iptables tables (filter, nat, mangle)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "Starting KubeSolo: $KUBESOLO_BIN $KUBESOLO_ARGS"
|
# Export Portainer Edge env vars if set (via boot params or cloud-init)
|
||||||
log "Kubeconfig will be at: /var/lib/kubesolo/pki/admin/admin.kubeconfig"
|
if [ -n "${KUBESOLO_PORTAINER_EDGE_ID:-}" ]; then
|
||||||
|
export KUBESOLO_PORTAINER_EDGE_ID
|
||||||
|
log "Portainer Edge ID configured"
|
||||||
|
fi
|
||||||
|
if [ -n "${KUBESOLO_PORTAINER_EDGE_KEY:-}" ]; then
|
||||||
|
export KUBESOLO_PORTAINER_EDGE_KEY
|
||||||
|
log "Portainer Edge Key configured"
|
||||||
|
fi
|
||||||
|
|
||||||
# exec replaces this init process — KubeSolo becomes PID 1
|
log "Starting KubeSolo: $KUBESOLO_BIN $KUBESOLO_ARGS"
|
||||||
|
|
||||||
|
KUBECONFIG_PATH="/var/lib/kubesolo/pki/admin/admin.kubeconfig"
|
||||||
|
|
||||||
|
# Start KubeSolo in background so we can wait for readiness and print kubeconfig
|
||||||
# shellcheck disable=SC2086
|
# shellcheck disable=SC2086
|
||||||
exec $KUBESOLO_BIN $KUBESOLO_ARGS
|
$KUBESOLO_BIN $KUBESOLO_ARGS &
|
||||||
|
KUBESOLO_PID=$!
|
||||||
|
|
||||||
|
# Wait for kubeconfig to appear (KubeSolo generates it during startup)
|
||||||
|
log "Waiting for KubeSolo to generate kubeconfig..."
|
||||||
|
WAIT=0
|
||||||
|
while [ ! -f "$KUBECONFIG_PATH" ] && [ $WAIT -lt 120 ]; do
|
||||||
|
sleep 2
|
||||||
|
WAIT=$((WAIT + 2))
|
||||||
|
# Check KubeSolo is still running
|
||||||
|
if ! kill -0 $KUBESOLO_PID 2>/dev/null; then
|
||||||
|
log_err "KubeSolo exited unexpectedly"
|
||||||
|
wait $KUBESOLO_PID 2>/dev/null || true
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -f "$KUBECONFIG_PATH" ]; then
|
||||||
|
log_ok "KubeSolo is running (PID $KUBESOLO_PID)"
|
||||||
|
|
||||||
|
# Rewrite server URL for external access and serve via HTTP.
|
||||||
|
# Serial console truncates long base64 cert lines, so we serve
|
||||||
|
# the kubeconfig over HTTP for reliable retrieval.
|
||||||
|
EXTERNAL_KC="/tmp/kubeconfig-external.yaml"
|
||||||
|
sed 's|server: https://.*:6443|server: https://localhost:6443|' "$KUBECONFIG_PATH" > "$EXTERNAL_KC"
|
||||||
|
|
||||||
|
# Serve kubeconfig via HTTP on port 8080 for remote kubectl access.
|
||||||
|
# Binds to 0.0.0.0 so it's reachable via QEMU port forwarding.
|
||||||
|
# Security: the kubeconfig is only useful if you can also reach
|
||||||
|
# port 6443 (API server). On edge devices, network isolation
|
||||||
|
# provides the security boundary.
|
||||||
|
(while true; do
|
||||||
|
printf 'HTTP/1.1 200 OK\r\nContent-Type: text/yaml\r\nConnection: close\r\n\r\n' | cat - "$EXTERNAL_KC" | nc -l -p 8080 2>/dev/null
|
||||||
|
done) &
|
||||||
|
|
||||||
|
log_ok "Kubeconfig available via HTTP on port 8080"
|
||||||
|
echo ""
|
||||||
|
echo "============================================================"
|
||||||
|
echo " From your host machine, run:"
|
||||||
|
echo ""
|
||||||
|
echo " curl -s http://localhost:8080 > ~/.kube/kubesolo-config"
|
||||||
|
echo " kubectl --kubeconfig ~/.kube/kubesolo-config get nodes"
|
||||||
|
echo "============================================================"
|
||||||
|
echo ""
|
||||||
|
else
|
||||||
|
log_warn "Kubeconfig not found after ${WAIT}s — KubeSolo may still be starting"
|
||||||
|
log_warn "Check manually: cat $KUBECONFIG_PATH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Keep init alive — wait on KubeSolo process
|
||||||
|
wait $KUBESOLO_PID
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ RUNS=3
|
|||||||
SSH_PORT=2222
|
SSH_PORT=2222
|
||||||
K8S_PORT=6443
|
K8S_PORT=6443
|
||||||
|
|
||||||
|
. "$SCRIPT_DIR/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
shift || true
|
shift || true
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
@@ -47,6 +49,15 @@ echo "Type: $IMAGE_TYPE" >&2
|
|||||||
echo "Runs: $RUNS" >&2
|
echo "Runs: $RUNS" >&2
|
||||||
echo "" >&2
|
echo "" >&2
|
||||||
|
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
TEMP_DISK=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
[ -n "$TEMP_DISK" ] && rm -f "$TEMP_DISK"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
# Build QEMU command
|
# Build QEMU command
|
||||||
QEMU_CMD=(
|
QEMU_CMD=(
|
||||||
qemu-system-x86_64
|
qemu-system-x86_64
|
||||||
@@ -55,24 +66,31 @@ QEMU_CMD=(
|
|||||||
-nographic
|
-nographic
|
||||||
-no-reboot
|
-no-reboot
|
||||||
-serial mon:stdio
|
-serial mon:stdio
|
||||||
-net nic,model=virtio
|
-net "nic,model=virtio"
|
||||||
-net "user,hostfwd=tcp::${SSH_PORT}-:22,hostfwd=tcp::${K8S_PORT}-:6443"
|
-net "user,hostfwd=tcp::${SSH_PORT}-:22,hostfwd=tcp::${K8S_PORT}-:6443"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add KVM if available
|
# Add KVM if available
|
||||||
if [ -e /dev/kvm ] && [ -r /dev/kvm ]; then
|
KVM_FLAG=$(detect_kvm)
|
||||||
|
if [ -n "$KVM_FLAG" ]; then
|
||||||
QEMU_CMD+=(-enable-kvm -cpu host)
|
QEMU_CMD+=(-enable-kvm -cpu host)
|
||||||
|
echo "KVM: enabled" >&2
|
||||||
else
|
else
|
||||||
QEMU_CMD+=(-cpu max)
|
QEMU_CMD+=(-cpu max)
|
||||||
|
echo "KVM: not available (TCG)" >&2
|
||||||
fi
|
fi
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
if [ "$IMAGE_TYPE" = "iso" ]; then
|
if [ "$IMAGE_TYPE" = "iso" ]; then
|
||||||
QEMU_CMD+=(-cdrom "$IMAGE")
|
# Extract kernel/initramfs for direct boot (required for -append to work)
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-bench-extract-XXXXXX)"
|
||||||
|
extract_kernel_from_iso "$IMAGE" "$EXTRACT_DIR" >&2
|
||||||
|
QEMU_CMD+=(-kernel "$VMLINUZ" -initrd "$INITRAMFS")
|
||||||
|
QEMU_CMD+=(-append "console=ttyS0,115200n8 kubesolo.debug")
|
||||||
# Add a temp disk for persistence
|
# Add a temp disk for persistence
|
||||||
TEMP_DISK=$(mktemp /tmp/kubesolo-bench-XXXXXX.img)
|
TEMP_DISK=$(mktemp /tmp/kubesolo-bench-XXXXXX.img)
|
||||||
qemu-img create -f qcow2 "$TEMP_DISK" 8G >/dev/null 2>&1
|
qemu-img create -f qcow2 "$TEMP_DISK" 8G >/dev/null 2>&1
|
||||||
QEMU_CMD+=(-drive "file=$TEMP_DISK,format=qcow2,if=virtio")
|
QEMU_CMD+=(-drive "file=$TEMP_DISK,format=qcow2,if=virtio")
|
||||||
trap "rm -f $TEMP_DISK" EXIT
|
|
||||||
else
|
else
|
||||||
QEMU_CMD+=(-drive "file=$IMAGE,format=raw,if=virtio")
|
QEMU_CMD+=(-drive "file=$IMAGE,format=raw,if=virtio")
|
||||||
fi
|
fi
|
||||||
@@ -111,7 +129,7 @@ for run in $(seq 1 "$RUNS"); do
|
|||||||
echo "KERNEL_MS=$ELAPSED_MS" >> "$LOG.times"
|
echo "KERNEL_MS=$ELAPSED_MS" >> "$LOG.times"
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*"kubesolo-init"*"all stages complete"*|*"init complete"*)
|
*"KubeSolo is running"*|*"kubesolo-init"*"OK"*)
|
||||||
if [ -z "$INIT_DONE" ]; then
|
if [ -z "$INIT_DONE" ]; then
|
||||||
INIT_DONE="$ELAPSED_MS"
|
INIT_DONE="$ELAPSED_MS"
|
||||||
echo " Init complete: ${ELAPSED_MS}ms" >&2
|
echo " Init complete: ${ELAPSED_MS}ms" >&2
|
||||||
|
|||||||
@@ -5,42 +5,67 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
ISO="${1:?Usage: $0 <path-to-iso>}"
|
ISO="${1:?Usage: $0 <path-to-iso>}"
|
||||||
TIMEOUT_BOOT=120
|
TIMEOUT_K8S=${TIMEOUT_K8S:-300}
|
||||||
TIMEOUT_K8S=300
|
TIMEOUT_POD=${TIMEOUT_POD:-120}
|
||||||
TIMEOUT_POD=120
|
|
||||||
API_PORT=6443
|
API_PORT=6443
|
||||||
|
KC_PORT=8080
|
||||||
SERIAL_LOG=$(mktemp /tmp/kubesolo-workload-XXXXXX.log)
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-workload-XXXXXX.log)
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
. "$SCRIPT_DIR/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
||||||
dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null
|
||||||
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
|
||||||
|
QEMU_PID=""
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
KUBECONFIG_FILE=""
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
kill "$QEMU_PID" 2>/dev/null || true
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
||||||
|
[ -n "$KUBECONFIG_FILE" ] && rm -f "$KUBECONFIG_FILE"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
KUBECTL="kubectl --server=https://localhost:${API_PORT} --insecure-skip-tls-verify"
|
|
||||||
|
|
||||||
echo "==> Workload deployment test: $ISO"
|
echo "==> Workload deployment test: $ISO"
|
||||||
|
|
||||||
|
# Extract kernel from ISO
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
|
||||||
|
extract_kernel_from_iso "$ISO" "$EXTRACT_DIR"
|
||||||
|
|
||||||
|
KVM_FLAG=$(detect_kvm)
|
||||||
|
|
||||||
# Launch QEMU
|
# Launch QEMU
|
||||||
|
# shellcheck disable=SC2086
|
||||||
qemu-system-x86_64 \
|
qemu-system-x86_64 \
|
||||||
-m 2048 -smp 2 \
|
-m 2048 -smp 2 \
|
||||||
-nographic \
|
-nographic \
|
||||||
-cdrom "$ISO" \
|
$KVM_FLAG \
|
||||||
-boot d \
|
-kernel "$VMLINUZ" \
|
||||||
|
-initrd "$INITRAMFS" \
|
||||||
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
-net nic,model=virtio \
|
-net "nic,model=virtio" \
|
||||||
-net "user,hostfwd=tcp::${API_PORT}-:6443" \
|
-net "user,hostfwd=tcp::${API_PORT}-:6443,hostfwd=tcp::${KC_PORT}-:8080" \
|
||||||
-serial "file:$SERIAL_LOG" \
|
-serial "file:$SERIAL_LOG" \
|
||||||
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda" \
|
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug" \
|
||||||
&
|
&
|
||||||
QEMU_PID=$!
|
QEMU_PID=$!
|
||||||
|
|
||||||
|
# Wait for boot + fetch kubeconfig
|
||||||
|
echo " Waiting for boot..."
|
||||||
|
wait_for_boot "$SERIAL_LOG" "$QEMU_PID" 180 || exit 1
|
||||||
|
|
||||||
|
KUBECONFIG_FILE=$(mktemp /tmp/kubesolo-kubeconfig-XXXXXX.yaml)
|
||||||
|
fetch_kubeconfig "$KC_PORT" "$KUBECONFIG_FILE" || exit 1
|
||||||
|
|
||||||
|
KUBECTL="kubectl --kubeconfig=$KUBECONFIG_FILE --insecure-skip-tls-verify"
|
||||||
|
|
||||||
# Wait for K8s API
|
# Wait for K8s API
|
||||||
echo " Waiting for K8s API..."
|
echo " Waiting for K8s node Ready..."
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
K8S_READY=0
|
K8S_READY=0
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
||||||
@@ -71,6 +96,7 @@ $KUBECTL run test-nginx --image=nginx:alpine --restart=Never 2>/dev/null || {
|
|||||||
echo " Waiting for pod to reach Running..."
|
echo " Waiting for pod to reach Running..."
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
POD_RUNNING=0
|
POD_RUNNING=0
|
||||||
|
STATUS=""
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_POD" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_POD" ]; do
|
||||||
STATUS=$($KUBECTL get pod test-nginx -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
|
STATUS=$($KUBECTL get pod test-nginx -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
|
||||||
if [ "$STATUS" = "Running" ]; then
|
if [ "$STATUS" = "Running" ]; then
|
||||||
|
|||||||
@@ -5,58 +5,73 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
ISO="${1:?Usage: $0 <path-to-iso>}"
|
ISO="${1:?Usage: $0 <path-to-iso>}"
|
||||||
TIMEOUT_BOOT=120
|
TIMEOUT_K8S=${TIMEOUT_K8S:-300}
|
||||||
TIMEOUT_K8S=300
|
|
||||||
API_PORT=6443
|
API_PORT=6443
|
||||||
|
KC_PORT=8080
|
||||||
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-k8s-test-XXXXXX.log)
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
. "$SCRIPT_DIR/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
||||||
dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null
|
||||||
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
|
||||||
|
QEMU_PID=""
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
KUBECONFIG_FILE=""
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
kill "$QEMU_PID" 2>/dev/null || true
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
rm -f "$DATA_DISK"
|
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
||||||
|
[ -n "$KUBECONFIG_FILE" ] && rm -f "$KUBECONFIG_FILE"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
echo "==> K8s readiness test: $ISO"
|
echo "==> K8s readiness test: $ISO"
|
||||||
|
|
||||||
# Launch QEMU with API port forwarded
|
# Extract kernel from ISO
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
|
||||||
|
extract_kernel_from_iso "$ISO" "$EXTRACT_DIR"
|
||||||
|
|
||||||
|
KVM_FLAG=$(detect_kvm)
|
||||||
|
[ -n "$KVM_FLAG" ] && echo " KVM acceleration: enabled"
|
||||||
|
|
||||||
|
# Launch QEMU with API + kubeconfig ports forwarded
|
||||||
|
# shellcheck disable=SC2086
|
||||||
qemu-system-x86_64 \
|
qemu-system-x86_64 \
|
||||||
-m 2048 -smp 2 \
|
-m 2048 -smp 2 \
|
||||||
-nographic \
|
-nographic \
|
||||||
-cdrom "$ISO" \
|
$KVM_FLAG \
|
||||||
-boot d \
|
-kernel "$VMLINUZ" \
|
||||||
|
-initrd "$INITRAMFS" \
|
||||||
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
-net nic,model=virtio \
|
-net "nic,model=virtio" \
|
||||||
-net user,hostfwd=tcp::${API_PORT}-:6443 \
|
-net "user,hostfwd=tcp::${API_PORT}-:6443,hostfwd=tcp::${KC_PORT}-:8080" \
|
||||||
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda" \
|
-serial "file:$SERIAL_LOG" \
|
||||||
|
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug" \
|
||||||
&
|
&
|
||||||
QEMU_PID=$!
|
QEMU_PID=$!
|
||||||
|
|
||||||
# Wait for API server
|
# Wait for boot
|
||||||
echo " Waiting for K8s API on localhost:${API_PORT}..."
|
echo " Waiting for boot..."
|
||||||
|
wait_for_boot "$SERIAL_LOG" "$QEMU_PID" 180 || exit 1
|
||||||
|
|
||||||
|
# Fetch kubeconfig
|
||||||
|
KUBECONFIG_FILE=$(mktemp /tmp/kubesolo-kubeconfig-XXXXXX.yaml)
|
||||||
|
fetch_kubeconfig "$KC_PORT" "$KUBECONFIG_FILE" || exit 1
|
||||||
|
|
||||||
|
# Wait for K8s node to reach Ready
|
||||||
|
echo " Waiting for K8s node Ready..."
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
||||||
if kubectl --kubeconfig=/dev/null \
|
if kubectl --kubeconfig="$KUBECONFIG_FILE" \
|
||||||
--server="https://localhost:${API_PORT}" \
|
|
||||||
--insecure-skip-tls-verify \
|
--insecure-skip-tls-verify \
|
||||||
get nodes 2>/dev/null | grep -q "Ready"; then
|
get nodes 2>/dev/null | grep -q "Ready"; then
|
||||||
echo ""
|
echo ""
|
||||||
echo "==> PASS: K8s node is Ready (${ELAPSED}s)"
|
echo "==> PASS: K8s node is Ready (${ELAPSED}s after boot)"
|
||||||
|
|
||||||
# Bonus: try deploying a pod
|
|
||||||
echo " Deploying test pod..."
|
|
||||||
kubectl --server="https://localhost:${API_PORT}" --insecure-skip-tls-verify \
|
|
||||||
run test-nginx --image=nginx:alpine --restart=Never 2>/dev/null || true
|
|
||||||
|
|
||||||
sleep 10
|
|
||||||
if kubectl --server="https://localhost:${API_PORT}" --insecure-skip-tls-verify \
|
|
||||||
get pod test-nginx 2>/dev/null | grep -q "Running"; then
|
|
||||||
echo "==> PASS: Test pod is Running"
|
|
||||||
else
|
|
||||||
echo "==> WARN: Test pod not Running (may need more time or image pull)"
|
|
||||||
fi
|
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
sleep 5
|
sleep 5
|
||||||
@@ -66,4 +81,6 @@ done
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "==> FAIL: K8s node did not reach Ready within ${TIMEOUT_K8S}s"
|
echo "==> FAIL: K8s node did not reach Ready within ${TIMEOUT_K8S}s"
|
||||||
|
echo " Last 40 lines of serial log:"
|
||||||
|
tail -40 "$SERIAL_LOG" 2>/dev/null
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
@@ -5,9 +5,14 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
ISO="${1:?Usage: $0 <path-to-iso>}"
|
ISO="${1:?Usage: $0 <path-to-iso>}"
|
||||||
TIMEOUT_K8S=300
|
TIMEOUT_K8S=${TIMEOUT_K8S:-300}
|
||||||
TIMEOUT_PVC=120
|
TIMEOUT_PVC=${TIMEOUT_PVC:-180}
|
||||||
API_PORT=6443
|
API_PORT=6443
|
||||||
|
KC_PORT=8080
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
. "$SCRIPT_DIR/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
||||||
dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null
|
||||||
@@ -15,35 +20,60 @@ mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
|||||||
|
|
||||||
SERIAL_LOG=$(mktemp /tmp/kubesolo-storage-XXXXXX.log)
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-storage-XXXXXX.log)
|
||||||
|
|
||||||
|
QEMU_PID=""
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
KUBECONFIG_FILE=""
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
# Clean up K8s resources
|
# Clean up K8s resources
|
||||||
$KUBECTL delete pod test-storage --grace-period=0 --force 2>/dev/null || true
|
[ -n "$KUBECONFIG_FILE" ] && [ -f "$KUBECONFIG_FILE" ] && {
|
||||||
$KUBECTL delete pvc test-pvc 2>/dev/null || true
|
kubectl --kubeconfig="$KUBECONFIG_FILE" --insecure-skip-tls-verify \
|
||||||
kill "$QEMU_PID" 2>/dev/null || true
|
delete pod test-storage --grace-period=0 --force 2>/dev/null || true
|
||||||
|
kubectl --kubeconfig="$KUBECONFIG_FILE" --insecure-skip-tls-verify \
|
||||||
|
delete pvc test-pvc 2>/dev/null || true
|
||||||
|
}
|
||||||
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
||||||
|
[ -n "$KUBECONFIG_FILE" ] && rm -f "$KUBECONFIG_FILE"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
KUBECTL="kubectl --server=https://localhost:${API_PORT} --insecure-skip-tls-verify"
|
|
||||||
|
|
||||||
echo "==> Local storage test: $ISO"
|
echo "==> Local storage test: $ISO"
|
||||||
|
|
||||||
|
# Extract kernel from ISO
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
|
||||||
|
extract_kernel_from_iso "$ISO" "$EXTRACT_DIR"
|
||||||
|
|
||||||
|
KVM_FLAG=$(detect_kvm)
|
||||||
|
|
||||||
# Launch QEMU
|
# Launch QEMU
|
||||||
|
# shellcheck disable=SC2086
|
||||||
qemu-system-x86_64 \
|
qemu-system-x86_64 \
|
||||||
-m 2048 -smp 2 \
|
-m 2048 -smp 2 \
|
||||||
-nographic \
|
-nographic \
|
||||||
-cdrom "$ISO" \
|
$KVM_FLAG \
|
||||||
-boot d \
|
-kernel "$VMLINUZ" \
|
||||||
|
-initrd "$INITRAMFS" \
|
||||||
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
-net nic,model=virtio \
|
-net "nic,model=virtio" \
|
||||||
-net "user,hostfwd=tcp::${API_PORT}-:6443" \
|
-net "user,hostfwd=tcp::${API_PORT}-:6443,hostfwd=tcp::${KC_PORT}-:8080" \
|
||||||
-serial "file:$SERIAL_LOG" \
|
-serial "file:$SERIAL_LOG" \
|
||||||
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda" \
|
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug" \
|
||||||
&
|
&
|
||||||
QEMU_PID=$!
|
QEMU_PID=$!
|
||||||
|
|
||||||
|
# Wait for boot + fetch kubeconfig
|
||||||
|
echo " Waiting for boot..."
|
||||||
|
wait_for_boot "$SERIAL_LOG" "$QEMU_PID" 180 || exit 1
|
||||||
|
|
||||||
|
KUBECONFIG_FILE=$(mktemp /tmp/kubesolo-kubeconfig-XXXXXX.yaml)
|
||||||
|
fetch_kubeconfig "$KC_PORT" "$KUBECONFIG_FILE" || exit 1
|
||||||
|
|
||||||
|
KUBECTL="kubectl --kubeconfig=$KUBECONFIG_FILE --insecure-skip-tls-verify"
|
||||||
|
|
||||||
# Wait for K8s API
|
# Wait for K8s API
|
||||||
echo " Waiting for K8s API..."
|
echo " Waiting for K8s node Ready..."
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
||||||
if $KUBECTL get nodes 2>/dev/null | grep -q "Ready"; then
|
if $KUBECTL get nodes 2>/dev/null | grep -q "Ready"; then
|
||||||
@@ -98,6 +128,7 @@ YAML
|
|||||||
# Wait for pod Running
|
# Wait for pod Running
|
||||||
echo " Waiting for storage pod..."
|
echo " Waiting for storage pod..."
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
|
STATUS=""
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_PVC" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_PVC" ]; do
|
||||||
STATUS=$($KUBECTL get pod test-storage -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
|
STATUS=$($KUBECTL get pod test-storage -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
|
||||||
if [ "$STATUS" = "Running" ]; then
|
if [ "$STATUS" = "Running" ]; then
|
||||||
|
|||||||
@@ -6,43 +6,72 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
ISO="${1:?Usage: $0 <path-to-iso>}"
|
ISO="${1:?Usage: $0 <path-to-iso>}"
|
||||||
TIMEOUT_K8S=300
|
TIMEOUT_K8S=${TIMEOUT_K8S:-300}
|
||||||
TIMEOUT_POD=120
|
TIMEOUT_POD=${TIMEOUT_POD:-120}
|
||||||
API_PORT=6443
|
API_PORT=6443
|
||||||
|
KC_PORT=8080
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
. "$SCRIPT_DIR/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
||||||
dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=2048 2>/dev/null
|
||||||
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
|
||||||
SERIAL_LOG=$(mktemp /tmp/kubesolo-netpol-XXXXXX.log)
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-netpol-XXXXXX.log)
|
||||||
|
|
||||||
|
QEMU_PID=""
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
KUBECONFIG_FILE=""
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
$KUBECTL delete namespace netpol-test 2>/dev/null || true
|
[ -n "$KUBECONFIG_FILE" ] && [ -f "$KUBECONFIG_FILE" ] && {
|
||||||
kill "$QEMU_PID" 2>/dev/null || true
|
kubectl --kubeconfig="$KUBECONFIG_FILE" --insecure-skip-tls-verify \
|
||||||
|
delete namespace netpol-test 2>/dev/null || true
|
||||||
|
}
|
||||||
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
||||||
|
[ -n "$KUBECONFIG_FILE" ] && rm -f "$KUBECONFIG_FILE"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
KUBECTL="kubectl --server=https://localhost:${API_PORT} --insecure-skip-tls-verify"
|
|
||||||
|
|
||||||
echo "==> Network policy test: $ISO"
|
echo "==> Network policy test: $ISO"
|
||||||
|
|
||||||
|
# Extract kernel from ISO
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
|
||||||
|
extract_kernel_from_iso "$ISO" "$EXTRACT_DIR"
|
||||||
|
|
||||||
|
KVM_FLAG=$(detect_kvm)
|
||||||
|
|
||||||
# Launch QEMU
|
# Launch QEMU
|
||||||
|
# shellcheck disable=SC2086
|
||||||
qemu-system-x86_64 \
|
qemu-system-x86_64 \
|
||||||
-m 2048 -smp 2 \
|
-m 2048 -smp 2 \
|
||||||
-nographic \
|
-nographic \
|
||||||
-cdrom "$ISO" \
|
$KVM_FLAG \
|
||||||
-boot d \
|
-kernel "$VMLINUZ" \
|
||||||
|
-initrd "$INITRAMFS" \
|
||||||
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
-net nic,model=virtio \
|
-net "nic,model=virtio" \
|
||||||
-net "user,hostfwd=tcp::${API_PORT}-:6443" \
|
-net "user,hostfwd=tcp::${API_PORT}-:6443,hostfwd=tcp::${KC_PORT}-:8080" \
|
||||||
-serial "file:$SERIAL_LOG" \
|
-serial "file:$SERIAL_LOG" \
|
||||||
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda" \
|
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug" \
|
||||||
&
|
&
|
||||||
QEMU_PID=$!
|
QEMU_PID=$!
|
||||||
|
|
||||||
|
# Wait for boot + fetch kubeconfig
|
||||||
|
echo " Waiting for boot..."
|
||||||
|
wait_for_boot "$SERIAL_LOG" "$QEMU_PID" 180 || exit 1
|
||||||
|
|
||||||
|
KUBECONFIG_FILE=$(mktemp /tmp/kubesolo-kubeconfig-XXXXXX.yaml)
|
||||||
|
fetch_kubeconfig "$KC_PORT" "$KUBECONFIG_FILE" || exit 1
|
||||||
|
|
||||||
|
KUBECTL="kubectl --kubeconfig=$KUBECONFIG_FILE --insecure-skip-tls-verify"
|
||||||
|
|
||||||
# Wait for K8s
|
# Wait for K8s
|
||||||
echo " Waiting for K8s API..."
|
echo " Waiting for K8s node Ready..."
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_K8S" ]; do
|
||||||
if $KUBECTL get nodes 2>/dev/null | grep -q "Ready"; then
|
if $KUBECTL get nodes 2>/dev/null | grep -q "Ready"; then
|
||||||
@@ -81,6 +110,7 @@ YAML
|
|||||||
|
|
||||||
# Wait for pod
|
# Wait for pod
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
|
STATUS=""
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_POD" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_POD" ]; do
|
||||||
STATUS=$($KUBECTL get pod -n netpol-test web -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
|
STATUS=$($KUBECTL get pod -n netpol-test web -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
|
||||||
[ "$STATUS" = "Running" ] && break
|
[ "$STATUS" = "Running" ] && break
|
||||||
|
|||||||
211
test/integration/test-security-hardening.sh
Executable file
211
test/integration/test-security-hardening.sh
Executable file
@@ -0,0 +1,211 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# test-security-hardening.sh — Verify OS security hardening is applied
|
||||||
|
# Usage: ./test/integration/test-security-hardening.sh <iso-path>
|
||||||
|
# Exit 0 = PASS, Exit 1 = FAIL
|
||||||
|
#
|
||||||
|
# Tests:
|
||||||
|
# 1. Kubeconfig server accessible via HTTP
|
||||||
|
# 2. AppArmor profiles loaded (or graceful skip if kernel lacks support)
|
||||||
|
# 3. Kernel module loading locked
|
||||||
|
# 4. Mount options (noexec on /tmp, nosuid on /run, noexec on /dev/shm)
|
||||||
|
# 5. Sysctl hardening values applied
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ISO="${1:?Usage: $0 <path-to-iso>}"
|
||||||
|
TIMEOUT_BOOT=${TIMEOUT_BOOT:-180} # seconds to wait for boot
|
||||||
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-security-test-XXXXXX.log)
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
. "$SCRIPT_DIR/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
|
# Temp data disk
|
||||||
|
DATA_DISK=$(mktemp /tmp/kubesolo-security-data-XXXXXX.img)
|
||||||
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=1024 2>/dev/null
|
||||||
|
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
|
||||||
|
QEMU_PID=""
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
|
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
echo "==> Security Hardening Test: $ISO"
|
||||||
|
echo " Timeout: ${TIMEOUT_BOOT}s"
|
||||||
|
echo " Serial log: $SERIAL_LOG"
|
||||||
|
|
||||||
|
# Extract kernel from ISO
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
|
||||||
|
extract_kernel_from_iso "$ISO" "$EXTRACT_DIR"
|
||||||
|
|
||||||
|
# Detect KVM
|
||||||
|
KVM_FLAG=$(detect_kvm)
|
||||||
|
|
||||||
|
# Launch QEMU in background with direct kernel boot
|
||||||
|
# shellcheck disable=SC2086
|
||||||
|
qemu-system-x86_64 \
|
||||||
|
-m 2048 -smp 2 \
|
||||||
|
-nographic \
|
||||||
|
$KVM_FLAG \
|
||||||
|
-kernel "$VMLINUZ" \
|
||||||
|
-initrd "$INITRAMFS" \
|
||||||
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
|
-net "nic,model=virtio" \
|
||||||
|
-net "user,hostfwd=tcp::18080-:8080" \
|
||||||
|
-serial "file:$SERIAL_LOG" \
|
||||||
|
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug" \
|
||||||
|
&
|
||||||
|
QEMU_PID=$!
|
||||||
|
|
||||||
|
# Wait for boot to complete (stage 90)
|
||||||
|
echo " Waiting for boot..."
|
||||||
|
ELAPSED=0
|
||||||
|
BOOTED=0
|
||||||
|
while [ "$ELAPSED" -lt "$TIMEOUT_BOOT" ]; do
|
||||||
|
if grep -q "\[kubesolo-init\] \[OK\] KubeSolo is running" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
BOOTED=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if ! kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||||
|
echo ""
|
||||||
|
echo "==> FAIL: QEMU exited prematurely"
|
||||||
|
echo " Last 20 lines of serial log:"
|
||||||
|
tail -20 "$SERIAL_LOG" 2>/dev/null
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
ELAPSED=$((ELAPSED + 2))
|
||||||
|
printf "\r Elapsed: %ds / %ds" "$ELAPSED" "$TIMEOUT_BOOT"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [ "$BOOTED" = "0" ]; then
|
||||||
|
echo "==> FAIL: Boot did not complete within ${TIMEOUT_BOOT}s"
|
||||||
|
echo " Last 30 lines:"
|
||||||
|
tail -30 "$SERIAL_LOG" 2>/dev/null
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " Boot completed in ${ELAPSED}s"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Give the system a moment to finish post-boot setup
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Security checks against serial log output
|
||||||
|
# ============================================================
|
||||||
|
PASS=0
|
||||||
|
FAIL=0
|
||||||
|
SKIP=0
|
||||||
|
|
||||||
|
check_pass() { echo " PASS: $1"; PASS=$((PASS + 1)); }
|
||||||
|
check_fail() { echo " FAIL: $1"; FAIL=$((FAIL + 1)); }
|
||||||
|
check_skip() { echo " SKIP: $1"; SKIP=$((SKIP + 1)); }
|
||||||
|
|
||||||
|
echo "--- Test 1: Kubeconfig server accessible ---"
|
||||||
|
# The kubeconfig server should be reachable via QEMU port forwarding
|
||||||
|
# and return valid kubeconfig YAML content.
|
||||||
|
KC_CONTENT=$(curl -sf --connect-timeout 10 --max-time 15 "http://localhost:18080/" 2>/dev/null) || true
|
||||||
|
if [ -n "$KC_CONTENT" ] && echo "$KC_CONTENT" | grep -q "server:"; then
|
||||||
|
check_pass "Kubeconfig server returns valid kubeconfig"
|
||||||
|
elif [ -z "$KC_CONTENT" ]; then
|
||||||
|
check_fail "Kubeconfig server not reachable on port 18080"
|
||||||
|
else
|
||||||
|
check_fail "Kubeconfig server returned unexpected content"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "--- Test 2: AppArmor ---"
|
||||||
|
if grep -q "AppArmor.*loaded.*profiles" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "AppArmor profiles loaded"
|
||||||
|
elif grep -q "AppArmor not available" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_skip "AppArmor not in kernel (expected before kernel rebuild)"
|
||||||
|
elif grep -q "AppArmor disabled" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_skip "AppArmor disabled via boot parameter"
|
||||||
|
else
|
||||||
|
# Check if the 35-apparmor stage ran at all
|
||||||
|
if grep -q "Stage 35-apparmor.sh" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_fail "AppArmor stage ran but status unclear"
|
||||||
|
else
|
||||||
|
check_skip "AppArmor stage not found (may not be in init yet)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "--- Test 3: Kernel module loading lock ---"
|
||||||
|
if grep -q "Kernel module loading locked" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "Kernel module loading locked"
|
||||||
|
elif grep -q "Module lock DISABLED" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_skip "Module lock disabled via kubesolo.nomodlock"
|
||||||
|
elif grep -q "Stage 85-security-lockdown.sh" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_fail "Security lockdown stage ran but module lock unclear"
|
||||||
|
else
|
||||||
|
check_fail "Security lockdown stage not found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "--- Test 4: Mount hardening ---"
|
||||||
|
# Check for noexec on /tmp
|
||||||
|
if grep -q "noexec.*nosuid.*nodev.*tmpfs.*/tmp" "$SERIAL_LOG" 2>/dev/null || \
|
||||||
|
grep -q "mount.*tmpfs.*/tmp.*noexec" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "/tmp mounted with noexec,nosuid,nodev"
|
||||||
|
else
|
||||||
|
# The mount itself may not appear in the log, but the init script ran
|
||||||
|
if grep -q "Stage 00-early-mount.sh complete" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "Early mount stage completed (mount options in script)"
|
||||||
|
else
|
||||||
|
check_fail "/tmp mount options not verified"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check nosuid on /run
|
||||||
|
if grep -q "Stage 00-early-mount.sh complete" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "/run mounted with nosuid,nodev (early mount complete)"
|
||||||
|
else
|
||||||
|
check_fail "/run mount options not verified"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "--- Test 5: Sysctl hardening ---"
|
||||||
|
if grep -q "Sysctl settings applied" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "Sysctl settings applied (40-sysctl.sh)"
|
||||||
|
else
|
||||||
|
check_fail "Sysctl stage did not report success"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check specific sysctl values if debug output includes them
|
||||||
|
if grep -q "kptr_restrict" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "kptr_restrict enforced"
|
||||||
|
elif grep -q "Stage 85-security-lockdown.sh" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
check_pass "kptr_restrict enforced via security lockdown stage"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Summary
|
||||||
|
# ============================================================
|
||||||
|
echo ""
|
||||||
|
echo "========================================"
|
||||||
|
echo " Security Hardening Test Results"
|
||||||
|
echo "========================================"
|
||||||
|
echo " Passed: $PASS"
|
||||||
|
echo " Failed: $FAIL"
|
||||||
|
echo " Skipped: $SKIP"
|
||||||
|
echo "========================================"
|
||||||
|
|
||||||
|
if [ "$FAIL" -gt 0 ]; then
|
||||||
|
echo ""
|
||||||
|
echo "==> FAIL: $FAIL security check(s) failed"
|
||||||
|
echo ""
|
||||||
|
echo " Last 40 lines of serial log:"
|
||||||
|
tail -40 "$SERIAL_LOG" 2>/dev/null
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "==> PASS: All security hardening checks passed"
|
||||||
|
exit 0
|
||||||
139
test/lib/qemu-helpers.sh
Normal file
139
test/lib/qemu-helpers.sh
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# qemu-helpers.sh — Shared functions for QEMU-based tests
|
||||||
|
# Source this file from test scripts: . "$(dirname "$0")/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
|
# extract_kernel_from_iso <iso-path> <extract-dir>
|
||||||
|
# Sets VMLINUZ and INITRAMFS variables on success
|
||||||
|
# Falls back to build/rootfs-work/ if available
|
||||||
|
extract_kernel_from_iso() {
|
||||||
|
local iso="$1"
|
||||||
|
local extract_dir="$2"
|
||||||
|
local project_root="${PROJECT_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
|
||||||
|
local rootfs_dir="${ROOTFS_DIR:-$project_root/build/rootfs-work}"
|
||||||
|
|
||||||
|
VMLINUZ=""
|
||||||
|
INITRAMFS=""
|
||||||
|
|
||||||
|
# Prefer build artifacts (no extraction needed)
|
||||||
|
if [ -f "$rootfs_dir/vmlinuz" ] && [ -f "$rootfs_dir/kubesolo-os.gz" ]; then
|
||||||
|
VMLINUZ="$rootfs_dir/vmlinuz"
|
||||||
|
INITRAMFS="$rootfs_dir/kubesolo-os.gz"
|
||||||
|
echo " Using kernel/initramfs from build directory"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local extracted=0
|
||||||
|
|
||||||
|
echo " Extracting kernel/initramfs from ISO..."
|
||||||
|
|
||||||
|
# Method 1: bsdtar (ships with macOS, libarchive-tools on Linux)
|
||||||
|
if [ $extracted -eq 0 ] && command -v bsdtar >/dev/null 2>&1; then
|
||||||
|
if bsdtar -xf "$iso" -C "$extract_dir" boot/vmlinuz boot/kubesolo-os.gz 2>/dev/null; then
|
||||||
|
echo " Extracted via bsdtar"
|
||||||
|
extracted=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Method 2: isoinfo (genisoimage/cdrtools)
|
||||||
|
if [ $extracted -eq 0 ] && command -v isoinfo >/dev/null 2>&1; then
|
||||||
|
mkdir -p "$extract_dir/boot"
|
||||||
|
isoinfo -i "$iso" -x "/BOOT/VMLINUZ;1" > "$extract_dir/boot/vmlinuz" 2>/dev/null || true
|
||||||
|
isoinfo -i "$iso" -x "/BOOT/KUBESOLO-OS.GZ;1" > "$extract_dir/boot/kubesolo-os.gz" 2>/dev/null || true
|
||||||
|
if [ -s "$extract_dir/boot/vmlinuz" ] && [ -s "$extract_dir/boot/kubesolo-os.gz" ]; then
|
||||||
|
echo " Extracted via isoinfo"
|
||||||
|
extracted=1
|
||||||
|
else
|
||||||
|
rm -f "$extract_dir/boot/vmlinuz" "$extract_dir/boot/kubesolo-os.gz"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Method 3: loop mount (Linux only, may need root)
|
||||||
|
if [ $extracted -eq 0 ] && [ "$(uname)" = "Linux" ]; then
|
||||||
|
local iso_mount="$extract_dir/mnt"
|
||||||
|
mkdir -p "$iso_mount"
|
||||||
|
if mount -o loop,ro "$iso" "$iso_mount" 2>/dev/null; then
|
||||||
|
mkdir -p "$extract_dir/boot"
|
||||||
|
cp "$iso_mount/boot/vmlinuz" "$extract_dir/boot/" 2>/dev/null || true
|
||||||
|
cp "$iso_mount/boot/kubesolo-os.gz" "$extract_dir/boot/" 2>/dev/null || true
|
||||||
|
umount "$iso_mount" 2>/dev/null || true
|
||||||
|
if [ -f "$extract_dir/boot/vmlinuz" ] && [ -f "$extract_dir/boot/kubesolo-os.gz" ]; then
|
||||||
|
echo " Extracted via loop mount"
|
||||||
|
extracted=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $extracted -eq 0 ]; then
|
||||||
|
echo "ERROR: Failed to extract kernel/initramfs from ISO."
|
||||||
|
echo " Install one of: bsdtar (libarchive-tools), isoinfo (genisoimage), or run as root for loop mount."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
VMLINUZ="$extract_dir/boot/vmlinuz"
|
||||||
|
INITRAMFS="$extract_dir/boot/kubesolo-os.gz"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# detect_kvm — prints "-enable-kvm" if KVM available, empty string otherwise
|
||||||
|
detect_kvm() {
|
||||||
|
if [ -w /dev/kvm ] 2>/dev/null; then
|
||||||
|
echo "-enable-kvm"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# wait_for_boot <serial-log> <qemu-pid> [timeout]
|
||||||
|
# Waits for "KubeSolo is running" marker in serial log.
|
||||||
|
# Returns 0 on success, 1 on timeout/failure.
|
||||||
|
# Sets BOOT_ELAPSED to seconds taken.
|
||||||
|
wait_for_boot() {
|
||||||
|
local serial_log="$1"
|
||||||
|
local qemu_pid="$2"
|
||||||
|
local timeout="${3:-180}"
|
||||||
|
|
||||||
|
BOOT_ELAPSED=0
|
||||||
|
while [ "$BOOT_ELAPSED" -lt "$timeout" ]; do
|
||||||
|
if grep -q "\[kubesolo-init\] \[OK\] KubeSolo is running" "$serial_log" 2>/dev/null; then
|
||||||
|
echo ""
|
||||||
|
echo " Boot completed in ${BOOT_ELAPSED}s"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! kill -0 "$qemu_pid" 2>/dev/null; then
|
||||||
|
echo ""
|
||||||
|
echo "==> FAIL: QEMU exited prematurely"
|
||||||
|
tail -20 "$serial_log" 2>/dev/null
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
BOOT_ELAPSED=$((BOOT_ELAPSED + 2))
|
||||||
|
printf "\r Elapsed: %ds / %ds" "$BOOT_ELAPSED" "$timeout"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
echo "==> FAIL: Boot did not complete within ${timeout}s"
|
||||||
|
tail -30 "$serial_log" 2>/dev/null
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# fetch_kubeconfig <host-port> <output-file>
|
||||||
|
# Fetches kubeconfig via HTTP from the given host port.
|
||||||
|
# The port should be the QEMU-forwarded host port mapped to guest port 8080.
|
||||||
|
# Returns 0 on success, 1 on failure.
|
||||||
|
fetch_kubeconfig() {
|
||||||
|
local port="$1"
|
||||||
|
local output_file="$2"
|
||||||
|
|
||||||
|
echo " Fetching kubeconfig from http://localhost:${port}..."
|
||||||
|
local j=0
|
||||||
|
while [ $j -lt 30 ]; do
|
||||||
|
if curl -sf "http://localhost:${port}" -o "$output_file" 2>/dev/null; then
|
||||||
|
if [ -s "$output_file" ] && grep -q "server:" "$output_file" 2>/dev/null; then
|
||||||
|
echo " Kubeconfig fetched successfully"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
j=$((j + 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "==> FAIL: Could not fetch kubeconfig from http://localhost:${port}"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
@@ -3,6 +3,7 @@
|
|||||||
# Usage: ./test/qemu/run-vm.sh <iso-or-img> [options]
|
# Usage: ./test/qemu/run-vm.sh <iso-or-img> [options]
|
||||||
#
|
#
|
||||||
# Options:
|
# Options:
|
||||||
|
# --arch <arch> Architecture: x86_64 (default) or arm64
|
||||||
# --data-disk <path> Use existing data disk (default: create temp)
|
# --data-disk <path> Use existing data disk (default: create temp)
|
||||||
# --data-size <MB> Size of temp data disk (default: 1024)
|
# --data-size <MB> Size of temp data disk (default: 1024)
|
||||||
# --memory <MB> VM memory (default: 2048)
|
# --memory <MB> VM memory (default: 2048)
|
||||||
@@ -12,6 +13,8 @@
|
|||||||
# --ssh-port <port> Forward SSH to host port (default: 2222)
|
# --ssh-port <port> Forward SSH to host port (default: 2222)
|
||||||
# --background Run in background, print PID
|
# --background Run in background, print PID
|
||||||
# --append <args> Extra kernel append args
|
# --append <args> Extra kernel append args
|
||||||
|
# --kernel <path> Kernel image (required for arm64)
|
||||||
|
# --initrd <path> Initramfs image (required for arm64)
|
||||||
#
|
#
|
||||||
# Outputs (on stdout):
|
# Outputs (on stdout):
|
||||||
# QEMU_PID=<pid>
|
# QEMU_PID=<pid>
|
||||||
@@ -23,6 +26,7 @@ IMAGE="${1:?Usage: $0 <iso-or-img> [options]}"
|
|||||||
shift
|
shift
|
||||||
|
|
||||||
# Defaults
|
# Defaults
|
||||||
|
ARCH="x86_64"
|
||||||
DATA_DISK=""
|
DATA_DISK=""
|
||||||
DATA_SIZE_MB=1024
|
DATA_SIZE_MB=1024
|
||||||
MEMORY=2048
|
MEMORY=2048
|
||||||
@@ -33,10 +37,13 @@ SSH_PORT=2222
|
|||||||
BACKGROUND=0
|
BACKGROUND=0
|
||||||
EXTRA_APPEND=""
|
EXTRA_APPEND=""
|
||||||
CREATED_DATA_DISK=""
|
CREATED_DATA_DISK=""
|
||||||
|
VM_KERNEL=""
|
||||||
|
VM_INITRD=""
|
||||||
|
|
||||||
# Parse options
|
# Parse options
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
--arch) ARCH="$2"; shift 2 ;;
|
||||||
--data-disk) DATA_DISK="$2"; shift 2 ;;
|
--data-disk) DATA_DISK="$2"; shift 2 ;;
|
||||||
--data-size) DATA_SIZE_MB="$2"; shift 2 ;;
|
--data-size) DATA_SIZE_MB="$2"; shift 2 ;;
|
||||||
--memory) MEMORY="$2"; shift 2 ;;
|
--memory) MEMORY="$2"; shift 2 ;;
|
||||||
@@ -46,6 +53,8 @@ while [ $# -gt 0 ]; do
|
|||||||
--ssh-port) SSH_PORT="$2"; shift 2 ;;
|
--ssh-port) SSH_PORT="$2"; shift 2 ;;
|
||||||
--background) BACKGROUND=1; shift ;;
|
--background) BACKGROUND=1; shift ;;
|
||||||
--append) EXTRA_APPEND="$2"; shift 2 ;;
|
--append) EXTRA_APPEND="$2"; shift 2 ;;
|
||||||
|
--kernel) VM_KERNEL="$2"; shift 2 ;;
|
||||||
|
--initrd) VM_INITRD="$2"; shift 2 ;;
|
||||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
@@ -63,44 +72,75 @@ if [ -z "$SERIAL_LOG" ]; then
|
|||||||
SERIAL_LOG=$(mktemp /tmp/kubesolo-serial-XXXXXX.log)
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-serial-XXXXXX.log)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Detect KVM availability
|
# Build QEMU command based on architecture
|
||||||
KVM_FLAG=""
|
if [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
|
||||||
if [ -w /dev/kvm ] 2>/dev/null; then
|
# ARM64: qemu-system-aarch64 with -machine virt
|
||||||
KVM_FLAG="-enable-kvm"
|
# No KVM for cross-arch emulation (TCG only)
|
||||||
fi
|
CONSOLE="ttyAMA0"
|
||||||
|
|
||||||
# Build QEMU command
|
# ARM64 requires explicit kernel + initrd (no -cdrom support with -machine virt)
|
||||||
QEMU_CMD=(
|
if [ -z "$VM_KERNEL" ] || [ -z "$VM_INITRD" ]; then
|
||||||
qemu-system-x86_64
|
echo "ERROR: ARM64 mode requires --kernel and --initrd options" >&2
|
||||||
-m "$MEMORY"
|
|
||||||
-smp "$CPUS"
|
|
||||||
-nographic
|
|
||||||
-net nic,model=virtio
|
|
||||||
-net "user,hostfwd=tcp::${API_PORT}-:6443,hostfwd=tcp::${SSH_PORT}-:22"
|
|
||||||
-drive "file=$DATA_DISK,format=raw,if=virtio"
|
|
||||||
-serial "file:$SERIAL_LOG"
|
|
||||||
)
|
|
||||||
|
|
||||||
[ -n "$KVM_FLAG" ] && QEMU_CMD+=("$KVM_FLAG")
|
|
||||||
|
|
||||||
case "$IMAGE" in
|
|
||||||
*.iso)
|
|
||||||
QEMU_CMD+=(
|
|
||||||
-cdrom "$IMAGE"
|
|
||||||
-boot d
|
|
||||||
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug $EXTRA_APPEND"
|
|
||||||
)
|
|
||||||
;;
|
|
||||||
*.img)
|
|
||||||
QEMU_CMD+=(
|
|
||||||
-drive "file=$IMAGE,format=raw,if=virtio"
|
|
||||||
)
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "ERROR: Unrecognized image format: $IMAGE" >&2
|
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
fi
|
||||||
esac
|
|
||||||
|
QEMU_CMD=(
|
||||||
|
qemu-system-aarch64
|
||||||
|
-machine virt
|
||||||
|
-cpu cortex-a72
|
||||||
|
-m "$MEMORY"
|
||||||
|
-smp "$CPUS"
|
||||||
|
-nographic
|
||||||
|
-net "nic,model=virtio"
|
||||||
|
-net "user,hostfwd=tcp::${API_PORT}-:6443,hostfwd=tcp::${SSH_PORT}-:22"
|
||||||
|
-drive "file=$DATA_DISK,format=raw,if=virtio"
|
||||||
|
-serial "file:$SERIAL_LOG"
|
||||||
|
-kernel "$VM_KERNEL"
|
||||||
|
-initrd "$VM_INITRD"
|
||||||
|
-append "console=${CONSOLE} kubesolo.data=/dev/vda kubesolo.debug $EXTRA_APPEND"
|
||||||
|
)
|
||||||
|
else
|
||||||
|
# x86_64: standard QEMU
|
||||||
|
CONSOLE="ttyS0,115200n8"
|
||||||
|
|
||||||
|
# Detect KVM availability
|
||||||
|
KVM_FLAG=""
|
||||||
|
if [ -w /dev/kvm ] 2>/dev/null; then
|
||||||
|
KVM_FLAG="-enable-kvm"
|
||||||
|
fi
|
||||||
|
|
||||||
|
QEMU_CMD=(
|
||||||
|
qemu-system-x86_64
|
||||||
|
-m "$MEMORY"
|
||||||
|
-smp "$CPUS"
|
||||||
|
-nographic
|
||||||
|
-net "nic,model=virtio"
|
||||||
|
-net "user,hostfwd=tcp::${API_PORT}-:6443,hostfwd=tcp::${SSH_PORT}-:22"
|
||||||
|
-drive "file=$DATA_DISK,format=raw,if=virtio"
|
||||||
|
-serial "file:$SERIAL_LOG"
|
||||||
|
)
|
||||||
|
|
||||||
|
[ -n "$KVM_FLAG" ] && QEMU_CMD+=("$KVM_FLAG")
|
||||||
|
|
||||||
|
case "$IMAGE" in
|
||||||
|
*.iso)
|
||||||
|
QEMU_CMD+=(
|
||||||
|
-cdrom "$IMAGE"
|
||||||
|
-boot d
|
||||||
|
-append "console=${CONSOLE} kubesolo.data=/dev/vda kubesolo.debug $EXTRA_APPEND"
|
||||||
|
)
|
||||||
|
;;
|
||||||
|
*.img)
|
||||||
|
QEMU_CMD+=(
|
||||||
|
-drive "file=$IMAGE,format=raw,if=virtio"
|
||||||
|
)
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "ERROR: Unrecognized image format: $IMAGE" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
# Launch
|
# Launch
|
||||||
"${QEMU_CMD[@]}" &
|
"${QEMU_CMD[@]}" &
|
||||||
|
|||||||
129
test/qemu/test-boot-arm64-disk.sh
Executable file
129
test/qemu/test-boot-arm64-disk.sh
Executable file
@@ -0,0 +1,129 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# test-boot-arm64-disk.sh — Boot the ARM64 .arm64.img via UEFI + GRUB and
|
||||||
|
# verify the init system reaches stage 90.
|
||||||
|
#
|
||||||
|
# This is the full-stack integration test: UEFI firmware -> GRUB -> kernel ->
|
||||||
|
# initramfs -> staged init. Contrast with test-boot-arm64.sh which skips the
|
||||||
|
# bootloader and loads kernel/initramfs directly.
|
||||||
|
#
|
||||||
|
# Exit 0 = PASS, Exit 1 = FAIL.
|
||||||
|
#
|
||||||
|
# Usage: ./test/qemu/test-boot-arm64-disk.sh [disk.img]
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
VERSION="$(cat "$PROJECT_ROOT/VERSION")"
|
||||||
|
|
||||||
|
DISK_IMAGE="${1:-$PROJECT_ROOT/output/kubesolo-os-${VERSION}.arm64.img}"
|
||||||
|
TIMEOUT=180
|
||||||
|
|
||||||
|
echo "==> ARM64 UEFI Disk Boot Test"
|
||||||
|
echo " Disk image: $DISK_IMAGE"
|
||||||
|
echo " Timeout: ${TIMEOUT}s"
|
||||||
|
|
||||||
|
if [ ! -f "$DISK_IMAGE" ]; then
|
||||||
|
echo "ERROR: Disk image not found: $DISK_IMAGE"
|
||||||
|
echo " Run 'make disk-image-arm64' to build it."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v qemu-system-aarch64 >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: qemu-system-aarch64 not found."
|
||||||
|
echo " apt install qemu-system-arm # Debian/Ubuntu"
|
||||||
|
echo " dnf install qemu-system-aarch64 # Fedora/RHEL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Locate UEFI firmware ---
|
||||||
|
UEFI_FW=""
|
||||||
|
for candidate in \
|
||||||
|
/usr/share/qemu-efi-aarch64/QEMU_EFI.fd \
|
||||||
|
/usr/share/AAVMF/AAVMF_CODE.fd \
|
||||||
|
/usr/share/edk2/aarch64/QEMU_EFI.fd \
|
||||||
|
/usr/share/qemu/edk2-aarch64-code.fd \
|
||||||
|
/opt/homebrew/share/qemu/edk2-aarch64-code.fd \
|
||||||
|
/usr/local/share/qemu/edk2-aarch64-code.fd
|
||||||
|
do
|
||||||
|
if [ -f "$candidate" ]; then
|
||||||
|
UEFI_FW="$candidate"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$UEFI_FW" ]; then
|
||||||
|
echo "ERROR: No ARM64 UEFI firmware found."
|
||||||
|
echo " apt install qemu-efi-aarch64"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " UEFI fw: $UEFI_FW"
|
||||||
|
|
||||||
|
# Copy disk image to a scratch file so the test doesn't mutate the source.
|
||||||
|
# UEFI will write to grubenv on the EFI partition; we don't want to bake those
|
||||||
|
# changes into the canonical build artifact.
|
||||||
|
SCRATCH_DISK=$(mktemp /tmp/kubesolo-arm64-disk-test-XXXXXX.img)
|
||||||
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-arm64-disk-serial-XXXXXX.log)
|
||||||
|
QEMU_PID=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
|
rm -f "$SCRATCH_DISK" "$SERIAL_LOG"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
cp --reflink=auto "$DISK_IMAGE" "$SCRATCH_DISK" 2>/dev/null || cp "$DISK_IMAGE" "$SCRATCH_DISK"
|
||||||
|
|
||||||
|
# --- Launch QEMU ---
|
||||||
|
qemu-system-aarch64 \
|
||||||
|
-machine virt \
|
||||||
|
-cpu cortex-a72 \
|
||||||
|
-m 2048 \
|
||||||
|
-smp 2 \
|
||||||
|
-nographic \
|
||||||
|
-bios "$UEFI_FW" \
|
||||||
|
-drive "file=$SCRATCH_DISK,format=raw,if=virtio,media=disk" \
|
||||||
|
-net nic,model=virtio \
|
||||||
|
-net user \
|
||||||
|
-serial "file:$SERIAL_LOG" &
|
||||||
|
QEMU_PID=$!
|
||||||
|
|
||||||
|
echo " Waiting for boot (PID $QEMU_PID)..."
|
||||||
|
ELAPSED=0
|
||||||
|
SUCCESS=0
|
||||||
|
while [ "$ELAPSED" -lt "$TIMEOUT" ]; do
|
||||||
|
if grep -q "\[kubesolo-init\] \[OK\] Stage 90-kubesolo.sh complete" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
SUCCESS=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if grep -q "KubeSolo is running" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
SUCCESS=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if ! kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||||
|
echo ""
|
||||||
|
echo "==> FAIL: QEMU exited prematurely"
|
||||||
|
echo " Last 30 lines of serial output:"
|
||||||
|
tail -30 "$SERIAL_LOG" 2>/dev/null || echo " (no output)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
ELAPSED=$((ELAPSED + 2))
|
||||||
|
printf "\r Elapsed: %ds / %ds" "$ELAPSED" "$TIMEOUT"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
kill "$QEMU_PID" 2>/dev/null || true
|
||||||
|
wait "$QEMU_PID" 2>/dev/null || true
|
||||||
|
QEMU_PID=""
|
||||||
|
|
||||||
|
if [ "$SUCCESS" = "1" ]; then
|
||||||
|
echo "==> ARM64 UEFI Disk Boot Test PASSED (${ELAPSED}s)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> ARM64 UEFI Disk Boot Test FAILED (timeout ${TIMEOUT}s)"
|
||||||
|
echo ""
|
||||||
|
echo "==> Last 50 lines of serial output:"
|
||||||
|
tail -50 "$SERIAL_LOG" 2>/dev/null || echo " (no output)"
|
||||||
|
exit 1
|
||||||
117
test/qemu/test-boot-arm64.sh
Executable file
117
test/qemu/test-boot-arm64.sh
Executable file
@@ -0,0 +1,117 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# test-boot-arm64.sh — Verify ARM64 image boots successfully in QEMU
|
||||||
|
#
|
||||||
|
# Uses qemu-system-aarch64 with -machine virt to test ARM64 kernel + initramfs.
|
||||||
|
# Exit 0 = PASS, Exit 1 = FAIL
|
||||||
|
#
|
||||||
|
# Usage: ./test/qemu/test-boot-arm64.sh [kernel] [initramfs]
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
|
||||||
|
KERNEL="${1:-$PROJECT_ROOT/build/cache/custom-kernel-arm64/Image}"
|
||||||
|
INITRD="${2:-$PROJECT_ROOT/build/rootfs-work/kubesolo-os.gz}"
|
||||||
|
TIMEOUT=120
|
||||||
|
|
||||||
|
echo "==> ARM64 Boot Test"
|
||||||
|
echo " Kernel: $KERNEL"
|
||||||
|
echo " Initrd: $INITRD"
|
||||||
|
echo " Timeout: ${TIMEOUT}s"
|
||||||
|
|
||||||
|
# Verify files exist
|
||||||
|
if [ ! -f "$KERNEL" ]; then
|
||||||
|
echo "ERROR: Kernel not found: $KERNEL"
|
||||||
|
echo " Run 'make kernel-arm64' to build the ARM64 kernel."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ ! -f "$INITRD" ]; then
|
||||||
|
echo "ERROR: Initrd not found: $INITRD"
|
||||||
|
echo " Run 'make initramfs' to build the initramfs."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Verify qemu-system-aarch64 is available
|
||||||
|
if ! command -v qemu-system-aarch64 >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: qemu-system-aarch64 not found."
|
||||||
|
echo " Install QEMU with ARM64 support:"
|
||||||
|
echo " apt install qemu-system-arm # Debian/Ubuntu"
|
||||||
|
echo " dnf install qemu-system-aarch64 # Fedora/RHEL"
|
||||||
|
echo " brew install qemu # macOS"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create temp data disk
|
||||||
|
DATA_DISK=$(mktemp /tmp/kubesolo-arm64-test-XXXXXX.img)
|
||||||
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=512 2>/dev/null
|
||||||
|
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
|
||||||
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-arm64-serial-XXXXXX.log)
|
||||||
|
QEMU_PID=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
|
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# Launch QEMU in background
|
||||||
|
qemu-system-aarch64 \
|
||||||
|
-machine virt \
|
||||||
|
-cpu cortex-a72 \
|
||||||
|
-m 2048 \
|
||||||
|
-smp 2 \
|
||||||
|
-nographic \
|
||||||
|
-kernel "$KERNEL" \
|
||||||
|
-initrd "$INITRD" \
|
||||||
|
-append "console=ttyAMA0 kubesolo.data=/dev/vda kubesolo.debug" \
|
||||||
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
|
-net nic,model=virtio \
|
||||||
|
-net user \
|
||||||
|
-serial "file:$SERIAL_LOG" &
|
||||||
|
QEMU_PID=$!
|
||||||
|
|
||||||
|
# Wait for boot success marker
|
||||||
|
echo " Waiting for boot..."
|
||||||
|
ELAPSED=0
|
||||||
|
SUCCESS=0
|
||||||
|
while [ "$ELAPSED" -lt "$TIMEOUT" ]; do
|
||||||
|
# Check for stage 90 completion (same marker as x86_64 test)
|
||||||
|
if grep -q "\[kubesolo-init\] \[OK\] Stage 90-kubesolo.sh complete" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
SUCCESS=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
# Also check for generic KubeSolo running message
|
||||||
|
if grep -q "KubeSolo is running" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
|
SUCCESS=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
# Check if QEMU exited prematurely
|
||||||
|
if ! kill -0 "$QEMU_PID" 2>/dev/null; then
|
||||||
|
echo ""
|
||||||
|
echo "==> FAIL: QEMU exited prematurely"
|
||||||
|
echo " Last 20 lines of serial output:"
|
||||||
|
tail -20 "$SERIAL_LOG" 2>/dev/null || echo " (no output)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
ELAPSED=$((ELAPSED + 2))
|
||||||
|
printf "\r Elapsed: %ds / %ds" "$ELAPSED" "$TIMEOUT"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Kill QEMU
|
||||||
|
kill "$QEMU_PID" 2>/dev/null || true
|
||||||
|
wait "$QEMU_PID" 2>/dev/null || true
|
||||||
|
QEMU_PID=""
|
||||||
|
|
||||||
|
if [ "$SUCCESS" = "1" ]; then
|
||||||
|
echo "==> ARM64 Boot Test PASSED (${ELAPSED}s)"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "==> ARM64 Boot Test FAILED (timeout ${TIMEOUT}s)"
|
||||||
|
echo ""
|
||||||
|
echo "==> Last 30 lines of serial output:"
|
||||||
|
tail -30 "$SERIAL_LOG" 2>/dev/null || echo " (no output)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
@@ -5,17 +5,25 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
ISO="${1:?Usage: $0 <path-to-iso>}"
|
ISO="${1:?Usage: $0 <path-to-iso>}"
|
||||||
TIMEOUT_BOOT=120 # seconds to wait for boot success marker
|
TIMEOUT_BOOT=${TIMEOUT_BOOT:-120} # seconds to wait for boot success marker
|
||||||
SERIAL_LOG=$(mktemp /tmp/kubesolo-boot-test-XXXXXX.log)
|
SERIAL_LOG=$(mktemp /tmp/kubesolo-boot-test-XXXXXX.log)
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
. "$SCRIPT_DIR/../lib/qemu-helpers.sh"
|
||||||
|
|
||||||
# Temp data disk
|
# Temp data disk
|
||||||
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
DATA_DISK=$(mktemp /tmp/kubesolo-data-XXXXXX.img)
|
||||||
dd if=/dev/zero of="$DATA_DISK" bs=1M count=512 2>/dev/null
|
dd if=/dev/zero of="$DATA_DISK" bs=1M count=512 2>/dev/null
|
||||||
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
mkfs.ext4 -q -L KSOLODATA "$DATA_DISK" 2>/dev/null
|
||||||
|
|
||||||
|
QEMU_PID=""
|
||||||
|
EXTRACT_DIR=""
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
kill "$QEMU_PID" 2>/dev/null || true
|
[ -n "$QEMU_PID" ] && kill "$QEMU_PID" 2>/dev/null || true
|
||||||
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
rm -f "$DATA_DISK" "$SERIAL_LOG"
|
||||||
|
[ -n "$EXTRACT_DIR" ] && rm -rf "$EXTRACT_DIR"
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
@@ -23,16 +31,25 @@ echo "==> Boot test: $ISO"
|
|||||||
echo " Timeout: ${TIMEOUT_BOOT}s"
|
echo " Timeout: ${TIMEOUT_BOOT}s"
|
||||||
echo " Serial log: $SERIAL_LOG"
|
echo " Serial log: $SERIAL_LOG"
|
||||||
|
|
||||||
# Launch QEMU in background
|
# Extract kernel from ISO
|
||||||
|
EXTRACT_DIR="$(mktemp -d /tmp/kubesolo-extract-XXXXXX)"
|
||||||
|
extract_kernel_from_iso "$ISO" "$EXTRACT_DIR"
|
||||||
|
|
||||||
|
KVM_FLAG=$(detect_kvm)
|
||||||
|
[ -n "$KVM_FLAG" ] && echo " KVM acceleration: enabled"
|
||||||
|
|
||||||
|
# Launch QEMU in background with direct kernel boot
|
||||||
|
# shellcheck disable=SC2086
|
||||||
qemu-system-x86_64 \
|
qemu-system-x86_64 \
|
||||||
-m 2048 -smp 2 \
|
-m 2048 -smp 2 \
|
||||||
-nographic \
|
-nographic \
|
||||||
-cdrom "$ISO" \
|
$KVM_FLAG \
|
||||||
-boot d \
|
-kernel "$VMLINUZ" \
|
||||||
|
-initrd "$INITRAMFS" \
|
||||||
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
-drive "file=$DATA_DISK,format=raw,if=virtio" \
|
||||||
-net nic,model=virtio \
|
-net "nic,model=virtio" \
|
||||||
-net user \
|
-net user \
|
||||||
-serial file:"$SERIAL_LOG" \
|
-serial "file:$SERIAL_LOG" \
|
||||||
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug" \
|
-append "console=ttyS0,115200n8 kubesolo.data=/dev/vda kubesolo.debug" \
|
||||||
&
|
&
|
||||||
QEMU_PID=$!
|
QEMU_PID=$!
|
||||||
@@ -41,7 +58,7 @@ QEMU_PID=$!
|
|||||||
echo " Waiting for boot..."
|
echo " Waiting for boot..."
|
||||||
ELAPSED=0
|
ELAPSED=0
|
||||||
while [ "$ELAPSED" -lt "$TIMEOUT_BOOT" ]; do
|
while [ "$ELAPSED" -lt "$TIMEOUT_BOOT" ]; do
|
||||||
if grep -q "\[kubesolo-init\] \[OK\] Stage 90-kubesolo.sh complete" "$SERIAL_LOG" 2>/dev/null; then
|
if grep -q "\[kubesolo-init\] \[OK\] KubeSolo is running" "$SERIAL_LOG" 2>/dev/null; then
|
||||||
echo ""
|
echo ""
|
||||||
echo "==> PASS: KubeSolo OS booted successfully in ${ELAPSED}s"
|
echo "==> PASS: KubeSolo OS booted successfully in ${ELAPSED}s"
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
@@ -4,24 +4,34 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Activate switches the boot target to the passive partition.
|
// Activate switches the boot target to the passive partition.
|
||||||
// After activation, the next reboot will boot from the new partition
|
// After activation, the next reboot will boot from the new partition
|
||||||
// with boot_counter=3. If health checks fail 3 times, GRUB auto-rolls back.
|
// with boot_counter=3. If health checks fail 3 times, GRUB auto-rolls back.
|
||||||
|
//
|
||||||
|
// State transition: Staged → Activated. On failure → Failed.
|
||||||
func Activate(args []string) error {
|
func Activate(args []string) error {
|
||||||
opts := parseOpts(args)
|
opts := parseOpts(args)
|
||||||
env := grubenv.New(opts.GrubenvPath)
|
env := opts.NewBootEnv()
|
||||||
|
|
||||||
|
st, err := state.Load(opts.StatePath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("state file unreadable, starting fresh", "error", err)
|
||||||
|
st = state.New()
|
||||||
|
}
|
||||||
|
|
||||||
// Get passive slot (the one we want to boot into)
|
// Get passive slot (the one we want to boot into)
|
||||||
passiveSlot, err := env.PassiveSlot()
|
passiveSlot, err := env.PassiveSlot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("reading passive slot: %w", err))
|
||||||
return fmt.Errorf("reading passive slot: %w", err)
|
return fmt.Errorf("reading passive slot: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
activeSlot, err := env.ActiveSlot()
|
activeSlot, err := env.ActiveSlot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("reading active slot: %w", err))
|
||||||
return fmt.Errorf("reading active slot: %w", err)
|
return fmt.Errorf("reading active slot: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -29,9 +39,14 @@ func Activate(args []string) error {
|
|||||||
|
|
||||||
// Set the passive slot as active with fresh boot counter
|
// Set the passive slot as active with fresh boot counter
|
||||||
if err := env.ActivateSlot(passiveSlot); err != nil {
|
if err := env.ActivateSlot(passiveSlot); err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("activating slot %s: %w", passiveSlot, err))
|
||||||
return fmt.Errorf("activating slot %s: %w", passiveSlot, err)
|
return fmt.Errorf("activating slot %s: %w", passiveSlot, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := st.Transition(opts.StatePath, state.PhaseActivated, "", ""); err != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseActivated, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
fmt.Printf("Slot %s activated (was %s)\n", passiveSlot, activeSlot)
|
fmt.Printf("Slot %s activated (was %s)\n", passiveSlot, activeSlot)
|
||||||
fmt.Println("Boot counter set to 3. Reboot to start the new version.")
|
fmt.Println("Boot counter set to 3. Reboot to start the new version.")
|
||||||
fmt.Println("The system will automatically roll back if health checks fail 3 times.")
|
fmt.Println("The system will automatically roll back if health checks fail 3 times.")
|
||||||
|
|||||||
@@ -1,74 +1,240 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
"github.com/portainer/kubesolo-os/update/pkg/config"
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/health"
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/image"
|
"github.com/portainer/kubesolo-os/update/pkg/image"
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/oci"
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/partition"
|
"github.com/portainer/kubesolo-os/update/pkg/partition"
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// applyMetadataGates enforces channel / architecture / min-version policy on
|
||||||
|
// resolved update metadata, regardless of transport (HTTP or OCI). Records
|
||||||
|
// any failure to the state file before returning.
|
||||||
|
func applyMetadataGates(opts opts, st *state.UpdateState, meta *image.UpdateMetadata) error {
|
||||||
|
if meta.Channel != "" && meta.Channel != opts.Channel {
|
||||||
|
err := fmt.Errorf("metadata channel %q does not match local channel %q",
|
||||||
|
meta.Channel, opts.Channel)
|
||||||
|
_ = st.RecordError(opts.StatePath, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if meta.Architecture != "" && meta.Architecture != runtime.GOARCH {
|
||||||
|
err := fmt.Errorf("metadata architecture %q does not match runtime %q",
|
||||||
|
meta.Architecture, runtime.GOARCH)
|
||||||
|
_ = st.RecordError(opts.StatePath, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if meta.MinCompatibleVersion != "" && st.FromVersion != "" {
|
||||||
|
cmp, cerr := config.CompareVersions(st.FromVersion, meta.MinCompatibleVersion)
|
||||||
|
if cerr != nil {
|
||||||
|
slog.Warn("min-version comparison failed", "error", cerr,
|
||||||
|
"from", st.FromVersion, "min", meta.MinCompatibleVersion)
|
||||||
|
} else if cmp < 0 {
|
||||||
|
err := fmt.Errorf("current version %s is below min_compatible_version %s; install %s first",
|
||||||
|
st.FromVersion, meta.MinCompatibleVersion, meta.MinCompatibleVersion)
|
||||||
|
_ = st.RecordError(opts.StatePath, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Apply downloads a new OS image and writes it to the passive partition.
|
// Apply downloads a new OS image and writes it to the passive partition.
|
||||||
// It does NOT activate the new partition — use 'activate' for that.
|
// It does NOT activate the new partition — use 'activate' for that.
|
||||||
|
//
|
||||||
|
// State transitions: Idle/Success/Failed → Checking → Downloading → Staged.
|
||||||
|
// On any error the state moves to Failed with LastError set.
|
||||||
func Apply(args []string) error {
|
func Apply(args []string) error {
|
||||||
opts := parseOpts(args)
|
opts := parseOpts(args)
|
||||||
|
|
||||||
if opts.ServerURL == "" {
|
if opts.ServerURL == "" && opts.Registry == "" {
|
||||||
return fmt.Errorf("--server is required")
|
return fmt.Errorf("--server or --registry is required (or set in /etc/kubesolo/update.conf)")
|
||||||
|
}
|
||||||
|
if opts.ServerURL != "" && opts.Registry != "" {
|
||||||
|
return fmt.Errorf("--server and --registry are mutually exclusive")
|
||||||
}
|
}
|
||||||
|
|
||||||
env := grubenv.New(opts.GrubenvPath)
|
// Maintenance window gate — earliest cheap check, before any HTTP work.
|
||||||
|
// Skipped with --force.
|
||||||
|
window, werr := config.ParseWindow(opts.MaintenanceWindow)
|
||||||
|
if werr != nil {
|
||||||
|
return fmt.Errorf("parse maintenance_window: %w", werr)
|
||||||
|
}
|
||||||
|
if !opts.Force && !window.Contains(time.Now()) {
|
||||||
|
return fmt.Errorf("outside maintenance window (%s); pass --force to override",
|
||||||
|
window.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Node-block-label gate — workload authors can defer an update by
|
||||||
|
// labeling the node updates.kubesolo.io/block=true. Skipped with --force
|
||||||
|
// and silently bypassed when the K8s API isn't reachable (air-gap).
|
||||||
|
if !opts.Force {
|
||||||
|
blocked, berr := health.CheckNodeBlocked("")
|
||||||
|
if berr != nil {
|
||||||
|
slog.Warn("node-block check failed, allowing update", "error", berr)
|
||||||
|
} else if blocked {
|
||||||
|
return fmt.Errorf("node carries label %s=true; refusing update (pass --force to override)",
|
||||||
|
health.NodeBlockLabel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
st, err := state.Load(opts.StatePath)
|
||||||
|
if err != nil {
|
||||||
|
// Don't block the operation on a corrupt state file. Log + recover.
|
||||||
|
slog.Warn("state file unreadable, starting fresh", "error", err)
|
||||||
|
st = state.New()
|
||||||
|
}
|
||||||
|
|
||||||
|
env := opts.NewBootEnv()
|
||||||
|
|
||||||
|
// Record the current running version as the "from" reference. The active
|
||||||
|
// slot's version file is the most reliable source.
|
||||||
|
activeSlot, slotErr := env.ActiveSlot()
|
||||||
|
if slotErr == nil {
|
||||||
|
if partInfo, perr := partition.GetSlotPartition(activeSlot); perr == nil {
|
||||||
|
mp := "/tmp/kubesolo-active-" + activeSlot
|
||||||
|
if merr := partition.MountReadOnly(partInfo.Device, mp); merr == nil {
|
||||||
|
if v, rerr := partition.ReadVersion(mp); rerr == nil {
|
||||||
|
st.SetFromVersion(v)
|
||||||
|
}
|
||||||
|
partition.Unmount(mp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Determine passive slot
|
// Determine passive slot
|
||||||
passiveSlot, err := env.PassiveSlot()
|
passiveSlot, err := env.PassiveSlot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("reading passive slot: %w", err))
|
||||||
return fmt.Errorf("reading passive slot: %w", err)
|
return fmt.Errorf("reading passive slot: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("applying update", "target_slot", passiveSlot)
|
slog.Info("applying update", "target_slot", passiveSlot)
|
||||||
|
|
||||||
// Check for update
|
|
||||||
stageDir := "/tmp/kubesolo-update-stage"
|
stageDir := "/tmp/kubesolo-update-stage"
|
||||||
client := image.NewClient(opts.ServerURL, stageDir)
|
|
||||||
defer client.Cleanup()
|
|
||||||
|
|
||||||
// Enable signature verification if public key is configured
|
if err := st.Transition(opts.StatePath, state.PhaseChecking, "", ""); err != nil {
|
||||||
if opts.PubKeyPath != "" {
|
slog.Warn("state transition failed", "phase", state.PhaseChecking, "error", err)
|
||||||
client.SetPublicKeyPath(opts.PubKeyPath)
|
|
||||||
slog.Info("signature verification enabled", "pubkey", opts.PubKeyPath)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
meta, err := client.CheckForUpdate()
|
// Resolve metadata via the configured transport. OCI registry mode pulls
|
||||||
if err != nil {
|
// the manifest only; HTTP mode hits latest.json.
|
||||||
return fmt.Errorf("checking for update: %w", err)
|
var (
|
||||||
|
meta *image.UpdateMetadata
|
||||||
|
staged *image.StagedImage
|
||||||
|
)
|
||||||
|
if opts.Registry != "" {
|
||||||
|
ociClient, err := oci.NewClient(opts.Registry)
|
||||||
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("oci client: %w", err))
|
||||||
|
return fmt.Errorf("oci client: %w", err)
|
||||||
|
}
|
||||||
|
tag := opts.Tag
|
||||||
|
if tag == "" {
|
||||||
|
tag = opts.Channel
|
||||||
|
}
|
||||||
|
if tag == "" {
|
||||||
|
tag = "stable"
|
||||||
|
}
|
||||||
|
meta, err = ociClient.FetchMetadata(context.Background(), tag)
|
||||||
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("oci fetch metadata: %w", err))
|
||||||
|
return fmt.Errorf("oci fetch metadata: %w", err)
|
||||||
|
}
|
||||||
|
if err := applyMetadataGates(opts, st, meta); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := st.Transition(opts.StatePath, state.PhaseDownloading, meta.Version, ""); err != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseDownloading, "error", err)
|
||||||
|
}
|
||||||
|
staged, _, err = ociClient.Pull(context.Background(), tag, stageDir)
|
||||||
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("oci pull: %w", err))
|
||||||
|
return fmt.Errorf("oci pull: %w", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
client := image.NewClient(opts.ServerURL, stageDir)
|
||||||
|
defer client.Cleanup()
|
||||||
|
if opts.PubKeyPath != "" {
|
||||||
|
client.SetPublicKeyPath(opts.PubKeyPath)
|
||||||
|
slog.Info("signature verification enabled", "pubkey", opts.PubKeyPath)
|
||||||
|
}
|
||||||
|
var err error
|
||||||
|
meta, err = client.CheckForUpdate()
|
||||||
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("checking for update: %w", err))
|
||||||
|
return fmt.Errorf("checking for update: %w", err)
|
||||||
|
}
|
||||||
|
if err := applyMetadataGates(opts, st, meta); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := st.Transition(opts.StatePath, state.PhaseDownloading, meta.Version, ""); err != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseDownloading, "error", err)
|
||||||
|
}
|
||||||
|
staged, err = client.Download(meta)
|
||||||
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("downloading update: %w", err))
|
||||||
|
return fmt.Errorf("downloading update: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("update available", "version", meta.Version)
|
slog.Info("update available", "version", meta.Version, "channel", meta.Channel, "arch", meta.Architecture)
|
||||||
|
|
||||||
// Download and verify
|
|
||||||
staged, err := client.Download(meta)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("downloading update: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mount passive partition
|
// Mount passive partition
|
||||||
partInfo, err := partition.GetSlotPartition(passiveSlot)
|
partInfo, err := partition.GetSlotPartition(passiveSlot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("finding passive partition: %w", err))
|
||||||
return fmt.Errorf("finding passive partition: %w", err)
|
return fmt.Errorf("finding passive partition: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
mountPoint := "/tmp/kubesolo-passive-" + passiveSlot
|
mountPoint := "/tmp/kubesolo-passive-" + passiveSlot
|
||||||
if err := partition.MountReadWrite(partInfo.Device, mountPoint); err != nil {
|
if err := partition.MountReadWrite(partInfo.Device, mountPoint); err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("mounting passive partition: %w", err))
|
||||||
return fmt.Errorf("mounting passive partition: %w", err)
|
return fmt.Errorf("mounting passive partition: %w", err)
|
||||||
}
|
}
|
||||||
defer partition.Unmount(mountPoint)
|
defer partition.Unmount(mountPoint)
|
||||||
|
|
||||||
|
// Free-space pre-write check: the passive partition must have at least
|
||||||
|
// (kernel + initramfs) + 10% headroom. Catches corrupted-FS reports and
|
||||||
|
// shrunk/wrong-size partitions before we destroy the existing slot data.
|
||||||
|
var imgSize int64
|
||||||
|
for _, p := range []string{staged.VmlinuzPath, staged.InitramfsPath} {
|
||||||
|
fi, ferr := os.Stat(p)
|
||||||
|
if ferr != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("stat staged file %s: %w", p, ferr))
|
||||||
|
return fmt.Errorf("stat staged file %s: %w", p, ferr)
|
||||||
|
}
|
||||||
|
imgSize += fi.Size()
|
||||||
|
}
|
||||||
|
avail, ok, ferr := partition.HasFreeSpaceFor(mountPoint, imgSize, 10)
|
||||||
|
if ferr != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("free-space check: %w", ferr))
|
||||||
|
return fmt.Errorf("free-space check: %w", ferr)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
err := fmt.Errorf("insufficient space on %s: have %.1f MiB, need %.1f MiB (image + 10%% headroom)",
|
||||||
|
passiveSlot, float64(avail)/(1<<20), float64(imgSize)*1.1/(1<<20))
|
||||||
|
_ = st.RecordError(opts.StatePath, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Write image to passive partition
|
// Write image to passive partition
|
||||||
if err := partition.WriteSystemImage(mountPoint, staged.VmlinuzPath, staged.InitramfsPath, staged.Version); err != nil {
|
if err := partition.WriteSystemImage(mountPoint, staged.VmlinuzPath, staged.InitramfsPath, staged.Version); err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("writing system image: %w", err))
|
||||||
return fmt.Errorf("writing system image: %w", err)
|
return fmt.Errorf("writing system image: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := st.Transition(opts.StatePath, state.PhaseStaged, staged.Version, ""); err != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseStaged, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
fmt.Printf("Update v%s written to slot %s (%s)\n", staged.Version, passiveSlot, partInfo.Device)
|
fmt.Printf("Update v%s written to slot %s (%s)\n", staged.Version, passiveSlot, partInfo.Device)
|
||||||
fmt.Println("Run 'kubesolo-update activate' to boot into the new version")
|
fmt.Println("Run 'kubesolo-update activate' to boot into the new version")
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/image"
|
"github.com/portainer/kubesolo-os/update/pkg/image"
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/partition"
|
"github.com/portainer/kubesolo-os/update/pkg/partition"
|
||||||
)
|
)
|
||||||
@@ -19,7 +18,7 @@ func Check(args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get current version from active partition
|
// Get current version from active partition
|
||||||
env := grubenv.New(opts.GrubenvPath)
|
env := opts.NewBootEnv()
|
||||||
activeSlot, err := env.ActiveSlot()
|
activeSlot, err := env.ActiveSlot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("reading active slot: %w", err)
|
return fmt.Errorf("reading active slot: %w", err)
|
||||||
|
|||||||
@@ -5,17 +5,32 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/health"
|
"github.com/portainer/kubesolo-os/update/pkg/health"
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Healthcheck performs post-boot health verification.
|
// Healthcheck performs post-boot health verification.
|
||||||
// If all checks pass, it marks the boot as successful in GRUB.
|
// If all checks pass, it marks the boot as successful in GRUB.
|
||||||
// This should be run after every boot (typically via a systemd unit or
|
// This should be run after every boot (typically via a systemd unit or
|
||||||
// init script) to confirm the system is healthy.
|
// init script) to confirm the system is healthy.
|
||||||
|
//
|
||||||
|
// State transition: Activated → Verifying → Success on pass, → Failed on fail.
|
||||||
|
// If state isn't in Activated (e.g. manual run on a long-stable system), the
|
||||||
|
// state file is left alone — healthcheck still does its job.
|
||||||
|
//
|
||||||
|
// When --auto-rollback-after N is set, consecutive post-Activated failures
|
||||||
|
// are counted in state.HealthCheckFailures. On the Nth failure, the agent
|
||||||
|
// calls Rollback() and the operator is expected to reboot (this command
|
||||||
|
// does not reboot the host — that's policy left to systemd/init).
|
||||||
func Healthcheck(args []string) error {
|
func Healthcheck(args []string) error {
|
||||||
opts := parseOpts(args)
|
opts := parseOpts(args)
|
||||||
env := grubenv.New(opts.GrubenvPath)
|
env := opts.NewBootEnv()
|
||||||
|
|
||||||
|
st, err := state.Load(opts.StatePath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("state file unreadable, starting fresh", "error", err)
|
||||||
|
st = state.New()
|
||||||
|
}
|
||||||
|
|
||||||
// Check if already marked successful
|
// Check if already marked successful
|
||||||
success, err := env.BootSuccess()
|
success, err := env.BootSuccess()
|
||||||
@@ -27,30 +42,94 @@ func Healthcheck(args []string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only transition state if we're post-activation. Manual healthcheck on a
|
||||||
|
// long-stable system shouldn't reset Idle → Verifying.
|
||||||
|
postActivation := st.Phase == state.PhaseActivated
|
||||||
|
if postActivation {
|
||||||
|
if err := st.Transition(opts.StatePath, state.PhaseVerifying, "", ""); err != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseVerifying, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
timeout := time.Duration(opts.TimeoutSecs) * time.Second
|
timeout := time.Duration(opts.TimeoutSecs) * time.Second
|
||||||
checker := health.NewChecker("", "", timeout)
|
checker := health.NewChecker("", "", timeout)
|
||||||
|
checker.ProbeURL = opts.HealthcheckURL
|
||||||
|
if opts.KubeSystemSettle > 0 {
|
||||||
|
checker.KubeSystemSettle = time.Duration(opts.KubeSystemSettle) * time.Second
|
||||||
|
}
|
||||||
|
// Probe the data partition every healthcheck so a wedged disk fails fast.
|
||||||
|
checker.DataDir = "/var/lib/kubesolo"
|
||||||
|
|
||||||
slog.Info("running post-boot health checks", "timeout", timeout)
|
slog.Info("running post-boot health checks",
|
||||||
|
"timeout", timeout,
|
||||||
|
"probe_url", checker.ProbeURL,
|
||||||
|
"kube_system_settle", checker.KubeSystemSettle)
|
||||||
|
|
||||||
status, err := checker.WaitForHealthy()
|
status, err := checker.WaitForHealthy()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Health check FAILED: %s\n", status.Message)
|
fmt.Printf("Health check FAILED: %s\n", status.Message)
|
||||||
fmt.Printf(" containerd: %v\n", status.Containerd)
|
printStatusBreakdown(status)
|
||||||
fmt.Printf(" apiserver: %v\n", status.APIServer)
|
|
||||||
fmt.Printf(" node_ready: %v\n", status.NodeReady)
|
|
||||||
fmt.Println("\nBoot NOT marked successful — system may roll back on next reboot")
|
fmt.Println("\nBoot NOT marked successful — system may roll back on next reboot")
|
||||||
|
|
||||||
|
if postActivation {
|
||||||
|
st.HealthCheckFailures++
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("post-boot health check failed: %s", status.Message))
|
||||||
|
|
||||||
|
// Auto-rollback escalation. Only trigger when post-Activated;
|
||||||
|
// don't second-guess a healthy long-running system.
|
||||||
|
if opts.AutoRollbackAfter > 0 && st.HealthCheckFailures >= opts.AutoRollbackAfter {
|
||||||
|
slog.Warn("auto-rollback threshold reached",
|
||||||
|
"failures", st.HealthCheckFailures,
|
||||||
|
"threshold", opts.AutoRollbackAfter)
|
||||||
|
if rerr := env.ForceRollback(); rerr != nil {
|
||||||
|
slog.Error("auto-rollback failed", "error", rerr)
|
||||||
|
return err // return the original healthcheck error
|
||||||
|
}
|
||||||
|
if terr := st.Transition(opts.StatePath, state.PhaseRolledBack, "",
|
||||||
|
fmt.Sprintf("auto-rollback after %d healthcheck failures", st.HealthCheckFailures)); terr != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseRolledBack, "error", terr)
|
||||||
|
}
|
||||||
|
fmt.Println("\nAuto-rollback triggered. Reboot to complete the rollback.")
|
||||||
|
}
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark boot as successful
|
// Mark boot as successful
|
||||||
if err := env.MarkBootSuccess(); err != nil {
|
if err := env.MarkBootSuccess(); err != nil {
|
||||||
|
if postActivation {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("marking boot success: %w", err))
|
||||||
|
}
|
||||||
return fmt.Errorf("marking boot success: %w", err)
|
return fmt.Errorf("marking boot success: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if postActivation {
|
||||||
|
// Reset failure counter on a clean pass.
|
||||||
|
st.HealthCheckFailures = 0
|
||||||
|
if err := st.Transition(opts.StatePath, state.PhaseSuccess, "", ""); err != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseSuccess, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fmt.Println("Health check PASSED — boot marked successful")
|
fmt.Println("Health check PASSED — boot marked successful")
|
||||||
fmt.Printf(" containerd: %v\n", status.Containerd)
|
printStatusBreakdown(status)
|
||||||
fmt.Printf(" apiserver: %v\n", status.APIServer)
|
|
||||||
fmt.Printf(" node_ready: %v\n", status.NodeReady)
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// printStatusBreakdown emits a human-readable per-check summary. Only emits
|
||||||
|
// optional check lines when they actually ran.
|
||||||
|
func printStatusBreakdown(s *health.Status) {
|
||||||
|
fmt.Printf(" containerd: %v\n", s.Containerd)
|
||||||
|
fmt.Printf(" apiserver: %v\n", s.APIServer)
|
||||||
|
fmt.Printf(" node_ready: %v\n", s.NodeReady)
|
||||||
|
if !s.KubeSystemReady {
|
||||||
|
fmt.Printf(" kube-system pods: %v\n", s.KubeSystemReady)
|
||||||
|
}
|
||||||
|
if !s.ProbeURL {
|
||||||
|
fmt.Printf(" probe URL: %v\n", s.ProbeURL)
|
||||||
|
}
|
||||||
|
if !s.DiskWritable {
|
||||||
|
fmt.Printf(" disk writable: %v\n", s.DiskWritable)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/metrics"
|
"github.com/portainer/kubesolo-os/update/pkg/metrics"
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Metrics starts the Prometheus-compatible metrics HTTP server.
|
// Metrics starts the Prometheus-compatible metrics HTTP server.
|
||||||
@@ -12,10 +13,12 @@ func Metrics(args []string) error {
|
|||||||
fs := flag.NewFlagSet("metrics", flag.ExitOnError)
|
fs := flag.NewFlagSet("metrics", flag.ExitOnError)
|
||||||
listenAddr := fs.String("listen", ":9100", "Metrics HTTP listen address")
|
listenAddr := fs.String("listen", ":9100", "Metrics HTTP listen address")
|
||||||
grubenvPath := fs.String("grubenv", "/boot/grub/grubenv", "Path to grubenv file")
|
grubenvPath := fs.String("grubenv", "/boot/grub/grubenv", "Path to grubenv file")
|
||||||
|
statePath := fs.String("state", state.DefaultPath, "Path to update state.json")
|
||||||
if err := fs.Parse(args); err != nil {
|
if err := fs.Parse(args); err != nil {
|
||||||
return fmt.Errorf("parse flags: %w", err)
|
return fmt.Errorf("parse flags: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
srv := metrics.NewServer(*listenAddr, *grubenvPath)
|
srv := metrics.NewServer(*listenAddr, *grubenvPath)
|
||||||
|
srv.SetStatePath(*statePath)
|
||||||
return srv.ListenAndServe()
|
return srv.ListenAndServe()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,28 +1,168 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log/slog"
|
||||||
|
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/bootenv"
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/config"
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
|
)
|
||||||
|
|
||||||
// opts holds shared command-line options for all subcommands.
|
// opts holds shared command-line options for all subcommands.
|
||||||
type opts struct {
|
type opts struct {
|
||||||
ServerURL string
|
ServerURL string
|
||||||
GrubenvPath string
|
Registry string // OCI registry ref (e.g. ghcr.io/foo/kubesolo-os). Mutually exclusive with ServerURL.
|
||||||
TimeoutSecs int
|
Tag string // OCI tag to pull (default: equal to Channel, falling back to "stable")
|
||||||
PubKeyPath string
|
GrubenvPath string
|
||||||
|
TimeoutSecs int
|
||||||
|
PubKeyPath string
|
||||||
|
BootEnvType string // "grub" or "rpi"
|
||||||
|
BootEnvPath string // path for RPi boot control dir
|
||||||
|
StatePath string // location of state.json (default: state.DefaultPath)
|
||||||
|
ConfPath string // location of update.conf (default: config.DefaultPath)
|
||||||
|
Channel string // update channel ("stable" by default)
|
||||||
|
MaintenanceWindow string // "HH:MM-HH:MM" or empty for always-allow
|
||||||
|
HealthcheckURL string // optional GET probe for healthcheck
|
||||||
|
AutoRollbackAfter int // healthcheck: rollback after N consecutive failures (0=off)
|
||||||
|
KubeSystemSettle int // healthcheck: kube-system pods must be Running for N seconds (0=disabled)
|
||||||
|
Force bool // bypass maintenance window
|
||||||
|
JSON bool // status: emit JSON instead of human-readable
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewBootEnv creates a BootEnv from the parsed options.
|
||||||
|
func (o opts) NewBootEnv() bootenv.BootEnv {
|
||||||
|
switch o.BootEnvType {
|
||||||
|
case "rpi":
|
||||||
|
return bootenv.NewRPi(o.BootEnvPath)
|
||||||
|
default:
|
||||||
|
return bootenv.NewGRUB(o.GrubenvPath)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseOpts extracts command-line flags from args.
|
// parseOpts extracts command-line flags from args.
|
||||||
// Simple parser — no external dependencies.
|
//
|
||||||
|
// Precedence: explicit CLI flags > /etc/kubesolo/update.conf > package
|
||||||
|
// defaults. The config file is loaded first so any CLI flag overrides it.
|
||||||
|
//
|
||||||
|
// Unknown flags are ignored (forward-compat).
|
||||||
func parseOpts(args []string) opts {
|
func parseOpts(args []string) opts {
|
||||||
o := opts{
|
o := opts{
|
||||||
GrubenvPath: "/boot/grub/grubenv",
|
GrubenvPath: "/boot/grub/grubenv",
|
||||||
TimeoutSecs: 120,
|
TimeoutSecs: 120,
|
||||||
|
BootEnvType: "grub",
|
||||||
|
StatePath: state.DefaultPath,
|
||||||
|
ConfPath: config.DefaultPath,
|
||||||
|
Channel: "stable",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// First pass: pick up --conf so it can point at a different file before
|
||||||
|
// we load. (Tests pass --conf <tempdir>/update.conf.)
|
||||||
|
for i := 0; i < len(args); i++ {
|
||||||
|
if args[i] == "--conf" && i+1 < len(args) {
|
||||||
|
o.ConfPath = args[i+1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load config file. Missing file is fine (fresh system, no cloud-init yet).
|
||||||
|
if cfg, err := config.Load(o.ConfPath); err == nil && cfg != nil {
|
||||||
|
if cfg.Server != "" {
|
||||||
|
o.ServerURL = cfg.Server
|
||||||
|
}
|
||||||
|
if cfg.Channel != "" {
|
||||||
|
o.Channel = cfg.Channel
|
||||||
|
}
|
||||||
|
if cfg.MaintenanceWindow != "" {
|
||||||
|
o.MaintenanceWindow = cfg.MaintenanceWindow
|
||||||
|
}
|
||||||
|
if cfg.PubKey != "" {
|
||||||
|
o.PubKeyPath = cfg.PubKey
|
||||||
|
}
|
||||||
|
if cfg.HealthcheckURL != "" {
|
||||||
|
o.HealthcheckURL = cfg.HealthcheckURL
|
||||||
|
}
|
||||||
|
if cfg.AutoRollbackAfter > 0 {
|
||||||
|
o.AutoRollbackAfter = cfg.AutoRollbackAfter
|
||||||
|
}
|
||||||
|
} else if err != nil {
|
||||||
|
slog.Warn("could not load update.conf", "path", o.ConfPath, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second pass: CLI overrides config file values.
|
||||||
for i := 0; i < len(args); i++ {
|
for i := 0; i < len(args); i++ {
|
||||||
switch args[i] {
|
switch args[i] {
|
||||||
|
case "--conf":
|
||||||
|
i++ // already handled above
|
||||||
|
case "--state":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.StatePath = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--channel":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.Channel = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--maintenance-window":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.MaintenanceWindow = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--force":
|
||||||
|
o.Force = true
|
||||||
|
case "--healthcheck-url":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.HealthcheckURL = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--auto-rollback-after":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
n := 0
|
||||||
|
for _, ch := range args[i+1] {
|
||||||
|
if ch >= '0' && ch <= '9' {
|
||||||
|
n = n*10 + int(ch-'0')
|
||||||
|
} else {
|
||||||
|
n = 0
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if n > 0 {
|
||||||
|
o.AutoRollbackAfter = n
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--kube-system-settle":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
n := 0
|
||||||
|
for _, ch := range args[i+1] {
|
||||||
|
if ch >= '0' && ch <= '9' {
|
||||||
|
n = n*10 + int(ch-'0')
|
||||||
|
} else {
|
||||||
|
n = 0
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if n > 0 {
|
||||||
|
o.KubeSystemSettle = n
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--json":
|
||||||
|
o.JSON = true
|
||||||
case "--server":
|
case "--server":
|
||||||
if i+1 < len(args) {
|
if i+1 < len(args) {
|
||||||
o.ServerURL = args[i+1]
|
o.ServerURL = args[i+1]
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
case "--registry":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.Registry = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--tag":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.Tag = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
case "--grubenv":
|
case "--grubenv":
|
||||||
if i+1 < len(args) {
|
if i+1 < len(args) {
|
||||||
o.GrubenvPath = args[i+1]
|
o.GrubenvPath = args[i+1]
|
||||||
@@ -46,6 +186,16 @@ func parseOpts(args []string) opts {
|
|||||||
o.PubKeyPath = args[i+1]
|
o.PubKeyPath = args[i+1]
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
case "--bootenv":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.BootEnvType = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
case "--bootenv-path":
|
||||||
|
if i+1 < len(args) {
|
||||||
|
o.BootEnvPath = args[i+1]
|
||||||
|
i++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,14 +4,22 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Rollback forces an immediate switch to the other partition.
|
// Rollback forces an immediate switch to the other partition.
|
||||||
// Use this to manually revert to the previous version.
|
// Use this to manually revert to the previous version.
|
||||||
|
//
|
||||||
|
// State transition: any → RolledBack with LastError="manual rollback".
|
||||||
func Rollback(args []string) error {
|
func Rollback(args []string) error {
|
||||||
opts := parseOpts(args)
|
opts := parseOpts(args)
|
||||||
env := grubenv.New(opts.GrubenvPath)
|
env := opts.NewBootEnv()
|
||||||
|
|
||||||
|
st, err := state.Load(opts.StatePath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("state file unreadable, starting fresh", "error", err)
|
||||||
|
st = state.New()
|
||||||
|
}
|
||||||
|
|
||||||
activeSlot, err := env.ActiveSlot()
|
activeSlot, err := env.ActiveSlot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -26,9 +34,14 @@ func Rollback(args []string) error {
|
|||||||
slog.Info("forcing rollback", "from", activeSlot, "to", passiveSlot)
|
slog.Info("forcing rollback", "from", activeSlot, "to", passiveSlot)
|
||||||
|
|
||||||
if err := env.ForceRollback(); err != nil {
|
if err := env.ForceRollback(); err != nil {
|
||||||
|
_ = st.RecordError(opts.StatePath, fmt.Errorf("rollback failed: %w", err))
|
||||||
return fmt.Errorf("rollback failed: %w", err)
|
return fmt.Errorf("rollback failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := st.Transition(opts.StatePath, state.PhaseRolledBack, "", "manual rollback"); err != nil {
|
||||||
|
slog.Warn("state transition failed", "phase", state.PhaseRolledBack, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
fmt.Printf("Rolled back: %s → %s\n", activeSlot, passiveSlot)
|
fmt.Printf("Rolled back: %s → %s\n", activeSlot, passiveSlot)
|
||||||
fmt.Println("Reboot to complete rollback.")
|
fmt.Println("Reboot to complete rollback.")
|
||||||
|
|
||||||
|
|||||||
@@ -1,43 +1,104 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// statusReport is the JSON-emitted shape of `kubesolo-update status --json`.
|
||||||
|
// Combines the bootloader-level A/B view with the update-agent state machine.
|
||||||
|
type statusReport struct {
|
||||||
|
ActiveSlot string `json:"active_slot"`
|
||||||
|
PassiveSlot string `json:"passive_slot"`
|
||||||
|
BootCounter int `json:"boot_counter"`
|
||||||
|
BootSuccess bool `json:"boot_success"`
|
||||||
|
State *state.UpdateState `json:"state"`
|
||||||
|
}
|
||||||
|
|
||||||
// Status displays the current A/B slot configuration and boot state.
|
// Status displays the current A/B slot configuration and boot state.
|
||||||
|
// With --json, emits the full state report to stdout for orchestration
|
||||||
|
// tooling.
|
||||||
func Status(args []string) error {
|
func Status(args []string) error {
|
||||||
opts := parseOpts(args)
|
opts := parseOpts(args)
|
||||||
env := grubenv.New(opts.GrubenvPath)
|
env := opts.NewBootEnv()
|
||||||
|
|
||||||
vars, err := env.ReadAll()
|
activeSlot, err := env.ActiveSlot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("reading GRUB environment: %w", err)
|
return fmt.Errorf("reading active slot: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
activeSlot := vars["active_slot"]
|
passiveSlot, err := env.PassiveSlot()
|
||||||
bootCounter := vars["boot_counter"]
|
if err != nil {
|
||||||
bootSuccess := vars["boot_success"]
|
return fmt.Errorf("reading passive slot: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
passiveSlot := "B"
|
bootCounter, err := env.BootCounter()
|
||||||
if activeSlot == "B" {
|
if err != nil {
|
||||||
passiveSlot = "A"
|
return fmt.Errorf("reading boot counter: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
bootSuccess, err := env.BootSuccess()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("reading boot success: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// State file is non-fatal: present means we have an update lifecycle
|
||||||
|
// recorded; absent means no update has run yet.
|
||||||
|
st, _ := state.Load(opts.StatePath)
|
||||||
|
|
||||||
|
if opts.JSON {
|
||||||
|
report := statusReport{
|
||||||
|
ActiveSlot: activeSlot,
|
||||||
|
PassiveSlot: passiveSlot,
|
||||||
|
BootCounter: bootCounter,
|
||||||
|
BootSuccess: bootSuccess,
|
||||||
|
State: st,
|
||||||
|
}
|
||||||
|
enc := json.NewEncoder(os.Stdout)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
return enc.Encode(report)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("KubeSolo OS — A/B Partition Status")
|
fmt.Println("KubeSolo OS — A/B Partition Status")
|
||||||
fmt.Println("───────────────────────────────────")
|
fmt.Println("───────────────────────────────────")
|
||||||
fmt.Printf(" Active slot: %s\n", activeSlot)
|
fmt.Printf(" Active slot: %s\n", activeSlot)
|
||||||
fmt.Printf(" Passive slot: %s\n", passiveSlot)
|
fmt.Printf(" Passive slot: %s\n", passiveSlot)
|
||||||
fmt.Printf(" Boot counter: %s\n", bootCounter)
|
fmt.Printf(" Boot counter: %d\n", bootCounter)
|
||||||
fmt.Printf(" Boot success: %s\n", bootSuccess)
|
if bootSuccess {
|
||||||
|
fmt.Printf(" Boot success: 1\n")
|
||||||
|
} else {
|
||||||
|
fmt.Printf(" Boot success: 0\n")
|
||||||
|
}
|
||||||
|
|
||||||
if bootSuccess == "1" {
|
if bootSuccess {
|
||||||
fmt.Println("\n ✓ System is healthy (boot confirmed)")
|
fmt.Println("\n ✓ System is healthy (boot confirmed)")
|
||||||
} else if bootCounter == "0" {
|
} else if bootCounter == 0 {
|
||||||
fmt.Println("\n ✗ Boot counter exhausted — rollback will occur on next reboot")
|
fmt.Println("\n ✗ Boot counter exhausted — rollback will occur on next reboot")
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("\n ⚠ Boot pending verification (%s attempts remaining)\n", bootCounter)
|
fmt.Printf("\n ⚠ Boot pending verification (%d attempts remaining)\n", bootCounter)
|
||||||
|
}
|
||||||
|
|
||||||
|
if st != nil && st.Phase != state.PhaseIdle {
|
||||||
|
fmt.Println("\nUpdate Lifecycle")
|
||||||
|
fmt.Println("───────────────────────────────────")
|
||||||
|
fmt.Printf(" Phase: %s\n", st.Phase)
|
||||||
|
if st.FromVersion != "" {
|
||||||
|
fmt.Printf(" From version: %s\n", st.FromVersion)
|
||||||
|
}
|
||||||
|
if st.ToVersion != "" {
|
||||||
|
fmt.Printf(" To version: %s\n", st.ToVersion)
|
||||||
|
}
|
||||||
|
if !st.StartedAt.IsZero() {
|
||||||
|
fmt.Printf(" Started: %s\n", st.StartedAt.Format("2006-01-02 15:04:05 MST"))
|
||||||
|
}
|
||||||
|
fmt.Printf(" Updated: %s\n", st.UpdatedAt.Format("2006-01-02 15:04:05 MST"))
|
||||||
|
fmt.Printf(" Attempts: %d\n", st.AttemptCount)
|
||||||
|
if st.LastError != "" {
|
||||||
|
fmt.Printf(" Last error: %s\n", st.LastError)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -1,3 +1,10 @@
|
|||||||
module github.com/portainer/kubesolo-os/update
|
module github.com/portainer/kubesolo-os/update
|
||||||
|
|
||||||
go 1.25.5
|
go 1.25.5
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||||
|
github.com/opencontainers/image-spec v1.1.1 // indirect
|
||||||
|
golang.org/x/sync v0.14.0 // indirect
|
||||||
|
oras.land/oras-go/v2 v2.6.0 // indirect
|
||||||
|
)
|
||||||
|
|||||||
8
update/go.sum
Normal file
8
update/go.sum
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||||
|
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||||
|
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
|
||||||
|
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
|
||||||
|
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
|
||||||
|
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||||
|
oras.land/oras-go/v2 v2.6.0 h1:X4ELRsiGkrbeox69+9tzTu492FMUu7zJQW6eJU+I2oc=
|
||||||
|
oras.land/oras-go/v2 v2.6.0/go.mod h1:magiQDfG6H1O9APp+rOsvCPcW1GD2MM7vgnKY0Y+u1o=
|
||||||
@@ -78,15 +78,28 @@ Commands:
|
|||||||
metrics Start Prometheus-compatible metrics HTTP server
|
metrics Start Prometheus-compatible metrics HTTP server
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--server URL Update server URL (default: from /etc/kubesolo/update.conf)
|
--server URL HTTP update server (mutually exclusive with --registry)
|
||||||
--grubenv PATH Path to grubenv file (default: /boot/grub/grubenv)
|
--registry REPO OCI registry repository, e.g. ghcr.io/portainer/kubesolo-os
|
||||||
--timeout SECS Health check timeout in seconds (default: 120)
|
(mutually exclusive with --server)
|
||||||
--pubkey PATH Ed25519 public key for signature verification (optional)
|
--tag TAG OCI tag to pull (default: channel name, then "stable")
|
||||||
|
--conf PATH update.conf path (default: /etc/kubesolo/update.conf)
|
||||||
|
--state PATH Update state file (default: /var/lib/kubesolo/update/state.json)
|
||||||
|
--channel NAME Update channel (default: "stable", or value from update.conf)
|
||||||
|
--maintenance-window HH:MM-HH:MM local time window; apply refuses outside it
|
||||||
|
--force Bypass maintenance-window check
|
||||||
|
--grubenv PATH Path to grubenv file (default: /boot/grub/grubenv)
|
||||||
|
--timeout SECS Health check timeout in seconds (default: 120)
|
||||||
|
--pubkey PATH Ed25519 public key for signature verification (optional)
|
||||||
|
--healthcheck-url URL Optional GET probe in healthcheck; 200 = pass
|
||||||
|
--auto-rollback-after N healthcheck: rollback after N consecutive failures
|
||||||
|
--kube-system-settle N healthcheck: require kube-system pods Running ≥ N seconds
|
||||||
|
--json For 'status': emit JSON instead of human-readable output
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
kubesolo-update check --server https://updates.example.com
|
kubesolo-update apply --server https://updates.example.com
|
||||||
kubesolo-update apply --server https://updates.example.com --pubkey /etc/kubesolo/update-pubkey.hex
|
kubesolo-update apply --registry ghcr.io/portainer/kubesolo-os --tag stable
|
||||||
|
kubesolo-update apply --force # uses /etc/kubesolo/update.conf
|
||||||
kubesolo-update healthcheck
|
kubesolo-update healthcheck
|
||||||
kubesolo-update status
|
kubesolo-update status --json
|
||||||
`)
|
`)
|
||||||
}
|
}
|
||||||
|
|||||||
27
update/pkg/bootenv/bootenv.go
Normal file
27
update/pkg/bootenv/bootenv.go
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
// Package bootenv provides a platform-independent interface for managing
|
||||||
|
// A/B boot environments. It abstracts GRUB (x86_64) and RPi firmware
|
||||||
|
// (ARM64) behind a common interface.
|
||||||
|
package bootenv
|
||||||
|
|
||||||
|
// BootEnv provides read/write access to A/B boot environment variables.
|
||||||
|
type BootEnv interface {
|
||||||
|
// ActiveSlot returns the currently active boot slot ("A" or "B").
|
||||||
|
ActiveSlot() (string, error)
|
||||||
|
// PassiveSlot returns the currently passive boot slot.
|
||||||
|
PassiveSlot() (string, error)
|
||||||
|
// BootCounter returns the current boot counter value.
|
||||||
|
BootCounter() (int, error)
|
||||||
|
// BootSuccess returns whether the last boot was marked successful.
|
||||||
|
BootSuccess() (bool, error)
|
||||||
|
// MarkBootSuccess marks the current boot as successful.
|
||||||
|
MarkBootSuccess() error
|
||||||
|
// ActivateSlot switches the active boot slot and resets the counter.
|
||||||
|
ActivateSlot(slot string) error
|
||||||
|
// ForceRollback switches to the other slot immediately.
|
||||||
|
ForceRollback() error
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
SlotA = "A"
|
||||||
|
SlotB = "B"
|
||||||
|
)
|
||||||
533
update/pkg/bootenv/bootenv_test.go
Normal file
533
update/pkg/bootenv/bootenv_test.go
Normal file
@@ -0,0 +1,533 @@
|
|||||||
|
package bootenv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// createTestGrubenv writes a properly formatted 1024-byte grubenv file.
|
||||||
|
func createTestGrubenv(t *testing.T, dir string, vars map[string]string) string {
|
||||||
|
t.Helper()
|
||||||
|
path := filepath.Join(dir, "grubenv")
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString("# GRUB Environment Block\n")
|
||||||
|
for k, v := range vars {
|
||||||
|
sb.WriteString(k + "=" + v + "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
content := sb.String()
|
||||||
|
padding := 1024 - len(content)
|
||||||
|
if padding > 0 {
|
||||||
|
content += strings.Repeat("#", padding)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// TestGRUBActiveSlot verifies ActiveSlot reads the correct value.
|
||||||
|
func TestGRUBActiveSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "1",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
slot, err := env.ActiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if slot != "A" {
|
||||||
|
t.Errorf("expected A, got %s", slot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBPassiveSlot verifies PassiveSlot returns the opposite slot.
|
||||||
|
func TestGRUBPassiveSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "0",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
passive, err := env.PassiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if passive != "B" {
|
||||||
|
t.Errorf("expected B, got %s", passive)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBBootCounter verifies BootCounter reads the correct value.
|
||||||
|
func TestGRUBBootCounter(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "2",
|
||||||
|
"boot_success": "0",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
counter, err := env.BootCounter()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if counter != 2 {
|
||||||
|
t.Errorf("expected 2, got %d", counter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBBootSuccess verifies BootSuccess reads the correct value.
|
||||||
|
func TestGRUBBootSuccess(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "1",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
success, err := env.BootSuccess()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if !success {
|
||||||
|
t.Error("expected true, got false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBMarkBootSuccess verifies marking boot as successful.
|
||||||
|
func TestGRUBMarkBootSuccess(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "B",
|
||||||
|
"boot_counter": "1",
|
||||||
|
"boot_success": "0",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
if err := env.MarkBootSuccess(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
success, err := env.BootSuccess()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if !success {
|
||||||
|
t.Error("expected boot_success=true after MarkBootSuccess")
|
||||||
|
}
|
||||||
|
|
||||||
|
counter, err := env.BootCounter()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if counter != 3 {
|
||||||
|
t.Errorf("expected boot_counter=3 after MarkBootSuccess, got %d", counter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBActivateSlot verifies slot activation sets correct state.
|
||||||
|
func TestGRUBActivateSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "1",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
if err := env.ActivateSlot("B"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
slot, _ := env.ActiveSlot()
|
||||||
|
if slot != "B" {
|
||||||
|
t.Errorf("expected B, got %s", slot)
|
||||||
|
}
|
||||||
|
|
||||||
|
counter, _ := env.BootCounter()
|
||||||
|
if counter != 3 {
|
||||||
|
t.Errorf("expected counter=3, got %d", counter)
|
||||||
|
}
|
||||||
|
|
||||||
|
success, _ := env.BootSuccess()
|
||||||
|
if success {
|
||||||
|
t.Error("expected boot_success=false after ActivateSlot")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBForceRollback verifies rollback switches to passive slot.
|
||||||
|
func TestGRUBForceRollback(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "1",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
if err := env.ForceRollback(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
slot, _ := env.ActiveSlot()
|
||||||
|
if slot != "B" {
|
||||||
|
t.Errorf("expected B after rollback from A, got %s", slot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBSlotCycling verifies A->B->A slot switching.
|
||||||
|
func TestGRUBSlotCycling(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "1",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
|
||||||
|
// A -> B
|
||||||
|
if err := env.ActivateSlot("B"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
slot, _ := env.ActiveSlot()
|
||||||
|
if slot != "B" {
|
||||||
|
t.Fatalf("expected B, got %s", slot)
|
||||||
|
}
|
||||||
|
|
||||||
|
// B -> A
|
||||||
|
if err := env.ActivateSlot("A"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
slot, _ = env.ActiveSlot()
|
||||||
|
if slot != "A" {
|
||||||
|
t.Fatalf("expected A, got %s", slot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGRUBActivateInvalidSlot verifies invalid slot is rejected.
|
||||||
|
func TestGRUBActivateInvalidSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "0",
|
||||||
|
})
|
||||||
|
|
||||||
|
env := NewGRUB(path)
|
||||||
|
if err := env.ActivateSlot("C"); err == nil {
|
||||||
|
t.Fatal("expected error for invalid slot")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiActiveSlot verifies ActiveSlot reads from autoboot.txt.
|
||||||
|
func TestRPiActiveSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 3, false)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
slot, err := env.ActiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if slot != "A" {
|
||||||
|
t.Errorf("expected A (partition 2), got %s", slot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiActiveSlotB verifies slot B with partition 3.
|
||||||
|
func TestRPiActiveSlotB(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 3, 2, 3, true)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
slot, err := env.ActiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if slot != "B" {
|
||||||
|
t.Errorf("expected B (partition 3), got %s", slot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiPassiveSlot verifies passive slot is opposite of active.
|
||||||
|
func TestRPiPassiveSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 3, false)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
passive, err := env.PassiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if passive != "B" {
|
||||||
|
t.Errorf("expected B, got %s", passive)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiBootCounter verifies counter is read from status file.
|
||||||
|
func TestRPiBootCounter(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 2, false)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
counter, err := env.BootCounter()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if counter != 2 {
|
||||||
|
t.Errorf("expected 2, got %d", counter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiBootCounterMissingFile verifies default when status file is absent.
|
||||||
|
func TestRPiBootCounterMissingFile(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Only create autoboot.txt, no boot-status
|
||||||
|
autoboot := "[all]\ntryboot_a_b=1\nboot_partition=2\n[tryboot]\nboot_partition=3\n"
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "autoboot.txt"), []byte(autoboot), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
counter, err := env.BootCounter()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if counter != 3 {
|
||||||
|
t.Errorf("expected default counter 3, got %d", counter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiBootSuccess verifies success is read from status file.
|
||||||
|
func TestRPiBootSuccess(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 3, true)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
success, err := env.BootSuccess()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if !success {
|
||||||
|
t.Error("expected true, got false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiMarkBootSuccess verifies marking boot success updates both files.
|
||||||
|
func TestRPiMarkBootSuccess(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 1, false)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
if err := env.MarkBootSuccess(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Active slot should still be A
|
||||||
|
slot, _ := env.ActiveSlot()
|
||||||
|
if slot != "A" {
|
||||||
|
t.Errorf("expected active slot A, got %s", slot)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Boot success should be true
|
||||||
|
success, _ := env.BootSuccess()
|
||||||
|
if !success {
|
||||||
|
t.Error("expected boot_success=true after MarkBootSuccess")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Counter should be reset to 3
|
||||||
|
counter, _ := env.BootCounter()
|
||||||
|
if counter != 3 {
|
||||||
|
t.Errorf("expected counter=3 after MarkBootSuccess, got %d", counter)
|
||||||
|
}
|
||||||
|
|
||||||
|
// [all] boot_partition should be 2 (slot A, making it permanent)
|
||||||
|
data, _ := os.ReadFile(filepath.Join(dir, "autoboot.txt"))
|
||||||
|
if !strings.Contains(string(data), "boot_partition=2") {
|
||||||
|
t.Error("expected [all] boot_partition=2 after MarkBootSuccess")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiActivateSlot verifies slot activation updates tryboot and status.
|
||||||
|
func TestRPiActivateSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 3, true)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
if err := env.ActivateSlot("B"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// [tryboot] should now point to partition 3 (slot B)
|
||||||
|
data, _ := os.ReadFile(filepath.Join(dir, "autoboot.txt"))
|
||||||
|
content := string(data)
|
||||||
|
// Find [tryboot] section and check boot_partition
|
||||||
|
idx := strings.Index(content, "[tryboot]")
|
||||||
|
if idx < 0 {
|
||||||
|
t.Fatal("missing [tryboot] section")
|
||||||
|
}
|
||||||
|
trybootSection := content[idx:]
|
||||||
|
if !strings.Contains(trybootSection, "boot_partition=3") {
|
||||||
|
t.Errorf("expected [tryboot] boot_partition=3, got: %s", trybootSection)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Status should be reset
|
||||||
|
success, _ := env.BootSuccess()
|
||||||
|
if success {
|
||||||
|
t.Error("expected boot_success=false after ActivateSlot")
|
||||||
|
}
|
||||||
|
counter, _ := env.BootCounter()
|
||||||
|
if counter != 3 {
|
||||||
|
t.Errorf("expected counter=3, got %d", counter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiActivateInvalidSlot verifies invalid slot is rejected.
|
||||||
|
func TestRPiActivateInvalidSlot(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 3, false)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
if err := env.ActivateSlot("C"); err == nil {
|
||||||
|
t.Fatal("expected error for invalid slot")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiForceRollback verifies rollback swaps the active slot.
|
||||||
|
func TestRPiForceRollback(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 3, true)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
if err := env.ForceRollback(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// [all] should now point to partition 3 (slot B)
|
||||||
|
slot, _ := env.ActiveSlot()
|
||||||
|
if slot != "B" {
|
||||||
|
t.Errorf("expected B after rollback from A, got %s", slot)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Success should be false
|
||||||
|
success, _ := env.BootSuccess()
|
||||||
|
if success {
|
||||||
|
t.Error("expected boot_success=false after ForceRollback")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRPiSlotCycling verifies A->B->A slot switching works.
|
||||||
|
func TestRPiSlotCycling(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, dir, 2, 3, 3, true)
|
||||||
|
|
||||||
|
env := NewRPi(dir)
|
||||||
|
|
||||||
|
// Rollback A -> B
|
||||||
|
if err := env.ForceRollback(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
slot, _ := env.ActiveSlot()
|
||||||
|
if slot != "B" {
|
||||||
|
t.Fatalf("expected B, got %s", slot)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rollback B -> A
|
||||||
|
if err := env.ForceRollback(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
slot, _ = env.ActiveSlot()
|
||||||
|
if slot != "A" {
|
||||||
|
t.Fatalf("expected A, got %s", slot)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestInterfaceCompliance verifies both implementations satisfy BootEnv.
|
||||||
|
func TestInterfaceCompliance(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
grubPath := createTestGrubenv(t, dir, map[string]string{
|
||||||
|
"active_slot": "A",
|
||||||
|
"boot_counter": "3",
|
||||||
|
"boot_success": "0",
|
||||||
|
})
|
||||||
|
|
||||||
|
rpiDir := t.TempDir()
|
||||||
|
createTestAutobootFiles(t, rpiDir, 2, 3, 3, false)
|
||||||
|
|
||||||
|
impls := map[string]BootEnv{
|
||||||
|
"grub": NewGRUB(grubPath),
|
||||||
|
"rpi": NewRPi(rpiDir),
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, env := range impls {
|
||||||
|
t.Run(name, func(t *testing.T) {
|
||||||
|
slot, err := env.ActiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ActiveSlot: %v", err)
|
||||||
|
}
|
||||||
|
if slot != "A" {
|
||||||
|
t.Errorf("ActiveSlot: expected A, got %s", slot)
|
||||||
|
}
|
||||||
|
|
||||||
|
passive, err := env.PassiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("PassiveSlot: %v", err)
|
||||||
|
}
|
||||||
|
if passive != "B" {
|
||||||
|
t.Errorf("PassiveSlot: expected B, got %s", passive)
|
||||||
|
}
|
||||||
|
|
||||||
|
counter, err := env.BootCounter()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("BootCounter: %v", err)
|
||||||
|
}
|
||||||
|
if counter != 3 {
|
||||||
|
t.Errorf("BootCounter: expected 3, got %d", counter)
|
||||||
|
}
|
||||||
|
|
||||||
|
success, err := env.BootSuccess()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("BootSuccess: %v", err)
|
||||||
|
}
|
||||||
|
if success {
|
||||||
|
t.Error("BootSuccess: expected false")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createTestAutobootFiles is a helper that writes both autoboot.txt and boot-status.
|
||||||
|
func createTestAutobootFiles(t *testing.T, dir string, allPart, trybootPart, counter int, success bool) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
autoboot := "[all]\ntryboot_a_b=1\nboot_partition=" + strconv.Itoa(allPart) + "\n"
|
||||||
|
autoboot += "[tryboot]\nboot_partition=" + strconv.Itoa(trybootPart) + "\n"
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "autoboot.txt"), []byte(autoboot), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
successVal := "0"
|
||||||
|
if success {
|
||||||
|
successVal = "1"
|
||||||
|
}
|
||||||
|
status := "boot_counter=" + strconv.Itoa(counter) + "\nboot_success=" + successVal + "\n"
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "boot-status"), []byte(status), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
23
update/pkg/bootenv/grub.go
Normal file
23
update/pkg/bootenv/grub.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package bootenv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/grubenv"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GRUBEnv implements BootEnv using GRUB environment variables.
|
||||||
|
type GRUBEnv struct {
|
||||||
|
env *grubenv.Env
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewGRUB creates a new GRUB-based BootEnv.
|
||||||
|
func NewGRUB(path string) BootEnv {
|
||||||
|
return &GRUBEnv{env: grubenv.New(path)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *GRUBEnv) ActiveSlot() (string, error) { return g.env.ActiveSlot() }
|
||||||
|
func (g *GRUBEnv) PassiveSlot() (string, error) { return g.env.PassiveSlot() }
|
||||||
|
func (g *GRUBEnv) BootCounter() (int, error) { return g.env.BootCounter() }
|
||||||
|
func (g *GRUBEnv) BootSuccess() (bool, error) { return g.env.BootSuccess() }
|
||||||
|
func (g *GRUBEnv) MarkBootSuccess() error { return g.env.MarkBootSuccess() }
|
||||||
|
func (g *GRUBEnv) ActivateSlot(slot string) error { return g.env.ActivateSlot(slot) }
|
||||||
|
func (g *GRUBEnv) ForceRollback() error { return g.env.ForceRollback() }
|
||||||
267
update/pkg/bootenv/rpi.go
Normal file
267
update/pkg/bootenv/rpi.go
Normal file
@@ -0,0 +1,267 @@
|
|||||||
|
package bootenv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// RPi partition numbers: slot A = partition 2, slot B = partition 3.
|
||||||
|
rpiSlotAPartition = 2
|
||||||
|
rpiSlotBPartition = 3
|
||||||
|
|
||||||
|
defaultBootCounter = 3
|
||||||
|
)
|
||||||
|
|
||||||
|
// RPiEnv implements BootEnv using Raspberry Pi firmware autoboot.txt.
|
||||||
|
type RPiEnv struct {
|
||||||
|
autobootPath string // path to autoboot.txt
|
||||||
|
statusPath string // path to boot-status file
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRPi creates a new RPi-based BootEnv.
|
||||||
|
// dir is the directory containing autoboot.txt (typically the boot control
|
||||||
|
// partition mount point).
|
||||||
|
func NewRPi(dir string) BootEnv {
|
||||||
|
return &RPiEnv{
|
||||||
|
autobootPath: filepath.Join(dir, "autoboot.txt"),
|
||||||
|
statusPath: filepath.Join(dir, "boot-status"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPiEnv) ActiveSlot() (string, error) {
|
||||||
|
partNum, err := r.readAllBootPartition()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("reading active slot: %w", err)
|
||||||
|
}
|
||||||
|
return partNumToSlot(partNum)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPiEnv) PassiveSlot() (string, error) {
|
||||||
|
active, err := r.ActiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if active == SlotA {
|
||||||
|
return SlotB, nil
|
||||||
|
}
|
||||||
|
return SlotA, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPiEnv) BootCounter() (int, error) {
|
||||||
|
status, err := r.readStatus()
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
val, ok := status["boot_counter"]
|
||||||
|
if !ok {
|
||||||
|
return defaultBootCounter, nil
|
||||||
|
}
|
||||||
|
n, err := strconv.Atoi(val)
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("invalid boot_counter %q: %w", val, err)
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPiEnv) BootSuccess() (bool, error) {
|
||||||
|
status, err := r.readStatus()
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return status["boot_success"] == "1", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPiEnv) MarkBootSuccess() error {
|
||||||
|
// Make the current slot permanent by updating [all] boot_partition
|
||||||
|
active, err := r.ActiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marking boot success: %w", err)
|
||||||
|
}
|
||||||
|
partNum := slotToPartNum(active)
|
||||||
|
if err := r.writeAllBootPartition(partNum); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return r.writeStatus(defaultBootCounter, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPiEnv) ActivateSlot(slot string) error {
|
||||||
|
if slot != SlotA && slot != SlotB {
|
||||||
|
return fmt.Errorf("invalid slot: %q (must be A or B)", slot)
|
||||||
|
}
|
||||||
|
partNum := slotToPartNum(slot)
|
||||||
|
// Update [tryboot] to point to the new slot
|
||||||
|
if err := r.writeTrybootPartition(partNum); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return r.writeStatus(defaultBootCounter, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RPiEnv) ForceRollback() error {
|
||||||
|
passive, err := r.PassiveSlot()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Swap the [all] boot_partition to the other slot
|
||||||
|
partNum := slotToPartNum(passive)
|
||||||
|
if err := r.writeAllBootPartition(partNum); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := r.writeTrybootPartition(partNum); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return r.writeStatus(defaultBootCounter, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// readAllBootPartition reads the boot_partition value from the [all] section.
|
||||||
|
func (r *RPiEnv) readAllBootPartition() (int, error) {
|
||||||
|
sections, err := r.parseAutoboot()
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
val, ok := sections["all"]["boot_partition"]
|
||||||
|
if !ok {
|
||||||
|
return 0, fmt.Errorf("boot_partition not found in [all] section")
|
||||||
|
}
|
||||||
|
return strconv.Atoi(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeAllBootPartition updates the [all] boot_partition value.
|
||||||
|
func (r *RPiEnv) writeAllBootPartition(partNum int) error {
|
||||||
|
sections, err := r.parseAutoboot()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if sections["all"] == nil {
|
||||||
|
sections["all"] = make(map[string]string)
|
||||||
|
}
|
||||||
|
sections["all"]["boot_partition"] = strconv.Itoa(partNum)
|
||||||
|
return r.writeAutoboot(sections)
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeTrybootPartition updates the [tryboot] boot_partition value.
|
||||||
|
func (r *RPiEnv) writeTrybootPartition(partNum int) error {
|
||||||
|
sections, err := r.parseAutoboot()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if sections["tryboot"] == nil {
|
||||||
|
sections["tryboot"] = make(map[string]string)
|
||||||
|
}
|
||||||
|
sections["tryboot"]["boot_partition"] = strconv.Itoa(partNum)
|
||||||
|
return r.writeAutoboot(sections)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseAutoboot reads autoboot.txt into a map of section -> key=value pairs.
|
||||||
|
func (r *RPiEnv) parseAutoboot() (map[string]map[string]string, error) {
|
||||||
|
data, err := os.ReadFile(r.autobootPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("reading autoboot.txt: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sections := make(map[string]map[string]string)
|
||||||
|
currentSection := ""
|
||||||
|
|
||||||
|
for _, line := range strings.Split(string(data), "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") {
|
||||||
|
currentSection = line[1 : len(line)-1]
|
||||||
|
if sections[currentSection] == nil {
|
||||||
|
sections[currentSection] = make(map[string]string)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(line, "=", 2)
|
||||||
|
if len(parts) == 2 && currentSection != "" {
|
||||||
|
sections[currentSection][strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sections, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeAutoboot writes sections back to autoboot.txt.
|
||||||
|
// Section order: [all] first, then [tryboot].
|
||||||
|
func (r *RPiEnv) writeAutoboot(sections map[string]map[string]string) error {
|
||||||
|
var sb strings.Builder
|
||||||
|
|
||||||
|
// Write [all] section first
|
||||||
|
if all, ok := sections["all"]; ok {
|
||||||
|
sb.WriteString("[all]\n")
|
||||||
|
for k, v := range all {
|
||||||
|
sb.WriteString(k + "=" + v + "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write [tryboot] section
|
||||||
|
if tryboot, ok := sections["tryboot"]; ok {
|
||||||
|
sb.WriteString("[tryboot]\n")
|
||||||
|
for k, v := range tryboot {
|
||||||
|
sb.WriteString(k + "=" + v + "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.WriteFile(r.autobootPath, []byte(sb.String()), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
// readStatus reads the boot-status key=value file.
|
||||||
|
func (r *RPiEnv) readStatus() (map[string]string, error) {
|
||||||
|
data, err := os.ReadFile(r.statusPath)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
// Return defaults if status file doesn't exist yet
|
||||||
|
return map[string]string{
|
||||||
|
"boot_counter": strconv.Itoa(defaultBootCounter),
|
||||||
|
"boot_success": "0",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("reading boot-status: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status := make(map[string]string)
|
||||||
|
for _, line := range strings.Split(string(data), "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(line, "=", 2)
|
||||||
|
if len(parts) == 2 {
|
||||||
|
status[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return status, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeStatus writes boot_counter and boot_success to the status file.
|
||||||
|
func (r *RPiEnv) writeStatus(counter int, success bool) error {
|
||||||
|
successVal := "0"
|
||||||
|
if success {
|
||||||
|
successVal = "1"
|
||||||
|
}
|
||||||
|
content := fmt.Sprintf("boot_counter=%d\nboot_success=%s\n", counter, successVal)
|
||||||
|
return os.WriteFile(r.statusPath, []byte(content), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func partNumToSlot(partNum int) (string, error) {
|
||||||
|
switch partNum {
|
||||||
|
case rpiSlotAPartition:
|
||||||
|
return SlotA, nil
|
||||||
|
case rpiSlotBPartition:
|
||||||
|
return SlotB, nil
|
||||||
|
default:
|
||||||
|
return "", fmt.Errorf("unknown partition number %d (expected %d or %d)", partNum, rpiSlotAPartition, rpiSlotBPartition)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func slotToPartNum(slot string) int {
|
||||||
|
if slot == SlotB {
|
||||||
|
return rpiSlotBPartition
|
||||||
|
}
|
||||||
|
return rpiSlotAPartition
|
||||||
|
}
|
||||||
105
update/pkg/config/config.go
Normal file
105
update/pkg/config/config.go
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
// Package config parses /etc/kubesolo/update.conf — the persistent
|
||||||
|
// configuration for the update agent. Each line is "key = value"; blank
|
||||||
|
// lines and "#"-prefixed comments are ignored. Unknown keys are tolerated
|
||||||
|
// (forward compatibility).
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
//
|
||||||
|
// # Where to look for updates
|
||||||
|
// server = https://updates.kubesolo.example.com
|
||||||
|
// channel = stable
|
||||||
|
//
|
||||||
|
// # Only apply between 03:00 and 05:00 local time
|
||||||
|
// maintenance_window = 03:00-05:00
|
||||||
|
//
|
||||||
|
// pubkey = /etc/kubesolo/update-pubkey.hex
|
||||||
|
//
|
||||||
|
// The file is populated on first boot by cloud-init (see the cloud-init
|
||||||
|
// updates: block) and can be hand-edited afterwards.
|
||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultPath is where update.conf lives on a live system.
|
||||||
|
const DefaultPath = "/etc/kubesolo/update.conf"
|
||||||
|
|
||||||
|
// Config holds the parsed update.conf values. Empty fields mean "not set" —
|
||||||
|
// the caller's defaults apply.
|
||||||
|
type Config struct {
|
||||||
|
Server string
|
||||||
|
Channel string
|
||||||
|
MaintenanceWindow string
|
||||||
|
PubKey string
|
||||||
|
// HealthcheckURL is an optional URL the healthcheck command will GET;
|
||||||
|
// 200 = pass, anything else = fail.
|
||||||
|
HealthcheckURL string
|
||||||
|
// AutoRollbackAfter is the number of consecutive post-boot healthcheck
|
||||||
|
// failures after which the agent will call Rollback automatically.
|
||||||
|
// 0 = disabled (default).
|
||||||
|
AutoRollbackAfter int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load reads and parses update.conf. A missing file returns an empty Config
|
||||||
|
// (not an error) — fresh systems before cloud-init has run.
|
||||||
|
func Load(path string) (*Config, error) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return &Config{}, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("open %s: %w", path, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
c := &Config{}
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
lineNo := 0
|
||||||
|
for scanner.Scan() {
|
||||||
|
lineNo++
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" || strings.HasPrefix(line, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
eq := strings.IndexByte(line, '=')
|
||||||
|
if eq < 0 {
|
||||||
|
return nil, fmt.Errorf("%s:%d: missing '=' in line: %q", path, lineNo, line)
|
||||||
|
}
|
||||||
|
key := strings.TrimSpace(line[:eq])
|
||||||
|
value := strings.TrimSpace(line[eq+1:])
|
||||||
|
switch key {
|
||||||
|
case "server":
|
||||||
|
c.Server = value
|
||||||
|
case "channel":
|
||||||
|
c.Channel = value
|
||||||
|
case "maintenance_window":
|
||||||
|
c.MaintenanceWindow = value
|
||||||
|
case "pubkey":
|
||||||
|
c.PubKey = value
|
||||||
|
case "healthcheck_url":
|
||||||
|
c.HealthcheckURL = value
|
||||||
|
case "auto_rollback_after":
|
||||||
|
// Parse a small integer. Non-numeric values are silently
|
||||||
|
// ignored (forward compat); zero disables the feature.
|
||||||
|
n := 0
|
||||||
|
for _, ch := range value {
|
||||||
|
if ch >= '0' && ch <= '9' {
|
||||||
|
n = n*10 + int(ch-'0')
|
||||||
|
} else {
|
||||||
|
n = 0
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.AutoRollbackAfter = n
|
||||||
|
}
|
||||||
|
// Unknown keys are silently ignored for forward compatibility.
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("read %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
117
update/pkg/config/config_test.go
Normal file
117
update/pkg/config/config_test.go
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func writeConf(t *testing.T, content string) string {
|
||||||
|
t.Helper()
|
||||||
|
path := filepath.Join(t.TempDir(), "update.conf")
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||||
|
t.Fatalf("seed: %v", err)
|
||||||
|
}
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadMissingReturnsEmptyConfig(t *testing.T) {
|
||||||
|
c, err := Load(filepath.Join(t.TempDir(), "does-not-exist.conf"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if c == nil {
|
||||||
|
t.Fatal("Load returned nil config")
|
||||||
|
}
|
||||||
|
if c.Server != "" || c.Channel != "" || c.MaintenanceWindow != "" || c.PubKey != "" {
|
||||||
|
t.Errorf("expected empty config, got %+v", c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadAllFields(t *testing.T) {
|
||||||
|
path := writeConf(t, `# comment line
|
||||||
|
server = https://updates.example.com
|
||||||
|
channel = stable
|
||||||
|
maintenance_window = 03:00-05:00
|
||||||
|
pubkey = /etc/kubesolo/pub.hex
|
||||||
|
`)
|
||||||
|
c, err := Load(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load: %v", err)
|
||||||
|
}
|
||||||
|
if c.Server != "https://updates.example.com" {
|
||||||
|
t.Errorf("server: got %q", c.Server)
|
||||||
|
}
|
||||||
|
if c.Channel != "stable" {
|
||||||
|
t.Errorf("channel: got %q", c.Channel)
|
||||||
|
}
|
||||||
|
if c.MaintenanceWindow != "03:00-05:00" {
|
||||||
|
t.Errorf("maintenance_window: got %q", c.MaintenanceWindow)
|
||||||
|
}
|
||||||
|
if c.PubKey != "/etc/kubesolo/pub.hex" {
|
||||||
|
t.Errorf("pubkey: got %q", c.PubKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadIgnoresUnknownKeys(t *testing.T) {
|
||||||
|
// Unknown keys must not be an error — supports forward-compat config
|
||||||
|
// fields added by newer agent versions.
|
||||||
|
path := writeConf(t, `server = https://x
|
||||||
|
future_field = whatever
|
||||||
|
channel = beta
|
||||||
|
`)
|
||||||
|
c, err := Load(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load: %v", err)
|
||||||
|
}
|
||||||
|
if c.Server != "https://x" {
|
||||||
|
t.Errorf("server: got %q", c.Server)
|
||||||
|
}
|
||||||
|
if c.Channel != "beta" {
|
||||||
|
t.Errorf("channel: got %q", c.Channel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadStripsWhitespace(t *testing.T) {
|
||||||
|
path := writeConf(t, " server = https://example \n channel=stable\n")
|
||||||
|
c, err := Load(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load: %v", err)
|
||||||
|
}
|
||||||
|
if c.Server != "https://example" {
|
||||||
|
t.Errorf("server: got %q (whitespace not stripped?)", c.Server)
|
||||||
|
}
|
||||||
|
if c.Channel != "stable" {
|
||||||
|
t.Errorf("channel: got %q", c.Channel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadIgnoresBlankAndCommentLines(t *testing.T) {
|
||||||
|
path := writeConf(t, `
|
||||||
|
# this is a comment
|
||||||
|
|
||||||
|
server = https://example
|
||||||
|
# indented comment
|
||||||
|
channel = stable
|
||||||
|
|
||||||
|
`)
|
||||||
|
c, err := Load(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load: %v", err)
|
||||||
|
}
|
||||||
|
if c.Server != "https://example" {
|
||||||
|
t.Errorf("server: got %q", c.Server)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadRejectsMissingEquals(t *testing.T) {
|
||||||
|
// "noEqualsHere" with no '=' is a syntax error worth surfacing — likely
|
||||||
|
// indicates a corrupted config file.
|
||||||
|
path := writeConf(t, `server = https://example
|
||||||
|
noEqualsHere
|
||||||
|
`)
|
||||||
|
_, err := Load(path)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error on malformed line, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
60
update/pkg/config/version.go
Normal file
60
update/pkg/config/version.go
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CompareVersions compares two semver-ish version strings.
|
||||||
|
//
|
||||||
|
// Accepts "v1.2.3", "1.2.3", "v1.2.3-rc1" (suffix ignored), with missing
|
||||||
|
// components defaulting to 0 ("v1" == "1.0.0"). Returns -1 if a < b, 0 if
|
||||||
|
// equal, +1 if a > b. Returns an error if either argument can't be parsed
|
||||||
|
// at all.
|
||||||
|
//
|
||||||
|
// Used by apply.go to enforce MinCompatibleVersion. Pre-release suffix
|
||||||
|
// handling is deliberately simple — we ignore it, treating "v1.2.3-rc1"
|
||||||
|
// equal to "v1.2.3". Edge case: production releases should never carry
|
||||||
|
// a pre-release suffix, and dev releases are the consumer's responsibility.
|
||||||
|
func CompareVersions(a, b string) (int, error) {
|
||||||
|
pa, err := parseVersion(a)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("parse %q: %w", a, err)
|
||||||
|
}
|
||||||
|
pb, err := parseVersion(b)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("parse %q: %w", b, err)
|
||||||
|
}
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
if pa[i] < pb[i] {
|
||||||
|
return -1, nil
|
||||||
|
}
|
||||||
|
if pa[i] > pb[i] {
|
||||||
|
return 1, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseVersion(s string) ([3]int, error) {
|
||||||
|
var out [3]int
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
s = strings.TrimPrefix(s, "v")
|
||||||
|
// Drop pre-release suffix: "1.2.3-rc1" -> "1.2.3"
|
||||||
|
if i := strings.IndexAny(s, "-+"); i >= 0 {
|
||||||
|
s = s[:i]
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(s, ".", 3)
|
||||||
|
for i, p := range parts {
|
||||||
|
n, err := strconv.Atoi(p)
|
||||||
|
if err != nil {
|
||||||
|
return out, fmt.Errorf("component %q not numeric", p)
|
||||||
|
}
|
||||||
|
if n < 0 {
|
||||||
|
return out, fmt.Errorf("component %d negative", n)
|
||||||
|
}
|
||||||
|
out[i] = n
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
46
update/pkg/config/version_test.go
Normal file
46
update/pkg/config/version_test.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestCompareVersions(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
a, b string
|
||||||
|
want int
|
||||||
|
}{
|
||||||
|
{"v1.0.0", "v1.0.0", 0},
|
||||||
|
{"1.0.0", "v1.0.0", 0}, // 'v' prefix optional
|
||||||
|
{"v1.0.0", "v1.0.1", -1},
|
||||||
|
{"v1.0.1", "v1.0.0", 1},
|
||||||
|
{"v1.1.0", "v1.0.99", 1},
|
||||||
|
{"v2.0.0", "v1.99.99", 1},
|
||||||
|
{"v0.3.0-dev", "v0.3.0", 0}, // pre-release suffix ignored
|
||||||
|
{"v0.2.5", "v0.3.0", -1},
|
||||||
|
{"v0.3.0", "v0.2.999", 1},
|
||||||
|
{"v1.2", "v1.2.0", 0}, // missing component defaults to 0
|
||||||
|
{"v1", "v1.0.0", 0},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
got, err := CompareVersions(tt.a, tt.b)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("CompareVersions(%q, %q): %v", tt.a, tt.b, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("CompareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareVersionsRejectsGarbage(t *testing.T) {
|
||||||
|
bad := []string{
|
||||||
|
"not-a-version",
|
||||||
|
"v.1.2",
|
||||||
|
"vabc",
|
||||||
|
"",
|
||||||
|
}
|
||||||
|
for _, s := range bad {
|
||||||
|
if _, err := CompareVersions(s, "v1.0.0"); err == nil {
|
||||||
|
t.Errorf("CompareVersions(%q, ...) accepted, want error", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
95
update/pkg/config/window.go
Normal file
95
update/pkg/config/window.go
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Window is a parsed maintenance-window expression. Times are minutes since
|
||||||
|
// midnight in the local timezone. When End < Start, the window wraps
|
||||||
|
// midnight (e.g. 23:00-01:00 means 23:00 today through 01:00 tomorrow).
|
||||||
|
//
|
||||||
|
// The zero value (Start == End == 0) means "always allowed" — used for
|
||||||
|
// the empty-string-meaning-no-window case.
|
||||||
|
type Window struct {
|
||||||
|
Start int // minutes since midnight, [0, 1440)
|
||||||
|
End int // minutes since midnight, [0, 1440)
|
||||||
|
|
||||||
|
// alwaysOpen distinguishes "no constraint" from "midnight to midnight"
|
||||||
|
// (the literal 00:00-00:00 window, which is a degenerate same-instant
|
||||||
|
// window). Set when ParseWindow is called with an empty string.
|
||||||
|
alwaysOpen bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// AlwaysOpen returns true if this window imposes no constraint (the empty
|
||||||
|
// string was parsed).
|
||||||
|
func (w Window) AlwaysOpen() bool { return w.alwaysOpen }
|
||||||
|
|
||||||
|
// ParseWindow parses "HH:MM-HH:MM" into a Window. Empty input returns an
|
||||||
|
// AlwaysOpen window (no constraint). Whitespace around the input is tolerated.
|
||||||
|
func ParseWindow(s string) (Window, error) {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
if s == "" {
|
||||||
|
return Window{alwaysOpen: true}, nil
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(s, "-", 2)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return Window{}, fmt.Errorf("maintenance window %q: expected HH:MM-HH:MM", s)
|
||||||
|
}
|
||||||
|
start, err := parseHHMM(strings.TrimSpace(parts[0]))
|
||||||
|
if err != nil {
|
||||||
|
return Window{}, fmt.Errorf("maintenance window %q: start: %w", s, err)
|
||||||
|
}
|
||||||
|
end, err := parseHHMM(strings.TrimSpace(parts[1]))
|
||||||
|
if err != nil {
|
||||||
|
return Window{}, fmt.Errorf("maintenance window %q: end: %w", s, err)
|
||||||
|
}
|
||||||
|
return Window{Start: start, End: end}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseHHMM(s string) (int, error) {
|
||||||
|
parts := strings.SplitN(s, ":", 2)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return 0, fmt.Errorf("%q: expected HH:MM", s)
|
||||||
|
}
|
||||||
|
h, err := strconv.Atoi(parts[0])
|
||||||
|
if err != nil || h < 0 || h > 23 {
|
||||||
|
return 0, fmt.Errorf("%q: invalid hour", s)
|
||||||
|
}
|
||||||
|
m, err := strconv.Atoi(parts[1])
|
||||||
|
if err != nil || m < 0 || m > 59 {
|
||||||
|
return 0, fmt.Errorf("%q: invalid minute", s)
|
||||||
|
}
|
||||||
|
return h*60 + m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contains reports whether the given local time falls inside this window.
|
||||||
|
// AlwaysOpen windows return true for any time.
|
||||||
|
func (w Window) Contains(t time.Time) bool {
|
||||||
|
if w.alwaysOpen {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
now := t.Hour()*60 + t.Minute()
|
||||||
|
if w.Start == w.End {
|
||||||
|
// Degenerate: zero-length window. Never matches.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if w.Start < w.End {
|
||||||
|
// Same-day window: [Start, End)
|
||||||
|
return now >= w.Start && now < w.End
|
||||||
|
}
|
||||||
|
// Wrapping window: [Start, 1440) ∪ [0, End)
|
||||||
|
return now >= w.Start || now < w.End
|
||||||
|
}
|
||||||
|
|
||||||
|
// String renders the window in HH:MM-HH:MM form for display. AlwaysOpen
|
||||||
|
// renders as "always".
|
||||||
|
func (w Window) String() string {
|
||||||
|
if w.alwaysOpen {
|
||||||
|
return "always"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%02d:%02d-%02d:%02d",
|
||||||
|
w.Start/60, w.Start%60, w.End/60, w.End%60)
|
||||||
|
}
|
||||||
120
update/pkg/config/window_test.go
Normal file
120
update/pkg/config/window_test.go
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func at(hour, min int) time.Time {
|
||||||
|
return time.Date(2026, 1, 1, hour, min, 0, 0, time.UTC)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseWindowEmpty(t *testing.T) {
|
||||||
|
w, err := ParseWindow("")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("empty window: %v", err)
|
||||||
|
}
|
||||||
|
if !w.AlwaysOpen() {
|
||||||
|
t.Error("empty input should produce AlwaysOpen window")
|
||||||
|
}
|
||||||
|
if !w.Contains(at(3, 0)) {
|
||||||
|
t.Error("AlwaysOpen window should contain any time")
|
||||||
|
}
|
||||||
|
if !w.Contains(at(23, 59)) {
|
||||||
|
t.Error("AlwaysOpen window should contain end-of-day")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseWindowSameDay(t *testing.T) {
|
||||||
|
w, err := ParseWindow("03:00-05:00")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse: %v", err)
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
hour, min int
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{2, 59, false}, // just before
|
||||||
|
{3, 0, true}, // start (inclusive)
|
||||||
|
{4, 30, true}, // middle
|
||||||
|
{4, 59, true}, // just before end
|
||||||
|
{5, 0, false}, // end (exclusive)
|
||||||
|
{15, 0, false}, // far outside
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
got := w.Contains(at(tt.hour, tt.min))
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("Contains(%02d:%02d) = %v, want %v", tt.hour, tt.min, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseWindowWrappingMidnight(t *testing.T) {
|
||||||
|
w, err := ParseWindow("23:00-01:00")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse: %v", err)
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
hour, min int
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{22, 59, false}, // just before
|
||||||
|
{23, 0, true}, // start (inclusive)
|
||||||
|
{23, 30, true}, // night-before
|
||||||
|
{0, 0, true}, // midnight
|
||||||
|
{0, 30, true}, // early morning
|
||||||
|
{0, 59, true}, // just before end
|
||||||
|
{1, 0, false}, // end (exclusive)
|
||||||
|
{12, 0, false}, // far outside (noon)
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
got := w.Contains(at(tt.hour, tt.min))
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("Contains(%02d:%02d) wrapping = %v, want %v", tt.hour, tt.min, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseWindowDegenerateZeroLength(t *testing.T) {
|
||||||
|
// 05:00-05:00 is a zero-length window — should never match. Different
|
||||||
|
// from "always" (empty string).
|
||||||
|
w, err := ParseWindow("05:00-05:00")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse: %v", err)
|
||||||
|
}
|
||||||
|
if w.AlwaysOpen() {
|
||||||
|
t.Error("05:00-05:00 must not be AlwaysOpen")
|
||||||
|
}
|
||||||
|
if w.Contains(at(5, 0)) {
|
||||||
|
t.Error("zero-length window must not contain its own boundary")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseWindowRejectsBadInput(t *testing.T) {
|
||||||
|
bad := []string{
|
||||||
|
"notatime",
|
||||||
|
"03:00", // no end
|
||||||
|
"03:00-", // empty end
|
||||||
|
"03:00-05", // missing minutes
|
||||||
|
"24:00-05:00", // hour out of range
|
||||||
|
"03:60-05:00", // minute out of range
|
||||||
|
"abc:00-05:00", // non-numeric
|
||||||
|
}
|
||||||
|
for _, s := range bad {
|
||||||
|
_, err := ParseWindow(s)
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("ParseWindow(%q) accepted, want error", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWindowString(t *testing.T) {
|
||||||
|
w, _ := ParseWindow("03:05-05:45")
|
||||||
|
if w.String() != "03:05-05:45" {
|
||||||
|
t.Errorf("String = %q, want 03:05-05:45", w.String())
|
||||||
|
}
|
||||||
|
always, _ := ParseWindow("")
|
||||||
|
if always.String() != "always" {
|
||||||
|
t.Errorf("AlwaysOpen.String = %q, want 'always'", always.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
125
update/pkg/health/extended.go
Normal file
125
update/pkg/health/extended.go
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
package health
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// kubeSystemSettleSeconds is how long all kube-system pods must hold a
|
||||||
|
// Running phase before we consider the cluster genuinely up. Catches the
|
||||||
|
// "pod just started, will crash-loop in 5s" case.
|
||||||
|
const kubeSystemSettleSeconds = 30
|
||||||
|
|
||||||
|
// CheckKubeSystemReady verifies that every pod in the kube-system namespace
|
||||||
|
// is in Running phase and has been Running for at least settle. Returns
|
||||||
|
// (ready, error). settle defaults to 30s when zero.
|
||||||
|
func (c *Checker) CheckKubeSystemReady(settle time.Duration) bool {
|
||||||
|
if settle == 0 {
|
||||||
|
settle = kubeSystemSettleSeconds * time.Second
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(c.kubeconfigPath); err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// jsonpath emits one line per pod: <phase>|<startTime>
|
||||||
|
cmd := exec.CommandContext(ctx, "kubectl",
|
||||||
|
"--kubeconfig", c.kubeconfigPath,
|
||||||
|
"get", "pods", "-n", "kube-system",
|
||||||
|
"-o", `jsonpath={range .items[*]}{.status.phase}|{.status.startTime}{"\n"}{end}`,
|
||||||
|
)
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
|
||||||
|
if len(lines) == 0 || lines[0] == "" {
|
||||||
|
// No pods reported. Conservatively treat as not-ready: kube-system
|
||||||
|
// is expected to host at least CoreDNS + pause.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
now := time.Now()
|
||||||
|
for _, line := range lines {
|
||||||
|
parts := strings.SplitN(line, "|", 2)
|
||||||
|
phase := strings.TrimSpace(parts[0])
|
||||||
|
if phase != "Running" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if len(parts) < 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
start, perr := time.Parse(time.RFC3339, strings.TrimSpace(parts[1]))
|
||||||
|
if perr != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if now.Sub(start) < settle {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckProbeURL fetches the given URL and reports whether it returned 200.
|
||||||
|
// Empty url returns (true, nil) — the check is opt-in.
|
||||||
|
func CheckProbeURL(url string) (bool, error) {
|
||||||
|
if url == "" {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
client := &http.Client{Timeout: 5 * time.Second}
|
||||||
|
resp, err := client.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("probe URL %s: %w", url, err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
return resp.StatusCode == http.StatusOK, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckDiskWritable writes a small file under dataDir, fsyncs, reads it back,
|
||||||
|
// and removes it. Confirms the data partition is mounted read-write and the
|
||||||
|
// underlying disk is responsive. Empty dataDir defaults to /var/lib/kubesolo.
|
||||||
|
func CheckDiskWritable(dataDir string) (bool, error) {
|
||||||
|
if dataDir == "" {
|
||||||
|
dataDir = "/var/lib/kubesolo"
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(dataDir); err != nil {
|
||||||
|
// Data partition not mounted? That's catastrophic but we shouldn't
|
||||||
|
// claim the disk is fine.
|
||||||
|
return false, fmt.Errorf("dataDir %s: %w", dataDir, err)
|
||||||
|
}
|
||||||
|
probe := filepath.Join(dataDir, ".update-probe")
|
||||||
|
want := []byte("kubesolo-os healthcheck probe")
|
||||||
|
|
||||||
|
f, err := os.Create(probe)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("create probe: %w", err)
|
||||||
|
}
|
||||||
|
defer os.Remove(probe)
|
||||||
|
|
||||||
|
if _, err := f.Write(want); err != nil {
|
||||||
|
f.Close()
|
||||||
|
return false, fmt.Errorf("write probe: %w", err)
|
||||||
|
}
|
||||||
|
if err := f.Sync(); err != nil {
|
||||||
|
f.Close()
|
||||||
|
return false, fmt.Errorf("fsync probe: %w", err)
|
||||||
|
}
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
return false, fmt.Errorf("close probe: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got, err := os.ReadFile(probe)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("read probe: %w", err)
|
||||||
|
}
|
||||||
|
if string(got) != string(want) {
|
||||||
|
return false, fmt.Errorf("probe content mismatch: got %q", got)
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
77
update/pkg/health/extended_test.go
Normal file
77
update/pkg/health/extended_test.go
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
package health
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCheckProbeURLEmptyAlwaysPasses(t *testing.T) {
|
||||||
|
ok, err := CheckProbeURL("")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CheckProbeURL(\"\"): %v", err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Error("empty probe URL should return ok=true (check disabled)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckProbeURL200(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
ok, err := CheckProbeURL(srv.URL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CheckProbeURL: %v", err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Error("expected ok=true on 200")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckProbeURLNon200(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
ok, err := CheckProbeURL(srv.URL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CheckProbeURL: %v", err)
|
||||||
|
}
|
||||||
|
if ok {
|
||||||
|
t.Error("expected ok=false on 503")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckProbeURLNetworkError(t *testing.T) {
|
||||||
|
// Port 1 is reserved (tcpmux) and never bound by Linux defaults.
|
||||||
|
_, err := CheckProbeURL("http://127.0.0.1:1")
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for unreachable URL, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckDiskWritableHappyPath(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
ok, err := CheckDiskWritable(dir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CheckDiskWritable: %v", err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Error("expected ok=true on writable temp dir")
|
||||||
|
}
|
||||||
|
// Probe file should have been cleaned up.
|
||||||
|
if _, err := os.Stat(filepath.Join(dir, ".update-probe")); !os.IsNotExist(err) {
|
||||||
|
t.Errorf("probe file not cleaned up: stat err=%v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckDiskWritableMissingDir(t *testing.T) {
|
||||||
|
_, err := CheckDiskWritable("/this/path/does/not/exist")
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for missing dataDir, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -24,15 +24,20 @@ import (
|
|||||||
|
|
||||||
// Status represents the result of a health check.
|
// Status represents the result of a health check.
|
||||||
type Status struct {
|
type Status struct {
|
||||||
Containerd bool
|
Containerd bool
|
||||||
APIServer bool
|
APIServer bool
|
||||||
NodeReady bool
|
NodeReady bool
|
||||||
Message string
|
KubeSystemReady bool // optional — true unless KubeSystemSettle is non-zero
|
||||||
|
ProbeURL bool // optional — true unless ProbeURL is set
|
||||||
|
DiskWritable bool // optional — true unless DataDir is set
|
||||||
|
Message string
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsHealthy returns true if all checks passed.
|
// IsHealthy returns true if all required checks passed. Optional checks
|
||||||
|
// default to true when not configured, so they don't block the result.
|
||||||
func (s *Status) IsHealthy() bool {
|
func (s *Status) IsHealthy() bool {
|
||||||
return s.Containerd && s.APIServer && s.NodeReady
|
return s.Containerd && s.APIServer && s.NodeReady &&
|
||||||
|
s.KubeSystemReady && s.ProbeURL && s.DiskWritable
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checker performs health checks against the local KubeSolo instance.
|
// Checker performs health checks against the local KubeSolo instance.
|
||||||
@@ -40,6 +45,11 @@ type Checker struct {
|
|||||||
kubeconfigPath string
|
kubeconfigPath string
|
||||||
apiServerAddr string
|
apiServerAddr string
|
||||||
timeout time.Duration
|
timeout time.Duration
|
||||||
|
|
||||||
|
// Optional gates. Zero values disable the check (it reports true).
|
||||||
|
KubeSystemSettle time.Duration
|
||||||
|
ProbeURL string
|
||||||
|
DataDir string
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewChecker creates a health checker.
|
// NewChecker creates a health checker.
|
||||||
@@ -149,12 +159,37 @@ func (c *Checker) CheckNodeReady() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunAll performs all health checks and returns the combined status.
|
// RunAll performs all health checks and returns the combined status.
|
||||||
|
//
|
||||||
|
// Optional checks (kube-system settle, user probe URL, disk writability) are
|
||||||
|
// only run if the corresponding Checker fields are set; otherwise they
|
||||||
|
// report true so as not to block the result.
|
||||||
func (c *Checker) RunAll() *Status {
|
func (c *Checker) RunAll() *Status {
|
||||||
return &Status{
|
s := &Status{
|
||||||
Containerd: c.CheckContainerd(),
|
Containerd: c.CheckContainerd(),
|
||||||
APIServer: c.CheckAPIServer(),
|
APIServer: c.CheckAPIServer(),
|
||||||
NodeReady: c.CheckNodeReady(),
|
NodeReady: c.CheckNodeReady(),
|
||||||
|
KubeSystemReady: true,
|
||||||
|
ProbeURL: true,
|
||||||
|
DiskWritable: true,
|
||||||
}
|
}
|
||||||
|
if c.KubeSystemSettle > 0 {
|
||||||
|
s.KubeSystemReady = c.CheckKubeSystemReady(c.KubeSystemSettle)
|
||||||
|
}
|
||||||
|
if c.ProbeURL != "" {
|
||||||
|
ok, err := CheckProbeURL(c.ProbeURL)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("probe URL check failed", "url", c.ProbeURL, "error", err)
|
||||||
|
}
|
||||||
|
s.ProbeURL = ok
|
||||||
|
}
|
||||||
|
if c.DataDir != "" {
|
||||||
|
ok, err := CheckDiskWritable(c.DataDir)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("disk writability check failed", "dir", c.DataDir, "error", err)
|
||||||
|
}
|
||||||
|
s.DiskWritable = ok
|
||||||
|
}
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForHealthy polls health checks until all pass or timeout expires.
|
// WaitForHealthy polls health checks until all pass or timeout expires.
|
||||||
|
|||||||
@@ -6,36 +6,42 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestStatusIsHealthy(t *testing.T) {
|
func TestStatusIsHealthy(t *testing.T) {
|
||||||
|
// Helper for the new 6-field Status: all-true except the named one.
|
||||||
|
allBut := func(field string) Status {
|
||||||
|
s := Status{
|
||||||
|
Containerd: true, APIServer: true, NodeReady: true,
|
||||||
|
KubeSystemReady: true, ProbeURL: true, DiskWritable: true,
|
||||||
|
}
|
||||||
|
switch field {
|
||||||
|
case "Containerd":
|
||||||
|
s.Containerd = false
|
||||||
|
case "APIServer":
|
||||||
|
s.APIServer = false
|
||||||
|
case "NodeReady":
|
||||||
|
s.NodeReady = false
|
||||||
|
case "KubeSystemReady":
|
||||||
|
s.KubeSystemReady = false
|
||||||
|
case "ProbeURL":
|
||||||
|
s.ProbeURL = false
|
||||||
|
case "DiskWritable":
|
||||||
|
s.DiskWritable = false
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
status Status
|
status Status
|
||||||
wantHealth bool
|
wantHealth bool
|
||||||
}{
|
}{
|
||||||
{
|
{"all healthy", allBut(""), true},
|
||||||
name: "all healthy",
|
{"containerd down", allBut("Containerd"), false},
|
||||||
status: Status{Containerd: true, APIServer: true, NodeReady: true},
|
{"apiserver down", allBut("APIServer"), false},
|
||||||
wantHealth: true,
|
{"node not ready", allBut("NodeReady"), false},
|
||||||
},
|
{"kube-system not ready", allBut("KubeSystemReady"), false},
|
||||||
{
|
{"probe URL failed", allBut("ProbeURL"), false},
|
||||||
name: "containerd down",
|
{"disk not writable", allBut("DiskWritable"), false},
|
||||||
status: Status{Containerd: false, APIServer: true, NodeReady: true},
|
{"all down", Status{}, false},
|
||||||
wantHealth: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "apiserver down",
|
|
||||||
status: Status{Containerd: true, APIServer: false, NodeReady: true},
|
|
||||||
wantHealth: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "node not ready",
|
|
||||||
status: Status{Containerd: true, APIServer: true, NodeReady: false},
|
|
||||||
wantHealth: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "all down",
|
|
||||||
status: Status{Containerd: false, APIServer: false, NodeReady: false},
|
|
||||||
wantHealth: false,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
|
|||||||
51
update/pkg/health/preflight.go
Normal file
51
update/pkg/health/preflight.go
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
package health
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NodeBlockLabel is the well-known label that workload authors set on the
|
||||||
|
// local node to defer an OS update. When present and "true", apply refuses.
|
||||||
|
const NodeBlockLabel = "updates.kubesolo.io/block"
|
||||||
|
|
||||||
|
// CheckNodeBlocked returns (blocked, error). blocked==true means the local
|
||||||
|
// node carries the updates.kubesolo.io/block=true label and the caller should
|
||||||
|
// refuse the update.
|
||||||
|
//
|
||||||
|
// If the kubeconfig is not available (offline / pre-boot / air-gap), this
|
||||||
|
// returns (false, nil) — silently allowing the update. That's the safe
|
||||||
|
// behaviour for the air-gap case where the node may not be reachable from
|
||||||
|
// the agent's perspective.
|
||||||
|
func CheckNodeBlocked(kubeconfigPath string) (bool, error) {
|
||||||
|
if kubeconfigPath == "" {
|
||||||
|
kubeconfigPath = "/var/lib/kubesolo/pki/admin/admin.kubeconfig"
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(kubeconfigPath); err != nil {
|
||||||
|
// No kubeconfig — assume air-gap / pre-K8s. Don't block updates.
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query the node label via kubectl. We don't know the node name a
|
||||||
|
// priori, so we use --kubeconfig on the local admin config and ask for
|
||||||
|
// "the only node" (KubeSolo is single-node by design).
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, "kubectl",
|
||||||
|
"--kubeconfig", kubeconfigPath,
|
||||||
|
"get", "node",
|
||||||
|
"-o", `jsonpath={.items[0].metadata.labels.updates\.kubesolo\.io/block}`)
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
// API unreachable or no nodes — treat as not blocked (analogous to
|
||||||
|
// the kubeconfig-missing case). We still surface the error so the
|
||||||
|
// caller can decide to log it.
|
||||||
|
return false, fmt.Errorf("query node label: %w", err)
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(string(out)) == "true", nil
|
||||||
|
}
|
||||||
@@ -35,6 +35,24 @@ type UpdateMetadata struct {
|
|||||||
MetadataSigURL string `json:"metadata_sig_url,omitempty"`
|
MetadataSigURL string `json:"metadata_sig_url,omitempty"`
|
||||||
ReleaseNotes string `json:"release_notes,omitempty"`
|
ReleaseNotes string `json:"release_notes,omitempty"`
|
||||||
ReleaseDate string `json:"release_date,omitempty"`
|
ReleaseDate string `json:"release_date,omitempty"`
|
||||||
|
|
||||||
|
// Channel labels this artifact ("stable", "beta", "edge", ...). The agent
|
||||||
|
// refuses metadata whose channel doesn't match the locally-configured
|
||||||
|
// one. Empty in metadata means "no channel constraint, accept anything".
|
||||||
|
Channel string `json:"channel,omitempty"`
|
||||||
|
|
||||||
|
// MinCompatibleVersion is the lowest version that can upgrade to this
|
||||||
|
// one. The agent refuses to apply if the currently-running version is
|
||||||
|
// below this. Used for stepping-stone migrations (e.g. 0.2.x -> 0.3.x
|
||||||
|
// requires 0.2.5+ to land the state-file format first). Empty means
|
||||||
|
// "any source version OK".
|
||||||
|
MinCompatibleVersion string `json:"min_compatible_version,omitempty"`
|
||||||
|
|
||||||
|
// Architecture restricts this artifact to a specific GOARCH ("amd64",
|
||||||
|
// "arm64"). Empty means the artifact is arch-agnostic — which is rare
|
||||||
|
// since the kernel + initramfs are arch-specific; this should normally
|
||||||
|
// be populated by the build pipeline.
|
||||||
|
Architecture string `json:"architecture,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// StagedImage represents downloaded and verified update files.
|
// StagedImage represents downloaded and verified update files.
|
||||||
|
|||||||
@@ -11,6 +11,9 @@
|
|||||||
// kubesolo_os_update_last_check_timestamp_seconds unix timestamp (gauge)
|
// kubesolo_os_update_last_check_timestamp_seconds unix timestamp (gauge)
|
||||||
// kubesolo_os_memory_total_bytes total RAM (gauge)
|
// kubesolo_os_memory_total_bytes total RAM (gauge)
|
||||||
// kubesolo_os_memory_available_bytes available RAM (gauge)
|
// kubesolo_os_memory_available_bytes available RAM (gauge)
|
||||||
|
// kubesolo_update_phase{phase} 1 for current phase, 0 for others
|
||||||
|
// kubesolo_update_attempts_total counter — attempts at current ToVersion
|
||||||
|
// kubesolo_update_last_attempt_timestamp_seconds unix timestamp of last state update
|
||||||
//
|
//
|
||||||
// This is a zero-dependency implementation — no Prometheus client library needed.
|
// This is a zero-dependency implementation — no Prometheus client library needed.
|
||||||
// It serves metrics in the Prometheus text exposition format.
|
// It serves metrics in the Prometheus text exposition format.
|
||||||
@@ -25,11 +28,14 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Server is a lightweight Prometheus metrics HTTP server.
|
// Server is a lightweight Prometheus metrics HTTP server.
|
||||||
type Server struct {
|
type Server struct {
|
||||||
grubenvPath string
|
grubenvPath string
|
||||||
|
statePath string
|
||||||
listenAddr string
|
listenAddr string
|
||||||
startTime time.Time
|
startTime time.Time
|
||||||
|
|
||||||
@@ -47,6 +53,27 @@ func NewServer(listenAddr, grubenvPath string) *Server {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetStatePath sets the location of the update state.json file. If empty or
|
||||||
|
// unset, state-derived metrics are emitted with the Idle defaults.
|
||||||
|
func (s *Server) SetStatePath(p string) {
|
||||||
|
s.statePath = p
|
||||||
|
}
|
||||||
|
|
||||||
|
// allPhases lists every Phase value we emit as a kubesolo_update_phase
|
||||||
|
// time-series, so consumers see all label values (with value 0 for non-current
|
||||||
|
// phases). Mirror of validPhases in pkg/state.
|
||||||
|
var allPhases = []state.Phase{
|
||||||
|
state.PhaseIdle,
|
||||||
|
state.PhaseChecking,
|
||||||
|
state.PhaseDownloading,
|
||||||
|
state.PhaseStaged,
|
||||||
|
state.PhaseActivated,
|
||||||
|
state.PhaseVerifying,
|
||||||
|
state.PhaseSuccess,
|
||||||
|
state.PhaseRolledBack,
|
||||||
|
state.PhaseFailed,
|
||||||
|
}
|
||||||
|
|
||||||
// SetUpdateAvailable records whether an update is available.
|
// SetUpdateAvailable records whether an update is available.
|
||||||
func (s *Server) SetUpdateAvailable(available bool) {
|
func (s *Server) SetUpdateAvailable(available bool) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
@@ -125,9 +152,49 @@ func (s *Server) handleMetrics(w http.ResponseWriter, r *http.Request) {
|
|||||||
sb.WriteString("# TYPE kubesolo_os_memory_available_bytes gauge\n")
|
sb.WriteString("# TYPE kubesolo_os_memory_available_bytes gauge\n")
|
||||||
sb.WriteString(fmt.Sprintf("kubesolo_os_memory_available_bytes %d\n", memAvail))
|
sb.WriteString(fmt.Sprintf("kubesolo_os_memory_available_bytes %d\n", memAvail))
|
||||||
|
|
||||||
|
// Update lifecycle (from state.json)
|
||||||
|
s.writeUpdateStateMetrics(&sb)
|
||||||
|
|
||||||
fmt.Fprint(w, sb.String())
|
fmt.Fprint(w, sb.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// writeUpdateStateMetrics appends update-lifecycle metrics derived from the
|
||||||
|
// state.json file. If the file is missing or unreadable, emits the Idle
|
||||||
|
// defaults so the metric series exists at all times.
|
||||||
|
func (s *Server) writeUpdateStateMetrics(sb *strings.Builder) {
|
||||||
|
current := state.PhaseIdle
|
||||||
|
var attempts int
|
||||||
|
var lastTS float64
|
||||||
|
|
||||||
|
if s.statePath != "" {
|
||||||
|
if st, err := state.Load(s.statePath); err == nil && st != nil {
|
||||||
|
current = st.Phase
|
||||||
|
attempts = st.AttemptCount
|
||||||
|
if !st.UpdatedAt.IsZero() {
|
||||||
|
lastTS = float64(st.UpdatedAt.Unix())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString("# HELP kubesolo_update_phase Current update lifecycle phase (1 for active, 0 otherwise).\n")
|
||||||
|
sb.WriteString("# TYPE kubesolo_update_phase gauge\n")
|
||||||
|
for _, p := range allPhases {
|
||||||
|
v := 0
|
||||||
|
if p == current {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
sb.WriteString(fmt.Sprintf("kubesolo_update_phase{phase=%q} %d\n", string(p), v))
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString("# HELP kubesolo_update_attempts_total Number of update attempts at the current target version.\n")
|
||||||
|
sb.WriteString("# TYPE kubesolo_update_attempts_total counter\n")
|
||||||
|
sb.WriteString(fmt.Sprintf("kubesolo_update_attempts_total %d\n", attempts))
|
||||||
|
|
||||||
|
sb.WriteString("# HELP kubesolo_update_last_attempt_timestamp_seconds Unix timestamp of the last state transition.\n")
|
||||||
|
sb.WriteString("# TYPE kubesolo_update_last_attempt_timestamp_seconds gauge\n")
|
||||||
|
sb.WriteString(fmt.Sprintf("kubesolo_update_last_attempt_timestamp_seconds %.0f\n", lastTS))
|
||||||
|
}
|
||||||
|
|
||||||
// readGrubenvVar reads a single variable from grubenv using simple file parse.
|
// readGrubenvVar reads a single variable from grubenv using simple file parse.
|
||||||
func (s *Server) readGrubenvVar(key string) string {
|
func (s *Server) readGrubenvVar(key string) string {
|
||||||
data, err := os.ReadFile(s.grubenvPath)
|
data, err := os.ReadFile(s.grubenvPath)
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestNewServer(t *testing.T) {
|
func TestNewServer(t *testing.T) {
|
||||||
@@ -247,6 +249,86 @@ func TestSafeInt(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUpdateStateMetricsAbsentStateFile(t *testing.T) {
|
||||||
|
// No state path set — should emit Idle defaults so the metric series
|
||||||
|
// exists from first boot.
|
||||||
|
s := NewServer(":9100", "/tmp/nonexistent")
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
s.handleMetrics(w, req)
|
||||||
|
|
||||||
|
body, _ := io.ReadAll(w.Result().Body)
|
||||||
|
output := string(body)
|
||||||
|
|
||||||
|
if !strings.Contains(output, `kubesolo_update_phase{phase="idle"} 1`) {
|
||||||
|
t.Errorf("expected idle=1 with no state file, got:\n%s", output)
|
||||||
|
}
|
||||||
|
if !strings.Contains(output, `kubesolo_update_phase{phase="checking"} 0`) {
|
||||||
|
t.Errorf("expected checking=0 with no state file, got:\n%s", output)
|
||||||
|
}
|
||||||
|
if !strings.Contains(output, "kubesolo_update_attempts_total 0") {
|
||||||
|
t.Errorf("expected attempts=0 with no state file, got:\n%s", output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateStateMetricsActivePhase(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
statePath := filepath.Join(dir, "state.json")
|
||||||
|
|
||||||
|
st := state.New()
|
||||||
|
if err := st.Transition(statePath, state.PhaseDownloading, "v0.3.0", ""); err != nil {
|
||||||
|
t.Fatalf("seed state: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
s := NewServer(":9100", "/tmp/nonexistent")
|
||||||
|
s.SetStatePath(statePath)
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
s.handleMetrics(w, req)
|
||||||
|
|
||||||
|
body, _ := io.ReadAll(w.Result().Body)
|
||||||
|
output := string(body)
|
||||||
|
|
||||||
|
if !strings.Contains(output, `kubesolo_update_phase{phase="downloading"} 1`) {
|
||||||
|
t.Errorf("expected downloading=1, got:\n%s", output)
|
||||||
|
}
|
||||||
|
if !strings.Contains(output, `kubesolo_update_phase{phase="idle"} 0`) {
|
||||||
|
t.Errorf("expected idle=0 when downloading is active, got:\n%s", output)
|
||||||
|
}
|
||||||
|
if !strings.Contains(output, "kubesolo_update_attempts_total 1") {
|
||||||
|
t.Errorf("expected attempts=1 after first Transition, got:\n%s", output)
|
||||||
|
}
|
||||||
|
if strings.Contains(output, "kubesolo_update_last_attempt_timestamp_seconds 0\n") {
|
||||||
|
t.Errorf("expected non-zero timestamp after state write, got:\n%s", output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateStateMetricsAllPhasesEmitted(t *testing.T) {
|
||||||
|
// Every phase value should appear in the output, so dashboards can graph
|
||||||
|
// the series cleanly.
|
||||||
|
s := NewServer(":9100", "/tmp/nonexistent")
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
s.handleMetrics(w, req)
|
||||||
|
|
||||||
|
body, _ := io.ReadAll(w.Result().Body)
|
||||||
|
output := string(body)
|
||||||
|
|
||||||
|
for _, p := range []state.Phase{
|
||||||
|
state.PhaseIdle, state.PhaseChecking, state.PhaseDownloading,
|
||||||
|
state.PhaseStaged, state.PhaseActivated, state.PhaseVerifying,
|
||||||
|
state.PhaseSuccess, state.PhaseRolledBack, state.PhaseFailed,
|
||||||
|
} {
|
||||||
|
needle := `kubesolo_update_phase{phase="` + string(p) + `"}`
|
||||||
|
if !strings.Contains(output, needle) {
|
||||||
|
t.Errorf("phase %q not present in metrics output", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestReadFileString(t *testing.T) {
|
func TestReadFileString(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
|||||||
281
update/pkg/oci/oci.go
Normal file
281
update/pkg/oci/oci.go
Normal file
@@ -0,0 +1,281 @@
|
|||||||
|
// Package oci pulls KubeSolo OS update artifacts from an OCI-compliant
|
||||||
|
// container registry (e.g. ghcr.io). It is the registry-native alternative
|
||||||
|
// to the legacy HTTP `latest.json` protocol implemented in pkg/image.
|
||||||
|
//
|
||||||
|
// # Artifact layout
|
||||||
|
//
|
||||||
|
// An update is published as a single OCI artifact under a tag like
|
||||||
|
// `stable` or `v0.3.0`. The tag may point at either:
|
||||||
|
//
|
||||||
|
// - A manifest index (preferred) containing per-architecture manifests.
|
||||||
|
// The agent picks the one matching runtime.GOARCH.
|
||||||
|
// - A single manifest (used for arch-specific tags such as
|
||||||
|
// `v0.3.0-amd64`). The agent verifies architecture against the
|
||||||
|
// manifest's platform annotation before trusting it.
|
||||||
|
//
|
||||||
|
// Each per-architecture manifest carries two layers:
|
||||||
|
//
|
||||||
|
// application/vnd.kubesolo.os.kernel.v1+octet-stream // vmlinuz / Image
|
||||||
|
// application/vnd.kubesolo.os.initramfs.v1+gzip // kubesolo-os.gz
|
||||||
|
//
|
||||||
|
// And these annotations (read into image.UpdateMetadata):
|
||||||
|
//
|
||||||
|
// io.kubesolo.os.version "v0.3.0"
|
||||||
|
// io.kubesolo.os.channel "stable"
|
||||||
|
// io.kubesolo.os.min_compatible_version "v0.2.0"
|
||||||
|
// io.kubesolo.os.architecture "amd64"
|
||||||
|
// io.kubesolo.os.release_notes (optional, short)
|
||||||
|
// io.kubesolo.os.release_date (optional, RFC3339)
|
||||||
|
//
|
||||||
|
// The agent ignores any additional layers, so the same image can also be
|
||||||
|
// shaped as a "scratch" container if the build pipeline finds that convenient
|
||||||
|
// for ecosystem tooling.
|
||||||
|
package oci
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/opencontainers/go-digest"
|
||||||
|
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||||
|
"oras.land/oras-go/v2/content"
|
||||||
|
"oras.land/oras-go/v2/registry/remote"
|
||||||
|
|
||||||
|
"github.com/portainer/kubesolo-os/update/pkg/image"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Media types used on KubeSolo OS update artifacts. Kept here (not in
|
||||||
|
// pkg/image) so the OCI protocol surface is fully self-contained.
|
||||||
|
const (
|
||||||
|
MediaKernel = "application/vnd.kubesolo.os.kernel.v1+octet-stream"
|
||||||
|
MediaInitramfs = "application/vnd.kubesolo.os.initramfs.v1+gzip"
|
||||||
|
|
||||||
|
AnnotVersion = "io.kubesolo.os.version"
|
||||||
|
AnnotChannel = "io.kubesolo.os.channel"
|
||||||
|
AnnotMinVersion = "io.kubesolo.os.min_compatible_version"
|
||||||
|
AnnotArch = "io.kubesolo.os.architecture"
|
||||||
|
AnnotReleaseNote = "io.kubesolo.os.release_notes"
|
||||||
|
AnnotReleaseDate = "io.kubesolo.os.release_date"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Client pulls artifacts from a single OCI repository (e.g.
|
||||||
|
// `ghcr.io/portainer/kubesolo-os`).
|
||||||
|
//
|
||||||
|
// Anonymous (public-pull) access is supported out of the box. For private
|
||||||
|
// repositories, configure auth via the underlying remote.Repository.Client
|
||||||
|
// before passing it to Resolve/Pull — that hook isn't surfaced here yet
|
||||||
|
// (deferred until we actually need it for a private fleet).
|
||||||
|
type Client struct {
|
||||||
|
repo *remote.Repository
|
||||||
|
// Arch is the architecture string we match against manifest indexes.
|
||||||
|
// Defaults to runtime.GOARCH; overridable for testing.
|
||||||
|
Arch string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewClient parses a repository reference of the form `host/path` (no tag)
|
||||||
|
// and returns a ready-to-use Client.
|
||||||
|
func NewClient(repoRef string) (*Client, error) {
|
||||||
|
repo, err := remote.NewRepository(repoRef)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid OCI reference %q: %w", repoRef, err)
|
||||||
|
}
|
||||||
|
// remote.NewRepository defaults to HTTPS. PlainHTTP is set per-test
|
||||||
|
// via the WithPlainHTTP option when we hit a httptest.Server.
|
||||||
|
return &Client{repo: repo, Arch: runtime.GOARCH}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithPlainHTTP toggles the underlying registry transport to HTTP. Useful for
|
||||||
|
// httptest-driven unit tests; do not use against production registries.
|
||||||
|
func (c *Client) WithPlainHTTP(plain bool) *Client {
|
||||||
|
c.repo.PlainHTTP = plain
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchMetadata resolves the tag, walks index → manifest if needed, and
|
||||||
|
// returns an image.UpdateMetadata populated from the manifest's annotations.
|
||||||
|
// No blobs are downloaded — this is the cheap "what's available" probe.
|
||||||
|
func (c *Client) FetchMetadata(ctx context.Context, tag string) (*image.UpdateMetadata, error) {
|
||||||
|
manifest, _, err := c.resolveArchManifest(ctx, tag)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return metadataFromAnnotations(manifest.Annotations), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pull resolves the tag, picks the matching-architecture manifest, downloads
|
||||||
|
// the kernel + initramfs layers to `stageDir`, verifies their digests, and
|
||||||
|
// returns a StagedImage compatible with the existing pkg/image consumer.
|
||||||
|
func (c *Client) Pull(ctx context.Context, tag, stageDir string) (*image.StagedImage, *image.UpdateMetadata, error) {
|
||||||
|
manifest, _, err := c.resolveArchManifest(ctx, tag)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.MkdirAll(stageDir, 0o755); err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("create stage dir: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var kernelPath, initramfsPath string
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
switch layer.MediaType {
|
||||||
|
case MediaKernel:
|
||||||
|
kernelPath = filepath.Join(stageDir, "vmlinuz")
|
||||||
|
if err := c.fetchBlobTo(ctx, layer, kernelPath); err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("download kernel: %w", err)
|
||||||
|
}
|
||||||
|
case MediaInitramfs:
|
||||||
|
initramfsPath = filepath.Join(stageDir, "kubesolo-os.gz")
|
||||||
|
if err := c.fetchBlobTo(ctx, layer, initramfsPath); err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("download initramfs: %w", err)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
slog.Debug("oci: skipping unknown layer", "media", layer.MediaType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if kernelPath == "" {
|
||||||
|
return nil, nil, fmt.Errorf("manifest has no %s layer", MediaKernel)
|
||||||
|
}
|
||||||
|
if initramfsPath == "" {
|
||||||
|
return nil, nil, fmt.Errorf("manifest has no %s layer", MediaInitramfs)
|
||||||
|
}
|
||||||
|
|
||||||
|
meta := metadataFromAnnotations(manifest.Annotations)
|
||||||
|
staged := &image.StagedImage{
|
||||||
|
VmlinuzPath: kernelPath,
|
||||||
|
InitramfsPath: initramfsPath,
|
||||||
|
Version: meta.Version,
|
||||||
|
}
|
||||||
|
return staged, meta, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveArchManifest fetches the descriptor at `tag`, walks an index if
|
||||||
|
// present, and returns the platform-specific manifest matching c.Arch.
|
||||||
|
func (c *Client) resolveArchManifest(ctx context.Context, tag string) (*ocispec.Manifest, *ocispec.Descriptor, error) {
|
||||||
|
desc, err := c.repo.Resolve(ctx, tag)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("resolve tag %q: %w", tag, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch desc.MediaType {
|
||||||
|
case ocispec.MediaTypeImageIndex, "application/vnd.docker.distribution.manifest.list.v2+json":
|
||||||
|
index, err := fetchJSON[ocispec.Index](ctx, c.repo, desc)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("fetch index: %w", err)
|
||||||
|
}
|
||||||
|
var matched *ocispec.Descriptor
|
||||||
|
for i := range index.Manifests {
|
||||||
|
m := &index.Manifests[i]
|
||||||
|
if m.Platform != nil && m.Platform.Architecture == c.Arch {
|
||||||
|
matched = m
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if matched == nil {
|
||||||
|
return nil, nil, fmt.Errorf("no manifest in index for architecture %q", c.Arch)
|
||||||
|
}
|
||||||
|
manifest, err := fetchJSON[ocispec.Manifest](ctx, c.repo, *matched)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("fetch manifest: %w", err)
|
||||||
|
}
|
||||||
|
return manifest, matched, nil
|
||||||
|
|
||||||
|
case ocispec.MediaTypeImageManifest, "application/vnd.docker.distribution.manifest.v2+json":
|
||||||
|
manifest, err := fetchJSON[ocispec.Manifest](ctx, c.repo, desc)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("fetch manifest: %w", err)
|
||||||
|
}
|
||||||
|
// Single-arch tag: if it declares an arch, enforce match.
|
||||||
|
if archAnnot := manifest.Annotations[AnnotArch]; archAnnot != "" && archAnnot != c.Arch {
|
||||||
|
return nil, nil, fmt.Errorf("single-arch manifest is %q, want %q", archAnnot, c.Arch)
|
||||||
|
}
|
||||||
|
return manifest, &desc, nil
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, nil, fmt.Errorf("unsupported media type %q at tag %q", desc.MediaType, tag)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchJSON pulls a small JSON document (manifest or index) and decodes it.
|
||||||
|
func fetchJSON[T any](ctx context.Context, store content.Fetcher, desc ocispec.Descriptor) (*T, error) {
|
||||||
|
rc, err := store.Fetch(ctx, desc)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rc.Close()
|
||||||
|
data, err := content.ReadAll(rc, desc)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var out T
|
||||||
|
if err := json.Unmarshal(data, &out); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode: %w", err)
|
||||||
|
}
|
||||||
|
return &out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchBlobTo streams a blob to disk and verifies its digest matches.
|
||||||
|
// Cleans up the destination file on any error so we never leave a partial.
|
||||||
|
func (c *Client) fetchBlobTo(ctx context.Context, desc ocispec.Descriptor, dest string) (retErr error) {
|
||||||
|
rc, err := c.repo.Fetch(ctx, desc)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("fetch blob: %w", err)
|
||||||
|
}
|
||||||
|
defer rc.Close()
|
||||||
|
|
||||||
|
f, err := os.Create(dest)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("create %s: %w", dest, err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if cerr := f.Close(); retErr == nil && cerr != nil {
|
||||||
|
retErr = cerr
|
||||||
|
}
|
||||||
|
if retErr != nil {
|
||||||
|
_ = os.Remove(dest)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
verifier := desc.Digest.Algorithm().Hash()
|
||||||
|
mw := io.MultiWriter(f, verifier)
|
||||||
|
n, err := io.Copy(mw, rc)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("stream blob: %w", err)
|
||||||
|
}
|
||||||
|
if desc.Size > 0 && n != desc.Size {
|
||||||
|
return fmt.Errorf("blob size mismatch: got %d, want %d", n, desc.Size)
|
||||||
|
}
|
||||||
|
got := digest.NewDigest(desc.Digest.Algorithm(), verifier)
|
||||||
|
if got != desc.Digest {
|
||||||
|
return fmt.Errorf("blob digest mismatch: got %s, want %s", got, desc.Digest)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// metadataFromAnnotations builds an UpdateMetadata from manifest annotations.
|
||||||
|
// Always returns a non-nil value (missing fields stay empty).
|
||||||
|
func metadataFromAnnotations(a map[string]string) *image.UpdateMetadata {
|
||||||
|
if a == nil {
|
||||||
|
a = map[string]string{}
|
||||||
|
}
|
||||||
|
return &image.UpdateMetadata{
|
||||||
|
Version: a[AnnotVersion],
|
||||||
|
Channel: a[AnnotChannel],
|
||||||
|
MinCompatibleVersion: a[AnnotMinVersion],
|
||||||
|
Architecture: a[AnnotArch],
|
||||||
|
ReleaseNotes: a[AnnotReleaseNote],
|
||||||
|
ReleaseDate: a[AnnotReleaseDate],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrNoManifestForArch is returned from FetchMetadata/Pull when an index has
|
||||||
|
// no entry matching the running architecture. Exposed so callers can
|
||||||
|
// distinguish "registry unreachable" from "this build doesn't ship for us".
|
||||||
|
var ErrNoManifestForArch = errors.New("no manifest in index for runtime architecture")
|
||||||
377
update/pkg/oci/oci_test.go
Normal file
377
update/pkg/oci/oci_test.go
Normal file
@@ -0,0 +1,377 @@
|
|||||||
|
package oci
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/go-digest"
|
||||||
|
specs "github.com/opencontainers/image-spec/specs-go"
|
||||||
|
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeRegistry implements the minimum OCI distribution-spec surface our
|
||||||
|
// Client touches: /v2/ probe, manifest fetch by tag or digest, blob fetch
|
||||||
|
// by digest. Backed by an in-memory blob+manifest store.
|
||||||
|
type fakeRegistry struct {
|
||||||
|
t *testing.T
|
||||||
|
srv *httptest.Server
|
||||||
|
blobs map[digest.Digest][]byte // keyed by digest
|
||||||
|
manifests map[string][]byte // keyed by digest string (raw form)
|
||||||
|
tags map[string]digest.Digest // tag -> manifest digest
|
||||||
|
mediaTypes map[digest.Digest]string // descriptor.MediaType per stored object
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFakeRegistry(t *testing.T) *fakeRegistry {
|
||||||
|
t.Helper()
|
||||||
|
r := &fakeRegistry{
|
||||||
|
t: t,
|
||||||
|
blobs: map[digest.Digest][]byte{},
|
||||||
|
manifests: map[string][]byte{},
|
||||||
|
tags: map[string]digest.Digest{},
|
||||||
|
mediaTypes: map[digest.Digest]string{},
|
||||||
|
}
|
||||||
|
r.srv = httptest.NewServer(http.HandlerFunc(r.handle))
|
||||||
|
t.Cleanup(r.srv.Close)
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *fakeRegistry) putBlob(media string, data []byte) digest.Digest {
|
||||||
|
h := sha256.Sum256(data)
|
||||||
|
d := digest.NewDigestFromBytes(digest.SHA256, h[:])
|
||||||
|
r.blobs[d] = data
|
||||||
|
r.mediaTypes[d] = media
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// putManifest stores a manifest/index document under both its digest and the
|
||||||
|
// given tag, returning the digest the caller can embed in indexes.
|
||||||
|
func (r *fakeRegistry) putManifest(tag string, media string, doc []byte) digest.Digest {
|
||||||
|
h := sha256.Sum256(doc)
|
||||||
|
d := digest.NewDigestFromBytes(digest.SHA256, h[:])
|
||||||
|
r.manifests[d.String()] = doc
|
||||||
|
r.mediaTypes[d] = media
|
||||||
|
if tag != "" {
|
||||||
|
r.tags[tag] = d
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// repoRef returns the "host:port/repo" string for use with NewClient.
|
||||||
|
func (r *fakeRegistry) repoRef() string {
|
||||||
|
u, _ := url.Parse(r.srv.URL)
|
||||||
|
return u.Host + "/test/kubesolo-os"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *fakeRegistry) handle(w http.ResponseWriter, req *http.Request) {
|
||||||
|
// Routes we implement:
|
||||||
|
// GET /v2/ -> 200 "{}"
|
||||||
|
// GET /v2/test/kubesolo-os/manifests/<tag-or-digest> -> manifest
|
||||||
|
// HEAD same -> same headers, no body
|
||||||
|
// GET /v2/test/kubesolo-os/blobs/<digest> -> blob
|
||||||
|
|
||||||
|
path := req.URL.Path
|
||||||
|
if path == "/v2/" || path == "/v2" {
|
||||||
|
w.Header().Set("Docker-Distribution-API-Version", "registry/2.0")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = io.WriteString(w, "{}")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const prefix = "/v2/test/kubesolo-os/"
|
||||||
|
if !strings.HasPrefix(path, prefix) {
|
||||||
|
http.NotFound(w, req)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rest := strings.TrimPrefix(path, prefix)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(rest, "manifests/"):
|
||||||
|
ref := strings.TrimPrefix(rest, "manifests/")
|
||||||
|
var d digest.Digest
|
||||||
|
var data []byte
|
||||||
|
if td, ok := r.tags[ref]; ok {
|
||||||
|
d = td
|
||||||
|
data = r.manifests[d.String()]
|
||||||
|
} else if md, ok := r.manifests[ref]; ok {
|
||||||
|
d = digest.Digest(ref)
|
||||||
|
data = md
|
||||||
|
} else {
|
||||||
|
http.NotFound(w, req)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
media := r.mediaTypes[d]
|
||||||
|
w.Header().Set("Content-Type", media)
|
||||||
|
w.Header().Set("Docker-Content-Digest", d.String())
|
||||||
|
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data)))
|
||||||
|
if req.Method == http.MethodHead {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_, _ = w.Write(data)
|
||||||
|
|
||||||
|
case strings.HasPrefix(rest, "blobs/"):
|
||||||
|
ref := strings.TrimPrefix(rest, "blobs/")
|
||||||
|
d := digest.Digest(ref)
|
||||||
|
blob, ok := r.blobs[d]
|
||||||
|
if !ok {
|
||||||
|
http.NotFound(w, req)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
media := r.mediaTypes[d]
|
||||||
|
if media == "" {
|
||||||
|
media = "application/octet-stream"
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", media)
|
||||||
|
w.Header().Set("Docker-Content-Digest", d.String())
|
||||||
|
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(blob)))
|
||||||
|
if req.Method == http.MethodHead {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_, _ = w.Write(blob)
|
||||||
|
|
||||||
|
default:
|
||||||
|
http.NotFound(w, req)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// seedSingleArchManifest puts kernel+initramfs blobs and a manifest with the
|
||||||
|
// given annotations into the registry, tagged as `tag`.
|
||||||
|
func (r *fakeRegistry) seedSingleArchManifest(t *testing.T, tag string, annot map[string]string) (kernelData, initramfsData []byte) {
|
||||||
|
t.Helper()
|
||||||
|
kernelData = []byte("FAKE-KERNEL-" + tag)
|
||||||
|
initramfsData = []byte("FAKE-INITRAMFS-" + tag)
|
||||||
|
|
||||||
|
kd := r.putBlob(MediaKernel, kernelData)
|
||||||
|
id := r.putBlob(MediaInitramfs, initramfsData)
|
||||||
|
|
||||||
|
// An empty config blob with sha256 of "{}" (the canonical "empty" body
|
||||||
|
// per OCI). We don't actually fetch the config so any valid descriptor
|
||||||
|
// works for the tests, but the digest still has to be syntactically valid.
|
||||||
|
emptyConfigBody := []byte("{}")
|
||||||
|
emptyConfigDigest := r.putBlob("application/vnd.oci.empty.v1+json", emptyConfigBody)
|
||||||
|
|
||||||
|
manifest := ocispec.Manifest{
|
||||||
|
Versioned: specs.Versioned{SchemaVersion: 2},
|
||||||
|
MediaType: ocispec.MediaTypeImageManifest,
|
||||||
|
Config: ocispec.Descriptor{
|
||||||
|
MediaType: "application/vnd.oci.empty.v1+json",
|
||||||
|
Size: int64(len(emptyConfigBody)),
|
||||||
|
Digest: emptyConfigDigest,
|
||||||
|
},
|
||||||
|
Layers: []ocispec.Descriptor{
|
||||||
|
{MediaType: MediaKernel, Digest: kd, Size: int64(len(kernelData))},
|
||||||
|
{MediaType: MediaInitramfs, Digest: id, Size: int64(len(initramfsData))},
|
||||||
|
},
|
||||||
|
Annotations: annot,
|
||||||
|
}
|
||||||
|
manifestBytes, err := json.Marshal(manifest)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal manifest: %v", err)
|
||||||
|
}
|
||||||
|
r.putManifest(tag, ocispec.MediaTypeImageManifest, manifestBytes)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// seedIndex creates a manifest index pointing at per-arch manifests created
|
||||||
|
// via seedSingleArchManifest with arch-suffixed tags, then publishes the
|
||||||
|
// index under `tag`.
|
||||||
|
func (r *fakeRegistry) seedIndex(t *testing.T, tag string, perArchAnnots map[string]map[string]string) {
|
||||||
|
t.Helper()
|
||||||
|
var descriptors []ocispec.Descriptor
|
||||||
|
for arch, annot := range perArchAnnots {
|
||||||
|
// Reuse seedSingleArchManifest but under an internal arch-suffixed tag
|
||||||
|
archTag := tag + "-" + arch
|
||||||
|
r.seedSingleArchManifest(t, archTag, annot)
|
||||||
|
d := r.tags[archTag]
|
||||||
|
descriptors = append(descriptors, ocispec.Descriptor{
|
||||||
|
MediaType: ocispec.MediaTypeImageManifest,
|
||||||
|
Digest: d,
|
||||||
|
Size: int64(len(r.manifests[d.String()])),
|
||||||
|
Platform: &ocispec.Platform{Architecture: arch, OS: "linux"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
index := ocispec.Index{
|
||||||
|
Versioned: specs.Versioned{SchemaVersion: 2},
|
||||||
|
MediaType: ocispec.MediaTypeImageIndex,
|
||||||
|
Manifests: descriptors,
|
||||||
|
}
|
||||||
|
indexBytes, _ := json.Marshal(index)
|
||||||
|
r.putManifest(tag, ocispec.MediaTypeImageIndex, indexBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestFetchMetadataSingleArchManifest(t *testing.T) {
|
||||||
|
reg := newFakeRegistry(t)
|
||||||
|
reg.seedSingleArchManifest(t, "v0.3.0", map[string]string{
|
||||||
|
AnnotVersion: "v0.3.0",
|
||||||
|
AnnotChannel: "stable",
|
||||||
|
AnnotArch: "amd64",
|
||||||
|
})
|
||||||
|
|
||||||
|
c, err := NewClient(reg.repoRef())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewClient: %v", err)
|
||||||
|
}
|
||||||
|
c.WithPlainHTTP(true)
|
||||||
|
c.Arch = "amd64"
|
||||||
|
|
||||||
|
meta, err := c.FetchMetadata(context.Background(), "v0.3.0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FetchMetadata: %v", err)
|
||||||
|
}
|
||||||
|
if meta.Version != "v0.3.0" {
|
||||||
|
t.Errorf("version: got %q, want v0.3.0", meta.Version)
|
||||||
|
}
|
||||||
|
if meta.Channel != "stable" {
|
||||||
|
t.Errorf("channel: got %q", meta.Channel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFetchMetadataIndexSelectsArch(t *testing.T) {
|
||||||
|
reg := newFakeRegistry(t)
|
||||||
|
reg.seedIndex(t, "stable", map[string]map[string]string{
|
||||||
|
"amd64": {AnnotVersion: "v0.3.0", AnnotChannel: "stable", AnnotArch: "amd64"},
|
||||||
|
"arm64": {AnnotVersion: "v0.3.0", AnnotChannel: "stable", AnnotArch: "arm64"},
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, arch := range []string{"amd64", "arm64"} {
|
||||||
|
t.Run(arch, func(t *testing.T) {
|
||||||
|
c, err := NewClient(reg.repoRef())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewClient: %v", err)
|
||||||
|
}
|
||||||
|
c.WithPlainHTTP(true)
|
||||||
|
c.Arch = arch
|
||||||
|
|
||||||
|
meta, err := c.FetchMetadata(context.Background(), "stable")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FetchMetadata: %v", err)
|
||||||
|
}
|
||||||
|
if meta.Architecture != arch {
|
||||||
|
t.Errorf("arch annotation: got %q, want %q", meta.Architecture, arch)
|
||||||
|
}
|
||||||
|
if meta.Version != "v0.3.0" {
|
||||||
|
t.Errorf("version: got %q, want v0.3.0", meta.Version)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFetchMetadataIndexMissingArchErrors(t *testing.T) {
|
||||||
|
reg := newFakeRegistry(t)
|
||||||
|
reg.seedIndex(t, "stable", map[string]map[string]string{
|
||||||
|
"amd64": {AnnotVersion: "v0.3.0", AnnotArch: "amd64"},
|
||||||
|
})
|
||||||
|
|
||||||
|
c, _ := NewClient(reg.repoRef())
|
||||||
|
c.WithPlainHTTP(true)
|
||||||
|
c.Arch = "arm64" // not in the index
|
||||||
|
|
||||||
|
_, err := c.FetchMetadata(context.Background(), "stable")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for missing arch, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "arm64") {
|
||||||
|
t.Errorf("expected error mentioning arm64, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFetchMetadataSingleArchManifestRejectsCrossArch(t *testing.T) {
|
||||||
|
// If the manifest declares an arch via annotation and it doesn't match
|
||||||
|
// our runtime, Pull should refuse — defense in depth on top of the
|
||||||
|
// channel/version gates in cmd/apply.go.
|
||||||
|
reg := newFakeRegistry(t)
|
||||||
|
reg.seedSingleArchManifest(t, "v0.3.0-arm64", map[string]string{
|
||||||
|
AnnotArch: "arm64",
|
||||||
|
})
|
||||||
|
|
||||||
|
c, _ := NewClient(reg.repoRef())
|
||||||
|
c.WithPlainHTTP(true)
|
||||||
|
c.Arch = "amd64"
|
||||||
|
|
||||||
|
_, err := c.FetchMetadata(context.Background(), "v0.3.0-arm64")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error pulling cross-arch single-arch manifest, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPullDownloadsBlobsAndVerifiesDigest(t *testing.T) {
|
||||||
|
reg := newFakeRegistry(t)
|
||||||
|
kernelData, initramfsData := reg.seedSingleArchManifest(t, "v0.3.0",
|
||||||
|
map[string]string{AnnotVersion: "v0.3.0", AnnotArch: "amd64"})
|
||||||
|
|
||||||
|
c, _ := NewClient(reg.repoRef())
|
||||||
|
c.WithPlainHTTP(true)
|
||||||
|
c.Arch = "amd64"
|
||||||
|
|
||||||
|
stageDir := filepath.Join(t.TempDir(), "stage")
|
||||||
|
staged, meta, err := c.Pull(context.Background(), "v0.3.0", stageDir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Pull: %v", err)
|
||||||
|
}
|
||||||
|
if meta.Version != "v0.3.0" {
|
||||||
|
t.Errorf("meta version: got %q", meta.Version)
|
||||||
|
}
|
||||||
|
if staged.Version != "v0.3.0" {
|
||||||
|
t.Errorf("staged version: got %q", staged.Version)
|
||||||
|
}
|
||||||
|
|
||||||
|
gotKernel, err := os.ReadFile(staged.VmlinuzPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read kernel: %v", err)
|
||||||
|
}
|
||||||
|
if string(gotKernel) != string(kernelData) {
|
||||||
|
t.Errorf("kernel mismatch:\n got %q\nwant %q", gotKernel, kernelData)
|
||||||
|
}
|
||||||
|
gotInit, err := os.ReadFile(staged.InitramfsPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read initramfs: %v", err)
|
||||||
|
}
|
||||||
|
if string(gotInit) != string(initramfsData) {
|
||||||
|
t.Errorf("initramfs mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPullRejectsTamperedBlob(t *testing.T) {
|
||||||
|
// Mutate the kernel blob after it's been digested into the manifest.
|
||||||
|
// Pull should refuse with a digest mismatch.
|
||||||
|
reg := newFakeRegistry(t)
|
||||||
|
_, _ = reg.seedSingleArchManifest(t, "v0.3.0",
|
||||||
|
map[string]string{AnnotVersion: "v0.3.0", AnnotArch: "amd64"})
|
||||||
|
|
||||||
|
// Corrupt every stored kernel blob in the registry by replacing its body.
|
||||||
|
for d, m := range reg.mediaTypes {
|
||||||
|
if m == MediaKernel {
|
||||||
|
reg.blobs[d] = []byte("TAMPERED-KERNEL-WRONG-LENGTH-AND-DIGEST")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c, _ := NewClient(reg.repoRef())
|
||||||
|
c.WithPlainHTTP(true)
|
||||||
|
c.Arch = "amd64"
|
||||||
|
|
||||||
|
_, _, err := c.Pull(context.Background(), "v0.3.0", filepath.Join(t.TempDir(), "stage"))
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected digest mismatch error on tampered blob, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "mismatch") {
|
||||||
|
t.Errorf("expected mismatch in error, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewClientRejectsGarbageReference(t *testing.T) {
|
||||||
|
_, err := NewClient("not a valid reference")
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error on bad reference, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
34
update/pkg/partition/freespace.go
Normal file
34
update/pkg/partition/freespace.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package partition
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FreeBytes returns the number of free bytes available on the filesystem
|
||||||
|
// containing `path`. Uses statfs(2); path must exist and be readable.
|
||||||
|
func FreeBytes(path string) (uint64, error) {
|
||||||
|
var stat syscall.Statfs_t
|
||||||
|
if err := syscall.Statfs(path, &stat); err != nil {
|
||||||
|
return 0, fmt.Errorf("statfs %s: %w", path, err)
|
||||||
|
}
|
||||||
|
// Bavail is the count of free blocks available to non-root users —
|
||||||
|
// matches what `df` reports. Bsize is the block size in bytes.
|
||||||
|
//nolint:unconvert // Bavail is uint64 on most platforms but int64 on darwin/freebsd
|
||||||
|
return uint64(stat.Bavail) * uint64(stat.Bsize), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasFreeSpaceFor reports whether `path`'s filesystem has at least `wantBytes`
|
||||||
|
// of free space, with `headroomPct` reserved (e.g. 10 = require 110% of want).
|
||||||
|
// Returns the available bytes alongside, so callers can render a useful error.
|
||||||
|
func HasFreeSpaceFor(path string, wantBytes int64, headroomPct int) (avail uint64, ok bool, err error) {
|
||||||
|
avail, err = FreeBytes(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, false, err
|
||||||
|
}
|
||||||
|
if wantBytes < 0 {
|
||||||
|
return avail, false, fmt.Errorf("invalid wantBytes %d", wantBytes)
|
||||||
|
}
|
||||||
|
required := uint64(wantBytes) * uint64(100+headroomPct) / 100
|
||||||
|
return avail, avail >= required, nil
|
||||||
|
}
|
||||||
44
update/pkg/partition/freespace_test.go
Normal file
44
update/pkg/partition/freespace_test.go
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
package partition
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestFreeBytesReturnsNonZeroOnTempDir(t *testing.T) {
|
||||||
|
b, err := FreeBytes(t.TempDir())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FreeBytes: %v", err)
|
||||||
|
}
|
||||||
|
// On any sane test runner the temp filesystem has more than 1 KiB free.
|
||||||
|
if b < 1024 {
|
||||||
|
t.Errorf("FreeBytes = %d, want > 1024 on /tmp", b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFreeBytesNonExistentPath(t *testing.T) {
|
||||||
|
_, err := FreeBytes("/this/path/does/not/exist/at/all")
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for missing path, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHasFreeSpaceForRejectsHugeRequest(t *testing.T) {
|
||||||
|
// Request 1 PiB with 10% headroom on /tmp — no test runner has that
|
||||||
|
// much free, so this should consistently report not-enough.
|
||||||
|
avail, ok, err := HasFreeSpaceFor(t.TempDir(), 1<<50, 10)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("HasFreeSpaceFor: %v", err)
|
||||||
|
}
|
||||||
|
if ok {
|
||||||
|
t.Errorf("expected insufficient space for 1PiB, got avail=%d ok=true", avail)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHasFreeSpaceForAcceptsSmallRequest(t *testing.T) {
|
||||||
|
// 1 KiB with 10% headroom = 1.1 KiB. Any temp dir has this.
|
||||||
|
_, ok, err := HasFreeSpaceFor(t.TempDir(), 1024, 10)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("HasFreeSpaceFor: %v", err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Error("expected sufficient space for 1KiB on /tmp")
|
||||||
|
}
|
||||||
|
}
|
||||||
206
update/pkg/state/state.go
Normal file
206
update/pkg/state/state.go
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
// Package state tracks the lifecycle of an OS update on disk.
|
||||||
|
//
|
||||||
|
// The state file (default /var/lib/kubesolo/update/state.json) records which
|
||||||
|
// phase the agent is in, what versions are involved, when the attempt started,
|
||||||
|
// any error from the last operation, and how many attempts have been made.
|
||||||
|
// Updates are atomic via tmp+rename, so a crash mid-write doesn't corrupt the
|
||||||
|
// state.
|
||||||
|
//
|
||||||
|
// Consumers:
|
||||||
|
// - cmd/check, cmd/apply, cmd/activate, cmd/healthcheck, cmd/rollback —
|
||||||
|
// transition the phase as they enter / leave their operations.
|
||||||
|
// - cmd/status --json — emits the raw state for orchestration tooling.
|
||||||
|
// - pkg/metrics — reads the state at scrape time to expose phase and
|
||||||
|
// attempt-count gauges.
|
||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultPath is where state.json lives on a live system. The directory is on
|
||||||
|
// the persistent data partition so the file survives A/B slot switches.
|
||||||
|
const DefaultPath = "/var/lib/kubesolo/update/state.json"
|
||||||
|
|
||||||
|
// Phase represents the current step in the update lifecycle.
|
||||||
|
//
|
||||||
|
// Terminal phases (Success, RolledBack, Failed) describe the outcome of the
|
||||||
|
// most recent attempt; transient phases (Checking, Downloading, Staged,
|
||||||
|
// Activated, Verifying) describe in-progress work. Idle means no update has
|
||||||
|
// been attempted yet, or the previous attempt has been acknowledged.
|
||||||
|
type Phase string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// PhaseIdle — no update in progress.
|
||||||
|
PhaseIdle Phase = "idle"
|
||||||
|
// PhaseChecking — querying the update server for new versions.
|
||||||
|
PhaseChecking Phase = "checking"
|
||||||
|
// PhaseDownloading — pulling artifacts from the server.
|
||||||
|
PhaseDownloading Phase = "downloading"
|
||||||
|
// PhaseStaged — artifacts written to the passive partition; not yet active.
|
||||||
|
PhaseStaged Phase = "staged"
|
||||||
|
// PhaseActivated — passive slot promoted; next boot will use the new version.
|
||||||
|
PhaseActivated Phase = "activated"
|
||||||
|
// PhaseVerifying — post-boot healthcheck in progress on the new version.
|
||||||
|
PhaseVerifying Phase = "verifying"
|
||||||
|
// PhaseSuccess — last attempt completed and verified.
|
||||||
|
PhaseSuccess Phase = "success"
|
||||||
|
// PhaseRolledBack — last attempt failed verification; reverted to prior slot.
|
||||||
|
PhaseRolledBack Phase = "rolled_back"
|
||||||
|
// PhaseFailed — last attempt failed before reaching activation (download,
|
||||||
|
// checksum, signature, etc.). System still on the original slot.
|
||||||
|
PhaseFailed Phase = "failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
// validPhases lists every legal Phase value. Anything not in this set is
|
||||||
|
// rejected by Save() to catch typos.
|
||||||
|
var validPhases = map[Phase]struct{}{
|
||||||
|
PhaseIdle: {},
|
||||||
|
PhaseChecking: {},
|
||||||
|
PhaseDownloading: {},
|
||||||
|
PhaseStaged: {},
|
||||||
|
PhaseActivated: {},
|
||||||
|
PhaseVerifying: {},
|
||||||
|
PhaseSuccess: {},
|
||||||
|
PhaseRolledBack: {},
|
||||||
|
PhaseFailed: {},
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateState is the on-disk representation. Fields use JSON tags so the
|
||||||
|
// file format is forward-compatible (extra fields ignored, missing fields
|
||||||
|
// default).
|
||||||
|
type UpdateState struct {
|
||||||
|
// Phase is the current lifecycle position.
|
||||||
|
Phase Phase `json:"phase"`
|
||||||
|
// FromVersion is the version the system was running before the attempt.
|
||||||
|
// Empty when no attempt has run.
|
||||||
|
FromVersion string `json:"from_version,omitempty"`
|
||||||
|
// ToVersion is the version the attempt is targeting.
|
||||||
|
// Empty when no attempt has run.
|
||||||
|
ToVersion string `json:"to_version,omitempty"`
|
||||||
|
// StartedAt is when the current attempt entered a non-Idle phase.
|
||||||
|
StartedAt time.Time `json:"started_at,omitempty"`
|
||||||
|
// UpdatedAt is the last time the file was written. Always set on Save().
|
||||||
|
UpdatedAt time.Time `json:"updated_at"`
|
||||||
|
// LastError carries the most recent operation error, populated when
|
||||||
|
// transitioning to PhaseFailed or PhaseRolledBack. Cleared on Success/Idle.
|
||||||
|
LastError string `json:"last_error,omitempty"`
|
||||||
|
// AttemptCount counts attempts at the current ToVersion. Reset when
|
||||||
|
// ToVersion changes or on successful completion.
|
||||||
|
AttemptCount int `json:"attempt_count"`
|
||||||
|
|
||||||
|
// HealthCheckFailures counts consecutive post-Activated healthcheck
|
||||||
|
// failures. Reset to 0 on a successful healthcheck or after a rollback.
|
||||||
|
// Used by `kubesolo-update healthcheck --auto-rollback-after N` to
|
||||||
|
// trigger automatic recovery on a wedged new boot.
|
||||||
|
HealthCheckFailures int `json:"health_check_failures,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// New returns a fresh Idle state with UpdatedAt set to now.
|
||||||
|
func New() *UpdateState {
|
||||||
|
return &UpdateState{
|
||||||
|
Phase: PhaseIdle,
|
||||||
|
UpdatedAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load reads the state from disk. If the file does not exist, returns a fresh
|
||||||
|
// Idle state — this is the normal first-run case, not an error.
|
||||||
|
func Load(path string) (*UpdateState, error) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return New(), nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("read state %s: %w", path, err)
|
||||||
|
}
|
||||||
|
var s UpdateState
|
||||||
|
if err := json.Unmarshal(data, &s); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse state %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return &s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save writes the state to disk atomically (tmp file + rename), so an
|
||||||
|
// interrupted write never leaves a partial file at `path`.
|
||||||
|
func (s *UpdateState) Save(path string) error {
|
||||||
|
if _, ok := validPhases[s.Phase]; !ok {
|
||||||
|
return fmt.Errorf("invalid phase %q", s.Phase)
|
||||||
|
}
|
||||||
|
s.UpdatedAt = time.Now().UTC()
|
||||||
|
|
||||||
|
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||||
|
return fmt.Errorf("creating state dir: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := json.MarshalIndent(s, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal state: %w", err)
|
||||||
|
}
|
||||||
|
data = append(data, '\n')
|
||||||
|
|
||||||
|
tmp := path + ".tmp"
|
||||||
|
if err := os.WriteFile(tmp, data, 0o644); err != nil {
|
||||||
|
return fmt.Errorf("write tmp state: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.Rename(tmp, path); err != nil {
|
||||||
|
_ = os.Remove(tmp)
|
||||||
|
return fmt.Errorf("rename state: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transition moves the state to phase `next` and persists it. If `next`
|
||||||
|
// targets a new ToVersion (different from the current one), AttemptCount is
|
||||||
|
// reset to 1; otherwise it is left untouched. StartedAt is set when
|
||||||
|
// transitioning out of Idle. LastError is cleared unless `next` is Failed or
|
||||||
|
// RolledBack.
|
||||||
|
func (s *UpdateState) Transition(path string, next Phase, toVersion, errMsg string) error {
|
||||||
|
now := time.Now().UTC()
|
||||||
|
|
||||||
|
// Reset attempt counter when targeting a new version.
|
||||||
|
if toVersion != "" && toVersion != s.ToVersion {
|
||||||
|
s.ToVersion = toVersion
|
||||||
|
s.AttemptCount = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// First non-Idle phase of an attempt: record start time and bump count.
|
||||||
|
if s.Phase == PhaseIdle && next != PhaseIdle {
|
||||||
|
s.StartedAt = now
|
||||||
|
s.AttemptCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Phase = next
|
||||||
|
switch next {
|
||||||
|
case PhaseFailed, PhaseRolledBack:
|
||||||
|
if errMsg != "" {
|
||||||
|
s.LastError = errMsg
|
||||||
|
}
|
||||||
|
case PhaseSuccess, PhaseIdle:
|
||||||
|
s.LastError = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.Save(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordError marks the state as failed with the given error and saves.
|
||||||
|
// Convenience wrapper around Transition for the most common failure path.
|
||||||
|
func (s *UpdateState) RecordError(path string, err error) error {
|
||||||
|
msg := ""
|
||||||
|
if err != nil {
|
||||||
|
msg = err.Error()
|
||||||
|
}
|
||||||
|
return s.Transition(path, PhaseFailed, "", msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetFromVersion records the version the system was running when an attempt
|
||||||
|
// started. Idempotent; only takes effect when From is empty.
|
||||||
|
func (s *UpdateState) SetFromVersion(v string) {
|
||||||
|
if s.FromVersion == "" {
|
||||||
|
s.FromVersion = v
|
||||||
|
}
|
||||||
|
}
|
||||||
197
update/pkg/state/state_test.go
Normal file
197
update/pkg/state/state_test.go
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// statePath returns a per-test state file path inside t.TempDir().
|
||||||
|
func statePath(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
return filepath.Join(t.TempDir(), "state.json")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadMissingReturnsIdle(t *testing.T) {
|
||||||
|
s, err := Load(filepath.Join(t.TempDir(), "does-not-exist.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error loading missing state: %v", err)
|
||||||
|
}
|
||||||
|
if s.Phase != PhaseIdle {
|
||||||
|
t.Errorf("missing file: phase=%q, want %q", s.Phase, PhaseIdle)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSaveLoadRoundTrip(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
in := &UpdateState{
|
||||||
|
Phase: PhaseStaged,
|
||||||
|
FromVersion: "v0.2.0",
|
||||||
|
ToVersion: "v0.3.0",
|
||||||
|
AttemptCount: 1,
|
||||||
|
}
|
||||||
|
if err := in.Save(path); err != nil {
|
||||||
|
t.Fatalf("save: %v", err)
|
||||||
|
}
|
||||||
|
out, err := Load(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("load: %v", err)
|
||||||
|
}
|
||||||
|
if out.Phase != in.Phase {
|
||||||
|
t.Errorf("phase: got %q, want %q", out.Phase, in.Phase)
|
||||||
|
}
|
||||||
|
if out.FromVersion != in.FromVersion {
|
||||||
|
t.Errorf("from_version: got %q, want %q", out.FromVersion, in.FromVersion)
|
||||||
|
}
|
||||||
|
if out.ToVersion != in.ToVersion {
|
||||||
|
t.Errorf("to_version: got %q, want %q", out.ToVersion, in.ToVersion)
|
||||||
|
}
|
||||||
|
if out.AttemptCount != in.AttemptCount {
|
||||||
|
t.Errorf("attempt_count: got %d, want %d", out.AttemptCount, in.AttemptCount)
|
||||||
|
}
|
||||||
|
if out.UpdatedAt.IsZero() {
|
||||||
|
t.Error("UpdatedAt should be set by Save")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSaveRejectsInvalidPhase(t *testing.T) {
|
||||||
|
s := &UpdateState{Phase: Phase("bogus")}
|
||||||
|
err := s.Save(statePath(t))
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error saving invalid phase, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSaveIsAtomic(t *testing.T) {
|
||||||
|
// After Save, the .tmp file should NOT exist — confirming we renamed it.
|
||||||
|
path := statePath(t)
|
||||||
|
s := New()
|
||||||
|
if err := s.Save(path); err != nil {
|
||||||
|
t.Fatalf("save: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
|
||||||
|
t.Errorf("tmp file still present after Save: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSaveCreatesDirectory(t *testing.T) {
|
||||||
|
// State directory may not exist yet (first-ever boot). Save() should mkdir.
|
||||||
|
dir := filepath.Join(t.TempDir(), "fresh", "subdir")
|
||||||
|
path := filepath.Join(dir, "state.json")
|
||||||
|
if err := New().Save(path); err != nil {
|
||||||
|
t.Fatalf("save into nonexistent dir: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(path); err != nil {
|
||||||
|
t.Errorf("state file not present after Save: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTransitionIdleToChecking(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
s := New()
|
||||||
|
if err := s.Transition(path, PhaseChecking, "v0.3.0", ""); err != nil {
|
||||||
|
t.Fatalf("transition: %v", err)
|
||||||
|
}
|
||||||
|
if s.Phase != PhaseChecking {
|
||||||
|
t.Errorf("phase: got %q, want %q", s.Phase, PhaseChecking)
|
||||||
|
}
|
||||||
|
if s.ToVersion != "v0.3.0" {
|
||||||
|
t.Errorf("to_version: got %q, want v0.3.0", s.ToVersion)
|
||||||
|
}
|
||||||
|
if s.AttemptCount != 1 {
|
||||||
|
t.Errorf("attempt_count: got %d, want 1 (first attempt after Idle)", s.AttemptCount)
|
||||||
|
}
|
||||||
|
if s.StartedAt.IsZero() {
|
||||||
|
t.Error("StartedAt should be set when leaving Idle")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTransitionRetainsAttemptCountWithinAttempt(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
s := New()
|
||||||
|
_ = s.Transition(path, PhaseChecking, "v0.3.0", "")
|
||||||
|
_ = s.Transition(path, PhaseDownloading, "v0.3.0", "")
|
||||||
|
_ = s.Transition(path, PhaseStaged, "v0.3.0", "")
|
||||||
|
if s.AttemptCount != 1 {
|
||||||
|
t.Errorf("attempt_count after staying on same version: got %d, want 1", s.AttemptCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTransitionResetsAttemptCountOnNewVersion(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
s := New()
|
||||||
|
_ = s.Transition(path, PhaseChecking, "v0.3.0", "")
|
||||||
|
// Now an attempt at a NEW version starts. AttemptCount should reset.
|
||||||
|
_ = s.Transition(path, PhaseChecking, "v0.4.0", "")
|
||||||
|
if s.ToVersion != "v0.4.0" {
|
||||||
|
t.Errorf("to_version: got %q, want v0.4.0", s.ToVersion)
|
||||||
|
}
|
||||||
|
if s.AttemptCount != 0 {
|
||||||
|
t.Errorf("attempt_count after new ToVersion: got %d, want 0 (reset)", s.AttemptCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTransitionFailedRecordsError(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
s := New()
|
||||||
|
_ = s.Transition(path, PhaseDownloading, "v0.3.0", "")
|
||||||
|
_ = s.Transition(path, PhaseFailed, "v0.3.0", "checksum mismatch")
|
||||||
|
if s.Phase != PhaseFailed {
|
||||||
|
t.Errorf("phase: got %q, want %q", s.Phase, PhaseFailed)
|
||||||
|
}
|
||||||
|
if s.LastError != "checksum mismatch" {
|
||||||
|
t.Errorf("last_error: got %q, want %q", s.LastError, "checksum mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTransitionSuccessClearsError(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
s := New()
|
||||||
|
_ = s.Transition(path, PhaseFailed, "v0.3.0", "boom")
|
||||||
|
if s.LastError == "" {
|
||||||
|
t.Fatal("setup: LastError should be non-empty before success")
|
||||||
|
}
|
||||||
|
_ = s.Transition(path, PhaseSuccess, "v0.3.0", "")
|
||||||
|
if s.LastError != "" {
|
||||||
|
t.Errorf("last_error after success: got %q, want empty", s.LastError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordError(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
s := New()
|
||||||
|
if err := s.RecordError(path, errors.New("network down")); err != nil {
|
||||||
|
t.Fatalf("RecordError: %v", err)
|
||||||
|
}
|
||||||
|
if s.Phase != PhaseFailed {
|
||||||
|
t.Errorf("phase: got %q, want %q", s.Phase, PhaseFailed)
|
||||||
|
}
|
||||||
|
if s.LastError != "network down" {
|
||||||
|
t.Errorf("last_error: got %q, want %q", s.LastError, "network down")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSetFromVersionIdempotent(t *testing.T) {
|
||||||
|
s := New()
|
||||||
|
s.SetFromVersion("v0.2.0")
|
||||||
|
if s.FromVersion != "v0.2.0" {
|
||||||
|
t.Errorf("from_version: got %q, want v0.2.0", s.FromVersion)
|
||||||
|
}
|
||||||
|
// Second call should not overwrite.
|
||||||
|
s.SetFromVersion("v0.1.0")
|
||||||
|
if s.FromVersion != "v0.2.0" {
|
||||||
|
t.Errorf("from_version after second SetFromVersion: got %q, want v0.2.0 (immutable)", s.FromVersion)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadHandlesGarbageFile(t *testing.T) {
|
||||||
|
path := statePath(t)
|
||||||
|
if err := os.WriteFile(path, []byte("not json"), 0o644); err != nil {
|
||||||
|
t.Fatalf("seed: %v", err)
|
||||||
|
}
|
||||||
|
_, err := Load(path)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error loading garbage, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user