// Package metrics exposes a lightweight Prometheus-compatible metrics endpoint // for KubeSolo OS system and update status. // // Metrics exposed: // // kubesolo_os_info{version, active_slot} 1 (gauge, labels identify the OS) // kubesolo_os_boot_success 1 or 0 (gauge) // kubesolo_os_boot_counter 0-3 (gauge) // kubesolo_os_uptime_seconds float (gauge) // kubesolo_os_update_available 1 or 0 (gauge) // kubesolo_os_update_last_check_timestamp_seconds unix timestamp (gauge) // kubesolo_os_memory_total_bytes total RAM (gauge) // kubesolo_os_memory_available_bytes available RAM (gauge) // // This is a zero-dependency implementation — no Prometheus client library needed. // It serves metrics in the Prometheus text exposition format. package metrics import ( "fmt" "log/slog" "net/http" "os" "strconv" "strings" "sync" "time" ) // Server is a lightweight Prometheus metrics HTTP server. type Server struct { grubenvPath string listenAddr string startTime time.Time mu sync.Mutex updateAvailable int lastCheckTime float64 } // NewServer creates a new metrics server. func NewServer(listenAddr, grubenvPath string) *Server { return &Server{ grubenvPath: grubenvPath, listenAddr: listenAddr, startTime: time.Now(), } } // SetUpdateAvailable records whether an update is available. func (s *Server) SetUpdateAvailable(available bool) { s.mu.Lock() defer s.mu.Unlock() if available { s.updateAvailable = 1 } else { s.updateAvailable = 0 } s.lastCheckTime = float64(time.Now().Unix()) } // ListenAndServe starts the metrics HTTP server. func (s *Server) ListenAndServe() error { mux := http.NewServeMux() mux.HandleFunc("/metrics", s.handleMetrics) mux.HandleFunc("/healthz", s.handleHealthz) slog.Info("starting metrics server", "addr", s.listenAddr) return http.ListenAndServe(s.listenAddr, mux) } func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) fmt.Fprint(w, "ok\n") } func (s *Server) handleMetrics(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") var sb strings.Builder // OS info version := readFileString("/etc/kubesolo-os-version") activeSlot := s.readGrubenvVar("active_slot") sb.WriteString("# HELP kubesolo_os_info KubeSolo OS version and slot info.\n") sb.WriteString("# TYPE kubesolo_os_info gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_info{version=%q,active_slot=%q} 1\n", version, activeSlot)) // Boot status bootSuccess := s.readGrubenvVar("boot_success") bootCounter := s.readGrubenvVar("boot_counter") sb.WriteString("# HELP kubesolo_os_boot_success Whether the current boot was marked successful.\n") sb.WriteString("# TYPE kubesolo_os_boot_success gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_boot_success %s\n", safeInt(bootSuccess, "0"))) sb.WriteString("# HELP kubesolo_os_boot_counter Remaining boot attempts before rollback.\n") sb.WriteString("# TYPE kubesolo_os_boot_counter gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_boot_counter %s\n", safeInt(bootCounter, "0"))) // Uptime uptime := time.Since(s.startTime).Seconds() sb.WriteString("# HELP kubesolo_os_uptime_seconds Time since the metrics server started.\n") sb.WriteString("# TYPE kubesolo_os_uptime_seconds gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_uptime_seconds %.1f\n", uptime)) // Update status s.mu.Lock() updateAvail := s.updateAvailable lastCheck := s.lastCheckTime s.mu.Unlock() sb.WriteString("# HELP kubesolo_os_update_available Whether an OS update is available.\n") sb.WriteString("# TYPE kubesolo_os_update_available gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_update_available %d\n", updateAvail)) sb.WriteString("# HELP kubesolo_os_update_last_check_timestamp_seconds Unix timestamp of last update check.\n") sb.WriteString("# TYPE kubesolo_os_update_last_check_timestamp_seconds gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_update_last_check_timestamp_seconds %.0f\n", lastCheck)) // Memory memTotal, memAvail := readMemInfo() sb.WriteString("# HELP kubesolo_os_memory_total_bytes Total system memory in bytes.\n") sb.WriteString("# TYPE kubesolo_os_memory_total_bytes gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_memory_total_bytes %d\n", memTotal)) sb.WriteString("# HELP kubesolo_os_memory_available_bytes Available system memory in bytes.\n") sb.WriteString("# TYPE kubesolo_os_memory_available_bytes gauge\n") sb.WriteString(fmt.Sprintf("kubesolo_os_memory_available_bytes %d\n", memAvail)) fmt.Fprint(w, sb.String()) } // readGrubenvVar reads a single variable from grubenv using simple file parse. func (s *Server) readGrubenvVar(key string) string { data, err := os.ReadFile(s.grubenvPath) if err != nil { return "" } for _, line := range strings.Split(string(data), "\n") { parts := strings.SplitN(line, "=", 2) if len(parts) == 2 && strings.TrimSpace(parts[0]) == key { return strings.TrimSpace(parts[1]) } } return "" } // readFileString reads a file and returns trimmed content. func readFileString(path string) string { data, err := os.ReadFile(path) if err != nil { return "unknown" } return strings.TrimSpace(string(data)) } // readMemInfo parses /proc/meminfo for total and available memory. func readMemInfo() (total, available int64) { data, err := os.ReadFile("/proc/meminfo") if err != nil { return 0, 0 } for _, line := range strings.Split(string(data), "\n") { fields := strings.Fields(line) if len(fields) < 2 { continue } val, err := strconv.ParseInt(fields[1], 10, 64) if err != nil { continue } // /proc/meminfo values are in kB switch fields[0] { case "MemTotal:": total = val * 1024 case "MemAvailable:": available = val * 1024 } } return total, available } // safeInt returns the value if it's a valid integer, otherwise the default. func safeInt(s, def string) string { if _, err := strconv.Atoi(s); err != nil { return def } return s }