commit f8ef695f20ee44ff28b4d62b4a8d4d056ba874d7 Author: Adolfo Delorenzo Date: Sat Jan 21 17:41:53 2023 -0600 first commit diff --git a/all-in-one.yaml b/all-in-one.yaml new file mode 100644 index 0000000..2ec290f --- /dev/null +++ b/all-in-one.yaml @@ -0,0 +1,829 @@ +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingresses + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: default + namespace: monitoring +--- +apiVersion: v1 +data: + config.yml: |- + global: + templates: + - '/etc/alertmanager/*.tmpl' + route: + receiver: alert-emailer + group_by: ['alertname', 'priority'] + group_wait: 10s + repeat_interval: 30m + routes: + - receiver: slack_demo + # Send severity=slack alerts to slack. + match: + severity: slack + group_wait: 10s + repeat_interval: 1m + + receivers: + - name: alert-emailer + email_configs: + - to: demo@devopscube.com + send_resolved: false + from: from-email@email.com + smarthost: smtp.eample.com:25 + require_tls: false + - name: slack_demo + slack_configs: + - api_url: https://hooks.slack.com/services/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sz4Qk + channel: '#devopscube-demo' +kind: ConfigMap +metadata: + name: alertmanager-config + namespace: monitoring +--- +apiVersion: v1 +data: + default.tmpl: | + {{ define "__alertmanager" }}AlertManager{{ end }} + {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} + {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} + {{ define "__description" }}{{ end }} + {{ define "__text_alert_list" }}{{ range . }}Labels: + {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Annotations: + {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Source: {{ .GeneratorURL }} + {{ end }}{{ end }} + {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} + {{ define "slack.default.pretext" }}{{ end }} + {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "slack.default.iconemoji" }}{{ end }} + {{ define "slack.default.iconurl" }}{{ end }} + {{ define "slack.default.text" }}{{ end }} + {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} + {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 -}} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{- end }} + {{ if gt (len .Alerts.Resolved) 0 -}} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{- end }} + {{- end }} + {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} + {{ define "email.default.html" }} + + + + + + + {{ template "__subject" . }} + + + + + + + + +
+
+ + + + + + + +
+ {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} + {{ .Name }}={{ .Value }} + {{ end }} +
+ + + + + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + {{ range .Alerts.Firing }} + + + + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + + + + {{ end }} + {{ range .Alerts.Resolved }} + + + + {{ end }} +
+ View in {{ template "__alertmanager" . }} +
+ [{{ .Alerts.Firing | len }}] Firing +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+
+ [{{ .Alerts.Resolved | len }}] Resolved +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+ + + {{ end }} + {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 }} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{ end }} + {{ end }} + {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} + slack.tmpl: | + {{ define "slack.devops.text" }} + {{range .Alerts}}{{.Annotations.DESCRIPTION}} + {{end}} + {{ end }} +kind: ConfigMap +metadata: + creationTimestamp: null + name: alertmanager-templates + namespace: monitoring +--- +apiVersion: v1 +data: + prometheus.yaml: |- + { + "apiVersion": 1, + "datasources": [ + { + "access":"proxy", + "editable": true, + "name": "prometheus", + "orgId": 1, + "type": "prometheus", + "url": "http://prometheus-service.monitoring.svc:8080", + "version": 1 + } + ] + } +kind: ConfigMap +metadata: + name: grafana-datasources + namespace: monitoring +--- +apiVersion: v1 +data: + prometheus.rules: |- + groups: + - name: devopscube demo alert + rules: + - alert: High Pod Memory + expr: sum(container_memory_usage_bytes) > 1 + for: 1m + labels: + severity: slack + annotations: + summary: High Memory Usage + prometheus.yml: "global:\n scrape_interval: 5s\n evaluation_interval: 5s\nrule_files:\n + \ - /etc/prometheus/prometheus.rules\nalerting:\n alertmanagers:\n - scheme: + http\n static_configs:\n - targets:\n - \"alertmanager.monitoring.svc:9093\"\n\nscrape_configs:\n + \ - job_name: 'node-exporter'\n kubernetes_sd_configs:\n - role: endpoints\n + \ relabel_configs:\n - source_labels: [__meta_kubernetes_endpoints_name]\n + \ regex: 'node-exporter'\n action: keep\n \n - job_name: 'kubernetes-apiservers'\n\n + \ kubernetes_sd_configs:\n - role: endpoints\n scheme: https\n\n tls_config:\n + \ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n bearer_token_file: + /var/run/secrets/kubernetes.io/serviceaccount/token\n\n relabel_configs:\n + \ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, + __meta_kubernetes_endpoint_port_name]\n action: keep\n regex: default;kubernetes;https\n\n + \ - job_name: 'kubernetes-nodes'\n\n scheme: https\n\n tls_config:\n ca_file: + /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token\n\n + \ kubernetes_sd_configs:\n - role: node\n\n relabel_configs:\n - action: + labelmap\n regex: __meta_kubernetes_node_label_(.+)\n - target_label: + __address__\n replacement: kubernetes.default.svc:443\n - source_labels: + [__meta_kubernetes_node_name]\n regex: (.+)\n target_label: __metrics_path__\n + \ replacement: /api/v1/nodes/${1}/proxy/metrics \n \n - job_name: 'kubernetes-pods'\n\n + \ kubernetes_sd_configs:\n - role: pod\n\n relabel_configs:\n - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_scrape]\n action: keep\n + \ regex: true\n - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]\n + \ action: replace\n target_label: __metrics_path__\n regex: (.+)\n + \ - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]\n + \ action: replace\n regex: ([^:]+)(?::\\d+)?;(\\d+)\n replacement: + $1:$2\n target_label: __address__\n - action: labelmap\n regex: __meta_kubernetes_pod_label_(.+)\n + \ - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: + kubernetes_namespace\n - source_labels: [__meta_kubernetes_pod_name]\n action: + replace\n target_label: kubernetes_pod_name\n \n - job_name: 'kube-state-metrics'\n + \ static_configs:\n - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080']\n\n + \ - job_name: 'kubernetes-cadvisor'\n\n scheme: https\n\n tls_config:\n + \ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n bearer_token_file: + /var/run/secrets/kubernetes.io/serviceaccount/token\n\n kubernetes_sd_configs:\n + \ - role: node\n\n relabel_configs:\n - action: labelmap\n regex: + __meta_kubernetes_node_label_(.+)\n - target_label: __address__\n replacement: + kubernetes.default.svc:443\n - source_labels: [__meta_kubernetes_node_name]\n + \ regex: (.+)\n target_label: __metrics_path__\n replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor\n + \ \n - job_name: 'kubernetes-service-endpoints'\n\n kubernetes_sd_configs:\n + \ - role: endpoints\n\n relabel_configs:\n - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]\n + \ action: keep\n regex: true\n - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]\n + \ action: replace\n target_label: __scheme__\n regex: (https?)\n + \ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]\n + \ action: replace\n target_label: __metrics_path__\n regex: (.+)\n + \ - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]\n + \ action: replace\n target_label: __address__\n regex: ([^:]+)(?::\\d+)?;(\\d+)\n + \ replacement: $1:$2\n - action: labelmap\n regex: __meta_kubernetes_service_label_(.+)\n + \ - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: + kubernetes_namespace\n - source_labels: [__meta_kubernetes_service_name]\n + \ action: replace\n target_label: kubernetes_name" +kind: ConfigMap +metadata: + labels: + name: prometheus-server-conf + name: prometheus-server-conf + namespace: monitoring +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry + selector: + app.kubernetes.io/name: kube-state-metrics +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "9093" + prometheus.io/scrape: "true" + name: alertmanager + namespace: monitoring +spec: + ports: + - nodePort: 31000 + port: 9093 + targetPort: 9093 + selector: + app: alertmanager + type: NodePort +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "3000" + prometheus.io/scrape: "true" + name: grafana + namespace: monitoring +spec: + ports: + - nodePort: 32000 + port: 3000 + targetPort: 3000 + selector: + app: grafana + type: NodePort +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "9100" + prometheus.io/scrape: "true" + name: node-exporter + namespace: monitoring +spec: + ports: + - name: node-exporter + port: 9100 + protocol: TCP + targetPort: 9100 + selector: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "9090" + prometheus.io/scrape: "true" + name: prometheus-service + namespace: monitoring +spec: + ports: + - nodePort: 30000 + port: 8080 + targetPort: 9090 + selector: + app: prometheus-server + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + spec: + automountServiceAccountToken: true + containers: + - image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.3.0 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + name: kube-state-metrics + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 65534 + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: kube-state-metrics +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alertmanager + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: alertmanager + template: + metadata: + labels: + app: alertmanager + name: alertmanager + spec: + containers: + - args: + - --config.file=/etc/alertmanager/config.yml + - --storage.path=/alertmanager + image: prom/alertmanager:latest + name: alertmanager + ports: + - containerPort: 9093 + name: alertmanager + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 500m + memory: 500M + volumeMounts: + - mountPath: /etc/alertmanager + name: config-volume + - mountPath: /etc/alertmanager-templates + name: templates-volume + - mountPath: /alertmanager + name: alertmanager + volumes: + - configMap: + name: alertmanager-config + name: config-volume + - configMap: + name: alertmanager-templates + name: templates-volume + - emptyDir: {} + name: alertmanager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + name: grafana + spec: + containers: + - image: grafana/grafana:latest + name: grafana + ports: + - containerPort: 3000 + name: grafana + resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 500m + memory: 500M + volumeMounts: + - mountPath: /var/lib/grafana + name: grafana-storage + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + readOnly: false + volumes: + - emptyDir: {} + name: grafana-storage + - configMap: + defaultMode: 420 + name: grafana-datasources + name: grafana-datasources +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: prometheus-server + name: prometheus-deployment + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-server + template: + metadata: + labels: + app: prometheus-server + spec: + containers: + - args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus/ + image: prom/prometheus + name: prometheus + ports: + - containerPort: 9090 + volumeMounts: + - mountPath: /etc/prometheus/ + name: prometheus-config-volume + - mountPath: /prometheus/ + name: prometheus-storage-volume + volumes: + - configMap: + defaultMode: 420 + name: prometheus-server-conf + name: prometheus-config-volume + - emptyDir: {} + name: prometheus-storage-volume +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + name: node-exporter + namespace: monitoring +spec: + selector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + spec: + containers: + - args: + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --no-collector.wifi + - --no-collector.hwmon + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.netclass.ignored-devices=^(veth.*)$ + image: prom/node-exporter + name: node-exporter + ports: + - containerPort: 9100 + protocol: TCP + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/sys + mountPropagation: HostToContainer + name: sys + readOnly: true + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + volumes: + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + namespace: monitoring +spec: + ingressClassName: nginx + rules: + - host: grafana.box.oe74.net + http: + paths: + - backend: + service: + name: grafana + port: + number: 32000 + path: / + pathType: Prefix diff --git a/compiled.yaml b/compiled.yaml new file mode 100644 index 0000000..2ec290f --- /dev/null +++ b/compiled.yaml @@ -0,0 +1,829 @@ +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingresses + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: default + namespace: monitoring +--- +apiVersion: v1 +data: + config.yml: |- + global: + templates: + - '/etc/alertmanager/*.tmpl' + route: + receiver: alert-emailer + group_by: ['alertname', 'priority'] + group_wait: 10s + repeat_interval: 30m + routes: + - receiver: slack_demo + # Send severity=slack alerts to slack. + match: + severity: slack + group_wait: 10s + repeat_interval: 1m + + receivers: + - name: alert-emailer + email_configs: + - to: demo@devopscube.com + send_resolved: false + from: from-email@email.com + smarthost: smtp.eample.com:25 + require_tls: false + - name: slack_demo + slack_configs: + - api_url: https://hooks.slack.com/services/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sz4Qk + channel: '#devopscube-demo' +kind: ConfigMap +metadata: + name: alertmanager-config + namespace: monitoring +--- +apiVersion: v1 +data: + default.tmpl: | + {{ define "__alertmanager" }}AlertManager{{ end }} + {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} + {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} + {{ define "__description" }}{{ end }} + {{ define "__text_alert_list" }}{{ range . }}Labels: + {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Annotations: + {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Source: {{ .GeneratorURL }} + {{ end }}{{ end }} + {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} + {{ define "slack.default.pretext" }}{{ end }} + {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "slack.default.iconemoji" }}{{ end }} + {{ define "slack.default.iconurl" }}{{ end }} + {{ define "slack.default.text" }}{{ end }} + {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} + {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 -}} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{- end }} + {{ if gt (len .Alerts.Resolved) 0 -}} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{- end }} + {{- end }} + {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} + {{ define "email.default.html" }} + + + + + + + {{ template "__subject" . }} + + + + + + + + +
+
+ + + + + + + +
+ {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} + {{ .Name }}={{ .Value }} + {{ end }} +
+ + + + + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + {{ range .Alerts.Firing }} + + + + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + + + + {{ end }} + {{ range .Alerts.Resolved }} + + + + {{ end }} +
+ View in {{ template "__alertmanager" . }} +
+ [{{ .Alerts.Firing | len }}] Firing +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+
+ [{{ .Alerts.Resolved | len }}] Resolved +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+ + + {{ end }} + {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 }} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{ end }} + {{ end }} + {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} + slack.tmpl: | + {{ define "slack.devops.text" }} + {{range .Alerts}}{{.Annotations.DESCRIPTION}} + {{end}} + {{ end }} +kind: ConfigMap +metadata: + creationTimestamp: null + name: alertmanager-templates + namespace: monitoring +--- +apiVersion: v1 +data: + prometheus.yaml: |- + { + "apiVersion": 1, + "datasources": [ + { + "access":"proxy", + "editable": true, + "name": "prometheus", + "orgId": 1, + "type": "prometheus", + "url": "http://prometheus-service.monitoring.svc:8080", + "version": 1 + } + ] + } +kind: ConfigMap +metadata: + name: grafana-datasources + namespace: monitoring +--- +apiVersion: v1 +data: + prometheus.rules: |- + groups: + - name: devopscube demo alert + rules: + - alert: High Pod Memory + expr: sum(container_memory_usage_bytes) > 1 + for: 1m + labels: + severity: slack + annotations: + summary: High Memory Usage + prometheus.yml: "global:\n scrape_interval: 5s\n evaluation_interval: 5s\nrule_files:\n + \ - /etc/prometheus/prometheus.rules\nalerting:\n alertmanagers:\n - scheme: + http\n static_configs:\n - targets:\n - \"alertmanager.monitoring.svc:9093\"\n\nscrape_configs:\n + \ - job_name: 'node-exporter'\n kubernetes_sd_configs:\n - role: endpoints\n + \ relabel_configs:\n - source_labels: [__meta_kubernetes_endpoints_name]\n + \ regex: 'node-exporter'\n action: keep\n \n - job_name: 'kubernetes-apiservers'\n\n + \ kubernetes_sd_configs:\n - role: endpoints\n scheme: https\n\n tls_config:\n + \ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n bearer_token_file: + /var/run/secrets/kubernetes.io/serviceaccount/token\n\n relabel_configs:\n + \ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, + __meta_kubernetes_endpoint_port_name]\n action: keep\n regex: default;kubernetes;https\n\n + \ - job_name: 'kubernetes-nodes'\n\n scheme: https\n\n tls_config:\n ca_file: + /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token\n\n + \ kubernetes_sd_configs:\n - role: node\n\n relabel_configs:\n - action: + labelmap\n regex: __meta_kubernetes_node_label_(.+)\n - target_label: + __address__\n replacement: kubernetes.default.svc:443\n - source_labels: + [__meta_kubernetes_node_name]\n regex: (.+)\n target_label: __metrics_path__\n + \ replacement: /api/v1/nodes/${1}/proxy/metrics \n \n - job_name: 'kubernetes-pods'\n\n + \ kubernetes_sd_configs:\n - role: pod\n\n relabel_configs:\n - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_scrape]\n action: keep\n + \ regex: true\n - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]\n + \ action: replace\n target_label: __metrics_path__\n regex: (.+)\n + \ - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]\n + \ action: replace\n regex: ([^:]+)(?::\\d+)?;(\\d+)\n replacement: + $1:$2\n target_label: __address__\n - action: labelmap\n regex: __meta_kubernetes_pod_label_(.+)\n + \ - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: + kubernetes_namespace\n - source_labels: [__meta_kubernetes_pod_name]\n action: + replace\n target_label: kubernetes_pod_name\n \n - job_name: 'kube-state-metrics'\n + \ static_configs:\n - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080']\n\n + \ - job_name: 'kubernetes-cadvisor'\n\n scheme: https\n\n tls_config:\n + \ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n bearer_token_file: + /var/run/secrets/kubernetes.io/serviceaccount/token\n\n kubernetes_sd_configs:\n + \ - role: node\n\n relabel_configs:\n - action: labelmap\n regex: + __meta_kubernetes_node_label_(.+)\n - target_label: __address__\n replacement: + kubernetes.default.svc:443\n - source_labels: [__meta_kubernetes_node_name]\n + \ regex: (.+)\n target_label: __metrics_path__\n replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor\n + \ \n - job_name: 'kubernetes-service-endpoints'\n\n kubernetes_sd_configs:\n + \ - role: endpoints\n\n relabel_configs:\n - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]\n + \ action: keep\n regex: true\n - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]\n + \ action: replace\n target_label: __scheme__\n regex: (https?)\n + \ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]\n + \ action: replace\n target_label: __metrics_path__\n regex: (.+)\n + \ - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]\n + \ action: replace\n target_label: __address__\n regex: ([^:]+)(?::\\d+)?;(\\d+)\n + \ replacement: $1:$2\n - action: labelmap\n regex: __meta_kubernetes_service_label_(.+)\n + \ - source_labels: [__meta_kubernetes_namespace]\n action: replace\n target_label: + kubernetes_namespace\n - source_labels: [__meta_kubernetes_service_name]\n + \ action: replace\n target_label: kubernetes_name" +kind: ConfigMap +metadata: + labels: + name: prometheus-server-conf + name: prometheus-server-conf + namespace: monitoring +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry + selector: + app.kubernetes.io/name: kube-state-metrics +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "9093" + prometheus.io/scrape: "true" + name: alertmanager + namespace: monitoring +spec: + ports: + - nodePort: 31000 + port: 9093 + targetPort: 9093 + selector: + app: alertmanager + type: NodePort +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "3000" + prometheus.io/scrape: "true" + name: grafana + namespace: monitoring +spec: + ports: + - nodePort: 32000 + port: 3000 + targetPort: 3000 + selector: + app: grafana + type: NodePort +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "9100" + prometheus.io/scrape: "true" + name: node-exporter + namespace: monitoring +spec: + ports: + - name: node-exporter + port: 9100 + protocol: TCP + targetPort: 9100 + selector: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/port: "9090" + prometheus.io/scrape: "true" + name: prometheus-service + namespace: monitoring +spec: + ports: + - nodePort: 30000 + port: 8080 + targetPort: 9090 + selector: + app: prometheus-server + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + spec: + automountServiceAccountToken: true + containers: + - image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.3.0 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + name: kube-state-metrics + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 65534 + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: kube-state-metrics +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alertmanager + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: alertmanager + template: + metadata: + labels: + app: alertmanager + name: alertmanager + spec: + containers: + - args: + - --config.file=/etc/alertmanager/config.yml + - --storage.path=/alertmanager + image: prom/alertmanager:latest + name: alertmanager + ports: + - containerPort: 9093 + name: alertmanager + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 500m + memory: 500M + volumeMounts: + - mountPath: /etc/alertmanager + name: config-volume + - mountPath: /etc/alertmanager-templates + name: templates-volume + - mountPath: /alertmanager + name: alertmanager + volumes: + - configMap: + name: alertmanager-config + name: config-volume + - configMap: + name: alertmanager-templates + name: templates-volume + - emptyDir: {} + name: alertmanager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + name: grafana + spec: + containers: + - image: grafana/grafana:latest + name: grafana + ports: + - containerPort: 3000 + name: grafana + resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 500m + memory: 500M + volumeMounts: + - mountPath: /var/lib/grafana + name: grafana-storage + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + readOnly: false + volumes: + - emptyDir: {} + name: grafana-storage + - configMap: + defaultMode: 420 + name: grafana-datasources + name: grafana-datasources +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: prometheus-server + name: prometheus-deployment + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-server + template: + metadata: + labels: + app: prometheus-server + spec: + containers: + - args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus/ + image: prom/prometheus + name: prometheus + ports: + - containerPort: 9090 + volumeMounts: + - mountPath: /etc/prometheus/ + name: prometheus-config-volume + - mountPath: /prometheus/ + name: prometheus-storage-volume + volumes: + - configMap: + defaultMode: 420 + name: prometheus-server-conf + name: prometheus-config-volume + - emptyDir: {} + name: prometheus-storage-volume +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + name: node-exporter + namespace: monitoring +spec: + selector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + spec: + containers: + - args: + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --no-collector.wifi + - --no-collector.hwmon + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.netclass.ignored-devices=^(veth.*)$ + image: prom/node-exporter + name: node-exporter + ports: + - containerPort: 9100 + protocol: TCP + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/sys + mountPropagation: HostToContainer + name: sys + readOnly: true + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + volumes: + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + namespace: monitoring +spec: + ingressClassName: nginx + rules: + - host: grafana.box.oe74.net + http: + paths: + - backend: + service: + name: grafana + port: + number: 32000 + path: / + pathType: Prefix diff --git a/conf-files/alertmanage.yaml b/conf-files/alertmanage.yaml new file mode 100644 index 0000000..8181362 --- /dev/null +++ b/conf-files/alertmanage.yaml @@ -0,0 +1,282 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: alertmanager-config + namespace: monitoring +data: + config.yml: |- + global: + templates: + - '/etc/alertmanager/*.tmpl' + route: + receiver: alert-emailer + group_by: ['alertname', 'priority'] + group_wait: 10s + repeat_interval: 30m + routes: + - receiver: slack_demo + # Send severity=slack alerts to slack. + match: + severity: slack + group_wait: 10s + repeat_interval: 1m + + receivers: + - name: alert-emailer + email_configs: + - to: demo@devopscube.com + send_resolved: false + from: from-email@email.com + smarthost: smtp.eample.com:25 + require_tls: false + - name: slack_demo + slack_configs: + - api_url: https://hooks.slack.com/services/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sz4Qk + channel: '#devopscube-demo' + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + creationTimestamp: null + name: alertmanager-templates + namespace: monitoring +data: + default.tmpl: | + {{ define "__alertmanager" }}AlertManager{{ end }} + {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} + {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} + {{ define "__description" }}{{ end }} + {{ define "__text_alert_list" }}{{ range . }}Labels: + {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Annotations: + {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Source: {{ .GeneratorURL }} + {{ end }}{{ end }} + {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} + {{ define "slack.default.pretext" }}{{ end }} + {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "slack.default.iconemoji" }}{{ end }} + {{ define "slack.default.iconurl" }}{{ end }} + {{ define "slack.default.text" }}{{ end }} + {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} + {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 -}} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{- end }} + {{ if gt (len .Alerts.Resolved) 0 -}} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{- end }} + {{- end }} + {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} + {{ define "email.default.html" }} + + + + + + + {{ template "__subject" . }} + + + + + + + + +
+
+ + + + + + + +
+ {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} + {{ .Name }}={{ .Value }} + {{ end }} +
+ + + + + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + {{ range .Alerts.Firing }} + + + + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + {{ if gt (len .Alerts.Firing) 0 }} + + + + {{ end }} + + + + {{ end }} + {{ range .Alerts.Resolved }} + + + + {{ end }} +
+ View in {{ template "__alertmanager" . }} +
+ [{{ .Alerts.Firing | len }}] Firing +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+
+ [{{ .Alerts.Resolved | len }}] Resolved +
+ Labels
+ {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + {{ if gt (len .Annotations) 0 }}Annotations
{{ end }} + {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}
{{ end }} + Source
+
+
+
+
+ + + {{ end }} + {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 }} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{ end }} + {{ if gt (len .Alerts.Resolved) 0 }} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{ end }} + {{ end }} + {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} + slack.tmpl: | + {{ define "slack.devops.text" }} + {{range .Alerts}}{{.Annotations.DESCRIPTION}} + {{end}} + {{ end }} + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alertmanager + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: alertmanager + template: + metadata: + name: alertmanager + labels: + app: alertmanager + spec: + containers: + - name: alertmanager + image: prom/alertmanager:latest + args: + - "--config.file=/etc/alertmanager/config.yml" + - "--storage.path=/alertmanager" + ports: + - name: alertmanager + containerPort: 9093 + resources: + requests: + cpu: 500m + memory: 500M + limits: + cpu: 1 + memory: 1Gi + volumeMounts: + - name: config-volume + mountPath: /etc/alertmanager + - name: templates-volume + mountPath: /etc/alertmanager-templates + - name: alertmanager + mountPath: /alertmanager + volumes: + - name: config-volume + configMap: + name: alertmanager-config + - name: templates-volume + configMap: + name: alertmanager-templates + - name: alertmanager + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: alertmanager + namespace: monitoring + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '9093' +spec: + selector: + app: alertmanager + type: NodePort + ports: + - port: 9093 + targetPort: 9093 + nodePort: 31000 diff --git a/conf-files/compiled.yaml b/conf-files/compiled.yaml new file mode 100644 index 0000000..e69de29 diff --git a/conf-files/grafana.yaml b/conf-files/grafana.yaml new file mode 100644 index 0000000..fdbc2db --- /dev/null +++ b/conf-files/grafana.yaml @@ -0,0 +1,103 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasources + namespace: monitoring +data: + prometheus.yaml: |- + { + "apiVersion": 1, + "datasources": [ + { + "access":"proxy", + "editable": true, + "name": "prometheus", + "orgId": 1, + "type": "prometheus", + "url": "http://prometheus-service.monitoring.svc:8080", + "version": 1 + } + ] + } + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + name: grafana + labels: + app: grafana + spec: + containers: + - name: grafana + image: grafana/grafana:latest + ports: + - name: grafana + containerPort: 3000 + resources: + limits: + memory: "1Gi" + cpu: "1000m" + requests: + memory: 500M + cpu: "500m" + volumeMounts: + - mountPath: /var/lib/grafana + name: grafana-storage + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + readOnly: false + volumes: + - name: grafana-storage + emptyDir: {} + - name: grafana-datasources + configMap: + defaultMode: 420 + name: grafana-datasources + +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: monitoring + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '3000' +spec: + selector: + app: grafana + type: NodePort + ports: + - port: 3000 + targetPort: 3000 + nodePort: 32000 + +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + namespace: monitoring +spec: + rules: + - host: grafana.box.oe74.net + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: grafana + port: + number: 32000 + ingressClassName: nginx diff --git a/conf-files/kube-state-metrics.yaml b/conf-files/kube-state-metrics.yaml new file mode 100644 index 0000000..958bb62 --- /dev/null +++ b/conf-files/kube-state-metrics.yaml @@ -0,0 +1,212 @@ +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingresses + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + spec: + automountServiceAccountToken: true + containers: + - image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.3.0 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + name: kube-state-metrics + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 65534 + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: kube-state-metrics + +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.3.0 + name: kube-state-metrics + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry + selector: + app.kubernetes.io/name: kube-state-metrics diff --git a/conf-files/kustomization.yaml b/conf-files/kustomization.yaml new file mode 100644 index 0000000..28a0ff2 --- /dev/null +++ b/conf-files/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - alertmanage.yaml + - grafana.yaml + - kube-state-metrics.yaml + - node-exporter.yaml + - prometheus.yaml + diff --git a/conf-files/node-exporter.yaml b/conf-files/node-exporter.yaml new file mode 100644 index 0000000..eee4162 --- /dev/null +++ b/conf-files/node-exporter.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + name: node-exporter + namespace: monitoring +spec: + selector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + spec: + containers: + - args: + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --no-collector.wifi + - --no-collector.hwmon + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.netclass.ignored-devices=^(veth.*)$ + name: node-exporter + image: prom/node-exporter + ports: + - containerPort: 9100 + protocol: TCP + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/sys + mountPropagation: HostToContainer + name: sys + readOnly: true + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + volumes: + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root + + +--- +kind: Service +apiVersion: v1 +metadata: + name: node-exporter + namespace: monitoring + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '9100' +spec: + selector: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + ports: + - name: node-exporter + protocol: TCP + port: 9100 + targetPort: 9100 diff --git a/conf-files/prometheus.yaml b/conf-files/prometheus.yaml new file mode 100644 index 0000000..1ae0d39 --- /dev/null +++ b/conf-files/prometheus.yaml @@ -0,0 +1,254 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: +- apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: + - extensions + resources: + - ingresses + verbs: ["get", "list", "watch"] +- nonResourceURLs: ["/metrics"] + verbs: ["get"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: default + namespace: monitoring + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-server-conf + labels: + name: prometheus-server-conf + namespace: monitoring +data: + prometheus.rules: |- + groups: + - name: devopscube demo alert + rules: + - alert: High Pod Memory + expr: sum(container_memory_usage_bytes) > 1 + for: 1m + labels: + severity: slack + annotations: + summary: High Memory Usage + prometheus.yml: |- + global: + scrape_interval: 5s + evaluation_interval: 5s + rule_files: + - /etc/prometheus/prometheus.rules + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "alertmanager.monitoring.svc:9093" + + scrape_configs: + - job_name: 'node-exporter' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_endpoints_name] + regex: 'node-exporter' + action: keep + + - job_name: 'kubernetes-apiservers' + + kubernetes_sd_configs: + - role: endpoints + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + - job_name: 'kubernetes-nodes' + + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + - job_name: 'kubernetes-pods' + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + + - job_name: 'kubernetes-cadvisor' + + scheme: https + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + + - job_name: 'kubernetes-service-endpoints' + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-deployment + namespace: monitoring + labels: + app: prometheus-server +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-server + template: + metadata: + labels: + app: prometheus-server + spec: + containers: + - name: prometheus + image: prom/prometheus + args: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus/" + ports: + - containerPort: 9090 + volumeMounts: + - name: prometheus-config-volume + mountPath: /etc/prometheus/ + - name: prometheus-storage-volume + mountPath: /prometheus/ + volumes: + - name: prometheus-config-volume + configMap: + defaultMode: 420 + name: prometheus-server-conf + + - name: prometheus-storage-volume + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus-service + namespace: monitoring + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '9090' + +spec: + selector: + app: prometheus-server + type: NodePort + ports: + - port: 8080 + targetPort: 9090 + nodePort: 30000