From 03dbc0ac5c6f6db7207b4f6d67e9acc0e91bc59a Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 1 Sep 2022 18:02:32 +0200 Subject: [PATCH] feat(cluster-components): add monitoring kube-prometheus-stack + node-problem-detector --- apps/monitoring.yaml | 18 +++ cluster-components/monitoring/Chart.lock | 9 ++ cluster-components/monitoring/Chart.yaml | 14 +++ cluster-components/monitoring/values.yaml | 145 ++++++++++++++++++++++ 4 files changed, 186 insertions(+) create mode 100644 apps/monitoring.yaml create mode 100644 cluster-components/monitoring/Chart.lock create mode 100644 cluster-components/monitoring/Chart.yaml create mode 100644 cluster-components/monitoring/values.yaml diff --git a/apps/monitoring.yaml b/apps/monitoring.yaml new file mode 100644 index 0000000..fdfbefc --- /dev/null +++ b/apps/monitoring.yaml @@ -0,0 +1,18 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cert-manager + namespace: argocd +spec: + destination: + namespace: monitoring + server: https://kubernetes.default.svc + project: default + source: + path: cluster-components/monitoring + repoURL: https://gitlab.mareshq.com/gitops/mareshq/bee.git + targetRevision: HEAD + syncPolicy: + automated: + prune: true + selfHeal: true diff --git a/cluster-components/monitoring/Chart.lock b/cluster-components/monitoring/Chart.lock new file mode 100644 index 0000000..de79f2e --- /dev/null +++ b/cluster-components/monitoring/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: kube-prometheus-stack + repository: https://prometheus-community.github.io/helm-charts + version: 39.11.0 +- name: node-problem-detector + repository: https://charts.deliveryhero.io/ + version: 2.2.3 +digest: sha256:abe14ddd49cb69425b09824ee126a66bf214a8119fb7966cf793fb025a910700 +generated: "2022-09-01T17:59:11.925486+02:00" diff --git a/cluster-components/monitoring/Chart.yaml b/cluster-components/monitoring/Chart.yaml new file mode 100644 index 0000000..043e04c --- /dev/null +++ b/cluster-components/monitoring/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +name: prometheus-stack +description: A Helm chart for Kubernetes +type: application +version: 0.1.0 +appVersion: "1.0" + +dependencies: +- name: kube-prometheus-stack + version: 39.11.0 + repository: https://prometheus-community.github.io/helm-charts +- name: node-problem-detector + version: 2.2.3 + repository: https://charts.deliveryhero.io/ diff --git a/cluster-components/monitoring/values.yaml b/cluster-components/monitoring/values.yaml new file mode 100644 index 0000000..9992915 --- /dev/null +++ b/cluster-components/monitoring/values.yaml @@ -0,0 +1,145 @@ +kube-prometheus-stack: + prometheus: + fullnameOverride: prometheus + + prometheusSpec: + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + ruleSelectorNilUsesHelmValues: false + + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: longhorn + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 20Gi + + ingress: + enabled: true + pathType: Prefix + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + # nginx.ingress.kubernetes.io/auth-url: "https://auth.cloud.mareshq.com/oauth2/auth" + # nginx.ingress.kubernetes.io/auth-signin: "https://auth.cloud.mareshq.com/oauth2/start?rd=$scheme://$host$request_uri" + hosts: + - prometheus.cloud.mareshq.com + paths: + - / + tls: + - hosts: + - prometheus.cloud.mareshq.com + secretName: prometheus-tls + + serviceMonitor: + selfMonitor: true + + grafana: + fullnameOverride: grafana + + defaultDashboardsEnabled: true + + adminPassword: zud!edy7WER5uqg7gjq + + ingress: + enabled: true + pathType: Prefix + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + hosts: + - grafana.cloud.mareshq.com + paths: + - / + tls: + - hosts: + - grafana.cloud.mareshq.com + secretName: grafana-tls + + serviceMonitor: + selfMonitor: true + + # grafana.ini: + # server: + # root_url: https://grafana.cloud.mareshq.com + # auth.gitlab: + # enabled: true + # allow_sign_up: true + # client_id: bbe5e1c7a6bf81a6725b1fe7f5c64c96e9bc697bebd24d5e41b2cd623d38917b + # client_secret: c137d2af70f82994be0c2be6437cef97cdaa7801e6cb232510fd7f10209d0cdf + # scopes: read_api + # auth_url: https://gitlab.mareshq.com/oauth/authorize + # token_url: https://gitlab.mareshq.com/oauth/token + # api_url: https://gitlab.mareshq.com/api/v4 + # allowed_groups: mareshq + # auth.basic: + # enabled: false + # auth: + # disable_login_form: false + + persistence: + enabled: false + storageClassName: longhorn + + alertmanager: + fullnameOverride: alertmanager + + ingress: + enabled: true + pathType: Prefix + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + # nginx.ingress.kubernetes.io/auth-url: "https://auth.cloud.mareshq.com/oauth2/auth" + # nginx.ingress.kubernetes.io/auth-signin: "https://auth.cloud.mareshq.com/oauth2/start?rd=$scheme://$host$request_uri" + hosts: + - alertmanager.cloud.mareshq.com + paths: + - / + tls: + - hosts: + - alertmanager.cloud.mareshq.com + secretName: alertmanager-tls + + serviceMonitor: + selfMonitor: true + + kubeApiServer: + enabled: true + + kubelet: + enabled: true + + kubeControllerManager: + enabled: false + + coreDns: + enabled: true + + kubeScheduler: + enabled: true + + kubeProxy: + enabled: true + + kubeStateMetrics: + enabled: true + + nodeExporter: + enabled: true + + prometheusOperator: + serviceMonitor: + selfMonitor: true + + nameOverride: prom + + defaultRules: + create: false + +node-problem-detector: + metrics: + enabled: true + serviceMonitor: + enabled: true + prometheusRule: + enabled: true