From c8a3f251bf9f6d8663bc7b4797e15f937e1866d9 Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Feb 2023 18:35:44 +0100 Subject: [PATCH 01/10] refactor(grafana): change name of prometheus and loki datasource --- clusters/bee/argocd-apps/grafana-ops.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/bee/argocd-apps/grafana-ops.yaml b/clusters/bee/argocd-apps/grafana-ops.yaml index 4007d25..1cbc14b 100644 --- a/clusters/bee/argocd-apps/grafana-ops.yaml +++ b/clusters/bee/argocd-apps/grafana-ops.yaml @@ -49,12 +49,12 @@ spec: datasources.yaml: apiVersion: 1 datasources: - - name: Cluster Prometheus + - name: In-Cluster Prometheus type: prometheus url: http://monitoring-prometheus.monitoring:9090 access: proxy isDefault: true - - name: Cluster Loki + - name: In-Cluster Loki type: loki url: http://loki-gateway.logging:80 access: proxy From 4423c54b007ddce6291961601511dbad30fbea1e Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Feb 2023 19:04:14 +0100 Subject: [PATCH 02/10] feat(bee): add monitoring-ops: prometheus & alertmanager --- clusters/bee/argocd-apps/monitoring-ops.yaml | 83 ++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 clusters/bee/argocd-apps/monitoring-ops.yaml diff --git a/clusters/bee/argocd-apps/monitoring-ops.yaml b/clusters/bee/argocd-apps/monitoring-ops.yaml new file mode 100644 index 0000000..cecfbbf --- /dev/null +++ b/clusters/bee/argocd-apps/monitoring-ops.yaml @@ -0,0 +1,83 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: monitoring-ops + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + namespace: monitoring-ops + server: https://kubernetes.default.svc + project: default + source: + repoURL: https://prometheus-community.github.io/helm-charts + chart: kube-prometheus-stack + targetRevision: "44.3.1" + helm: + releaseName: monitoring-ops + values: | + fullnameOverride: ops + + defaultRules: + create: false + + prometheus: + podDisruptionBudget: + enabled: true + minAvailable: 1 + maxUnavailable: "" + + prometheusSpec: + logFormat: json + retention: 5d + replicas: 3 + + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: hcloud-volumes + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi + selector: {} + + alertmanager: + podDisruptionBudget: + enabled: true + minAvailable: 1 + maxUnavailable: "" + + alertmanagerSpec: + logFormat: json + replicas: 3 + + retention: 168h + + storage: + volumeClaimTemplate: + spec: + storageClassName: hcloud-volumes + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi + selector: {} + + grafana: + enabled: false + + kubeStateMetrics: + enabled: false + + nodeExporter: + enabled: false + + syncPolicy: + automated: + prune: true + selfHeal: true + + syncOptions: + - CreateNamespace=true From 025a9a0f79168688765390d8ddf1e8d478425c54 Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Feb 2023 19:13:39 +0100 Subject: [PATCH 03/10] refactor(bee): disbale crds, prometheus-operator; chart resource naming --- clusters/bee/argocd-apps/monitoring-ops.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clusters/bee/argocd-apps/monitoring-ops.yaml b/clusters/bee/argocd-apps/monitoring-ops.yaml index cecfbbf..42f81c5 100644 --- a/clusters/bee/argocd-apps/monitoring-ops.yaml +++ b/clusters/bee/argocd-apps/monitoring-ops.yaml @@ -16,7 +16,9 @@ spec: targetRevision: "44.3.1" helm: releaseName: monitoring-ops + skipCrds: true values: | + cleanPrometheusOperatorObjectNames: true fullnameOverride: ops defaultRules: @@ -65,6 +67,9 @@ spec: storage: 50Gi selector: {} + prometheusOperator: + enabled: false + grafana: enabled: false From 44e5d4ca102e9b7c36cb33a00c64dbcdcbcd6da5 Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Feb 2023 19:19:09 +0100 Subject: [PATCH 04/10] refactor(monitoring-ops): remove pvc selector --- clusters/bee/argocd-apps/monitoring-ops.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/clusters/bee/argocd-apps/monitoring-ops.yaml b/clusters/bee/argocd-apps/monitoring-ops.yaml index 42f81c5..23643cb 100644 --- a/clusters/bee/argocd-apps/monitoring-ops.yaml +++ b/clusters/bee/argocd-apps/monitoring-ops.yaml @@ -43,7 +43,6 @@ spec: resources: requests: storage: 50Gi - selector: {} alertmanager: podDisruptionBudget: @@ -65,7 +64,6 @@ spec: resources: requests: storage: 50Gi - selector: {} prometheusOperator: enabled: false From 339b9053b2223af24bb65df6a87542d2020de2e2 Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Feb 2023 19:40:36 +0100 Subject: [PATCH 05/10] feat(bee): add datasources --- clusters/bee/argocd-apps/grafana-ops.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/clusters/bee/argocd-apps/grafana-ops.yaml b/clusters/bee/argocd-apps/grafana-ops.yaml index 1cbc14b..77413fc 100644 --- a/clusters/bee/argocd-apps/grafana-ops.yaml +++ b/clusters/bee/argocd-apps/grafana-ops.yaml @@ -49,11 +49,19 @@ spec: datasources.yaml: apiVersion: 1 datasources: + - name: Prometheus + type: prometheus + url: http://ops-prometheus.monitoring-ops:9090 + access: proxy + isDefault: true + - name: AlertManager + type: alertmanager + url: http://ops-alertmanager.monitoring-ops:9093 + access: proxy - name: In-Cluster Prometheus type: prometheus url: http://monitoring-prometheus.monitoring:9090 access: proxy - isDefault: true - name: In-Cluster Loki type: loki url: http://loki-gateway.logging:80 From 9aa80c21d97e844a5438f34b9cb3962e4df43a0d Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Feb 2023 19:44:13 +0100 Subject: [PATCH 06/10] chore(bee): bump grafana ops chart to v6.50.6 --- clusters/bee/argocd-apps/grafana-ops.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/bee/argocd-apps/grafana-ops.yaml b/clusters/bee/argocd-apps/grafana-ops.yaml index 77413fc..7c0f9f1 100644 --- a/clusters/bee/argocd-apps/grafana-ops.yaml +++ b/clusters/bee/argocd-apps/grafana-ops.yaml @@ -13,7 +13,7 @@ spec: source: repoURL: https://grafana.github.io/helm-charts chart: grafana - targetRevision: "6.50.0" + targetRevision: "6.50.6" helm: releaseName: grafana-ops values: | From 15fc85fafb900106a4f34364d1489ff17c9f5382 Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Feb 2023 21:14:01 +0100 Subject: [PATCH 07/10] refactor(bee): grafana deployment strategy to `Recreate` Some downtime is OK for Grafana, since I am using PVC with access mode `ReadWriteOnce` which is a blocker for a rolling update deployment strategy --- clusters/bee/argocd-apps/grafana-ops.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clusters/bee/argocd-apps/grafana-ops.yaml b/clusters/bee/argocd-apps/grafana-ops.yaml index 7c0f9f1..492b9b4 100644 --- a/clusters/bee/argocd-apps/grafana-ops.yaml +++ b/clusters/bee/argocd-apps/grafana-ops.yaml @@ -17,6 +17,9 @@ spec: helm: releaseName: grafana-ops values: | + deploymentStrategy: + type: Recreate + ingress: enabled: true From 20f82ef641467f05ba624f8d3536ff07526309ea Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Mar 2023 11:58:15 +0100 Subject: [PATCH 08/10] refactor(grafana-ops): remove alertmanager datasource --- clusters/bee/argocd-apps/grafana-ops.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clusters/bee/argocd-apps/grafana-ops.yaml b/clusters/bee/argocd-apps/grafana-ops.yaml index 492b9b4..103f3c8 100644 --- a/clusters/bee/argocd-apps/grafana-ops.yaml +++ b/clusters/bee/argocd-apps/grafana-ops.yaml @@ -57,10 +57,6 @@ spec: url: http://ops-prometheus.monitoring-ops:9090 access: proxy isDefault: true - - name: AlertManager - type: alertmanager - url: http://ops-alertmanager.monitoring-ops:9093 - access: proxy - name: In-Cluster Prometheus type: prometheus url: http://monitoring-prometheus.monitoring:9090 From 506e7814b31c0ae19077f62a4d8f90f7c1c83c0f Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Mar 2023 11:59:58 +0100 Subject: [PATCH 09/10] refactor(monitoring-ops): scale down prometheus to two instances --- clusters/bee/argocd-apps/monitoring-ops.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/bee/argocd-apps/monitoring-ops.yaml b/clusters/bee/argocd-apps/monitoring-ops.yaml index 23643cb..4503d80 100644 --- a/clusters/bee/argocd-apps/monitoring-ops.yaml +++ b/clusters/bee/argocd-apps/monitoring-ops.yaml @@ -33,7 +33,7 @@ spec: prometheusSpec: logFormat: json retention: 5d - replicas: 3 + replicas: 2 storageSpec: volumeClaimTemplate: From 43d0f8375394fa3ed6c078b9e4a2fdfa1069e31d Mon Sep 17 00:00:00 2001 From: Vojtech Mares Date: Thu, 2 Mar 2023 12:00:32 +0100 Subject: [PATCH 10/10] refactor(monitoring-ops): disable alertmanager --- clusters/bee/argocd-apps/monitoring-ops.yaml | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/clusters/bee/argocd-apps/monitoring-ops.yaml b/clusters/bee/argocd-apps/monitoring-ops.yaml index 4503d80..695814b 100644 --- a/clusters/bee/argocd-apps/monitoring-ops.yaml +++ b/clusters/bee/argocd-apps/monitoring-ops.yaml @@ -45,25 +45,7 @@ spec: storage: 50Gi alertmanager: - podDisruptionBudget: - enabled: true - minAvailable: 1 - maxUnavailable: "" - - alertmanagerSpec: - logFormat: json - replicas: 3 - - retention: 168h - - storage: - volumeClaimTemplate: - spec: - storageClassName: hcloud-volumes - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: 50Gi + enabled: false prometheusOperator: enabled: false