1
0
Fork 0

feat(cluster-components): add monitoring

kube-prometheus-stack + node-problem-detector
This commit is contained in:
Vojtěch Mareš 2022-09-01 18:02:32 +02:00
parent 18e8d23a36
commit 03dbc0ac5c
Signed by: vojtech.mares
GPG key ID: C6827B976F17240D
4 changed files with 186 additions and 0 deletions

18
apps/monitoring.yaml Normal file
View file

@ -0,0 +1,18 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cert-manager
namespace: argocd
spec:
destination:
namespace: monitoring
server: https://kubernetes.default.svc
project: default
source:
path: cluster-components/monitoring
repoURL: https://gitlab.mareshq.com/gitops/mareshq/bee.git
targetRevision: HEAD
syncPolicy:
automated:
prune: true
selfHeal: true

View file

@ -0,0 +1,9 @@
dependencies:
- name: kube-prometheus-stack
repository: https://prometheus-community.github.io/helm-charts
version: 39.11.0
- name: node-problem-detector
repository: https://charts.deliveryhero.io/
version: 2.2.3
digest: sha256:abe14ddd49cb69425b09824ee126a66bf214a8119fb7966cf793fb025a910700
generated: "2022-09-01T17:59:11.925486+02:00"

View file

@ -0,0 +1,14 @@
apiVersion: v2
name: prometheus-stack
description: A Helm chart for Kubernetes
type: application
version: 0.1.0
appVersion: "1.0"
dependencies:
- name: kube-prometheus-stack
version: 39.11.0
repository: https://prometheus-community.github.io/helm-charts
- name: node-problem-detector
version: 2.2.3
repository: https://charts.deliveryhero.io/

View file

@ -0,0 +1,145 @@
kube-prometheus-stack:
prometheus:
fullnameOverride: prometheus
prometheusSpec:
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
ruleSelectorNilUsesHelmValues: false
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: longhorn
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
ingress:
enabled: true
pathType: Prefix
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
# nginx.ingress.kubernetes.io/auth-url: "https://auth.cloud.mareshq.com/oauth2/auth"
# nginx.ingress.kubernetes.io/auth-signin: "https://auth.cloud.mareshq.com/oauth2/start?rd=$scheme://$host$request_uri"
hosts:
- prometheus.cloud.mareshq.com
paths:
- /
tls:
- hosts:
- prometheus.cloud.mareshq.com
secretName: prometheus-tls
serviceMonitor:
selfMonitor: true
grafana:
fullnameOverride: grafana
defaultDashboardsEnabled: true
adminPassword: zud!edy7WER5uqg7gjq
ingress:
enabled: true
pathType: Prefix
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- grafana.cloud.mareshq.com
paths:
- /
tls:
- hosts:
- grafana.cloud.mareshq.com
secretName: grafana-tls
serviceMonitor:
selfMonitor: true
# grafana.ini:
# server:
# root_url: https://grafana.cloud.mareshq.com
# auth.gitlab:
# enabled: true
# allow_sign_up: true
# client_id: bbe5e1c7a6bf81a6725b1fe7f5c64c96e9bc697bebd24d5e41b2cd623d38917b
# client_secret: c137d2af70f82994be0c2be6437cef97cdaa7801e6cb232510fd7f10209d0cdf
# scopes: read_api
# auth_url: https://gitlab.mareshq.com/oauth/authorize
# token_url: https://gitlab.mareshq.com/oauth/token
# api_url: https://gitlab.mareshq.com/api/v4
# allowed_groups: mareshq
# auth.basic:
# enabled: false
# auth:
# disable_login_form: false
persistence:
enabled: false
storageClassName: longhorn
alertmanager:
fullnameOverride: alertmanager
ingress:
enabled: true
pathType: Prefix
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
# nginx.ingress.kubernetes.io/auth-url: "https://auth.cloud.mareshq.com/oauth2/auth"
# nginx.ingress.kubernetes.io/auth-signin: "https://auth.cloud.mareshq.com/oauth2/start?rd=$scheme://$host$request_uri"
hosts:
- alertmanager.cloud.mareshq.com
paths:
- /
tls:
- hosts:
- alertmanager.cloud.mareshq.com
secretName: alertmanager-tls
serviceMonitor:
selfMonitor: true
kubeApiServer:
enabled: true
kubelet:
enabled: true
kubeControllerManager:
enabled: false
coreDns:
enabled: true
kubeScheduler:
enabled: true
kubeProxy:
enabled: true
kubeStateMetrics:
enabled: true
nodeExporter:
enabled: true
prometheusOperator:
serviceMonitor:
selfMonitor: true
nameOverride: prom
defaultRules:
create: false
node-problem-detector:
metrics:
enabled: true
serviceMonitor:
enabled: true
prometheusRule:
enabled: true