feat(cluster-components): add monitoring
kube-prometheus-stack + node-problem-detector
This commit is contained in:
		
							parent
							
								
									18e8d23a36
								
							
						
					
					
						commit
						03dbc0ac5c
					
				
					 4 changed files with 186 additions and 0 deletions
				
			
		
							
								
								
									
										18
									
								
								apps/monitoring.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								apps/monitoring.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | |||
| apiVersion: argoproj.io/v1alpha1 | ||||
| kind: Application | ||||
| metadata: | ||||
|   name: cert-manager | ||||
|   namespace: argocd | ||||
| spec: | ||||
|   destination: | ||||
|     namespace: monitoring | ||||
|     server: https://kubernetes.default.svc | ||||
|   project: default | ||||
|   source: | ||||
|     path: cluster-components/monitoring | ||||
|     repoURL: https://gitlab.mareshq.com/gitops/mareshq/bee.git | ||||
|     targetRevision: HEAD | ||||
|   syncPolicy: | ||||
|     automated: | ||||
|       prune: true | ||||
|       selfHeal: true | ||||
							
								
								
									
										9
									
								
								cluster-components/monitoring/Chart.lock
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								cluster-components/monitoring/Chart.lock
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,9 @@ | |||
| dependencies: | ||||
| - name: kube-prometheus-stack | ||||
|   repository: https://prometheus-community.github.io/helm-charts | ||||
|   version: 39.11.0 | ||||
| - name: node-problem-detector | ||||
|   repository: https://charts.deliveryhero.io/ | ||||
|   version: 2.2.3 | ||||
| digest: sha256:abe14ddd49cb69425b09824ee126a66bf214a8119fb7966cf793fb025a910700 | ||||
| generated: "2022-09-01T17:59:11.925486+02:00" | ||||
							
								
								
									
										14
									
								
								cluster-components/monitoring/Chart.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								cluster-components/monitoring/Chart.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,14 @@ | |||
| apiVersion: v2 | ||||
| name: prometheus-stack | ||||
| description: A Helm chart for Kubernetes | ||||
| type: application | ||||
| version: 0.1.0 | ||||
| appVersion: "1.0" | ||||
| 
 | ||||
| dependencies: | ||||
| - name: kube-prometheus-stack | ||||
|   version: 39.11.0 | ||||
|   repository: https://prometheus-community.github.io/helm-charts | ||||
| - name: node-problem-detector | ||||
|   version: 2.2.3 | ||||
|   repository: https://charts.deliveryhero.io/ | ||||
							
								
								
									
										145
									
								
								cluster-components/monitoring/values.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								cluster-components/monitoring/values.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,145 @@ | |||
| kube-prometheus-stack: | ||||
|   prometheus: | ||||
|     fullnameOverride: prometheus | ||||
| 
 | ||||
|     prometheusSpec: | ||||
|       serviceMonitorSelectorNilUsesHelmValues: false | ||||
|       podMonitorSelectorNilUsesHelmValues: false | ||||
|       ruleSelectorNilUsesHelmValues: false | ||||
| 
 | ||||
|       storageSpec: | ||||
|         volumeClaimTemplate: | ||||
|           spec: | ||||
|             storageClassName: longhorn | ||||
|             accessModes: ["ReadWriteOnce"] | ||||
|             resources: | ||||
|               requests: | ||||
|                 storage: 20Gi | ||||
| 
 | ||||
|     ingress: | ||||
|       enabled: true | ||||
|       pathType: Prefix | ||||
|       annotations: | ||||
|         cert-manager.io/cluster-issuer: letsencrypt-prod | ||||
|         # nginx.ingress.kubernetes.io/auth-url: "https://auth.cloud.mareshq.com/oauth2/auth" | ||||
|         # nginx.ingress.kubernetes.io/auth-signin: "https://auth.cloud.mareshq.com/oauth2/start?rd=$scheme://$host$request_uri" | ||||
|       hosts: | ||||
|         - prometheus.cloud.mareshq.com | ||||
|       paths: | ||||
|         - / | ||||
|       tls: | ||||
|         - hosts: | ||||
|           - prometheus.cloud.mareshq.com | ||||
|           secretName: prometheus-tls | ||||
| 
 | ||||
|     serviceMonitor: | ||||
|       selfMonitor: true | ||||
| 
 | ||||
|   grafana: | ||||
|     fullnameOverride: grafana | ||||
| 
 | ||||
|     defaultDashboardsEnabled: true | ||||
| 
 | ||||
|     adminPassword: zud!edy7WER5uqg7gjq | ||||
| 
 | ||||
|     ingress: | ||||
|       enabled: true | ||||
|       pathType: Prefix | ||||
|       annotations: | ||||
|         cert-manager.io/cluster-issuer: letsencrypt-prod | ||||
|       hosts: | ||||
|         - grafana.cloud.mareshq.com | ||||
|       paths: | ||||
|         - / | ||||
|       tls: | ||||
|         - hosts: | ||||
|           - grafana.cloud.mareshq.com | ||||
|           secretName: grafana-tls | ||||
| 
 | ||||
|     serviceMonitor: | ||||
|       selfMonitor: true | ||||
| 
 | ||||
|     # grafana.ini: | ||||
|     #   server: | ||||
|     #     root_url: https://grafana.cloud.mareshq.com | ||||
|     #   auth.gitlab: | ||||
|     #     enabled: true | ||||
|     #     allow_sign_up: true | ||||
|     #     client_id: bbe5e1c7a6bf81a6725b1fe7f5c64c96e9bc697bebd24d5e41b2cd623d38917b | ||||
|     #     client_secret: c137d2af70f82994be0c2be6437cef97cdaa7801e6cb232510fd7f10209d0cdf | ||||
|     #     scopes: read_api | ||||
|     #     auth_url: https://gitlab.mareshq.com/oauth/authorize | ||||
|     #     token_url: https://gitlab.mareshq.com/oauth/token | ||||
|     #     api_url: https://gitlab.mareshq.com/api/v4 | ||||
|     #     allowed_groups: mareshq | ||||
|     #   auth.basic: | ||||
|     #     enabled: false | ||||
|     #   auth: | ||||
|     #     disable_login_form: false | ||||
| 
 | ||||
|     persistence: | ||||
|       enabled: false | ||||
|       storageClassName: longhorn | ||||
| 
 | ||||
|   alertmanager: | ||||
|     fullnameOverride: alertmanager | ||||
| 
 | ||||
|     ingress: | ||||
|       enabled: true | ||||
|       pathType: Prefix | ||||
|       annotations: | ||||
|         cert-manager.io/cluster-issuer: letsencrypt-prod | ||||
|         # nginx.ingress.kubernetes.io/auth-url: "https://auth.cloud.mareshq.com/oauth2/auth" | ||||
|         # nginx.ingress.kubernetes.io/auth-signin: "https://auth.cloud.mareshq.com/oauth2/start?rd=$scheme://$host$request_uri" | ||||
|       hosts: | ||||
|         - alertmanager.cloud.mareshq.com | ||||
|       paths: | ||||
|         - / | ||||
|       tls: | ||||
|         - hosts: | ||||
|           - alertmanager.cloud.mareshq.com | ||||
|           secretName: alertmanager-tls | ||||
| 
 | ||||
|     serviceMonitor: | ||||
|       selfMonitor: true | ||||
| 
 | ||||
|     kubeApiServer: | ||||
|       enabled: true | ||||
| 
 | ||||
|     kubelet: | ||||
|       enabled: true | ||||
| 
 | ||||
|     kubeControllerManager: | ||||
|       enabled: false | ||||
| 
 | ||||
|     coreDns: | ||||
|       enabled: true | ||||
| 
 | ||||
|     kubeScheduler: | ||||
|       enabled: true | ||||
| 
 | ||||
|     kubeProxy: | ||||
|       enabled: true | ||||
| 
 | ||||
|     kubeStateMetrics: | ||||
|       enabled: true | ||||
| 
 | ||||
|     nodeExporter: | ||||
|       enabled: true | ||||
| 
 | ||||
|     prometheusOperator: | ||||
|       serviceMonitor: | ||||
|         selfMonitor: true | ||||
| 
 | ||||
|     nameOverride: prom | ||||
| 
 | ||||
|     defaultRules: | ||||
|       create: false | ||||
| 
 | ||||
| node-problem-detector: | ||||
|   metrics: | ||||
|     enabled: true | ||||
|     serviceMonitor: | ||||
|       enabled: true | ||||
|     prometheusRule: | ||||
|       enabled: true | ||||
		Reference in a new issue