Compare commits
	
		
			10 commits
		
	
	
		
			1ed3171954
			...
			d410d564f3
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| d410d564f3 | |||
| 2eed379533 | |||
| 4577bacd0f | |||
| 12525660cd | |||
| 88e41eb5cc | |||
| 3122d80ee1 | |||
| 005fe949d6 | |||
| 9ed52183d5 | |||
| f38e6d5a3c | |||
| be21c1c298 | 
					 10 changed files with 345 additions and 0 deletions
				
			
		
							
								
								
									
										46
									
								
								appsets/node-problem-detector.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								appsets/node-problem-detector.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,46 @@ | |||
| apiVersion: argoproj.io/v1alpha1 | ||||
| kind: ApplicationSet | ||||
| metadata: | ||||
|   name: universal-cert-manager | ||||
|   annotations: | ||||
|     argocd.argoproj.io/sync-wave: "3" | ||||
| spec: | ||||
|   generators: | ||||
|     - list: | ||||
|         elements: | ||||
|           - cluster: cherry | ||||
|             url: https://kubernetes.default.svc | ||||
|           - cluster: lychee | ||||
|             url: https://172.16.152.1:6443 | ||||
|   syncPolicy: | ||||
|     applicationsSync: sync | ||||
|   template: | ||||
|     metadata: | ||||
|       name: "{{cluster}}-node-problem-detector" | ||||
|       annotations: | ||||
|         argocd.argoproj.io/sync-wave: "5" | ||||
|     spec: | ||||
|       project: monitoring | ||||
|       syncPolicy: | ||||
|         automated: | ||||
|           prune: true | ||||
|           selfHeal: true | ||||
|         syncOptions: | ||||
|           - CreateNamespace=true | ||||
|       source: | ||||
|         chart: node-problem-detector | ||||
|         repoURL: https://charts.deliveryhero.io/ | ||||
|         targetRevision: 2.3.10 | ||||
|         helm: | ||||
|           releaseName: node-problem-detector | ||||
|           valuesObject: | ||||
|             metrics: | ||||
|               enabled: true | ||||
|               serviceMonitor: | ||||
|                 enabled: true | ||||
|               prometheusRule: | ||||
|                 enabled: true | ||||
| 
 | ||||
|       destination: | ||||
|         server: "{{url}}" | ||||
|         namespace: monitoring | ||||
							
								
								
									
										103
									
								
								appsets/prometheus.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								appsets/prometheus.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,103 @@ | |||
| apiVersion: argoproj.io/v1alpha1 | ||||
| kind: ApplicationSet | ||||
| metadata: | ||||
|   name: universal-prometheus | ||||
|   annotations: | ||||
|     argocd.argoproj.io/sync-wave: "2" | ||||
| spec: | ||||
|   generators: | ||||
|     - list: | ||||
|         elements: | ||||
|           - cluster: lychee | ||||
|             url: https://172.16.152.1:6443 | ||||
|   syncPolicy: | ||||
|     applicationsSync: create-update | ||||
|     preserveResourcesOnDeletion: true | ||||
|   template: | ||||
|     metadata: | ||||
|       name: "{{cluster}}-prometheus" | ||||
|       annotations: | ||||
|         argocd.argoproj.io/sync-wave: "5" | ||||
|     spec: | ||||
|       project: monitoring | ||||
|       syncPolicy: | ||||
|         automated: | ||||
|           prune: true | ||||
|           selfHeal: true | ||||
|         syncOptions: | ||||
|           - CreateNamespace=true | ||||
|           - ServerSideApply=true | ||||
|       source: | ||||
|         chart: kube-prometheus-stack | ||||
|         repoURL: https://prometheus-community.github.io/helm-charts | ||||
|         targetRevision: 51.2.0 | ||||
|         helm: | ||||
|           releaseName: prometheus | ||||
|           values: | | ||||
|             fullnameOverride: "prometheus" | ||||
| 
 | ||||
|             prometheus: | ||||
|               ingress: | ||||
|                 enabled: true | ||||
|                 ingressClassName: nginx | ||||
|                 annotations: | ||||
|                   cert-manager.io/cluster-issuer: letsencrypt-prod | ||||
|                 hosts: | ||||
|                   - prometheus.{{cluster}}.k8s.vxm.cz | ||||
|                 paths: | ||||
|                   - / | ||||
|                 pathType: Prefix | ||||
|                 tls: | ||||
|                   - hosts: | ||||
|                     - prometheus.{{cluster}}.k8s.vxm.cz | ||||
|                     secretName: prometheus-ingress-tls | ||||
| 
 | ||||
|               prometheusSpec: | ||||
|                 serviceMonitorSelectorNilUsesHelmValues: false | ||||
|                 podMonitorSelectorNilUsesHelmValues: false | ||||
|                 ruleSelectorNilUsesHelmValues: false | ||||
| 
 | ||||
|                 storageSpec: | ||||
|                   volumeClaimTemplate: | ||||
|                     spec: | ||||
|                       storageClassName: hcloud-volumes | ||||
|                       accessModes: ["ReadWriteOnce"] | ||||
|                       resources: | ||||
|                         requests: | ||||
|                           storage: 20Gi | ||||
| 
 | ||||
|               serviceMonitor: | ||||
|                 selfMonitor: true | ||||
| 
 | ||||
|             grafana: | ||||
|               enabled: false | ||||
| 
 | ||||
|             alertmanager: | ||||
|               enabled: true | ||||
|               ingress: | ||||
|                 enabled: true | ||||
|                 ingressClassName: nginx | ||||
|                 annotations: | ||||
|                   cert-manager.io/cluster-issuer: letsencrypt-prod | ||||
|                 hosts: | ||||
|                   - alertmanager.{{cluster}}.k8s.vxm.cz | ||||
|                 paths: | ||||
|                   - / | ||||
|                 pathType: Prefix | ||||
|                 tls: | ||||
|                   - hosts: | ||||
|                       - alertmanager.{{cluster}}.k8s.vxm.cz | ||||
|                     secretName: alertmanager-ingress-tls | ||||
| 
 | ||||
|               alertmanagerSpec: | ||||
|                 resources: | ||||
|                   limits: | ||||
|                     cpu: 250m | ||||
|                     memory: 256Mi | ||||
|                   requests: | ||||
|                     cpu: 100m | ||||
|                     memory: 256Mi | ||||
| 
 | ||||
|       destination: | ||||
|         server: "{{url}}" | ||||
|         namespace: monitoring | ||||
							
								
								
									
										38
									
								
								clusters/cherry/apps/blackbox-exporter.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								clusters/cherry/apps/blackbox-exporter.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,38 @@ | |||
| apiVersion: argoproj.io/v1alpha1 | ||||
| kind: Application | ||||
| metadata: | ||||
|   name: cherry-blackbox-exporter | ||||
|   namespace: argocd | ||||
|   finalizers: | ||||
|     - resources-finalizer.argocd.argoproj.io | ||||
| spec: | ||||
|   project: monitoring | ||||
|   syncPolicy: | ||||
|     automated: | ||||
|       prune: true | ||||
|       selfHeal: true | ||||
|     syncOptions: | ||||
|       - CreateNamespace=true | ||||
|   destination: | ||||
|     server: https://kubernetes.default.svc | ||||
|     namespace: monitoring | ||||
|   source: | ||||
|     chart: prometheus-blackbox-exporter | ||||
|     repoURL: https://prometheus-community.github.io/helm-charts | ||||
|     targetRevision: 8.4.0 | ||||
|     helm: | ||||
|       releaseName: blackbox-exporter | ||||
|       valuesObject: | ||||
|         releaseLabel: true | ||||
|         pspEnabled: false | ||||
|         resources: | ||||
|           limits: | ||||
|             cpu: 100m | ||||
|             memory: 300Mi | ||||
|           requests: | ||||
|             cpu: 50m | ||||
|             memory: 50Mi | ||||
| 
 | ||||
|         serviceMonitor: | ||||
|           selfMonitor: | ||||
|             enabled: true | ||||
							
								
								
									
										26
									
								
								clusters/cherry/apps/hcloud-cloud-controller-manager.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								clusters/cherry/apps/hcloud-cloud-controller-manager.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| apiVersion: argoproj.io/v1alpha1 | ||||
| kind: Application | ||||
| metadata: | ||||
|   name: cherry-hcloud-cloud-controller-manager | ||||
|   namespace: argocd | ||||
| spec: | ||||
|   project: infrastructure | ||||
|   syncPolicy: | ||||
|     automated: | ||||
|       prune: true | ||||
|       selfHeal: true | ||||
|   destination: | ||||
|     server: https://kubernetes.default.svc | ||||
|     namespace: kube-system | ||||
|   source: | ||||
|     chart: hcloud-cloud-controller-manager | ||||
|     repoURL: https://charts.hetzner.cloud | ||||
|     targetRevision: 1.18.0 | ||||
|     helm: | ||||
|       releaseName: hcloud-cloud-controller-manager | ||||
|       valuesObject: | ||||
|         controller: | ||||
|           hcloudToken: | ||||
|             existingSecret: | ||||
|               name: hcloud-token | ||||
|               key: token | ||||
|  | @ -55,6 +55,10 @@ spec: | |||
|             type: ClusterIP | ||||
|           hostPort: | ||||
|             enabled: true | ||||
|           metrics: | ||||
|             enabled: true | ||||
|             serviceMonitor: | ||||
|               enabled: true | ||||
|           config: | ||||
|             allow-snippet-annotations: "true" | ||||
|             generate-request-id: "true" | ||||
|  |  | |||
|  | @ -107,6 +107,34 @@ spec: | |||
|                     requests: | ||||
|                       storage: 50Gi | ||||
| 
 | ||||
|             additionalScrapeConfigs: | ||||
|               - job_name: "node-exporter" | ||||
|                 metrics_path: "/metrics" | ||||
|                 static_configs: | ||||
|                   - targets: | ||||
|                       - 172.16.1.1:9100 | ||||
|                       - 172.16.1.2:9100 | ||||
|                       - maple.vxm.cz:9100 | ||||
|                       - alder.vxm.cz:9100 | ||||
| 
 | ||||
|               # See: https://developers.mattermost.com/blog/cloud-monitoring/ | ||||
|               - job_name: "federate" | ||||
|                 scrape_interval: 15s | ||||
| 
 | ||||
|                 honor_labels: true | ||||
|                 metrics_path: "/federate" | ||||
| 
 | ||||
|                 params: | ||||
|                   'match[]': | ||||
|                     - '{job="prometheus"}' | ||||
|                     - '{__name__=~"job:.*"}' | ||||
| 
 | ||||
|                 static_configs: | ||||
|                   - targets: | ||||
|                     - prometheus.lychee.k8s.vxm.cz. | ||||
|                     labels: | ||||
|                       clusterID: lychee | ||||
| 
 | ||||
|           serviceMonitor: | ||||
|             selfMonitor: true | ||||
| 
 | ||||
|  |  | |||
|  | @ -3,6 +3,8 @@ kind: Application | |||
| metadata: | ||||
|   name: uptime-kuma | ||||
|   namespace: argocd | ||||
|   finalizers: | ||||
|     - resources-finalizer.argocd.argoproj.io | ||||
| spec: | ||||
|   project: monitoring | ||||
|   syncPolicy: | ||||
|  |  | |||
							
								
								
									
										33
									
								
								clusters/cherry/manifests/vault/ca.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								clusters/cherry/manifests/vault/ca.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| apiVersion: cert-manager.io/v1 | ||||
| kind: Issuer | ||||
| metadata: | ||||
|   name: selfsigned | ||||
| spec: | ||||
|   selfSigned: {} | ||||
| --- | ||||
| apiVersion: cert-manager.io/v1 | ||||
| kind: Certificate | ||||
| metadata: | ||||
|   name: server-selfsigned-ca | ||||
| spec: | ||||
|   isCA: true | ||||
|   commonName: Vault Server CA | ||||
|   secretName: server-ca | ||||
|   duration: 87660h # 10 years | ||||
|   privateKey: | ||||
|     algorithm: RSA | ||||
|     size: 4096 | ||||
|   issuerRef: | ||||
|     name: selfsigned | ||||
|     kind: Issuer | ||||
|     group: cert-manager.io | ||||
|   additionalOutputFormats: | ||||
|     - type: CombinedPEM | ||||
| --- | ||||
| apiVersion: cert-manager.io/v1 | ||||
| kind: Issuer | ||||
| metadata: | ||||
|   name: server-ca-issuer | ||||
| spec: | ||||
|   ca: | ||||
|     secretName: server-ca | ||||
							
								
								
									
										21
									
								
								clusters/cherry/manifests/vault/certificate.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								clusters/cherry/manifests/vault/certificate.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | |||
| apiVersion: cert-manager.io/v1 | ||||
| kind: Certificate | ||||
| metadata: | ||||
|   name: server | ||||
| spec: | ||||
|   secretName: server-tls | ||||
|   duration: 24h | ||||
|   renewBefore: 144m # 10% of 24h | ||||
|   dnsNames: | ||||
|     - vault-0.vault-internal | ||||
|     - vault-1.vault-internal | ||||
|     - vault-2.vault-internal | ||||
|     - vault-0.vault-internal.vault | ||||
|     - vault-1.vault-internal.vault | ||||
|     - vault-2.vault-internal.vault | ||||
|     - vault-0.vault-internal.vault.svc | ||||
|     - vault-1.vault-internal.vault.svc | ||||
|     - vault-2.vault-internal.vault.svc | ||||
|   issuerRef: | ||||
|     name: server-ca-issuer | ||||
|   commonName: Server Certificate | ||||
							
								
								
									
										44
									
								
								vault-unseal.sh
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										44
									
								
								vault-unseal.sh
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,44 @@ | |||
| #!/usr/bin/env bash | ||||
| 
 | ||||
| UNSEAL_KEYS="xxx yyy zzz" | ||||
| 
 | ||||
| VAULT_PODS=$(kubectl get pods -n vault -l app.kubernetes.io/name=vault -o jsonpath='{.items[*].metadata.name}') | ||||
| 
 | ||||
| echo "" | ||||
| echo "Unsealing Vault..." | ||||
| echo "" | ||||
| 
 | ||||
| echo "Deleting existing pods..." | ||||
| echo "" | ||||
| 
 | ||||
| kubectl delete pods -n vault $VAULT_PODS >> /dev/null | ||||
| 
 | ||||
| echo "Waiting for new pods to become ready..." | ||||
| echo "" | ||||
| 
 | ||||
| kubectl wait --for=condition=ready --timeout=180s pods -n vault -l app.kubernetes.io/name=vault >> /dev/null | ||||
| 
 | ||||
| echo "Unsealing Vault pods..." | ||||
| echo "" | ||||
| 
 | ||||
| for pod in $VAULT_PODS | ||||
| do | ||||
|   echo "Unsealing $pod..." | ||||
| 
 | ||||
|   for key in $UNSEAL_KEYS | ||||
|   do | ||||
|     kubectl exec -n vault -it $pod -- vault operator unseal $key >> /dev/null | ||||
|     sleep 1 | ||||
|   done | ||||
| 
 | ||||
|   echo "$pod unsealed!" | ||||
|   echo "" | ||||
| done | ||||
| 
 | ||||
| echo "Vault unsealed!" | ||||
| echo "" | ||||
| echo "" | ||||
| echo "" | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
		Reference in a new issue