Compare commits
10 commits
1ed3171954
...
d410d564f3
| Author | SHA1 | Date | |
|---|---|---|---|
| d410d564f3 | |||
| 2eed379533 | |||
| 4577bacd0f | |||
| 12525660cd | |||
| 88e41eb5cc | |||
| 3122d80ee1 | |||
| 005fe949d6 | |||
| 9ed52183d5 | |||
| f38e6d5a3c | |||
| be21c1c298 |
10 changed files with 345 additions and 0 deletions
46
appsets/node-problem-detector.yaml
Normal file
46
appsets/node-problem-detector.yaml
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: ApplicationSet
|
||||||
|
metadata:
|
||||||
|
name: universal-cert-manager
|
||||||
|
annotations:
|
||||||
|
argocd.argoproj.io/sync-wave: "3"
|
||||||
|
spec:
|
||||||
|
generators:
|
||||||
|
- list:
|
||||||
|
elements:
|
||||||
|
- cluster: cherry
|
||||||
|
url: https://kubernetes.default.svc
|
||||||
|
- cluster: lychee
|
||||||
|
url: https://172.16.152.1:6443
|
||||||
|
syncPolicy:
|
||||||
|
applicationsSync: sync
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: "{{cluster}}-node-problem-detector"
|
||||||
|
annotations:
|
||||||
|
argocd.argoproj.io/sync-wave: "5"
|
||||||
|
spec:
|
||||||
|
project: monitoring
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
selfHeal: true
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
source:
|
||||||
|
chart: node-problem-detector
|
||||||
|
repoURL: https://charts.deliveryhero.io/
|
||||||
|
targetRevision: 2.3.10
|
||||||
|
helm:
|
||||||
|
releaseName: node-problem-detector
|
||||||
|
valuesObject:
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
serviceMonitor:
|
||||||
|
enabled: true
|
||||||
|
prometheusRule:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
destination:
|
||||||
|
server: "{{url}}"
|
||||||
|
namespace: monitoring
|
||||||
103
appsets/prometheus.yaml
Normal file
103
appsets/prometheus.yaml
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: ApplicationSet
|
||||||
|
metadata:
|
||||||
|
name: universal-prometheus
|
||||||
|
annotations:
|
||||||
|
argocd.argoproj.io/sync-wave: "2"
|
||||||
|
spec:
|
||||||
|
generators:
|
||||||
|
- list:
|
||||||
|
elements:
|
||||||
|
- cluster: lychee
|
||||||
|
url: https://172.16.152.1:6443
|
||||||
|
syncPolicy:
|
||||||
|
applicationsSync: create-update
|
||||||
|
preserveResourcesOnDeletion: true
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: "{{cluster}}-prometheus"
|
||||||
|
annotations:
|
||||||
|
argocd.argoproj.io/sync-wave: "5"
|
||||||
|
spec:
|
||||||
|
project: monitoring
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
selfHeal: true
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
- ServerSideApply=true
|
||||||
|
source:
|
||||||
|
chart: kube-prometheus-stack
|
||||||
|
repoURL: https://prometheus-community.github.io/helm-charts
|
||||||
|
targetRevision: 51.2.0
|
||||||
|
helm:
|
||||||
|
releaseName: prometheus
|
||||||
|
values: |
|
||||||
|
fullnameOverride: "prometheus"
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
ingress:
|
||||||
|
enabled: true
|
||||||
|
ingressClassName: nginx
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||||
|
hosts:
|
||||||
|
- prometheus.{{cluster}}.k8s.vxm.cz
|
||||||
|
paths:
|
||||||
|
- /
|
||||||
|
pathType: Prefix
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- prometheus.{{cluster}}.k8s.vxm.cz
|
||||||
|
secretName: prometheus-ingress-tls
|
||||||
|
|
||||||
|
prometheusSpec:
|
||||||
|
serviceMonitorSelectorNilUsesHelmValues: false
|
||||||
|
podMonitorSelectorNilUsesHelmValues: false
|
||||||
|
ruleSelectorNilUsesHelmValues: false
|
||||||
|
|
||||||
|
storageSpec:
|
||||||
|
volumeClaimTemplate:
|
||||||
|
spec:
|
||||||
|
storageClassName: hcloud-volumes
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 20Gi
|
||||||
|
|
||||||
|
serviceMonitor:
|
||||||
|
selfMonitor: true
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
alertmanager:
|
||||||
|
enabled: true
|
||||||
|
ingress:
|
||||||
|
enabled: true
|
||||||
|
ingressClassName: nginx
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||||
|
hosts:
|
||||||
|
- alertmanager.{{cluster}}.k8s.vxm.cz
|
||||||
|
paths:
|
||||||
|
- /
|
||||||
|
pathType: Prefix
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- alertmanager.{{cluster}}.k8s.vxm.cz
|
||||||
|
secretName: alertmanager-ingress-tls
|
||||||
|
|
||||||
|
alertmanagerSpec:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 250m
|
||||||
|
memory: 256Mi
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
|
||||||
|
destination:
|
||||||
|
server: "{{url}}"
|
||||||
|
namespace: monitoring
|
||||||
38
clusters/cherry/apps/blackbox-exporter.yaml
Normal file
38
clusters/cherry/apps/blackbox-exporter.yaml
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: cherry-blackbox-exporter
|
||||||
|
namespace: argocd
|
||||||
|
finalizers:
|
||||||
|
- resources-finalizer.argocd.argoproj.io
|
||||||
|
spec:
|
||||||
|
project: monitoring
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
selfHeal: true
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
destination:
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
namespace: monitoring
|
||||||
|
source:
|
||||||
|
chart: prometheus-blackbox-exporter
|
||||||
|
repoURL: https://prometheus-community.github.io/helm-charts
|
||||||
|
targetRevision: 8.4.0
|
||||||
|
helm:
|
||||||
|
releaseName: blackbox-exporter
|
||||||
|
valuesObject:
|
||||||
|
releaseLabel: true
|
||||||
|
pspEnabled: false
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 300Mi
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 50Mi
|
||||||
|
|
||||||
|
serviceMonitor:
|
||||||
|
selfMonitor:
|
||||||
|
enabled: true
|
||||||
26
clusters/cherry/apps/hcloud-cloud-controller-manager.yaml
Normal file
26
clusters/cherry/apps/hcloud-cloud-controller-manager.yaml
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: cherry-hcloud-cloud-controller-manager
|
||||||
|
namespace: argocd
|
||||||
|
spec:
|
||||||
|
project: infrastructure
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
selfHeal: true
|
||||||
|
destination:
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
namespace: kube-system
|
||||||
|
source:
|
||||||
|
chart: hcloud-cloud-controller-manager
|
||||||
|
repoURL: https://charts.hetzner.cloud
|
||||||
|
targetRevision: 1.18.0
|
||||||
|
helm:
|
||||||
|
releaseName: hcloud-cloud-controller-manager
|
||||||
|
valuesObject:
|
||||||
|
controller:
|
||||||
|
hcloudToken:
|
||||||
|
existingSecret:
|
||||||
|
name: hcloud-token
|
||||||
|
key: token
|
||||||
|
|
@ -55,6 +55,10 @@ spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
hostPort:
|
hostPort:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
serviceMonitor:
|
||||||
|
enabled: true
|
||||||
config:
|
config:
|
||||||
allow-snippet-annotations: "true"
|
allow-snippet-annotations: "true"
|
||||||
generate-request-id: "true"
|
generate-request-id: "true"
|
||||||
|
|
|
||||||
|
|
@ -107,6 +107,34 @@ spec:
|
||||||
requests:
|
requests:
|
||||||
storage: 50Gi
|
storage: 50Gi
|
||||||
|
|
||||||
|
additionalScrapeConfigs:
|
||||||
|
- job_name: "node-exporter"
|
||||||
|
metrics_path: "/metrics"
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- 172.16.1.1:9100
|
||||||
|
- 172.16.1.2:9100
|
||||||
|
- maple.vxm.cz:9100
|
||||||
|
- alder.vxm.cz:9100
|
||||||
|
|
||||||
|
# See: https://developers.mattermost.com/blog/cloud-monitoring/
|
||||||
|
- job_name: "federate"
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
honor_labels: true
|
||||||
|
metrics_path: "/federate"
|
||||||
|
|
||||||
|
params:
|
||||||
|
'match[]':
|
||||||
|
- '{job="prometheus"}'
|
||||||
|
- '{__name__=~"job:.*"}'
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- prometheus.lychee.k8s.vxm.cz.
|
||||||
|
labels:
|
||||||
|
clusterID: lychee
|
||||||
|
|
||||||
serviceMonitor:
|
serviceMonitor:
|
||||||
selfMonitor: true
|
selfMonitor: true
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ kind: Application
|
||||||
metadata:
|
metadata:
|
||||||
name: uptime-kuma
|
name: uptime-kuma
|
||||||
namespace: argocd
|
namespace: argocd
|
||||||
|
finalizers:
|
||||||
|
- resources-finalizer.argocd.argoproj.io
|
||||||
spec:
|
spec:
|
||||||
project: monitoring
|
project: monitoring
|
||||||
syncPolicy:
|
syncPolicy:
|
||||||
|
|
|
||||||
33
clusters/cherry/manifests/vault/ca.yaml
Normal file
33
clusters/cherry/manifests/vault/ca.yaml
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Issuer
|
||||||
|
metadata:
|
||||||
|
name: selfsigned
|
||||||
|
spec:
|
||||||
|
selfSigned: {}
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: server-selfsigned-ca
|
||||||
|
spec:
|
||||||
|
isCA: true
|
||||||
|
commonName: Vault Server CA
|
||||||
|
secretName: server-ca
|
||||||
|
duration: 87660h # 10 years
|
||||||
|
privateKey:
|
||||||
|
algorithm: RSA
|
||||||
|
size: 4096
|
||||||
|
issuerRef:
|
||||||
|
name: selfsigned
|
||||||
|
kind: Issuer
|
||||||
|
group: cert-manager.io
|
||||||
|
additionalOutputFormats:
|
||||||
|
- type: CombinedPEM
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Issuer
|
||||||
|
metadata:
|
||||||
|
name: server-ca-issuer
|
||||||
|
spec:
|
||||||
|
ca:
|
||||||
|
secretName: server-ca
|
||||||
21
clusters/cherry/manifests/vault/certificate.yaml
Normal file
21
clusters/cherry/manifests/vault/certificate.yaml
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: server
|
||||||
|
spec:
|
||||||
|
secretName: server-tls
|
||||||
|
duration: 24h
|
||||||
|
renewBefore: 144m # 10% of 24h
|
||||||
|
dnsNames:
|
||||||
|
- vault-0.vault-internal
|
||||||
|
- vault-1.vault-internal
|
||||||
|
- vault-2.vault-internal
|
||||||
|
- vault-0.vault-internal.vault
|
||||||
|
- vault-1.vault-internal.vault
|
||||||
|
- vault-2.vault-internal.vault
|
||||||
|
- vault-0.vault-internal.vault.svc
|
||||||
|
- vault-1.vault-internal.vault.svc
|
||||||
|
- vault-2.vault-internal.vault.svc
|
||||||
|
issuerRef:
|
||||||
|
name: server-ca-issuer
|
||||||
|
commonName: Server Certificate
|
||||||
44
vault-unseal.sh
Executable file
44
vault-unseal.sh
Executable file
|
|
@ -0,0 +1,44 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
UNSEAL_KEYS="xxx yyy zzz"
|
||||||
|
|
||||||
|
VAULT_PODS=$(kubectl get pods -n vault -l app.kubernetes.io/name=vault -o jsonpath='{.items[*].metadata.name}')
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Unsealing Vault..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "Deleting existing pods..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
kubectl delete pods -n vault $VAULT_PODS >> /dev/null
|
||||||
|
|
||||||
|
echo "Waiting for new pods to become ready..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
kubectl wait --for=condition=ready --timeout=180s pods -n vault -l app.kubernetes.io/name=vault >> /dev/null
|
||||||
|
|
||||||
|
echo "Unsealing Vault pods..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
for pod in $VAULT_PODS
|
||||||
|
do
|
||||||
|
echo "Unsealing $pod..."
|
||||||
|
|
||||||
|
for key in $UNSEAL_KEYS
|
||||||
|
do
|
||||||
|
kubectl exec -n vault -it $pod -- vault operator unseal $key >> /dev/null
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "$pod unsealed!"
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Vault unsealed!"
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Reference in a new issue