diff --git a/.github/workflows/pr-tests.yaml b/.github/workflows/pr-tests.yaml index e28aeda..a0ea5db 100644 --- a/.github/workflows/pr-tests.yaml +++ b/.github/workflows/pr-tests.yaml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - env: [dev] # TODO: add prod once clusters/prod/ has apps + env: [dev, prod] steps: - uses: actions/checkout@v4 @@ -34,7 +34,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - env: [dev] # TODO: add prod once clusters/prod/ has apps + env: [dev, prod] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index 530aafe..baaaee6 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ GitOps repository that contains all the related configuration to manage the Pilo ## App-of-Apps Pattern -This repo uses ArgoCD's app-of-apps pattern: a root Application (`root-app.yaml`) deploys all child Applications, each defined under `clusters/dev/apps//`. +This repo uses ArgoCD's app-of-apps pattern: a root Application (`root-app.yaml`) deploys all child Applications, each defined under `clusters//apps//`. + +> **Environments:** Replace `` with `dev` or `prod` throughout. Domains: dev -> `dev.hdc.ebrains.eu`, prod -> `hdc.ebrains.eu`. ### Sync-Wave Order @@ -18,15 +20,15 @@ This repo uses ArgoCD's app-of-apps pattern: a root Application (`root-app.yaml` | 3 | registry-secrets | ExternalSecrets for docker-registry-secret | | 3 | greenroom-storage | RWX PVC for upload/download (greenroom ns, nfs-client) | | 3 | core-storage | RWX PVC for upload/download (core ns, nfs-client) | -| 3 | arc-controller | GitHub Actions Runner Controller (arc-systems ns) | -| 4 | arc-runners-public | Self-hosted GH runners for PilotDataPlatform org (DinD, arc-runners ns) | +| 3 | arc-controller | GitHub Actions Runner Controller (arc-systems ns, dev only) | +| 4 | arc-runners-public | Self-hosted GH runners for PilotDataPlatform org (DinD, arc-runners ns, dev only) | | 4 | postgresql | Main DB (utility ns) | | 4 | keycloak-postgresql | Keycloak DB | | 5 | redis | | | 5 | kafka | Broker + Zookeeper + Connect | | 5 | elasticsearch | ES 7.17.3 (utility ns) | -| 5 | mailhog | SMTP sink for dev (no auth, no ingress) | -| 5 | minio | Object storage, S3 API ingress at `object.dev.hdc.ebrains.eu` | +| 5 | mailhog | SMTP sink (dev only, no auth, no ingress) | +| 5 | minio | Object storage, S3 API ingress at `object.` | | 5 | message-bus-greenroom | RabbitMQ (greenroom ns) | | 6 | keycloak | | | 7 | auth | | @@ -34,7 +36,7 @@ This repo uses ArgoCD's app-of-apps pattern: a root Application (`root-app.yaml` | 8 | project | | | 8 | dataset | Dataset management (S3, metadata) | | 8 | dataops | Data operations (lineage, file ops) | -| 8 | notification | Email notifications (uses MailHog SMTP) | +| 8 | notification | Email notifications (dev: MailHog SMTP; prod: real SMTP) | | 8 | approval | Copy request workflows | | 8 | kong-postgresql | Kong DB (split from kong for PreSync hook) | | 8 | queue-consumer | Queue consumer (greenroom ns) | @@ -59,7 +61,7 @@ This repo uses ArgoCD's app-of-apps pattern: a root Application (`root-app.yaml` ### Workbench (Per-Project ApplicationSets) -Workbench services are deployed per project namespace (`project-{name}`) using ArgoCD ApplicationSets with a git file generator. Each project is defined in `clusters/dev/workbench/projects/{name}.yaml`: +Workbench services are deployed per project namespace (`project-{name}`) using ArgoCD ApplicationSets with a git file generator. Each project is defined in `clusters//workbench/projects/{name}.yaml`: ```yaml name: myproject @@ -69,13 +71,13 @@ Adding a project file triggers ApplicationSets to create per-project instances o | Service | Chart | Components | |---------|-------|------------| -| Guacamole | `clusters/dev/workbench/guacamole-stack/` | guacd + guacamole + PostgreSQL (md5 auth) | +| Guacamole | `clusters//workbench/guacamole-stack/` | guacd + guacamole + PostgreSQL (md5 auth) | -The `projects/` directory is shared across all workbench ApplicationSets — future services (Superset, JupyterHub) will read from the same catalog. +The `projects/` directory is shared across all workbench ApplicationSets within an environment — future services (Superset, JupyterHub) will read from the same catalog. #### Adding a new project -1. Create `clusters/dev/workbench/projects/.yaml` with `name: ` +1. Create `clusters//workbench/projects/.yaml` with `name: ` 2. Ensure prerequisites exist: - Vault secret: `vault kv put secret/guacamole pg-password=$(openssl rand -hex 24)` - Keycloak client: `guacamole-` (managed in Terraform) @@ -106,8 +108,9 @@ After ArgoCD deploys Vault, these manual steps are required once per cluster. # Initialize - outputs 5 unseal keys + root token kubectl exec -it vault-0 -n vault -- vault operator init -# Store keys securely (dev cluster uses gopass), e.g: -# gopass ebrains-dev/hdc/ovh/vault-unseal-keys +# Store keys securely in gopass: +# dev: gopass ebrains-dev/hdc/ovh/vault-unseal-keys +# prod: gopass ebrains/hdc/ovh/vault-unseal-keys # Unseal (repeat 3x with different keys) kubectl exec -it vault-0 -n vault -- vault operator unseal @@ -196,6 +199,14 @@ vault kv put secret/minio \ | `secret/docker-registry/ovh` | username, password | registry-secrets | | `secret/github-runner` | github_app_id, github_app_installation_id, github_app_private_key | arc-runners-public | +### Docker Registry (`secret/docker-registry/ovh`) + +```bash +vault kv put secret/docker-registry/ovh \ + username='' \ + password='' +``` + To add or update a service password: `vault kv patch secret/postgresql -user-password=` ## Platform Architecture (WIP) @@ -234,17 +245,17 @@ HDC splits workloads across namespaces by trust boundary and function: ### Version Management -All image tags and chart dependency versions are centralized in [`clusters/dev/versions.yaml`](clusters/dev/versions.yaml). +All image tags and chart dependency versions are centralized in `clusters//versions.yaml`. ```bash # 1. Edit versions.yaml (image tag or chart version) -vim clusters/dev/versions.yaml +vim clusters//versions.yaml # 2. For chart version changes, propagate to Chart.yaml files -make sync-versions +make ENV= sync-versions # 3. Validate -make test +make ENV= test # 4. Commit both versions.yaml and any updated Chart.yaml files ``` @@ -253,19 +264,19 @@ Image tags are consumed as a Helm valueFile — ArgoCD deep-merges `registry.yam ### Registry Switching -The repo supports multiple container registries (OVH, EBRAINS). The active registry is set in `clusters/dev/registry.yaml`. +The repo supports multiple container registries (OVH, EBRAINS). The active registry is set in `clusters//registry.yaml`. ```bash -make which-registry # show current registry -make switch-registry TO=ovh # switch to OVH registry -make switch-registry TO=ebrains # switch to EBRAINS registry +make ENV= which-registry # show current registry +make ENV= switch-registry TO=ovh # switch to OVH registry +make ENV= switch-registry TO=ebrains # switch to EBRAINS registry ``` This updates `registry.yaml` and rewrites hardcoded registry URLs in app `values.yaml` files. ### Validation -Run `make test` before committing. It runs all checks: +Run `make ENV= test` before committing. It runs all checks: | Test | What it catches | |------|----------------| diff --git a/clusters/prod/apps/cert-manager/Chart.yaml b/clusters/prod/apps/cert-manager/Chart.yaml new file mode 100644 index 0000000..4db31d2 --- /dev/null +++ b/clusters/prod/apps/cert-manager/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v2 +name: cert-manager +version: 0.1.0 +dependencies: + - name: cert-manager-jetstack + alias: cert-manager + version: v1.19.2 + repository: https://pilotdataplatform.github.io/helm-charts/ diff --git a/clusters/prod/apps/cert-manager/application.yaml b/clusters/prod/apps/cert-manager/application.yaml new file mode 100644 index 0000000..f051cc6 --- /dev/null +++ b/clusters/prod/apps/cert-manager/application.yaml @@ -0,0 +1,25 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cert-manager + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "0" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + path: clusters/prod/apps/cert-manager + targetRevision: main + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: cert-manager + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/clusters/prod/apps/cert-manager/templates/cluster-issuers.yaml b/clusters/prod/apps/cert-manager/templates/cluster-issuers.yaml new file mode 100644 index 0000000..c9fefcc --- /dev/null +++ b/clusters/prod/apps/cert-manager/templates/cluster-issuers.yaml @@ -0,0 +1,14 @@ +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-prod +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + email: devops@indocresearch.org + privateKeySecretRef: + name: letsencrypt-prod-account-key + solvers: + - http01: + ingress: + ingressClassName: nginx diff --git a/clusters/prod/apps/cert-manager/values.yaml b/clusters/prod/apps/cert-manager/values.yaml new file mode 100644 index 0000000..7c6bc41 --- /dev/null +++ b/clusters/prod/apps/cert-manager/values.yaml @@ -0,0 +1,30 @@ +cert-manager: + crds: + enabled: true + keep: true + + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi + + webhook: + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi + + cainjector: + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi diff --git a/clusters/prod/apps/core-storage/Chart.yaml b/clusters/prod/apps/core-storage/Chart.yaml new file mode 100644 index 0000000..5b3d0d6 --- /dev/null +++ b/clusters/prod/apps/core-storage/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v2 +name: core-storage +version: 0.1.0 diff --git a/clusters/prod/apps/core-storage/application.yaml b/clusters/prod/apps/core-storage/application.yaml new file mode 100644 index 0000000..c6c2807 --- /dev/null +++ b/clusters/prod/apps/core-storage/application.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: core-storage + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "3" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + targetRevision: main + path: clusters/prod/apps/core-storage + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: core + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/clusters/prod/apps/core-storage/templates/pvc.yaml b/clusters/prod/apps/core-storage/templates/pvc.yaml new file mode 100644 index 0000000..89fbb79 --- /dev/null +++ b/clusters/prod/apps/core-storage/templates/pvc.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: core-storage +spec: + accessModes: + - ReadWriteMany + storageClassName: {{ .Values.storage.storageClassName }} + resources: + requests: + storage: {{ .Values.storage.size }} diff --git a/clusters/prod/apps/core-storage/values.yaml b/clusters/prod/apps/core-storage/values.yaml new file mode 100644 index 0000000..ea1383f --- /dev/null +++ b/clusters/prod/apps/core-storage/values.yaml @@ -0,0 +1,3 @@ +storage: + size: 10Gi + storageClassName: nfs-client diff --git a/clusters/prod/apps/external-secrets/Chart.yaml b/clusters/prod/apps/external-secrets/Chart.yaml new file mode 100644 index 0000000..3200a55 --- /dev/null +++ b/clusters/prod/apps/external-secrets/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: external-secrets +version: 0.1.0 +dependencies: + - name: external-secrets + version: "1.2.1" + repository: https://pilotdataplatform.github.io/helm-charts/ diff --git a/clusters/prod/apps/external-secrets/application.yaml b/clusters/prod/apps/external-secrets/application.yaml new file mode 100644 index 0000000..8f63b7b --- /dev/null +++ b/clusters/prod/apps/external-secrets/application.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: external-secrets + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "2" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + path: clusters/prod/apps/external-secrets + targetRevision: main + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: external-secrets + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/clusters/prod/apps/external-secrets/templates/cluster-secret-store.yaml b/clusters/prod/apps/external-secrets/templates/cluster-secret-store.yaml new file mode 100644 index 0000000..60df2f8 --- /dev/null +++ b/clusters/prod/apps/external-secrets/templates/cluster-secret-store.yaml @@ -0,0 +1,19 @@ +apiVersion: external-secrets.io/v1 +kind: ClusterSecretStore +metadata: + name: vault + annotations: + argocd.argoproj.io/sync-wave: "1" +spec: + provider: + vault: + server: "http://vault.vault.svc:8200" + path: "secret" + version: "v2" + auth: + kubernetes: + mountPath: "kubernetes" + role: "external-secrets" + serviceAccountRef: + name: "external-secrets" + namespace: "external-secrets" diff --git a/clusters/prod/apps/external-secrets/values.yaml b/clusters/prod/apps/external-secrets/values.yaml new file mode 100644 index 0000000..8f0cb89 --- /dev/null +++ b/clusters/prod/apps/external-secrets/values.yaml @@ -0,0 +1,29 @@ +external-secrets: + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi + + webhook: + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi + + certController: + startupProbe: + enabled: true + useReadinessProbePort: true + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi diff --git a/clusters/prod/apps/greenroom-storage/Chart.yaml b/clusters/prod/apps/greenroom-storage/Chart.yaml new file mode 100644 index 0000000..e3e389b --- /dev/null +++ b/clusters/prod/apps/greenroom-storage/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v2 +name: greenroom-storage +version: 0.1.0 diff --git a/clusters/prod/apps/greenroom-storage/application.yaml b/clusters/prod/apps/greenroom-storage/application.yaml new file mode 100644 index 0000000..6fa0b2c --- /dev/null +++ b/clusters/prod/apps/greenroom-storage/application.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: greenroom-storage + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "3" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + targetRevision: main + path: clusters/prod/apps/greenroom-storage + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: greenroom + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/clusters/prod/apps/greenroom-storage/templates/pvc.yaml b/clusters/prod/apps/greenroom-storage/templates/pvc.yaml new file mode 100644 index 0000000..b2b8a74 --- /dev/null +++ b/clusters/prod/apps/greenroom-storage/templates/pvc.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: greenroom-storage +spec: + accessModes: + - ReadWriteMany + storageClassName: {{ .Values.storage.storageClassName }} + resources: + requests: + storage: {{ .Values.storage.size }} diff --git a/clusters/prod/apps/greenroom-storage/values.yaml b/clusters/prod/apps/greenroom-storage/values.yaml new file mode 100644 index 0000000..ea1383f --- /dev/null +++ b/clusters/prod/apps/greenroom-storage/values.yaml @@ -0,0 +1,3 @@ +storage: + size: 10Gi + storageClassName: nfs-client diff --git a/clusters/prod/apps/ingress-nginx/Chart.yaml b/clusters/prod/apps/ingress-nginx/Chart.yaml new file mode 100644 index 0000000..2a15784 --- /dev/null +++ b/clusters/prod/apps/ingress-nginx/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: ingress-nginx +version: 0.1.0 +dependencies: + - name: ingress-nginx + version: 4.14.1 + repository: https://pilotdataplatform.github.io/helm-charts/ diff --git a/clusters/prod/apps/ingress-nginx/application.yaml b/clusters/prod/apps/ingress-nginx/application.yaml new file mode 100644 index 0000000..322769c --- /dev/null +++ b/clusters/prod/apps/ingress-nginx/application.yaml @@ -0,0 +1,25 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ingress-nginx + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "1" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + path: clusters/prod/apps/ingress-nginx + targetRevision: main + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: ingress-nginx + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/clusters/prod/apps/ingress-nginx/values.yaml b/clusters/prod/apps/ingress-nginx/values.yaml new file mode 100644 index 0000000..e65fca9 --- /dev/null +++ b/clusters/prod/apps/ingress-nginx/values.yaml @@ -0,0 +1,36 @@ +ingress-nginx: + controller: + addHeaders: + X-Frame-Options: "SAMEORIGIN" + X-Content-Type-Options: "nosniff" + Referrer-Policy: "strict-origin-when-cross-origin" + Permissions-Policy: "geolocation=(), microphone=(), camera=()" + kind: Deployment + replicaCount: 2 + service: + type: NodePort + nodePorts: + http: 30080 + https: 30443 + config: + server-tokens: "false" + use-forwarded-headers: "true" + compute-full-forwarded-for: "true" + # Trusted proxy subnet (prod private network) + proxy-real-ip-cidr: "10.0.1.0/24" + ingressClassResource: + name: nginx + enabled: true + default: true + controllerValue: k8s.io/ingress-nginx + resources: + requests: + cpu: 100m + memory: 90Mi + limits: + cpu: 500m + memory: 256Mi + nodeSelector: + kubernetes.io/os: linux + admissionWebhooks: + enabled: true diff --git a/clusters/prod/apps/nfs-provisioner/Chart.yaml b/clusters/prod/apps/nfs-provisioner/Chart.yaml new file mode 100644 index 0000000..70d0890 --- /dev/null +++ b/clusters/prod/apps/nfs-provisioner/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: nfs-provisioner +version: 0.1.0 +dependencies: + - name: nfs-subdir-external-provisioner + version: "4.0.18" + repository: https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner diff --git a/clusters/prod/apps/nfs-provisioner/application.yaml b/clusters/prod/apps/nfs-provisioner/application.yaml new file mode 100644 index 0000000..1a9ca96 --- /dev/null +++ b/clusters/prod/apps/nfs-provisioner/application.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: nfs-provisioner + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "3" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + path: clusters/prod/apps/nfs-provisioner + targetRevision: main + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: nfs-provisioner + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/clusters/prod/apps/nfs-provisioner/values.yaml b/clusters/prod/apps/nfs-provisioner/values.yaml new file mode 100644 index 0000000..81647f6 --- /dev/null +++ b/clusters/prod/apps/nfs-provisioner/values.yaml @@ -0,0 +1,29 @@ +nfs-subdir-external-provisioner: + fullnameOverride: nfs-provisioner + + image: + repository: n47w5524.c1.de1.container-registry.ovh.net/hdc-services-external/sig-storage/nfs-subdir-external-provisioner + tag: v4.0.2 + + imagePullSecrets: + - name: docker-registry-secret + + nfs: + server: 10.0.1.163 + path: /nfs/export + + storageClass: + name: nfs-client + create: true + onDelete: "delete" + reclaimPolicy: Delete + accessModes: ReadWriteOnce + volumeBindingMode: Immediate + + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi diff --git a/clusters/prod/apps/registry-secrets/Chart.yaml b/clusters/prod/apps/registry-secrets/Chart.yaml new file mode 100644 index 0000000..b614956 --- /dev/null +++ b/clusters/prod/apps/registry-secrets/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v2 +name: registry-secrets +version: 0.1.0 diff --git a/clusters/prod/apps/registry-secrets/application.yaml b/clusters/prod/apps/registry-secrets/application.yaml new file mode 100644 index 0000000..295f4f9 --- /dev/null +++ b/clusters/prod/apps/registry-secrets/application.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: registry-secrets + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "3" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + targetRevision: main + path: clusters/prod/apps/registry-secrets + helm: + valueFiles: + - ../../registry.yaml + destination: + server: https://kubernetes.default.svc + namespace: utility + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/clusters/prod/apps/registry-secrets/templates/docker-registry-secret.yaml b/clusters/prod/apps/registry-secrets/templates/docker-registry-secret.yaml new file mode 100644 index 0000000..3646641 --- /dev/null +++ b/clusters/prod/apps/registry-secrets/templates/docker-registry-secret.yaml @@ -0,0 +1,39 @@ +{{- range $ns := list "utility" "keycloak" "redis" "minio" "greenroom" "core" "nfs-provisioner" }} +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: docker-registry-secret + namespace: {{ $ns }} +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: vault + target: + name: docker-registry-secret + template: + type: kubernetes.io/dockerconfigjson + data: + .dockerconfigjson: '{"auths":{"{{ $.Values.global.imageRegistry }}":{"username":"{{`{{ .username }}`}}","password":"{{`{{ .password }}`}}","auth":"{{`{{ printf "%s:%s" .username .password | b64enc }}`}}"}}}' + data: + - secretKey: username + remoteRef: + key: {{ $.Values.registryVaultPath }} + property: username + - secretKey: password + remoteRef: + key: {{ $.Values.registryVaultPath }} + property: password +{{- end }} +{{- /* Patch default SA with imagePullSecrets in namespaces where Jobs need private registry access */ -}} +{{- range $ns := list "greenroom" "utility" }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: default + namespace: {{ $ns }} +imagePullSecrets: + - name: docker-registry-secret +{{- end }} diff --git a/clusters/prod/apps/vault/Chart.yaml b/clusters/prod/apps/vault/Chart.yaml new file mode 100644 index 0000000..928cd7f --- /dev/null +++ b/clusters/prod/apps/vault/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: vault +version: 0.1.0 +dependencies: + - name: vault + version: 0.31.0 + repository: https://pilotdataplatform.github.io/helm-charts/ diff --git a/clusters/prod/apps/vault/application.yaml b/clusters/prod/apps/vault/application.yaml new file mode 100644 index 0000000..e640450 --- /dev/null +++ b/clusters/prod/apps/vault/application.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: vault + namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "3" +spec: + project: default + source: + repoURL: https://github.com/PilotDataPlatform/pilot-hdc-platform-gitops.git + path: clusters/prod/apps/vault + targetRevision: main + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: vault + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true diff --git a/clusters/prod/apps/vault/templates/backup-cronjob.yaml b/clusters/prod/apps/vault/templates/backup-cronjob.yaml new file mode 100644 index 0000000..bde377a --- /dev/null +++ b/clusters/prod/apps/vault/templates/backup-cronjob.yaml @@ -0,0 +1,81 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: vault-backup +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vault-backup +spec: + schedule: "0 2 * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app.kubernetes.io/name: vault + component: server + topologyKey: kubernetes.io/hostname + securityContext: + runAsNonRoot: true + runAsUser: 100 + runAsGroup: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault + containers: + - name: backup + image: alpine:3.19 + resources: + requests: + memory: 32Mi + cpu: 10m + limits: + memory: 128Mi + cpu: 100m + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + command: + - /bin/sh + - -c + - | + set -e + TIMESTAMP=$(date +%Y%m%d-%H%M%S) + BACKUP_DIR=/backup/${TIMESTAMP} + mkdir -p ${BACKUP_DIR} + cp -a /vault/data/* ${BACKUP_DIR}/ + # Keep only last 7 backups + cd /backup && ls -t | tail -n +8 | xargs rm -rf || true + echo "Backup completed: ${BACKUP_DIR}" + volumeMounts: + - name: vault-data + mountPath: /vault/data + readOnly: true + - name: vault-backup + mountPath: /backup + volumes: + - name: vault-data + persistentVolumeClaim: + claimName: data-vault-0 + - name: vault-backup + persistentVolumeClaim: + claimName: vault-backup diff --git a/clusters/prod/apps/vault/values.yaml b/clusters/prod/apps/vault/values.yaml new file mode 100644 index 0000000..061a12a --- /dev/null +++ b/clusters/prod/apps/vault/values.yaml @@ -0,0 +1,21 @@ +vault: + injector: + resources: + requests: + memory: 64Mi + cpu: 50m + limits: + memory: 128Mi + cpu: 250m + + server: + resources: + requests: + memory: 128Mi + cpu: 100m + limits: + memory: 256Mi + cpu: 500m + + dataStorage: + size: 1Gi diff --git a/clusters/prod/registry-ebrains.yaml b/clusters/prod/registry-ebrains.yaml new file mode 100644 index 0000000..281c7a0 --- /dev/null +++ b/clusters/prod/registry-ebrains.yaml @@ -0,0 +1,3 @@ +global: + imageRegistry: docker-registry.ebrains.eu +registryVaultPath: secret/data/docker-registry/ebrains diff --git a/clusters/prod/registry-ovh.yaml b/clusters/prod/registry-ovh.yaml new file mode 100644 index 0000000..5833eb8 --- /dev/null +++ b/clusters/prod/registry-ovh.yaml @@ -0,0 +1,3 @@ +global: + imageRegistry: n47w5524.c1.de1.container-registry.ovh.net +registryVaultPath: secret/data/docker-registry/ovh diff --git a/clusters/prod/registry.yaml b/clusters/prod/registry.yaml new file mode 100644 index 0000000..5833eb8 --- /dev/null +++ b/clusters/prod/registry.yaml @@ -0,0 +1,3 @@ +global: + imageRegistry: n47w5524.c1.de1.container-registry.ovh.net +registryVaultPath: secret/data/docker-registry/ovh diff --git a/clusters/prod/versions.yaml b/clusters/prod/versions.yaml new file mode 100644 index 0000000..6035130 --- /dev/null +++ b/clusters/prod/versions.yaml @@ -0,0 +1,108 @@ +# Centralized version definitions for all apps +# Image tags are consumed as Helm valueFiles +# Chart versions are synced to Chart.yaml via: make sync-versions + +# -- HDC service image tags (auto-bumped by CI) -- +auth-service: + image: + tag: "2.2.38" +metadata-service: + image: + tag: "2.2.13" +project-service: + image: + tag: "2.3.9" +dataops-service: + image: + tag: "2.5.8" +notification-service: + image: + tag: "2.2.10" +pipelinewatch-service: + image: + tag: "pipelinewatch-2.2.7" +upload-service: + image: + tag: "upload-2.2.19" +download-service: + image: + tag: "download-2.2.21" +approval-service: + image: + tag: "2.2.9" +metadata-event-handler: + image: + tag: "metadata_event_handler-1.0.8" +search-service: + image: + tag: "2.2.10" +bff-cli-service: + image: + tag: "bff-cli-2.2.15" +workspace-service: + image: + tag: "2.2.7" +portal: + image: + tag: "1.7.5-hdc-ovh-prod" +# BFF and dataset use base-chart-hdc — tags managed in their values.yaml by CI (update_gitops_versions.yml) + +# -- Infrastructure image tags (manually managed) -- +postgresql: + image: + tag: "16.3.0-932ab18-pgcron" +keycloak: + image: + tag: "20.0.5-debian-11-r4" +# Note: keycloak init container alpine:3.21 is hardcoded in keycloak/values.yaml + +kong: + image: + tag: "latest" +xwiki: + image: + tag: "16.6.0-postgres-tomcat" +minio: + image: + tag: "2022.12.12-debian-11-r9" +# -- Chart dependency versions -- +# Synced to Chart.yaml via: make sync-versions +charts: + argo-cd: "9.1.7" + auth-service: "1.0.9" + base-chart-hdc: "1.0.1" + metadata-service: "1.0.0" + project-service: "0.2.1" + dataops-service: "0.2.1" + cert-manager-jetstack: "v1.19.2" + external-secrets: "1.2.1" + ingress-nginx: "4.14.1" + keycloak: "13.2.0" + postgresql: "15.5.17" + rabbitmq: "10.1.12" + redis: "16.11.2" + kafka: "20.0.3" + elasticsearch: "17.9.29" + kong: "9.1.8" + mailhog: "5.0.7" + notification-service: "0.3.2" + approval-service: "0.3.1" + queue-service: "0.3.0" # consumer + producer + queue-service-socketio: "0.4.1" # socketio only (same chart, different version) + minio: "11.7.13" + portal: "2.1.2" + vault: "0.31.0" + nfs-subdir-external-provisioner: "4.0.18" + pipelinewatch-service: "0.4.2" + upload-service: "0.3.1" + download-service: "1.0.6" + metadata-event-handler: "0.1.1" + search-service: "0.2.2" + base-chart: "0.1.0" + bff-cli-service: "0.1.0" + workspace-service: "0.2.0" + xwiki: "0.3.0" + guacamole-postgresql: "15.5.17" + jupyterhub: "3.3.8" + gha-runner-scale-set-controller: "0.8.2" + gha-runner-scale-set: "0.8.2"