From 1c0ab80326cce72d01a965e296095e70415e4c40 Mon Sep 17 00:00:00 2001 From: Riley Louvier Date: Mon, 15 Dec 2025 12:54:31 -0700 Subject: [PATCH] Add readme Q/A and HPA in Actions --- .github/workflows/dev_dispatch.yml | 45 ++++++++++++++++++++++++++---- README.md | 44 ++++++++++++++++++++++++++--- 2 files changed, 79 insertions(+), 10 deletions(-) diff --git a/.github/workflows/dev_dispatch.yml b/.github/workflows/dev_dispatch.yml index b39c2b0..596fa81 100644 --- a/.github/workflows/dev_dispatch.yml +++ b/.github/workflows/dev_dispatch.yml @@ -32,8 +32,34 @@ on: required: true default: "512Mi" + push_to_dockerhub: + description: "Push built image to dockerhub" + required: false + default: "false" + + hpa_enabled: + description: "Enable HPA" + required: false + default: "false" + + hpa_min_replicas: + description: "HPA min replicas" + required: false + default: "1" + + hpa_max_replicas: + description: "HPA max replicas" + required: false + default: "3" + + hpa_cpu_target_percentage: + description: "HPA CPU target percentage" + required: false + default: "60" + + jobs: - deploy: + deploy-docker-k8s: runs-on: ubuntu-latest steps: @@ -56,13 +82,18 @@ jobs: # APP_NAME=${{ github.event.inputs.name }} APP_NAME=app-${{ github.actor }}-${{ github.event.inputs.name }}-${{ github.run_number }} CONTAINER_NAME=${{ github.event.inputs.container_name }} - IMAGE=${{ github.event.inputs.base_image }} CPU_REQUEST=${{ github.event.inputs.cpu }} MEMORY_REQUEST=${{ github.event.inputs.memory }} CPU_LIMIT=${{ github.event.inputs.cpu }} MEMORY_LIMIT=${{ github.event.inputs.memory }} + # hpa stuff + HPA_ENABLED=${{ github.event.inputs.hpa_enabled }} + HPA_MIN_REPLICAS=${{ github.event.inputs.hpa_min_replicas }} + HPA_MAX_REPLICAS=${{ github.event.inputs.hpa_max_replicas }} + HPA_CPU_TARGET_PERCENTAGE=${{ github.event.inputs.hpa_cpu_target_percentage }} + SERVICE_PORT=80 TARGET_PORT=8080 EOF @@ -72,15 +103,17 @@ jobs: envsubst < ops/deploy.tpl.yaml > ops/deploy.yaml envsubst < ops/service.tpl.yaml > ops/service.yaml + if [ "${HPA_ENABLED}" = "true" ]; then + envsubst < ops/hpa.tpl.yaml > ops/hpa.yaml + fi + - name: Validate Kubernetes for Deployment # Of course, this would not be a dry run if I had a real cluster to deploy to. uses: docker://ghcr.io/yannh/kubeconform:latest with: - args: "-strict -ignore-missing-schemas ops/deploy.yaml ops/service.yaml" + args: "-strict -ignore-missing-schemas ops/*.yaml" - name: Upload rendered manifests as build artifacts uses: actions/upload-artifact@v4 with: name: k8s-manifests-${{ github.event.inputs.name }} - path: | - ops/deploy.yaml - ops/service.yaml + path: ops/*.yaml diff --git a/README.md b/README.md index f6fb2d5..27fbc0c 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,9 @@ The objective of this repo is to create an automated DevSecOps styled repo for u - Image must contain python2, python3, and R runtimes. - Image must be k8s compatabile. - Automated Github Actions to build,scan,and push image to dockerhub. -- Scan for CVEs and remedy them. As much as it is incredibly simple to use `docker scout` here, it's probably more efficient to use Trivy imo. It would practically be a no brainer decision in automated pipelines/GH Actions. -- I included a bad version of python 'requests' in my requirements.txt to show a critical vuln. +- Scan for CVEs and remedy them. As much as it is incredibly simple to use `docker scout` here, it's probably more efficient to use Trivy imo. It would practically be a no brainer decision in automated pipelines/GH Actions via Trivy and dependabot PRs. +- I included a bad version of python 'requests' in my requirements.txt to show a HIGH vuln. [CVE-2018-18074](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-18074) +- Repo must offer automated code reporting and fixing. ## Docker Overview This image uses the following on base ubuntu22.04: @@ -26,21 +27,31 @@ This image uses the following on base ubuntu22.04: Right now with no cache, the image builds locally in about 35s according to docker buildkit. Obviously, if I was not using shared git runners and was in enterprise Github Org - the runners may indeed be much faster using self-hosted runners. -I generally find myself leaning on the Actions Runner controller [helm chart](https://artifacthub.io/packages/helm/actions-runner-controller/actions-runner-controller "helm chart") for increased build times on the dedicated runners in the Action itself. +I generally find myself leaning on the Actions Runner controller [helm chart](https://artifacthub.io/packages/helm/actions-runner-controller/actions-runner-controller "helm chart") for increased build times on the dedicated runners in the Action's runner itself. At some level with this challenge, there is a few limitations not having access to - - A real production grade k8s cluster - Enterprise Github Org(Github Security SARIF report posting only works in Enterprise Orgs within private repos). It'd be nice to use Trivy to post to this. - Some kind of ALB, ingress route setup,etc publicly exposable endpoint for the Service that goes to the Deploy. (the challenge specifically asked for me to touch on this). +- Probably Argo to deploy things from Helm charts +## Why did you leave our GPU requirement out? +TBH - it's poor practice without a real templating system like Helm/Kustomize/ETC(chef's choice here really,whatever templating you may be using.) to put in the GPU requirement. You could indeed make a completely seperate template file for GPU nodes, but for simplicty sake - I left it out and wanted to explain myself. I wanted to show some Github Actions purism on purpose here. + + +aka - 'Here is what I can do in a private k8s + docker msvc repo with automated sec. using nothing but Github Actions' :) + +You could of course use `nvidia.com/gpu: "0"` and define them as 0,but depending on what you are using I wouldn't want to +present this as GPU worlkload when it is not. In any sense, now that I think about it. +Obviously nodeselectors + taints/tolerations could be used too(probably), but again it sort of lands on "hacky" by definition. ## Minikube setup **This can be run locally with minikube for testing purposes, and to verify the k8s comptability and run forever pod. Had to do it from minikube for this demo,imo**. - Please follow the approrpirate minikube install for your OS from [the official source](https://minikube.sigs.k8s.io/docs/start/) -- Enable metrics-server via `minikube addons enable metrics-server` +- Enable metrics-server via `minikube addons enable metrics-server` - This is for our HPA step later. - Load the image with `minikube image load sadminriley/python-test` - Verify you've loaded the image locally if needed with the following cmds: ``` @@ -73,4 +84,29 @@ python-swish-r-hpa Deployment/python-swish-r-deploy cpu: 2%/50% 1 ``` +HPA is for scaling based on _pod_ metrics, and is provided by metrics-server. + +### How do we scale resoources based on events rather than resources? +- I mostly go for KEDA in this situation. Remember, HPA is for scaling based on your pods metrics reported usage and only scales replicasets(this is a k8s managed resource from our Deployments defined replica count). KEDA scales based on external events and works really well with HPA from my experience. + + +Some reasons to scale with KEDA: +- Cron schedules +- Custom external metrics + + +### How do we scale nodes though?! +On EKS??! We're always going to handle nodes with Karpenter nodepools. It's an absolute must have in most EKS clusters, in my opinion. The money saving measures combined with KEDA are just plain awesome(that's not really opinion as much as well known now) +...Or just cluster-autoscaler for incredibly basic clusters + + +### Observability/Logging +Most real enterprise orgs are going to be using Datadog(my personal favorite), Newrelic, or the prometheus/grafana stack we see in pure open source k8s. The native prometheus alertmanager can also be used in that last one. + + +### Bringing data to Memory? +I feel like you would probably default to a shared memory volume of some kind here - but to be honest, I'd love to hear +the actual answer to this. +## Room for improvement +- Lots probably! For respect of everyones time(yours and mine both), I wanted to keep this mostly simple.