From 48ac3a18aeee1d2e98c49b0211f91159735e4771 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Tue, 4 Feb 2025 01:30:34 +0000 Subject: [PATCH 1/2] Automated scoring update --- codebundles/aws-eks-health/runbook.robot | 6 +- codebundles/aws-eks-health/sli.robot | 2 +- codebundles/aws-eks-node-reboot/runbook.robot | 2 +- .../runbook.robot | 2 +- .../aws-elasticache-redis-health/sli.robot | 2 +- codebundles/aws-lambda-health/runbook.robot | 6 +- codebundles/aws-lambda-health/sli.robot | 2 +- codebundles/azure-aks-triage/runbook.robot | 4 +- codebundles/azure-aks-triage/sli.robot | 2 +- .../azure-appgateway-health/runbook.robot | 2 +- codebundles/azure-appgateway-health/sli.robot | 4 +- .../azure-appservice-triage/runbook.robot | 4 +- codebundles/azure-appservice-triage/sli.robot | 4 +- codebundles/cli-test/runbook.robot | 6 +- codebundles/cmd-test/runbook.robot | 8 +- .../runbook.robot | 6 +- codebundles/curl-http-ok/runbook.robot | 2 +- codebundles/curl-http-ok/sli.robot | 2 +- .../gcloud-log-inspection/runbook.robot | 2 +- codebundles/gcloud-node-preempt/runbook.robot | 2 +- codebundles/gcloud-node-preempt/sli.robot | 2 +- codebundles/gcp-bucket-health/sli.robot | 2 +- .../gcp-cloud-function-health/runbook.robot | 4 +- .../runbook.robot | 2 +- .../gh-actions-artifact-analysis/sli.robot | 2 +- .../k8s-app-troubleshoot/runbook.robot | 4 +- codebundles/k8s-app-troubleshoot/sli.robot | 2 +- .../k8s-artifactory-health/runbook.robot | 2 +- .../k8s-certmanager-healthcheck/sli.robot | 2 +- codebundles/k8s-chaos-flux/runbook.robot | 10 +- codebundles/k8s-chaos-workload/runbook.robot | 6 +- .../k8s-cluster-node-health/runbook.robot | 2 +- codebundles/k8s-cluster-node-health/sli.robot | 2 +- .../k8s-daemonset-healthcheck/runbook.robot | 2 +- .../k8s-deployment-healthcheck/runbook.robot | 10 +- codebundles/k8s-deployment-ops/runbook.robot | 2 +- .../k8s-flux-suspend-namespace/runbook.robot | 4 +- .../runbook.robot | 2 +- .../k8s-fluxcd-reconcile/runbook.robot | 2 +- .../k8s-gitops-gh-remediate/runbook.robot | 2 +- .../k8s-ingress-gce-healthcheck/runbook.robot | 12 +- .../k8s-jenkins-healthcheck/runbook.robot | 4 +- codebundles/k8s-kubectl-cmd/runbook.robot | 2 +- codebundles/k8s-kubectl-cmd/sli.robot | 2 +- .../k8s-labeledpods-healthcheck/sli.robot | 2 +- .../k8s-loki-healthcheck/runbook.robot | 4 +- .../k8s-namespace-healthcheck/sli.robot | 8 +- codebundles/k8s-otelcollector/runbook.robot | 2 +- .../k8s-podresources-health/runbook.robot | 4 +- .../k8s-postgres-healthcheck/sli.robot | 4 +- .../k8s-prometheus-healthcheck/runbook.robot | 10 +- codebundles/k8s-pvc-healthcheck/sli.robot | 2 +- .../k8s-redis-healthcheck/runbook.robot | 2 +- .../k8s-restart-resource/runbook.robot | 2 +- .../k8s-statefulset-healthcheck/runbook.robot | 8 +- .../k8s-tail-logs-dynamic/runbook.robot | 2 +- .../k8s-vault-healthcheck/runbook.robot | 16 +- .../runbook.robot | 2 +- codebundles/test-issue/runbook.robot | 2 +- task_analysis.json | 2173 +++++++++++++++++ 60 files changed, 2286 insertions(+), 113 deletions(-) create mode 100644 task_analysis.json diff --git a/codebundles/aws-eks-health/runbook.robot b/codebundles/aws-eks-health/runbook.robot index 1f0464335..d27c8a65f 100644 --- a/codebundles/aws-eks-health/runbook.robot +++ b/codebundles/aws-eks-health/runbook.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Check EKS Fargate Cluster Health Status +Check EKS Fargate Cluster Health Status in AWS Region `${AWS_REGION}` [Documentation] This script checks the health status of an Amazon EKS Fargate cluster. [Tags] EKS Fargate Cluster Health AWS Kubernetes Pods Nodes ${process}= RW.CLI.Run Bash File check_eks_fargate_cluster_health_status.sh @@ -35,7 +35,7 @@ Check EKS Fargate Cluster Health Status END RW.Core.Add Pre To Report ${process.stdout} -Check EKS Cluster Health Status +Check Amazon EKS Cluster Health Status in AWS Region `${AWS_REGION}` [Documentation] This script checks the health status of an Amazon EKS cluster. [Tags] EKS Cluster Health AWS Kubernetes Pods Nodes ${process}= RW.CLI.Run Bash File check_eks_cluster_health.sh @@ -54,7 +54,7 @@ Check EKS Cluster Health Status END RW.Core.Add Pre To Report ${process.stdout} -List EKS Cluster Metrics +Monitor EKS Cluster Health in AWS Region `${AWS_REGION}` [Documentation] This bash script is designed to monitor the health and status of an Amazon EKS cluster. [Tags] AWS EKS Fargate Bash Script Node Health ${process}= RW.CLI.Run Bash File list_eks_fargate_metrics.sh diff --git a/codebundles/aws-eks-health/sli.robot b/codebundles/aws-eks-health/sli.robot index 50270e931..7b382553c 100644 --- a/codebundles/aws-eks-health/sli.robot +++ b/codebundles/aws-eks-health/sli.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Check EKS Cluster Health Status +Check Amazon EKS Cluster Health Status in AWS Region `${AWS_REGION}` [Documentation] This script checks the health status of an Amazon EKS cluster. [Tags] EKS Cluster Health AWS Kubernetes Pods Nodes ${process}= RW.CLI.Run Bash File check_eks_cluster_health.sh diff --git a/codebundles/aws-eks-node-reboot/runbook.robot b/codebundles/aws-eks-node-reboot/runbook.robot index 11d0ff806..b9c788b05 100644 --- a/codebundles/aws-eks-node-reboot/runbook.robot +++ b/codebundles/aws-eks-node-reboot/runbook.robot @@ -11,7 +11,7 @@ Suite Setup Suite Initialization *** Tasks *** -Check EKS Nodegroup Status +Check EKS Nodegroup Status in `${EKS_CLUSTER_NAME}` [Documentation] Performs a check on a given cluster's nodegroup, raising an issue if the status of the nodegroup is not healthy. [Tags] aws eks node group status ${node_state}= RW.CLI.Run Cli diff --git a/codebundles/aws-elasticache-redis-health/runbook.robot b/codebundles/aws-elasticache-redis-health/runbook.robot index f5dacc702..698df36ae 100644 --- a/codebundles/aws-elasticache-redis-health/runbook.robot +++ b/codebundles/aws-elasticache-redis-health/runbook.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Scan AWS Elasticache Redis Status +Scan AWS Elasticache Redis Status in AWS Region `${AWS_REGION}` [Documentation] Checks the high level metrics and status of the elasticache redis instances in the region. [Tags] AWS Elasticache configuration endpoint configuration ${process}= RW.CLI.Run Bash File analyze_aws_elasticache_redis_metrics.sh diff --git a/codebundles/aws-elasticache-redis-health/sli.robot b/codebundles/aws-elasticache-redis-health/sli.robot index 3ddd928b4..6e2071568 100644 --- a/codebundles/aws-elasticache-redis-health/sli.robot +++ b/codebundles/aws-elasticache-redis-health/sli.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Scan ElastiCaches +Scan ElastiCaches in AWS Region `${AWS_REGION}` [Documentation] Performs a broad health scan of all Elasticache instances in the region. [Tags] bash script AWS Elasticache Health ${process}= RW.CLI.Run Bash File redis_status_scan.sh diff --git a/codebundles/aws-lambda-health/runbook.robot b/codebundles/aws-lambda-health/runbook.robot index 6c9a7cdfe..b8e002d6b 100644 --- a/codebundles/aws-lambda-health/runbook.robot +++ b/codebundles/aws-lambda-health/runbook.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -List Lambda Versions and Runtimes +List Lambda Versions and Runtimes in AWS Region `${AWS_REGION}` [Documentation] This script is designed to list all the versions and runtimes of a specified AWS Lambda function. [Tags] AWS Lambda Versions Runtimes ${process}= RW.CLI.Run Bash File list_lambda_runtimes.sh @@ -26,7 +26,7 @@ List Lambda Versions and Runtimes ... secret__AWS_ROLE_ARN=${AWS_ROLE_ARN} RW.Core.Add Pre To Report ${process.stdout} -Analyze AWS Lambda Invocation Errors +Analyze AWS Lambda Invocation Errors for Function `${AWS_REGION}` in Region `${AWS_REGION}` [Documentation] This bash script is designed to analyze AWS Lambda Invocation Errors for a specified function within a specified region. [Tags] AWS Lambda Error Analysis Invocation Errors CloudWatch Logs ${process}= RW.CLI.Run Bash File analyze_lambda_invocation_errors.sh @@ -45,7 +45,7 @@ Analyze AWS Lambda Invocation Errors ... details=${process.stdout} END -Monitor AWS Lambda Performance Metrics +Monitor AWS Lambda Performance Metrics in AWS Region `${AWS_REGION}` [Documentation] This script is a bash utility for AWS Lambda functions the lists their notable metrics. [Tags] AWS Lambda CloudWatch Logs Metrics ${process}= RW.CLI.Run Bash File monitor_aws_lambda_performance_metrics.sh diff --git a/codebundles/aws-lambda-health/sli.robot b/codebundles/aws-lambda-health/sli.robot index 125a69872..e73f16831 100644 --- a/codebundles/aws-lambda-health/sli.robot +++ b/codebundles/aws-lambda-health/sli.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Analyze AWS Lambda Invocation Errors +Analyze AWS Lambda Invocation Errors for Function `${AWS_REGION}` in Region `${AWS_REGION}` [Documentation] This bash script is designed to analyze AWS Lambda Invocation Errors for a specified function within a specified region. [Tags] AWS Lambda Error Analysis Invocation Errors CloudWatch Logs ${process}= RW.CLI.Run Bash File analyze_lambda_invocation_errors.sh diff --git a/codebundles/azure-aks-triage/runbook.robot b/codebundles/azure-aks-triage/runbook.robot index 89fa5c302..cbb63a913 100644 --- a/codebundles/azure-aks-triage/runbook.robot +++ b/codebundles/azure-aks-triage/runbook.robot @@ -13,7 +13,7 @@ Suite Setup Suite Initialization *** Tasks *** -Check for Resource Health Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` +Check for Overutilization and Networking Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch a list of issues that might affect the AKS cluster [Tags] aks config ${resource_health}= RW.CLI.Run Bash File @@ -82,7 +82,7 @@ Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${ ... details=${item["details"]} END END -Check Network Configuration of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` +Check Network Configuration of AKS Cluster `${{AKS_CLUSTER}}` In Resource Group `${{AZ_RESOURCE_GROUP}}` [Documentation] Fetch the network configuration, generating resource URLs and basic recommendations [Tags] AKS config network route firewall ${network}= RW.CLI.Run Bash File diff --git a/codebundles/azure-aks-triage/sli.robot b/codebundles/azure-aks-triage/sli.robot index 85bbf1f91..36e35b8e0 100644 --- a/codebundles/azure-aks-triage/sli.robot +++ b/codebundles/azure-aks-triage/sli.robot @@ -77,7 +77,7 @@ Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${ ${aks_config_score}= Evaluate 1 if len(@{issue_list["issues"]}) == 0 else 0 Set Global Variable ${aks_config_score} -Generate AKS Cluster Health Score +Calculate AKS Cluster Health Score for AKS Cluster `${AKS_CLUSTER}` in Azure Resource Group `${AZ_RESOURCE_GROUP}` ${aks_cluster_health_score}= Evaluate (${aks_resource_score} + ${aks_activities_score} + ${aks_config_score}) / 3 ${health_score}= Convert to Number ${aks_cluster_health_score} 2 RW.Core.Push Metric ${health_score} diff --git a/codebundles/azure-appgateway-health/runbook.robot b/codebundles/azure-appgateway-health/runbook.robot index bafbeee7f..3954b3114 100644 --- a/codebundles/azure-appgateway-health/runbook.robot +++ b/codebundles/azure-appgateway-health/runbook.robot @@ -51,7 +51,7 @@ Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NA ... details=${issue_list} ... next_steps=Please escalate to the Azure service owner to enable provider Microsoft.ResourceHealth. END -Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` +Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` in Subscription `${AZURE_RESOURCE_SUBSCRIPTION_ID}` [Documentation] Fetch the details and health of the application gateway configuration [Tags] appservice logs tail ${config_health}= RW.CLI.Run Bash File diff --git a/codebundles/azure-appgateway-health/sli.robot b/codebundles/azure-appgateway-health/sli.robot index df7c6138b..e07c701c0 100644 --- a/codebundles/azure-appgateway-health/sli.robot +++ b/codebundles/azure-appgateway-health/sli.robot @@ -36,7 +36,7 @@ Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NA END Set Global Variable ${appgw_resource_score} -Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` +Fetch AKS Cluster Config in Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the config of the AKS cluster in azure [Tags] AKS config ${config}= RW.CLI.Run Bash File @@ -75,7 +75,7 @@ Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resou Set Global Variable ${appgw_backend_score} -Generate Application Gateway Health Score +Generate Application Gateway Health Score for `${APP_GATEWAY_NAME}` in Azure Subscription `${AZURE_RESOURCE_SUBSCRIPTION_ID}` ${appgw_health_score}= Evaluate (${appgw_resource_score} + ${appgw_config_score} + ${appgw_backend_score}) / 3 ${health_score}= Convert to Number ${appgw_health_score} 2 RW.Core.Push Metric ${health_score} diff --git a/codebundles/azure-appservice-triage/runbook.robot b/codebundles/azure-appservice-triage/runbook.robot index 1c39d7017..56ee53544 100644 --- a/codebundles/azure-appservice-triage/runbook.robot +++ b/codebundles/azure-appservice-triage/runbook.robot @@ -53,7 +53,7 @@ Check for Resource Health Issues Affecting App Service `${APP_SERVICE_NAME}` In END -Check App Service `${APP_SERVICE_NAME}` Health Check Metrics In Resource Group `${AZ_RESOURCE_GROUP}` +Check App Service `${APP_SERVICE_NAME}` Health in Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Checks the health status of a appservice workload. [Tags] ${health_check_metric}= RW.CLI.Run Bash File @@ -172,7 +172,7 @@ Check Configuration Health of App Service `${APP_SERVICE_NAME}` In Resource Grou END END -Check Deployment Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` +Fetch Deployment Health of App Service `${APP_SERVICE_NAME}` in Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch deployment health of the App Service [Tags] appservice deployment ${deployment_health}= RW.CLI.Run Bash File diff --git a/codebundles/azure-appservice-triage/sli.robot b/codebundles/azure-appservice-triage/sli.robot index 1506482ed..616db7c6e 100644 --- a/codebundles/azure-appservice-triage/sli.robot +++ b/codebundles/azure-appservice-triage/sli.robot @@ -89,7 +89,7 @@ Check App Service `${APP_SERVICE_NAME}` Configuration Health In Resource Group ` END END END -Check Deployment Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` +Fetch Deployment Health of App Service `${APP_SERVICE_NAME}` in Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch deployment health of the App Service [Tags] appservice deployment ${deployment_health}= RW.CLI.Run Bash File @@ -140,7 +140,7 @@ Fetch App Service `${APP_SERVICE_NAME}` Activities In Resource Group `${AZ_RESOU END END -Generate App Service Health Score +Generate App Service Health Score for `${APP_SERVICE_NAME}` in resource group `${AZ_RESOURCE_GROUP}` ${app_service_health_score}= Evaluate (${appservice_resource_score} + ${app_service_health_check_score} + ${app_service_config_score} + ${app_service_activities_score} + ${app_service_deployment_score}) / 5 ${health_score}= Convert to Number ${app_service_health_score} 2 RW.Core.Push Metric ${health_score} diff --git a/codebundles/cli-test/runbook.robot b/codebundles/cli-test/runbook.robot index 78e6c9f7a..9268b28e1 100644 --- a/codebundles/cli-test/runbook.robot +++ b/codebundles/cli-test/runbook.robot @@ -31,7 +31,7 @@ Suite Initialization Set Suite Variable ${env} {"KUBECONFIG":"./${kubeconfig.key}"} *** Tasks *** -Run CLI and Parse Output For Issues +Run CLI and Parse Output For Issues in `${NAMESPACE}` namespace and `${CONTEXT}` context [Documentation] Fetch some output from the cluster in varying forms and run tests against it [Tags] Stdout Test Output Pods ${rsp}= RW.CLI.Run Cli @@ -77,7 +77,7 @@ Run CLI and Parse Output For Issues ... total_container_restarts__raise_issue_if_lt=1 RW.Core.Add Pre To Report Found issues after parsing the output of: kubectl get pods --context ${CONTEXT} -n ${NAMESPACE} -ojson -Exec Test +Exec Test in Pod `$${NAMESPACE}` [Documentation] Used to verify that running CLI commands in remote workloads works [Tags] Remote Exec Command Tags Workload Pod ${df}= RW.CLI.Run Cli @@ -95,7 +95,7 @@ Exec Test ... optional_namespace=${NAMESPACE} ... optional_context=${CONTEXT} -Local Process Test +Local Process Test for '$${NAMESPACE}' [Documentation] Verifies that commands can be run locally directly within the runner [Tags] Commands Direct Process Local # run local 'ls' diff --git a/codebundles/cmd-test/runbook.robot b/codebundles/cmd-test/runbook.robot index 961afcc47..df16a83d0 100644 --- a/codebundles/cmd-test/runbook.robot +++ b/codebundles/cmd-test/runbook.robot @@ -13,7 +13,7 @@ Suite Setup Suite Initialization *** Tasks *** -Run CLI Command +Run CLI Command in `${NAMESPACE}` namespace [Documentation] Runs a bare CLI command and captures the stderr and stdout for the report [Tags] stdout test output pods ${rsp}= RW.CLI.Run Cli @@ -23,17 +23,17 @@ Run CLI Command RW.Core.Add Pre To Report Command Stdout:\n${rsp.stdout} RW.Core.Add Pre To Report Command Stderr:\n${rsp.stderr} -Run Bash File +Run Bash File in `${NAMESPACE}`/${CONTEXT}/path/to/script.sh [Documentation] Runs a bash file to verify script passthrough works [Tags] file script - ${rsp}= RW.CLI.Run Bash File + ${rsp}= RW.CLI.Run Bash File in `${NAMESPACE}`/${CONTEXT}/path/to/script.sh ... bash_file=getdeploys.sh ... secret_file__kubeconfig=${kubeconfig} ... env=${env} RW.Core.Add Pre To Report Command Stdout:\n${rsp.stdout} RW.Core.Add Pre To Report Command Stderr:\n${rsp.stderr} -Log Suggestion +Generate Next Step Suggestion and Log in `NAMESPACE` [Documentation] Generate a next step suggestion, format it, and log it ${next_steps}= RW.NextSteps.Suggest Bind Mount ${next_steps}= RW.NextSteps.Format ${next_steps} diff --git a/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot b/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot index 4a0b05532..87258c5f2 100644 --- a/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot +++ b/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** -Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold +Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold in GCP Project `$${GCP_PROJECT_ID}` [Documentation] Fetches HTTP Error metrics for the Kong ingress host and service from GMP and performs an inspection on the results. If there are currently any results with more than the defined HTTP error threshold, their route and service names will be surfaced for further troubleshooting. [Tags] curl http ingress errors metrics kong gmp ${gmp_rsp}= RW.CLI.Run Cli @@ -48,7 +48,7 @@ Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold RW.Core.Add Pre To Report HTTP Error Violation & Details:\n${gmp_rsp.stdout} RW.Core.Add Pre To Report GMP Json Data:\n${gmp_json.stdout} -Check If Kong Ingress HTTP Request Latency Violates Threshold +Check If Kong Ingress HTTP Request Latency Violates Threshold for Upstream `$${INGRESS_UPSTREAM}` [Documentation] Fetches metrics for the Kong ingress 99th percentile request latency from GMP and performs an inspection on the results. If there are currently any results with more than the defined request latency threshold, their route and service names will be surfaced for further troubleshooting. [Tags] curl request ingress latency http kong gmp ${gmp_rsp}= RW.CLI.Run Cli @@ -79,7 +79,7 @@ Check If Kong Ingress HTTP Request Latency Violates Threshold RW.Core.Add Pre To Report Commands Used: ${history} RW.Core.Add Pre To Report HTTP Request Latency Within Acceptable Parameters:\n${gmp_rsp.stdout} -Check If Kong Ingress Controller Reports Upstream Errors +Check If Kong Ingress Controller Reports Upstream Errors in GCP Project `$${GCP_PROJECT_ID}` [Documentation] Fetches metrics for the Kong ingress controller related to upstream healthchecks or dns errors. [Tags] curl request ingress upstream healthcheck dns errrors http kong gmp ${gmp_healthchecks_off_rsp}= RW.CLI.Run Cli diff --git a/codebundles/curl-http-ok/runbook.robot b/codebundles/curl-http-ok/runbook.robot index 9d33e0b53..29f617890 100644 --- a/codebundles/curl-http-ok/runbook.robot +++ b/codebundles/curl-http-ok/runbook.robot @@ -13,7 +13,7 @@ Suite Setup Suite Initialization *** Tasks *** -Checking HTTP URL Is Available And Timely +Check HTTP URL Availability and Timeliness for `${URL}` [Documentation] Use cURL to validate the http response [Tags] curl http ingress latency errors ${curl_rsp}= RW.CLI.Run Cli diff --git a/codebundles/curl-http-ok/sli.robot b/codebundles/curl-http-ok/sli.robot index 0e8f27f9e..dd8194094 100644 --- a/codebundles/curl-http-ok/sli.robot +++ b/codebundles/curl-http-ok/sli.robot @@ -33,7 +33,7 @@ Suite Initialization ... default=200 ... example=200 *** Tasks *** -Checking HTTP URL Is Available And Timely +Validate HTTP URL Availability and Timeliness for ${URL} [Documentation] Use cURL to validate the http response [Tags] cURL HTTP Ingress Latency Errors ${curl_rsp}= RW.CLI.Run Cli diff --git a/codebundles/gcloud-log-inspection/runbook.robot b/codebundles/gcloud-log-inspection/runbook.robot index f1d52c391..cb9d335b8 100644 --- a/codebundles/gcloud-log-inspection/runbook.robot +++ b/codebundles/gcloud-log-inspection/runbook.robot @@ -51,7 +51,7 @@ Suite Initialization Set Suite Variable ${env} {"CLOUDSDK_CORE_PROJECT":"${GCP_PROJECT_ID}","GOOGLE_APPLICATION_CREDENTIALS":"./${gcp_credentials_json.key}","PATH":"$PATH:${OS_PATH}"} *** Tasks *** -Inspect GCP Logs For Common Errors +Inspect GCP Logs For Common Errors in GCP Project `${GCP_PROJECT_ID}` [Tags] Logs Query Gcloud GCP Errors Common [Documentation] Fetches logs from a Google Cloud Project and filters for a count of common error messages. ${cmd} Set Variable gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && gcloud logging read "severity>=${SEVERITY}${ADD_FILTERS}" --freshness=120m --limit=50 --format=json diff --git a/codebundles/gcloud-node-preempt/runbook.robot b/codebundles/gcloud-node-preempt/runbook.robot index 4531ce941..7a6324ab8 100644 --- a/codebundles/gcloud-node-preempt/runbook.robot +++ b/codebundles/gcloud-node-preempt/runbook.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -List all nodes in an active prempt operation for GCP Project `${GCP_PROJECT_ID}` +List all nodes in an active preempt operation for GCP Project `${GCP_PROJECT_ID}` within the last `${AGE}` hours [Documentation] Fetches all nodes that have been preempted within the defined time interval. [Tags] stdout gcloud node preempt gcp ${gcp_project_id} ${preempt_node_list}= RW.CLI.Run Cli diff --git a/codebundles/gcloud-node-preempt/sli.robot b/codebundles/gcloud-node-preempt/sli.robot index 4e917a4f4..006842f35 100644 --- a/codebundles/gcloud-node-preempt/sli.robot +++ b/codebundles/gcloud-node-preempt/sli.robot @@ -36,7 +36,7 @@ Suite Initialization *** Tasks *** -Count the number of nodes in active prempt operation +Count the number of nodes in active preempt operation in project `${GCP_PROJECT_ID}` [Documentation] Counts all nodes that have been preempted within the defined time interval. [Tags] Stdout gcloud node preempt gcp ${preempt_node_list}= RW.CLI.Run Cli diff --git a/codebundles/gcp-bucket-health/sli.robot b/codebundles/gcp-bucket-health/sli.robot index 31965991f..bf8f44548 100644 --- a/codebundles/gcp-bucket-health/sli.robot +++ b/codebundles/gcp-bucket-health/sli.robot @@ -60,7 +60,7 @@ Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}` ${bucket_ops_rate_score}= Evaluate 1 if int(${buckets_over_ops_threshold.stdout}) == 0 else 0 Set Global Variable ${bucket_ops_rate_score} -Generate Bucket Score +Generate Bucket Score in Project `$${PROJECT_IDS}` ${bucket_health_score}= Evaluate (${buckets_over_utilization} + ${public_bucket_score} + ${bucket_ops_rate_score}) / 3 ${health_score}= Convert to Number ${bucket_health_score} 2 RW.Core.Push Metric ${health_score} diff --git a/codebundles/gcp-cloud-function-health/runbook.robot b/codebundles/gcp-cloud-function-health/runbook.robot index 3914d980b..5bcdf5296 100644 --- a/codebundles/gcp-cloud-function-health/runbook.robot +++ b/codebundles/gcp-cloud-function-health/runbook.robot @@ -14,7 +14,7 @@ Library Collections Suite Setup Suite Initialization *** Tasks *** -List Unhealhy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` +List Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` [Documentation] Fetches a list of GCP Cloud Functions that are not healthy. [Tags] gcloud function gcp ${GCP_PROJECT_ID} # This command is cheat-sheet friendly @@ -67,7 +67,7 @@ List Unhealhy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` RW.Core.Add Pre To Report Failed GCP Functions Table:\n${unhealthy_cloud_function_list_simple_output.stdout} RW.Core.Add Pre To Report Commands Used:\n${history} -Get Error Logs for Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` +Get Error Logs for Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` in `us-central1` Region [Documentation] Fetches GCP logs related to unhealthy Cloud Functions within the last 14 days [Tags] gcloud function gcp ${GCP_PROJECT_ID} # This command is cheat-sheet friendly diff --git a/codebundles/gh-actions-artifact-analysis/runbook.robot b/codebundles/gh-actions-artifact-analysis/runbook.robot index 6699299cd..d68ccdd71 100644 --- a/codebundles/gh-actions-artifact-analysis/runbook.robot +++ b/codebundles/gh-actions-artifact-analysis/runbook.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Analyze artifact from GitHub workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` +Analyze artifact from GitHub workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` using command `${ANALYSIS_COMMAND}` [Documentation] Check GitHub workflow status and analyze artifact with a user provided command. [Tags] github workflow actions artifact report ${ESCAPED_ANALYSIS_COMMAND}= RW.CLI.Escape Bash Command ${ANALYSIS_COMMAND} diff --git a/codebundles/gh-actions-artifact-analysis/sli.robot b/codebundles/gh-actions-artifact-analysis/sli.robot index e7f2e42bc..40445c15a 100644 --- a/codebundles/gh-actions-artifact-analysis/sli.robot +++ b/codebundles/gh-actions-artifact-analysis/sli.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Analyze artifact from GitHub Workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` and push metric +Analyze artifact from GitHub Workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` and push `${METRIC}` metric [Documentation] Check GitHub workflow status, run a user provided analysis command, and push the metric. The analysis command should result in a single metric. [Tags] github workflow actions artifact report ${ESCAPED_ANALYSIS_COMMAND}= RW.CLI.Escape Bash Command ${ANALYSIS_COMMAND} diff --git a/codebundles/k8s-app-troubleshoot/runbook.robot b/codebundles/k8s-app-troubleshoot/runbook.robot index 00a34e5ea..caad00e70 100644 --- a/codebundles/k8s-app-troubleshoot/runbook.robot +++ b/codebundles/k8s-app-troubleshoot/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** -Get `${CONTAINER_NAME}` Application Logs +Get `${CONTAINER_NAME}` Application Logs from Workload `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}` [Documentation] Collects the last approximately 300 lines of logs from the workload [Tags] resource application workload logs state ${container_name} ${workload_name} ${logs}= RW.CLI.Run Cli @@ -41,7 +41,7 @@ Scan `${CONTAINER_NAME}` Application For Misconfigured Environment RW.Core.Add Pre To Report Stdout:\n\n${script_run.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Tail `${CONTAINER_NAME}` Application Logs For Stacktraces +Tail `${CONTAINER_NAME}` Application Logs For Stacktraces in Workload `${WORKLOAD_NAME}` [Documentation] Performs an inspection on container logs for exceptions/stacktraces, parsing them and attempts to find relevant source code information [Tags] ... application diff --git a/codebundles/k8s-app-troubleshoot/sli.robot b/codebundles/k8s-app-troubleshoot/sli.robot index c31414369..63db9d469 100644 --- a/codebundles/k8s-app-troubleshoot/sli.robot +++ b/codebundles/k8s-app-troubleshoot/sli.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** -Measure Application Exceptions +Measure Application Exceptions in `${NAMESPACE}` [Documentation] Examines recent logs for exceptions, providing a count of them. [Tags] resource application workload logs state exceptions errors ${cmd}= Set Variable diff --git a/codebundles/k8s-artifactory-health/runbook.robot b/codebundles/k8s-artifactory-health/runbook.robot index 5eec19164..cc584fbca 100644 --- a/codebundles/k8s-artifactory-health/runbook.robot +++ b/codebundles/k8s-artifactory-health/runbook.robot @@ -66,7 +66,7 @@ Suite Initialization *** Tasks *** -Check Artifactory Liveness and Readiness Endpoints +Check Artifactory Liveness and Readiness Endpoints in `NAMESPACE` [Documentation] Runs a set of exec commands internally in the Artifactory workloads to curl the system health endpoints. [Tags] Pods Statefulset Artifactory Health System Curl API OK HTTP # these endpoints dont respect json type headers diff --git a/codebundles/k8s-certmanager-healthcheck/sli.robot b/codebundles/k8s-certmanager-healthcheck/sli.robot index 206ead570..fba53f5ec 100644 --- a/codebundles/k8s-certmanager-healthcheck/sli.robot +++ b/codebundles/k8s-certmanager-healthcheck/sli.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Count Unready and Expired Certificates +Count Unready and Expired Certificates in Namespace `${NAMESPACE}` [Documentation] Adds together the count of unready and expired certificates. A healthy SLI value is 0. [Tags] certificate status count health certmanager cert diff --git a/codebundles/k8s-chaos-flux/runbook.robot b/codebundles/k8s-chaos-flux/runbook.robot index 902185e43..9b8036fdf 100644 --- a/codebundles/k8s-chaos-flux/runbook.robot +++ b/codebundles/k8s-chaos-flux/runbook.robot @@ -104,7 +104,7 @@ Suite Initialization Set Suite Variable ${env} {"KUBECONFIG":"./${kubeconfig.key}"} *** Tasks *** -Suspend the Flux Resource Reconciliation +Suspend the Flux Resource Reconciliation for ${FLUX_RESOURCE_NAME} in namespace ${FLUX_RESOURCE_NAMESPACE} [Documentation] Suspends a flux resource so that it can be manipulated for chaos purposes. [Tags] Chaos Flux Kubernetes Resource Suspend ${suspend_flux_resource}= RW.CLI.Run Cli @@ -114,7 +114,7 @@ Suspend the Flux Resource Reconciliation ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used:\n${history} -Find Random FluxCD Workload as Chaos Target +Select Random FluxCD Workload for Chaos Target in Namespace `${FLUX_RESOURCE_NAMESPACE}` [Documentation] Inspects the Flux resource and randomly selects a deployment to tickle. Tehe. Only runs if RANDOMIZE = Yes. [Tags] Chaos Flux Kubernetes Resource Random IF "${RANDOMIZE}" == "Yes" @@ -138,7 +138,7 @@ Find Random FluxCD Workload as Chaos Target Set Suite Variable ${TARGET_RESOURCE} deployment/${deployment_name} END -Execute Chaos Command +Execute Chaos Command on ${TARGET_RESOURCE} in Namespace ${TARGET_NAMESPACE} [Documentation] Run the desired chaos command within a targeted resource [Tags] Chaos Flux Kubernetes Resource Kill OOM FOR ${index} IN RANGE ${CHAOS_COMMAND_LOOP} @@ -150,7 +150,7 @@ Execute Chaos Command ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used:\n${history} -Execute Additional Chaos Command +Execute Additional Chaos Command on ${FLUX_RESOURCE_TYPE} '${FLUX_RESOURCE_NAME}' in namespace '${FLUX_RESOURCE_NAMESPACE}' [Documentation] Run the additional command as input, verbatim. [Tags] Chaos Flux Kubernetes Resource ${run_additional_command}= RW.CLI.Run Cli @@ -160,7 +160,7 @@ Execute Additional Chaos Command ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used:\n${history} -Resume Flux Resource Reconciliation +Resume Flux Resource Reconciliation in `${TARGET_NAMESPACE}` [Documentation] Resumes Flux reconciliation on desired resource. [Tags] Chaos Flux Kubernetes Resource Resume ${resume_flux}= RW.CLI.Run Cli diff --git a/codebundles/k8s-chaos-workload/runbook.robot b/codebundles/k8s-chaos-workload/runbook.robot index 02f53cf98..ad83cff31 100644 --- a/codebundles/k8s-chaos-workload/runbook.robot +++ b/codebundles/k8s-chaos-workload/runbook.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Test `${WORKLOAD_NAME}` High Availability +Test `${WORKLOAD_NAME}` High Availability in Namespace `${NAMESPACE}` [Documentation] Kills a pod under this workload to test high availability. [Tags] Kubernetes StatefulSet Deployments Pods Highly Available ${process}= RW.CLI.Run Bash File @@ -34,7 +34,7 @@ OOMKill `${WORKLOAD_NAME}` Pod ... secret_file__kubeconfig=${kubeconfig} RW.Core.Add Pre To Report ${process.stdout} -Mangle Service Selector For `${WORKLOAD_NAME}` +Mangle Service Selector For `${WORKLOAD_NAME}` in `${NAMESPACE}` [Documentation] Breaks a service's label selector to cause a network disruption [Tags] Kubernetes networking Services Selector ${process}= RW.CLI.Run Bash File @@ -43,7 +43,7 @@ Mangle Service Selector For `${WORKLOAD_NAME}` ... secret_file__kubeconfig=${kubeconfig} RW.Core.Add Pre To Report ${process.stdout} -Mangle Service Port For `${WORKLOAD_NAME}` +Mangle Service Port For `${WORKLOAD_NAME}` in `${NAMESPACE}` [Documentation] Changes a service's port to cause a network disruption [Tags] Kubernetes networking Services Port ${process}= RW.CLI.Run Bash File diff --git a/codebundles/k8s-cluster-node-health/runbook.robot b/codebundles/k8s-cluster-node-health/runbook.robot index cbb0aa048..854bc709e 100644 --- a/codebundles/k8s-cluster-node-health/runbook.robot +++ b/codebundles/k8s-cluster-node-health/runbook.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Check for Node Restarts in Cluster `${CONTEXT}` +Check for Node Restarts in Cluster `${CONTEXT}` within Interval `${INTERVAL}` [Documentation] Identify nodes that are starting and stopping within the time interval. [Tags] cluster preempt spot reboot utilization saturation exhaustion starvation ${node_restart_details}= RW.CLI.Run Bash File diff --git a/codebundles/k8s-cluster-node-health/sli.robot b/codebundles/k8s-cluster-node-health/sli.robot index 6d1e4b262..6c81a6c0a 100644 --- a/codebundles/k8s-cluster-node-health/sli.robot +++ b/codebundles/k8s-cluster-node-health/sli.robot @@ -28,7 +28,7 @@ Check for Node Restarts in Cluster `${CONTEXT}` ${event_score}= Evaluate 1 if ${events} == 0 else 0 Set Global Variable ${event_score} -Generate Namspace Score +Generate Namespace Score in Kubernetes Cluster `$${CONTEXT}` ${cluster_node_score}= Evaluate (${event_score} / 1) ${health_score}= Convert to Number ${cluster_node_score} 2 RW.Core.Push Metric ${health_score} diff --git a/codebundles/k8s-daemonset-healthcheck/runbook.robot b/codebundles/k8s-daemonset-healthcheck/runbook.robot index d6a0fdefa..697fb6dc9 100644 --- a/codebundles/k8s-daemonset-healthcheck/runbook.robot +++ b/codebundles/k8s-daemonset-healthcheck/runbook.robot @@ -27,7 +27,7 @@ Get DaemonSet Logs for `${DAEMONSET_NAME}` and Add to Report RW.Core.Add Pre To Report ${logs.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Get Related Daemonset `${DAEMONSET_NAME}` Events +Get Related Daemonset `${DAEMONSET_NAME}` Events in Namespace `${NAMESPACE}` [Documentation] Fetches events related to the daemonset workload in the namespace. [Tags] events workloads errors warnings get daemonset csi ${events}= RW.CLI.Run Cli diff --git a/codebundles/k8s-deployment-healthcheck/runbook.robot b/codebundles/k8s-deployment-healthcheck/runbook.robot index 9193f0795..0b851db74 100644 --- a/codebundles/k8s-deployment-healthcheck/runbook.robot +++ b/codebundles/k8s-deployment-healthcheck/runbook.robot @@ -61,7 +61,7 @@ Check Deployment Log For Issues with `${DEPLOYMENT_NAME}` ... Recent logs from Deployment ${DEPLOYMENT_NAME} in Namespace ${NAMESPACE}:\n\n${logs.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Fetch Deployments Logs for `${DEPLOYMENT_NAME}` and Add to Report +Fetch Deployments Logs for `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` and Add to Report [Documentation] Fetches logs from running pods and adds content to the report [Tags] ... kubernetes @@ -118,7 +118,7 @@ Check Liveness Probe Configuration for Deployment `${DEPLOYMENT_NAME}` RW.Core.Add Pre To Report Liveness probe testing results:\n\n${liveness_probe_health.stdout} RW.Core.Add Pre To Report Commands Used: ${liveness_probe_health.cmd} -Check Readiness Probe Configuration for Deployment `${DEPLOYMENT_NAME}` +Check Readiness Probe Configuration for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Validates if a readiness probe has possible misconfigurations [Tags] ... readiness @@ -229,7 +229,7 @@ Inspect Deployment Warning Events for `${DEPLOYMENT_NAME}` RW.Core.Add Pre To Report ${events.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Get Deployment Workload Details For `${DEPLOYMENT_NAME}` and Add to Report +Fetch Deployment Workload Details For `${DEPLOYMENT_NAME}` [Documentation] Fetches the current state of the deployment for future review in the report. [Tags] deployment details manifest info ${DEPLOYMENT_NAME} ${deployment}= RW.CLI.Run Cli @@ -242,7 +242,7 @@ Get Deployment Workload Details For `${DEPLOYMENT_NAME}` and Add to Report RW.Core.Add Pre To Report Snapshot of deployment state:\n\n${deployment.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Inspect Deployment Replicas for `${DEPLOYMENT_NAME}` +Inspect Deployment Replicas for `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}` [Documentation] Pulls the replica information for a given deployment and checks if it's highly available ... , if the replica counts are the expected / healthy values, and raises issues if it is not progressing ... and is missing pods. @@ -297,7 +297,7 @@ Inspect Deployment Replicas for `${DEPLOYMENT_NAME}` ... title= Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` is not configured to be highly available. ... reproduce_hint=View Commands Used in Report Output ... details=Deployment `${DEPLOYMENT_NAME}` is only configured to have a single pod:\n`${deployment_status}` - ... next_steps=Get Deployment Workload Details For `${DEPLOYMENT_NAME}` and Add to Report\nAdjust Deployment `${DEPLOYMENT_NAME}` spec.replicas to be greater than 1. + ... next_steps=Fetch Deployment Workload Details For `${DEPLOYMENT_NAME}`\nAdjust Deployment `${DEPLOYMENT_NAME}` spec.replicas to be greater than 1. END RW.Core.Add Pre To Report Deployment State:\n${deployment_replicas.stdout} ${history}= RW.CLI.Pop Shell History diff --git a/codebundles/k8s-deployment-ops/runbook.robot b/codebundles/k8s-deployment-ops/runbook.robot index 5124dd783..eee3d26ef 100644 --- a/codebundles/k8s-deployment-ops/runbook.robot +++ b/codebundles/k8s-deployment-ops/runbook.robot @@ -161,7 +161,7 @@ Rollback Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` to Previous ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Scale Down Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` +Halt Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Stops all running pods in a deployment to immediately halt a failing or runaway service. [Tags] ... log diff --git a/codebundles/k8s-flux-suspend-namespace/runbook.robot b/codebundles/k8s-flux-suspend-namespace/runbook.robot index fa80eb80b..0f0e6980f 100644 --- a/codebundles/k8s-flux-suspend-namespace/runbook.robot +++ b/codebundles/k8s-flux-suspend-namespace/runbook.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Flux Suspend Namespace ${NAMESPACE} +Flux Suspend Namespace `${NAMESPACE}` [Documentation] Applies a flux suspend to the spec of all flux objects reconciling in a given namespace. [Tags] Kubernetes Namespace Flux Suspend ${process}= RW.CLI.Run Bash File suspend_namespace.sh @@ -24,7 +24,7 @@ Flux Suspend Namespace ${NAMESPACE} ... secret_file__kubeconfig=${kubeconfig} RW.Core.Add Pre To Report ${process.stdout} -Unsuspend Flux for Namespace ${NAMESPACE} +Unsuspend Flux for Namespace `${NAMESPACE}` [Documentation] Unsuspends any suspended flux objects in a given namespace, allowing reconciliation to resume. [Tags] Kubernetes Namespace Flux Unsuspend ${process}= RW.CLI.Run Bash File unsuspend_namespace.sh diff --git a/codebundles/k8s-fluxcd-kustomization-health/runbook.robot b/codebundles/k8s-fluxcd-kustomization-health/runbook.robot index 98147cf8f..d2db1831d 100644 --- a/codebundles/k8s-fluxcd-kustomization-health/runbook.robot +++ b/codebundles/k8s-fluxcd-kustomization-health/runbook.robot @@ -26,7 +26,7 @@ List all available Kustomization objects in Namespace `${NAMESPACE}` RW.Core.Add Pre To Report Kustomizations available: \n ${kustomizations.stdout} RW.Core.Add Pre To Report Commands Used:\n${history} -Get details for unready Kustomizations in Namespace `${NAMESPACE}` +List Unready Kustomizations in Namespace `${NAMESPACE}` [Documentation] List all Kustomizations that are not found in a ready state in namespace ${NAMESPACE} [Tags] FluxCD Kustomization Versions ${NAMESPACE} ${kustomizations_not_ready}= RW.CLI.Run Cli diff --git a/codebundles/k8s-fluxcd-reconcile/runbook.robot b/codebundles/k8s-fluxcd-reconcile/runbook.robot index b0b2b5859..5a40ebb69 100644 --- a/codebundles/k8s-fluxcd-reconcile/runbook.robot +++ b/codebundles/k8s-fluxcd-reconcile/runbook.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Health Check Flux Reconciliation +Health Check Flux Reconciliation in Kubernetes Namespace `${FLUX_NAMESPACE}` [Documentation] Fetches reconciliation logs for flux and creates a report for them. [Tags] Kubernetes Namespace Flux ${process}= RW.CLI.Run Bash File flux_reconcile_report.sh diff --git a/codebundles/k8s-gitops-gh-remediate/runbook.robot b/codebundles/k8s-gitops-gh-remediate/runbook.robot index 1b785e2d6..9079736f8 100644 --- a/codebundles/k8s-gitops-gh-remediate/runbook.robot +++ b/codebundles/k8s-gitops-gh-remediate/runbook.robot @@ -54,7 +54,7 @@ Remediate Readiness and Liveness Probe GitOps Manifests in Namespace `${NAMESPAC RW.Core.Add Pre To Report Readiness probe testing results:\n\n${probe_health.stdout} RW.Core.Add Pre To Report Commands Used: ${probe_health.cmd} -Increase ResourceQuota for Namespace `${NAMESPACE}` +Increase ResourceQuota Limit for Namespace `${NAMESPACE}` in GitHub GitOps Repository [Documentation] Looks for a resourcequota object in the namespace and increases it if applicable, and if it is managed in a GitHub GitOps repository [Tags] resourcequota quota namespace remediate github gitops ${NAMESPACE} ${quota_usage}= RW.CLI.Run Bash File diff --git a/codebundles/k8s-ingress-gce-healthcheck/runbook.robot b/codebundles/k8s-ingress-gce-healthcheck/runbook.robot index a6db74016..82a1b6ded 100644 --- a/codebundles/k8s-ingress-gce-healthcheck/runbook.robot +++ b/codebundles/k8s-ingress-gce-healthcheck/runbook.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Search For GCE Ingress Warnings in GKE +Search For GCE Ingress Warnings in GKE Context `${CONTEXT}` [Documentation] Find warning events related to GCE Ingress and services objects [Tags] service ingress endpoint health ingress-gce gke ${event_warnings}= RW.CLI.Run Cli @@ -31,13 +31,13 @@ Search For GCE Ingress Warnings in GKE ... set_issue_actual=Ingress and service objects have warnings in namespace `${NAMESPACE}` for ingress `${INGRESS}` ... set_issue_title=Unhealthy GCE ingress or service objects found in namespace `${NAMESPACE}` for ingress `${INGRESS}` ... set_issue_details=The following warning events were found:\n\n${event_warnings.stdout}\n\n - ... set_issue_next_steps=Validate GCP HTTP Load Balancer Configurations for ${INGRESS} + ... set_issue_next_steps=Validate GCP HTTP Load Balancer Configurations in GCP Project `$${GCP_PROJECT_ID}` for ${INGRESS} ... _line__raise_issue_if_contains=Warning ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report GCE Ingress warnings for ${NAMESPACE}:\n\n${event_warnings.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Identify Unhealthy GCE HTTP Ingress Backends +Identify Unhealthy GCE HTTP Ingress Backends in GKE Namespace `$${NAMESPACE}` [Documentation] Checks the backend annotations on the ingress object to determine if they are not regstered as healthy [Tags] service ingress endpoint health ingress-gce gke ${unhealthy_backends}= RW.CLI.Run Cli @@ -61,7 +61,7 @@ Identify Unhealthy GCE HTTP Ingress Backends ... GCE unhealthy backends in `${NAMESPACE}` for ingress `${INGRESS}`:\n\n${unhealthy_backends.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Validate GCP HTTP Load Balancer Configurations +Validate GCP HTTP Load Balancer Configurations in GCP Project `$${GCP_PROJECT_ID}` [Documentation] Extract GCP HTTP Load Balancer components from ingress annotations and check health of each object [Tags] service ingress endpoint health backends urlmap gce ${gce_config_objects}= RW.CLI.Run Bash File @@ -90,7 +90,7 @@ Validate GCP HTTP Load Balancer Configurations RW.Core.Add Pre To Report Ingress object summary for ingress: `${INGRESS}` in namespace: `${NAMESPACE}`:\n\n${gce_config_objects.stdout} -Fetch Network Error Logs from GCP Operations Manager for Ingress Backends +Fetch Network Error Logs from GCP Operations Manager for Ingress Backends in GCP Project `$${GCP_PROJECT_ID}` [Documentation] Fetch logs from the last 1d that are specific to the HTTP Load Balancer within the last 60 minutes [Tags] service ingress endpoint health ${network_error_logs}= RW.CLI.Run Cli @@ -115,7 +115,7 @@ Fetch Network Error Logs from GCP Operations Manager for Ingress Backends RW.Core.Add Pre To Report Network error logs possibly related to Ingress ${INGRESS}:\n\n${network_error_logs.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Review GCP Operations Logging Dashboard +Review GCP Operations Logging Dashboard in GCP project `$${GCP_PROJECT_ID}` [Documentation] Create urls that will help users obtain logs from the GCP Dashboard [Tags] service ingress endpoint health logging http loadbalancer ${loadbalancer_log_url}= RW.CLI.Run CLI diff --git a/codebundles/k8s-jenkins-healthcheck/runbook.robot b/codebundles/k8s-jenkins-healthcheck/runbook.robot index 402d0aede..16679085f 100644 --- a/codebundles/k8s-jenkins-healthcheck/runbook.robot +++ b/codebundles/k8s-jenkins-healthcheck/runbook.robot @@ -67,7 +67,7 @@ Suite Initialization Set Suite Variable ${env} {"KUBECONFIG":"./${kubeconfig.key}"} *** Tasks *** -Query The Jenkins Kubernetes Workload HTTP Endpoint +Query The Jenkins Kubernetes Workload HTTP Endpoint in Kubernetes StatefulSet `${STATEFULSET_NAME}` [Documentation] Performs a curl within the jenkins statefulset kubernetes workload to determine if the pod is up and healthy, and can serve requests. [Tags] HTTP Curl Web Code OK Available Jenkins HTTP Endpoint API ${rsp}= RW.CLI.Run Cli @@ -97,7 +97,7 @@ Query The Jenkins Kubernetes Workload HTTP Endpoint RW.Core.Add Pre To Report Commands Used: ${history} -Query For Stuck Jenkins Jobs +Query For Stuck Jenkins Jobs in Kubernetes Statefulset Workload `$${STATEFULSET_NAME}` [Documentation] Performs a curl within the jenkins statefulset kubernetes workload to check for stuck jobs in the jenkins piepline queue. [Tags] HTTP Curl Web Code OK Available Queue Stuck Jobs Jenkins ${rsp}= RW.CLI.Run Cli diff --git a/codebundles/k8s-kubectl-cmd/runbook.robot b/codebundles/k8s-kubectl-cmd/runbook.robot index f0f307356..9956a57ec 100644 --- a/codebundles/k8s-kubectl-cmd/runbook.robot +++ b/codebundles/k8s-kubectl-cmd/runbook.robot @@ -12,7 +12,7 @@ Suite Setup Suite Initialization *** Tasks *** -Run User Provided Kubectl Command +Run User Provided Kubectl Command on `${KUBECTL_COMMAND}` in Kubernetes Cluster [Documentation] Runs a user provided kubectl command and adds the output to the report. [Tags] kubectl cli ${rsp}= RW.CLI.Run Cli diff --git a/codebundles/k8s-kubectl-cmd/sli.robot b/codebundles/k8s-kubectl-cmd/sli.robot index 4bc86c631..f66fdc1bd 100644 --- a/codebundles/k8s-kubectl-cmd/sli.robot +++ b/codebundles/k8s-kubectl-cmd/sli.robot @@ -12,7 +12,7 @@ Suite Setup Suite Initialization *** Tasks *** -Run User Provided Kubectl Command +Run User Provided Kubectl Command in Kubernetes Cluster `$${KUBECTL_CLUSTER}` [Documentation] Runs a user provided kubectl command and pushes the metric as an SLI [Tags] kubectl cli metric sli ${rsp}= RW.CLI.Run Cli diff --git a/codebundles/k8s-labeledpods-healthcheck/sli.robot b/codebundles/k8s-labeledpods-healthcheck/sli.robot index be64894ad..b32419634 100644 --- a/codebundles/k8s-labeledpods-healthcheck/sli.robot +++ b/codebundles/k8s-labeledpods-healthcheck/sli.robot @@ -46,7 +46,7 @@ Suite Initialization Set Suite Variable ${env} {"KUBECONFIG":"./${kubeconfig.key}"} *** Tasks *** -Measure Number of Running Pods with Label +Measure Number of Running Pods with Label in `${NAMESPACE}` [Documentation] Counts the number of running pods with the configured labels. [Tags] Pods Containers Running Status Count Health ${running_pods}= RW.CLI.Run Cli diff --git a/codebundles/k8s-loki-healthcheck/runbook.robot b/codebundles/k8s-loki-healthcheck/runbook.robot index 870023d0b..97cda9cbe 100644 --- a/codebundles/k8s-loki-healthcheck/runbook.robot +++ b/codebundles/k8s-loki-healthcheck/runbook.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** -Check Loki Ring API +Check Loki Ring API for Unhealthy Shards in Kubernetes Cluster `$${NAMESPACE}` [Documentation] Request and inspect the state of the Loki hash rings for non-active (potentially unhealthy) shards. # TODO: extend to dedicated script for parsing complex ring output/state ${rsp}= RW.CLI.Run Cli @@ -36,7 +36,7 @@ Check Loki Ring API ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Check Loki API Ready +Check Loki API Ready in Kubernetes Cluster `${NAMESPACE}` [Documentation] Pings the internal Loki API to check it's ready. ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get pods -l app.kubernetes.io/component=single-binary -o=jsonpath='{.items[0].metadata.name}') -- wget -q --header="Accept: application/json" -O - http://localhost:3100/ready diff --git a/codebundles/k8s-namespace-healthcheck/sli.robot b/codebundles/k8s-namespace-healthcheck/sli.robot index 0de56af16..d2505c6a5 100644 --- a/codebundles/k8s-namespace-healthcheck/sli.robot +++ b/codebundles/k8s-namespace-healthcheck/sli.robot @@ -68,7 +68,7 @@ Suite Initialization Set Suite Variable ${env} {"KUBECONFIG":"./${kubeconfig.key}"} *** Tasks *** -Get Event Count and Score +Get Error Event Count within ${EVENT_AGE} and calculate Score [Documentation] Captures error events and counts them within a configurable timeframe. [Tags] Event Count Warning ${error_events}= RW.CLI.Run Cli @@ -85,7 +85,7 @@ Get Event Count and Score ${event_score}= Evaluate 1 if ${error_event_count.stdout} <= ${EVENT_THRESHOLD} else 0 Set Global Variable ${event_score} -Get Container Restarts and Score +Get Container Restarts and Score in Namespace `${NAMESPACE}` [Documentation] Counts the total sum of container restarts within a timeframe and determines if they're beyond a threshold. [Tags] Restarts Pods Containers Count Status ${pods}= RW.CLI.Run Cli @@ -103,7 +103,7 @@ Get Container Restarts and Score ${container_restart_score}= Evaluate 1 if ${container_restarts_sum.stdout} <= ${CONTAINER_RESTART_THRESHOLD} else 0 Set Global Variable ${container_restart_score} -Get NotReady Pods +Get NotReady Pods in `${NAMESPACE}` [Documentation] Fetches a count of unready pods. [Tags] Pods Status Phase Ready Unready Running ${unreadypods_results}= RW.CLI.Run Cli @@ -114,7 +114,7 @@ Get NotReady Pods ${pods_notready_score}= Evaluate 1 if ${unreadypods_results.stdout} == 0 else 0 Set Global Variable ${pods_notready_score} -Generate Namspace Score +Generate Namespace Score in `${NAMESPACE}` ${namespace_health_score}= Evaluate (${event_score} + ${container_restart_score} + ${pods_notready_score}) / 3 ${health_score}= Convert to Number ${namespace_health_score} 2 RW.Core.Push Metric ${health_score} \ No newline at end of file diff --git a/codebundles/k8s-otelcollector/runbook.robot b/codebundles/k8s-otelcollector/runbook.robot index a078e0608..850edd08f 100644 --- a/codebundles/k8s-otelcollector/runbook.robot +++ b/codebundles/k8s-otelcollector/runbook.robot @@ -53,7 +53,7 @@ Check OpenTelemetry Collector Logs For Errors In Namespace `${NAMESPACE}` END RW.Core.Add Pre To Report ${process.stdout}\n -Scan OpenTelemetry Logs For Dropped Spans In Namespace `${NAMESPACE}` +Query OpenTelemetry Logs For Dropped Spans In Namespace `${NAMESPACE}` [Documentation] Query the collector logs for dropped spans from errors [Tags] otel collector metrics errors logs dropped rejected ${process}= RW.CLI.Run Bash File diff --git a/codebundles/k8s-podresources-health/runbook.robot b/codebundles/k8s-podresources-health/runbook.robot index 196538f65..0f22f7342 100644 --- a/codebundles/k8s-podresources-health/runbook.robot +++ b/codebundles/k8s-podresources-health/runbook.robot @@ -72,7 +72,7 @@ Show Pods Without Resource Limit or Resource Requests Set in Namespace `${NAMESP RW.Core.Add Pre To Report ${summary} RW.Core.Add Pre To Report Commands Used:\n${history} -Get Pod Resource Utilization with Top in Namespace `${NAMESPACE}` +Check Pod Resource Utilization with Top in Namespace `${NAMESPACE}` [Documentation] Performs and a top command on list of labeled workloads to check pod resources. [Tags] top resources utilization pods workloads cpu memory allocation labeled ${NAMESPACE} ${pods_top}= RW.CLI.Run Cli @@ -118,7 +118,7 @@ Identify VPA Pod Resource Recommendations in Namespace `${NAMESPACE}` END RW.Core.Add Pre To Report ${vpa_usage.stdout}\n -Identify Resource Constrained Pods In Namespace `${NAMESPACE}` +Identify Overutilized Pods in Namespace `${NAMESPACE}` [Documentation] Scans the namespace for pods that are over utilizing resources or may be experiencing resource problems like oomkills or restarts. [Tags] overutilized resources utilization pods cpu memory allocation ${NAMESPACE} oomkill restarts ${pod_usage_analysis}= RW.CLI.Run Bash File identify_resource_contrained_pods.sh diff --git a/codebundles/k8s-postgres-healthcheck/sli.robot b/codebundles/k8s-postgres-healthcheck/sli.robot index dc2c795aa..9050d66f1 100644 --- a/codebundles/k8s-postgres-healthcheck/sli.robot +++ b/codebundles/k8s-postgres-healthcheck/sli.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Fetch Patroni Database Lag +Check Patroni Database Lag in Namespace `${NAMESPACE}` on Host `${HOSTNAME}` using `patronictl` [Documentation] Identifies the lag using patronictl and raises issues if necessary. [Tags] patroni patronictl list cluster health check state postgres ${database_lag_score}= Set Variable 1 @@ -57,7 +57,7 @@ Check Database Backup Status for Cluster `${OBJECT_NAME}` in Namespace `${NAMESP END Set Global Variable ${database_backup_score} -Generate Namspace Score +Generate Namespace Score for Namespace `${NAMESPACE}` ${postgres_health_score}= Evaluate (${database_lag_score} + ${database_backup_score}) / 2 ${health_score}= Convert to Number ${postgres_health_score} 2 RW.Core.Push Metric ${postgres_health_score} diff --git a/codebundles/k8s-prometheus-healthcheck/runbook.robot b/codebundles/k8s-prometheus-healthcheck/runbook.robot index a4c5b93c4..39dce14ea 100644 --- a/codebundles/k8s-prometheus-healthcheck/runbook.robot +++ b/codebundles/k8s-prometheus-healthcheck/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** -Check Prometheus Service Monitors +Check Prometheus Service Monitors in namespace `${NAMESPACE}` [Documentation] Checks the selector mappings of service monitors are valid in the namespace ${sm_report}= RW.CLI.Run Bash File ... bash_file=validate_servicemonitors.sh @@ -39,7 +39,7 @@ Check Prometheus Service Monitors ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Check For Successful Rule Setup +Check For Successful Rule Setup in Kubernetes Namespace `${NAMESPACE}` [Documentation] Inspects operator instance logs for failed rules setup Log To Console Prometheus ${rsp}= RW.CLI.Run Cli @@ -65,7 +65,7 @@ Check For Successful Rule Setup RW.Core.Add Pre To Report Logs Found:\n ${rsp.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Verify Prometheus RBAC Can Access ServiceMonitors +Verify Prometheus RBAC Can Access ServiceMonitors in Namespace `${PROM_NAMESPACE}` [Documentation] Fetch operator rbac and verify it has ServiceMonitors in rbac. ${clusterrole}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get clusterrole/kube-prometheus-stack-operator -ojson @@ -87,7 +87,7 @@ Verify Prometheus RBAC Can Access ServiceMonitors ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Identify Endpoint Scraping Errors +Inspect Prometheus Operator Logs for Scraping Errors in Namespace `${NAMESPACE}` [Documentation] Inspect the prometheus operator logs for scraping errors and raise issues if any found ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} logs $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} get pods -l app.kubernetes.io/name=prometheus -o=jsonpath='{.items[0].metadata.name}') -c prometheus | grep -iP "(scrape.*.error)" || true @@ -112,7 +112,7 @@ Identify Endpoint Scraping Errors RW.Core.Add Pre To Report Logs Found:\n ${rsp.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Check Prometheus API Healthy +Check Prometheus API Healthy in Namespace `${PROM_NAMESPACE}` [Documentation] Ping Prometheus healthy API endpoint for a 200 response code. ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} get pods -l app.kubernetes.io/name=prometheus -o=jsonpath='{.items[0].metadata.name}') --container prometheus -- wget -qO- -S 127.0.0.1:9090/-/healthy 2>&1 | grep "HTTP/" | awk '{print $2}' diff --git a/codebundles/k8s-pvc-healthcheck/sli.robot b/codebundles/k8s-pvc-healthcheck/sli.robot index a08c4e3db..93113e388 100644 --- a/codebundles/k8s-pvc-healthcheck/sli.robot +++ b/codebundles/k8s-pvc-healthcheck/sli.robot @@ -73,7 +73,7 @@ Fetch the Storage Utilization for PVC Mounts in Namespace `${NAMESPACE}` ${pvc_utilization_score}= Evaluate 1 if len(@{issue_list}) == 0 else 0 Set Global Variable ${pvc_utilization_score} -Generate Namspace Score +Generate Namespace Score for Namespace `${NAMESPACE}` ${pvc_health_score}= Evaluate (${pvc_utilization_score}) / 1 ${health_score}= Convert to Number ${pvc_health_score} 2 RW.Core.Push Metric ${health_score} \ No newline at end of file diff --git a/codebundles/k8s-redis-healthcheck/runbook.robot b/codebundles/k8s-redis-healthcheck/runbook.robot index 55fd27968..fe21269e3 100644 --- a/codebundles/k8s-redis-healthcheck/runbook.robot +++ b/codebundles/k8s-redis-healthcheck/runbook.robot @@ -38,7 +38,7 @@ Ping `${DEPLOYMENT_NAME}` Redis Workload ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Verify `${DEPLOYMENT_NAME}` Redis Read Write Operation +Verify `${DEPLOYMENT_NAME}` Redis Read Write Operation in Kubernetes [Documentation] Attempts to perform a write and read operation on the redis workload, checking that a key can be set, incremented, and read from. [Tags] redis cli increment health check read write ${set_op}= RW.CLI.Run Cli diff --git a/codebundles/k8s-restart-resource/runbook.robot b/codebundles/k8s-restart-resource/runbook.robot index ab2e013ae..ac20073be 100644 --- a/codebundles/k8s-restart-resource/runbook.robot +++ b/codebundles/k8s-restart-resource/runbook.robot @@ -42,7 +42,7 @@ Get Resource Logs with Labels `${LABELS}` ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Restart Resource with Labels `${LABELS}` +Restart Resource with Labels `${LABELS}` in `${CONTEXT}` [Documentation] Restarts the labeled resource in an attempt to get it out of a bad state. [Tags] resource application restart pod kill rollout revision ${resource_name}= RW.CLI.Run Cli diff --git a/codebundles/k8s-statefulset-healthcheck/runbook.robot b/codebundles/k8s-statefulset-healthcheck/runbook.robot index 29c8a126c..d58e12203 100644 --- a/codebundles/k8s-statefulset-healthcheck/runbook.robot +++ b/codebundles/k8s-statefulset-healthcheck/runbook.robot @@ -120,7 +120,7 @@ Troubleshoot StatefulSet Warning Events for `${STATEFULSET_NAME}` RW.Core.Add Pre To Report ${events.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Check StatefulSet Event Anomalies for `${STATEFULSET_NAME}` +Check StatefulSet Event Anomalies for `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}` [Documentation] Parses all events in a namespace within a timeframe and checks for unusual activity, raising issues for any found. [Tags] statefulset events info state anomolies count occurences ${statefulset_name} ${recent_anomalies}= RW.CLI.Run Cli @@ -158,7 +158,7 @@ Check StatefulSet Event Anomalies for `${STATEFULSET_NAME}` RW.Core.Add To Report ${anomalies_report_output}\n RW.Core.Add Pre To Report Commands Used:\n${history} -Fetch StatefulSet Logs for `${STATEFULSET_NAME}` and Add to Report +Fetch StatefulSet Logs for `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}` and Add to Report [Documentation] Fetches the last 100 lines of logs for the given statefulset in the namespace. [Tags] fetch log pod container errors inspect trace info ${STATEFULSET_NAME} statefulset ${logs}= RW.CLI.Run Cli @@ -184,7 +184,7 @@ Get Related StatefulSet `${STATEFULSET_NAME}` Events RW.Core.Add Pre To Report ${events.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Fetch Manifest Details for StatefulSet `${STATEFULSET_NAME}` +Fetch Manifest Details for StatefulSet `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}` [Documentation] Fetches the current state of the statefulset manifest for inspection. [Tags] statefulset details manifest info ${STATEFULSET_NAME} ${statefulset}= RW.CLI.Run Cli @@ -197,7 +197,7 @@ Fetch Manifest Details for StatefulSet `${STATEFULSET_NAME}` RW.Core.Add Pre To Report ${statefulset.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -List StatefulSets with Unhealthy Replica Counts In Namespace `${NAMESPACE}` +List Unhealthy Replica Counts for StatefulSets in Namespace `${NAMESPACE}` [Documentation] Pulls the replica information for a given StatefulSet and checks if it's highly available ... , if the replica counts are the expected / healthy values, and if not, what they should be. [Tags] diff --git a/codebundles/k8s-tail-logs-dynamic/runbook.robot b/codebundles/k8s-tail-logs-dynamic/runbook.robot index d2e3c5528..4d458b45f 100644 --- a/codebundles/k8s-tail-logs-dynamic/runbook.robot +++ b/codebundles/k8s-tail-logs-dynamic/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** -Get `${CONTAINER_NAME}` Application Logs +Get `${CONTAINER_NAME}` Application Logs in Namespace `${NAMESPACE}` [Documentation] Collects the last approximately 300 lines of logs from the workload [Tags] resource application workload logs state ${container_name} ${workload_name} ${logs}= RW.CLI.Run Cli diff --git a/codebundles/k8s-vault-healthcheck/runbook.robot b/codebundles/k8s-vault-healthcheck/runbook.robot index 838e3db10..95365c3d7 100644 --- a/codebundles/k8s-vault-healthcheck/runbook.robot +++ b/codebundles/k8s-vault-healthcheck/runbook.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Fetch Vault CSI Driver Logs +Fetch Vault CSI Driver Logs in Namespace `${NAMESPACE}` [Documentation] Fetches the last 100 lines of logs for the vault CSI driver. [Tags] fetch log pod container errors inspect trace info vault csi driver ${logs}= RW.CLI.Run Cli @@ -31,7 +31,7 @@ Fetch Vault CSI Driver Logs RW.Core.Add Pre To Report ${found_logs} RW.Core.Add Pre To Report Commands Used: ${history} -Get Vault CSI Driver Warning Events +Get Vault CSI Driver Warning Events in `${NAMESPACE}` [Documentation] Fetches warning-type events related to the vault CSI driver. [Tags] events errors warnings get vault csi driver ${events}= RW.CLI.Run Cli @@ -130,7 +130,7 @@ Check Vault CSI Driver Replicas ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Fetch Vault Logs +Fetch Vault Pod Workload Logs in Namespace `${NAMESPACE}` with Labels `${LABELS}` [Documentation] Fetches the last 100 lines of logs for all vault pod workloads in the vault namespace. [Tags] fetch log pod container errors inspect trace info statefulset vault ${logs}= RW.CLI.Run Cli @@ -147,7 +147,7 @@ Fetch Vault Logs RW.Core.Add Pre To Report ${found_logs} RW.Core.Add Pre To Report Commands Used: ${history} -Get Related Vault Events +Get Related Vault Events in Namespace `$${NAMESPACE}` [Documentation] Fetches all warning-type events related to vault in the vault namespace. [Tags] events workloads errors warnings get statefulset vault ${events}= RW.CLI.Run Cli @@ -164,7 +164,7 @@ Get Related Vault Events RW.Core.Add Pre To Report ${found_events} RW.Core.Add Pre To Report Commands Used: ${history} -Fetch Vault StatefulSet Manifest Details +Fetch Vault StatefulSet Manifest Details in `${NAMESPACE}` [Documentation] Fetches the current state of the vault statefulset manifest for inspection. [Tags] statefulset details manifest info vault ${statefulset}= RW.CLI.Run Cli @@ -177,7 +177,7 @@ Fetch Vault StatefulSet Manifest Details RW.Core.Add Pre To Report ${statefulset.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Fetch Vault DaemonSet Manifest Details +Fetch Vault DaemonSet Manifest Details in Kubernetes Cluster `${NAMESPACE}` [Documentation] Fetches the current state of the vault daemonset manifest for inspection. [Tags] statefulset details manifest info vault ${statefulset}= RW.CLI.Run Cli @@ -190,7 +190,7 @@ Fetch Vault DaemonSet Manifest Details RW.Core.Add Pre To Report ${statefulset.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Verify Vault Availability +Verify Vault Availability in Namespace `${NAMESPACE}` and Context `${CONTEXT}` [Documentation] Curls the vault endpoint and checks the HTTP response code. [Tags] http curl vault web code ok available ${rsp}= RW.CLI.Run Cli @@ -207,7 +207,7 @@ Verify Vault Availability ... set_issue_title=The Vault API Responded With An Error State ... set_issue_details=The vault state is init:$init, standby:$standby and sealed:$sealed. Based on "$_stdout". Check statefulset pod logs and events. Verify or invoke unseal process. -Check Vault StatefulSet Replicas +Check Vault StatefulSet Replicas in `NAMESPACE` [Documentation] Pulls the replica information for the Vault statefulset and checks if it's highly available ... , if the replica counts are the expected / healthy values, and if not, what they should be. [Tags] diff --git a/codebundles/terraform-cloud-workspace-lock-check/runbook.robot b/codebundles/terraform-cloud-workspace-lock-check/runbook.robot index e5596fe81..6880295fc 100644 --- a/codebundles/terraform-cloud-workspace-lock-check/runbook.robot +++ b/codebundles/terraform-cloud-workspace-lock-check/runbook.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Checking whether the Terraform Cloud Workspace is in a locked state +Checking whether the Terraform Cloud Workspace '${TERRAFORM_WORKSPACE_NAME}' is in a locked state [Documentation] Use curl to check whether the Terraform Cloud Workspace is in a locked state [Tags] terraform cloud workspace lock ${curl_rsp}= RW.CLI.Run Cli diff --git a/codebundles/test-issue/runbook.robot b/codebundles/test-issue/runbook.robot index 90b2ecb4c..cf90ef2b0 100644 --- a/codebundles/test-issue/runbook.robot +++ b/codebundles/test-issue/runbook.robot @@ -13,7 +13,7 @@ Suite Setup Suite Initialization *** Tasks *** -Raise Full Issue +Raise Full Issue on `${RESOURCE_TYPE}` in `${SCOPE}` [Documentation] Always raises an issue with full content [Tags] test ${issue}= RW.CLI.Run Cli diff --git a/task_analysis.json b/task_analysis.json new file mode 100644 index 000000000..e8fcdfcde --- /dev/null +++ b/task_analysis.json @@ -0,0 +1,2173 @@ +{ + "task_results": [ + { + "codebundle": "gcloud-log-inspection", + "file": "runbook.robot", + "filepath": "codebundles/gcloud-log-inspection/runbook.robot", + "task": "Inspect GCP Logs For Common Errors", + "score": 3, + "reasoning": "The task title is clear and specific, indicating the action of inspecting GCP logs for common errors. It is also easily readable by humans. The imported user variables are used to provide specificity by filtering logs from a specific GCP project. The task lacks a specific 'Where' variable; consider using `SEVERITY`.", + "suggested_title": "Inspect GCP Logs For Common Errors in GCP Project `${GCP_PROJECT_ID}`" + }, + { + "codebundle": "curl-http-ok", + "file": "runbook.robot", + "filepath": "codebundles/curl-http-ok/runbook.robot", + "task": "Checking HTTP URL Is Available And Timely", + "score": 4, + "reasoning": "The task title is clear and specific, it provides a clear instruction to use cURL to validate the http response. It includes specific tags related to the task. The imported user variables are effectively used in the documentation.", + "suggested_title": "Check HTTP URL Availability and Timeliness for `${URL}`" + }, + { + "codebundle": "curl-http-ok", + "file": "sli.robot", + "filepath": "codebundles/curl-http-ok/sli.robot", + "task": "Checking HTTP URL Is Available And Timely", + "score": 4, + "reasoning": "The task title is clear in its purpose and specific in the method to be used (cURL). It provides a clear directive for what the task entails, but could be more specific by including the 'Where' variable, which in this case would be the imported 'URL' variable.", + "suggested_title": "Validate HTTP URL Availability and Timeliness for ${URL}" + }, + { + "codebundle": "k8s-redis-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-redis-healthcheck/runbook.robot", + "task": "Ping `${DEPLOYMENT_NAME}` Redis Workload", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (Redis Workload) and the 'Where' (DEPLOYMENT_NAME) variables in backticks & curly braces.", + "suggested_title": "Ping `${DEPLOYMENT_NAME}` Redis Workload" + }, + { + "codebundle": "k8s-redis-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-redis-healthcheck/runbook.robot", + "task": "Verify `${DEPLOYMENT_NAME}` Redis Read Write Operation", + "score": 4, + "reasoning": "The task title is clear in its purpose, readable, and specific. It includes the resource type (Redis) and the 'Where' variable `${DEPLOYMENT_NAME}` in backticks and curly braces.", + "suggested_title": "Verify `${DEPLOYMENT_NAME}` Redis Read Write Operation in Kubernetes" + }, + { + "codebundle": "terraform-cloud-workspace-lock-check", + "file": "runbook.robot", + "filepath": "codebundles/terraform-cloud-workspace-lock-check/runbook.robot", + "task": "Checking whether the Terraform Cloud Workspace is in a locked state", + "score": 3, + "reasoning": "The task title is clear and specific, mentioning 'Terraform Cloud Workspace' and 'locked state'. It is human-readable and provides a clear action using 'curl'. The only improvement would be to include the specific workspace name in the title. The task lacks a specific 'Where' variable; consider using `TERRAFORM_API_URL`.", + "suggested_title": "Checking whether the Terraform Cloud Workspace '${TERRAFORM_WORKSPACE_NAME}' is in a locked state" + }, + { + "codebundle": "k8s-artifactory-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-artifactory-health/runbook.robot", + "task": "Check Artifactory Liveness and Readiness Endpoints", + "score": 3, + "reasoning": "The task title is clear and specific about checking the liveness and readiness endpoints of Artifactory. It also mentions using curl to perform the check. The tags provide additional context. The imported variables are used effectively in the documentation. The task lacks a specific 'Where' variable; consider using `STATEFULSET_NAME`.", + "suggested_title": "Check Artifactory Liveness and Readiness Endpoints in `NAMESPACE`" + }, + { + "codebundle": "k8s-otelcollector", + "file": "runbook.robot", + "filepath": "codebundles/k8s-otelcollector/runbook.robot", + "task": "Query Collector Queued Spans in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title is clear and specific in terms of what needs to be done (query collector queued spans) and where (in the namespace). The use of backticks and curly braces for the 'Where' variable adds clarity.", + "suggested_title": "Query Collector Queued Spans in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-otelcollector", + "file": "runbook.robot", + "filepath": "codebundles/k8s-otelcollector/runbook.robot", + "task": "Check OpenTelemetry Collector Logs For Errors In Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear and specific. It provides a clear directive to fetch OpenTelemetry Collector logs and check for errors, while also indicating that the search will be in a specific namespace.", + "suggested_title": "Check OpenTelemetry Collector Logs For Errors In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-otelcollector", + "file": "runbook.robot", + "filepath": "codebundles/k8s-otelcollector/runbook.robot", + "task": "Scan OpenTelemetry Logs For Dropped Spans In Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear and specific, providing a clear directive to query OpenTelemetry logs for dropped spans in a specific namespace. The use of backticks and curly braces around the NAMESPACE variable enhances readability and specificity.", + "suggested_title": "Query OpenTelemetry Logs For Dropped Spans In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-podresources-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-podresources-health/runbook.robot", + "task": "Show Pods Without Resource Limit or Resource Requests Set in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It includes both the 'What' (pods without resource limit or resource requests) and the 'Where' (namespace) variables identified by the placeholders and the imported user variable. The tags provide additional context and specificity.", + "suggested_title": "Show Pods Without Resource Limit or Resource Requests Set in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-podresources-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-podresources-health/runbook.robot", + "task": "Get Pod Resource Utilization with Top in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the action (Get Pod Resource Utilization with Top), the resource type (Pod), and the specific scope (Namespace `${NAMESPACE}`). The use of backticks & curly braces for the 'Where' variable adds clarity.", + "suggested_title": "Check Pod Resource Utilization with Top in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-podresources-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-podresources-health/runbook.robot", + "task": "Identify VPA Pod Resource Recommendations in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear in its purpose, mentioning both the 'What' (VPA Pod Resource Recommendations) and the 'Where' (specific Namespace). It is also human-readable and specific.", + "suggested_title": "Identify VPA Pod Resource Recommendations in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-podresources-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-podresources-health/runbook.robot", + "task": "Identify Resource Constrained Pods In Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title clearly specifies the 'What' (identifying resource constrained pods) and the 'Where' (in the specified namespace). It is human-readable and specific, providing clear guidance on what the task entails.", + "suggested_title": "Identify Overutilized Pods in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "aws-s3-bucket-storage-report", + "file": "runbook.robot", + "filepath": "codebundles/aws-s3-bucket-storage-report/runbook.robot", + "task": "Check AWS S3 Bucket Storage Utilization", + "score": 3, + "reasoning": "The task title is clear, specific, and human-readable. It clearly states the action to be performed, the resource type (AWS S3 Bucket), and the specific scope (specified bucket). The imported variable AWS_REGION is used for specifying the location. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": null + }, + { + "codebundle": "k8s-cluster-node-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-cluster-node-health/runbook.robot", + "task": "Check for Node Restarts in Cluster `${CONTEXT}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the cluster context and the action to be taken. It is also human-readable and would be easy to understand for someone with knowledge of the environment. However, it could be improved by including a specific time interval for the node restarts.", + "suggested_title": "Check for Node Restarts in Cluster `${CONTEXT}` within Interval `${INTERVAL}`" + }, + { + "codebundle": "k8s-cluster-node-health", + "file": "sli.robot", + "filepath": "codebundles/k8s-cluster-node-health/sli.robot", + "task": "Check for Node Restarts in Cluster `${CONTEXT}`", + "score": 4, + "reasoning": "The task title is clear in its purpose (checking for node restarts), readable, and specific. It includes the 'What' (Node Restarts) and utilizes the imported 'CONTEXT' variable as the 'Where'.", + "suggested_title": "Check for Node Restarts in Cluster `${CONTEXT}`" + }, + { + "codebundle": "k8s-cluster-node-health", + "file": "sli.robot", + "filepath": "codebundles/k8s-cluster-node-health/sli.robot", + "task": "Generate Namspace Score", + "score": 2, + "reasoning": "The task title is vague and lacks specificity. It does not provide clear instructions or indicate a specific resource type or scope.", + "suggested_title": "Generate Namespace Score in Kubernetes Cluster `$${CONTEXT}`" + }, + { + "codebundle": "gh-actions-artifact-analysis", + "file": "runbook.robot", + "filepath": "codebundles/gh-actions-artifact-analysis/runbook.robot", + "task": "Analyze artifact from GitHub workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}`", + "score": 5, + "reasoning": "The title is clear, human-readable, and specific. It provides a clear instruction to analyze an artifact from a specific GitHub workflow in a specific repository using a user provided command.", + "suggested_title": "Analyze artifact from GitHub workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` using command `${ANALYSIS_COMMAND}`" + }, + { + "codebundle": "gh-actions-artifact-analysis", + "file": "sli.robot", + "filepath": "codebundles/gh-actions-artifact-analysis/sli.robot", + "task": "Analyze artifact from GitHub Workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` and push metric", + "score": 4, + "reasoning": "The task title is clear and specific, it provides a clear set of actions to be performed on a specific artifact from a GitHub workflow in a repository. It includes user-provided analysis command and metric push. However, it could be improved by including the specific metric to be pushed.", + "suggested_title": "Analyze artifact from GitHub Workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` and push `${METRIC}` metric" + }, + { + "codebundle": "k8s-image-check", + "file": "runbook.robot", + "filepath": "codebundles/k8s-image-check/runbook.robot", + "task": "Check Image Rollover Times for Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to be performed (Check Image Rollover Times) and the specific scope (Namespace `${NAMESPACE}`). The documentation and tags provide additional context for the task.", + "suggested_title": "Check Image Rollover Times for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-image-check", + "file": "runbook.robot", + "filepath": "codebundles/k8s-image-check/runbook.robot", + "task": "List Images and Tags for Every Container in Running Pods for Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It clearly defines the task of listing images and tags for every container in running pods for a specific namespace. The documentation and tags provided further clarify the purpose of the task.", + "suggested_title": "List Images and Tags for Every Container in Running Pods for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-image-check", + "file": "runbook.robot", + "filepath": "codebundles/k8s-image-check/runbook.robot", + "task": "List Images and Tags for Every Container in Failed Pods for Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title is clear and specific in its instruction to list images and tags for every container in failed pods specifically for the namespace. It includes relevant tags for indexing, and the imported 'NAMESPACE' variable is used to specify the scope.", + "suggested_title": "List Images and Tags for Every Container in Failed Pods for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-image-check", + "file": "runbook.robot", + "filepath": "codebundles/k8s-image-check/runbook.robot", + "task": "List ImagePullBackOff Events and Test Path and Tags for Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title clearly outlines the specific actions to be taken, including searching for ImagePullBackOff events and testing the path and tags for a specified namespace. It is human-readable and specific, providing clear guidance on what needs to be done.", + "suggested_title": "List ImagePullBackOff Events and Test Path and Tags for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "cli-test", + "file": "runbook.robot", + "filepath": "codebundles/cli-test/runbook.robot", + "task": "Run CLI and Parse Output For Issues", + "score": 3, + "reasoning": "The title is clear and specific, indicating the action of running a CLI and parsing its output for issues. It also mentions fetching output from a cluster and running tests, which provides context for the task. The imported variables NAMESPACE and CONTEXT provide clarity on the specific scope, as they will be substituted at runtime to specify the 'where' of the task. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Run CLI and Parse Output For Issues in `${NAMESPACE}` namespace and `${CONTEXT}` context" + }, + { + "codebundle": "cli-test", + "file": "runbook.robot", + "filepath": "codebundles/cli-test/runbook.robot", + "task": "Exec Test", + "score": 3, + "reasoning": "The task title is clear and specific, explaining the purpose of the task and what it does. It is also human-readable. The tags provided give additional context to the task. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Exec Test in Pod `$${NAMESPACE}`" + }, + { + "codebundle": "cli-test", + "file": "runbook.robot", + "filepath": "codebundles/cli-test/runbook.robot", + "task": "Local Process Test", + "score": 3, + "reasoning": "The title is clear and specific, indicating that the test is for running commands locally within the runner. It is also readable to a human and includes relevant tags. The imported user variables are not used in the title, however 'NAMESPACE' could potentially be used as a 'Where' variable. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Local Process Test for '$${NAMESPACE}'" + }, + { + "codebundle": "curl-gmp-kong-ingress-inspection", + "file": "runbook.robot", + "filepath": "codebundles/curl-gmp-kong-ingress-inspection/runbook.robot", + "task": "Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold", + "score": 3, + "reasoning": "The task title is clear and specific, providing details on what needs to be checked (Kong Ingress HTTP Error Rate) and the threshold for violation. It is also human-readable with no ambiguous language. The imported variables provide context for where the inspection will take place. The task lacks a specific 'Where' variable; consider using `GCP_PROJECT_ID`.", + "suggested_title": "Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold in GCP Project `$${GCP_PROJECT_ID}`" + }, + { + "codebundle": "curl-gmp-kong-ingress-inspection", + "file": "runbook.robot", + "filepath": "codebundles/curl-gmp-kong-ingress-inspection/runbook.robot", + "task": "Check If Kong Ingress HTTP Request Latency Violates Threshold", + "score": 3, + "reasoning": "The task title is clear in its purpose, readable by a human, and specific in its scope. It clearly outlines the action, resource type (Kong Ingress), and specifies the threshold to be checked. The imported variable 'INGRESS_UPSTREAM' can be used as the 'Where' variable for further specificity. The task lacks a specific 'Where' variable; consider using `GCP_PROJECT_ID`.", + "suggested_title": "Check If Kong Ingress HTTP Request Latency Violates Threshold for Upstream `$${INGRESS_UPSTREAM}`" + }, + { + "codebundle": "curl-gmp-kong-ingress-inspection", + "file": "runbook.robot", + "filepath": "codebundles/curl-gmp-kong-ingress-inspection/runbook.robot", + "task": "Check If Kong Ingress Controller Reports Upstream Errors", + "score": 3, + "reasoning": "The title is clear, specific, and human-readable. It clearly states the task of checking for upstream errors in the Kong ingress controller. It also mentions metrics, healthchecks, and dns errors, providing additional context. The tags and user variables are well-aligned with the task, further adding to the specificity. The task lacks a specific 'Where' variable; consider using `GCP_PROJECT_ID`.", + "suggested_title": "Check If Kong Ingress Controller Reports Upstream Errors in GCP Project `$${GCP_PROJECT_ID}`" + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/runbook.robot", + "task": "Fetch Events for Unhealthy Kubernetes PersistentVolumeClaims in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to 'Fetch Events' and the 'Where' being the namespace variable. It is human-readable and provides a specific scope.", + "suggested_title": "Fetch Events for Unhealthy Kubernetes PersistentVolumeClaims in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/runbook.robot", + "task": "List PersistentVolumeClaims in Terminating State in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (persistentvolumeclaim) and 'Where' (Namespace) variables in backticks and curly braces.", + "suggested_title": "List PersistentVolumeClaims in Terminating State in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/runbook.robot", + "task": "List PersistentVolumes in Terminating State in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear, human-readable, and specific. It clearly states what needs to be done (List PersistentVolumes in Terminating State) and specifies the scope using the 'Where' variable (Namespace `${NAMESPACE}`). The documentation and tags also provide additional context and specificity.", + "suggested_title": "List PersistentVolumes in Terminating State in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/runbook.robot", + "task": "List Pods with Attached Volumes and Related PersistentVolume Details in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is specific, clear, and human-readable. It clearly states the 'What' (List Pods with Attached Volumes and Related PersistentVolume Details) and the 'Where' (Namespace ${NAMESPACE}). The documentation provides additional context on what details to collect, making it highly specific.", + "suggested_title": null + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/runbook.robot", + "task": "Fetch the Storage Utilization for PVC Mounts in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, providing a clear action to fetch storage utilization for PVC mounts in a specific namespace. It is also human-readable and includes relevant tags for categorization. The only improvement would be to include the 'Where' variable in backticks and curly braces.", + "suggested_title": "Fetch the Storage Utilization for PVC Mounts in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/runbook.robot", + "task": "Check for RWO Persistent Volume Node Attachment Issues in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear in its goal of checking for RWO Persistent Volume Node Attachment Issues in a specific namespace. It provides specific guidance on what to look for and where to look for it.", + "suggested_title": "Check for RWO Persistent Volume Node Attachment Issues in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/sli.robot", + "task": "Fetch the Storage Utilization for PVC Mounts in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, providing a clear action to fetch storage utilization for PVC mounts in a specific namespace. It is also human-readable and includes relevant tags for categorization. The only improvement would be to include the 'Where' variable in backticks and curly braces.", + "suggested_title": "Fetch the Storage Utilization for PVC Mounts in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-pvc-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-pvc-healthcheck/sli.robot", + "task": "Generate Namspace Score", + "score": 3, + "reasoning": "The task title is somewhat clear but lacks specificity. It mentions 'Generate Namespace Score' but does not specify the location or resource for this action. It would be clearer with a specific 'Where' variable, such as 'NAMESPACE'.", + "suggested_title": "Generate Namespace Score for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-certmanager-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-certmanager-healthcheck/runbook.robot", + "task": "Get Namespace Certificate Summary for Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (Namespace Certificate Summary) and uses the imported variable for 'Where' (NAMESPACE) within backticks & curly braces.", + "suggested_title": "" + }, + { + "codebundle": "k8s-certmanager-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-certmanager-healthcheck/runbook.robot", + "task": "Find Unhealthy Certificates in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (certificates) and 'Where' (namespace) variables. The documentation and tags provide additional context.", + "suggested_title": "Find Unhealthy Certificates in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-certmanager-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-certmanager-healthcheck/runbook.robot", + "task": "Find Failed Certificate Requests and Identify Issues for Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The title is clear, human-readable, and specific. It includes the 'What' (cert-manager certificates) and the 'Where' (specific namespace). The documentation and tags provide additional context for understanding the task.", + "suggested_title": "Find Failed Certificate Requests and Identify Issues for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-certmanager-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-certmanager-healthcheck/sli.robot", + "task": "Count Unready and Expired Certificates", + "score": 3, + "reasoning": "The task title is clear in its purpose, readable, and specific enough with the use of 'cert' and 'certificate' tags. It includes both the 'What' (certificates) and the 'Where' (namespace) variables for Kubernetes, making it specific to a certain scope. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Count Unready and Expired Certificates in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "aws-eks-node-reboot", + "file": "runbook.robot", + "filepath": "codebundles/aws-eks-node-reboot/runbook.robot", + "task": "Check EKS Nodegroup Status", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the action to be performed, the resource type (EKS nodegroup), and the where variable 'EKS_CLUSTER_NAME' included as a placeholder. The task lacks a specific 'Where' variable; consider using `AWS_DEFAULT_REGION`.", + "suggested_title": "Check EKS Nodegroup Status in `${EKS_CLUSTER_NAME}`" + }, + { + "codebundle": "azure-acr-image-sync", + "file": "runbook.robot", + "filepath": "codebundles/azure-acr-image-sync/runbook.robot", + "task": "Sync Container Images into Azure Container Registry `${ACR_REGISTRY}`", + "score": 4, + "reasoning": "The title is clear and specific about the task of syncing container images into Azure Container Registry. It includes the 'Where' variable `${ACR_REGISTRY}` in backticks and curly braces, making it easily identifiable.", + "suggested_title": "Sync Container Images into Azure Container Registry `${ACR_REGISTRY}`" + }, + { + "codebundle": "azure-acr-image-sync", + "file": "sli.robot", + "filepath": "codebundles/azure-acr-image-sync/sli.robot", + "task": "Count Outdated Images in Azure Container Registry `${ACR_REGISTRY}`", + "score": 4, + "reasoning": "The task title is clear in its purpose, readable to a human, and specific in counting outdated images in the Azure Container Registry. It includes the 'What' (ACR) and provides a specific scope ('${ACR_REGISTRY}') using the imported variable.", + "suggested_title": "Count Outdated Images in Azure Container Registry `${ACR_REGISTRY}`" + }, + { + "codebundle": "aws-lambda-health", + "file": "runbook.robot", + "filepath": "codebundles/aws-lambda-health/runbook.robot", + "task": "List Lambda Versions and Runtimes", + "score": 3, + "reasoning": "The title is clear and specific, indicating the task's purpose to list Lambda versions and runtimes. It is also human-readable and provides a clear understanding of what the script will do. The tags provide additional context, and the imported user variables are clearly mentioned. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "List Lambda Versions and Runtimes in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "aws-lambda-health", + "file": "runbook.robot", + "filepath": "codebundles/aws-lambda-health/runbook.robot", + "task": "Analyze AWS Lambda Invocation Errors", + "score": 3, + "reasoning": "The task title is specific, clear, and human-readable. It provides a clear indication of what the script does and for which function and region it applies. The tags further clarify the purpose of the task. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Analyze AWS Lambda Invocation Errors for Function `${AWS_REGION}` in Region `${AWS_REGION}`" + }, + { + "codebundle": "aws-lambda-health", + "file": "runbook.robot", + "filepath": "codebundles/aws-lambda-health/runbook.robot", + "task": "Monitor AWS Lambda Performance Metrics", + "score": 3, + "reasoning": "The task title is clear and specific about monitoring AWS Lambda performance metrics. It mentions the resource type (Lambda) and the action to be performed (monitoring performance metrics). The imported user variable AWS_REGION is used for specifying the 'Where' in the title. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Monitor AWS Lambda Performance Metrics in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "aws-lambda-health", + "file": "sli.robot", + "filepath": "codebundles/aws-lambda-health/sli.robot", + "task": "Analyze AWS Lambda Invocation Errors", + "score": 3, + "reasoning": "The task title is specific, clear, and human-readable. It provides a clear indication of what the script does and for which function and region it applies. The tags further clarify the purpose of the task. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Analyze AWS Lambda Invocation Errors for Function `${AWS_REGION}` in Region `${AWS_REGION}`" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "Check Readiness Probe Configuration for StatefulSet `${STATEFULSET_NAME}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the StatefulSet `${STATEFULSET_NAME}`. It is also human-readable and provides a clear understanding of what the task entails.", + "suggested_title": "Check Readiness Probe Configuration for StatefulSet `${STATEFULSET_NAME}`" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "Check Liveness Probe Configuration for StatefulSet `${STATEFULSET_NAME}`", + "score": 5, + "reasoning": "The title is clear, human-readable, and specific. It includes the task of checking liveness probe configuration for a specific StatefulSet, making it both clear on the 'What' and specifying the 'Where' with the STATEFULSET_NAME variable.", + "suggested_title": "Check Liveness Probe Configuration for StatefulSet `${STATEFULSET_NAME}`" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "Troubleshoot StatefulSet Warning Events for `${STATEFULSET_NAME}`", + "score": 4, + "reasoning": "The title is clear in its task to troubleshoot warning events for a specific StatefulSet. It is also human-readable, and the use of backticks and curly braces makes it easy to understand where the specific StatefulSet name will be inserted.", + "suggested_title": "Troubleshoot StatefulSet Warning Events for `${STATEFULSET_NAME}`" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "Check StatefulSet Event Anomalies for `${STATEFULSET_NAME}`", + "score": 4, + "reasoning": "The task title is clear and specific, providing a clear action to be performed on a specific resource type (StatefulSet) with the use of the imported variable. It also includes the purpose of the task, which is to check for anomalies in statefulset events.", + "suggested_title": "Check StatefulSet Event Anomalies for `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "Fetch StatefulSet Logs for `${STATEFULSET_NAME}` and Add to Report", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to be taken (fetch logs) and the target resource (StatefulSet). The documentation and tags provide additional context. It lacks specific information about the namespace, which could be included in the suggested title.", + "suggested_title": "Fetch StatefulSet Logs for `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}` and Add to Report" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "Get Related StatefulSet `${STATEFULSET_NAME}` Events", + "score": 5, + "reasoning": "The task title is very clear and specific, indicating the action to 'Get Related StatefulSet' events. It is also human-readable and includes the specific resource type 'StatefulSet' and the scope 'in the namespace'. The imported variable '${STATEFULSET_NAME}' is used in a clear and consistent manner.", + "suggested_title": "" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "Fetch Manifest Details for StatefulSet `${STATEFULSET_NAME}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to be performed (fetch manifest details) and the resource type (StatefulSet). The use of backticks and curly braces for the STATEFULSET_NAME variable enhances human readability. However, it could benefit from specifying the 'Where' variable in the title, such as fetching manifest details for a specific StatefulSet in a particular namespace.", + "suggested_title": "Fetch Manifest Details for StatefulSet `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-statefulset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-statefulset-healthcheck/runbook.robot", + "task": "List StatefulSets with Unhealthy Replica Counts In Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is specific, clear, and human-readable. It provides the 'What' (StatefulSets) and 'Where' (Namespace) variables, making it easy to understand and execute.", + "suggested_title": "List Unhealthy Replica Counts for StatefulSets in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-labeledpods-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-labeledpods-healthcheck/sli.robot", + "task": "Measure Number of Running Pods with Label", + "score": 3, + "reasoning": "The title clearly states the action to be performed (measure), the resource type (running pods), and the specific scope (with label). The documentation provides additional context, and the tags give a clear indication of the task's purpose. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Measure Number of Running Pods with Label in `${NAMESPACE}`" + }, + { + "codebundle": "k8s-serviceaccount-check", + "file": "runbook.robot", + "filepath": "codebundles/k8s-serviceaccount-check/runbook.robot", + "task": "Test Service Account Access to Kubernetes API Server in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear, human-readable, and specific. It includes the 'What' (Test Service Account Access to Kubernetes API Server) and the 'Where' (Namespace `${NAMESPACE}`). It also includes relevant tags and uses imported variables for clarity.", + "suggested_title": "Test Service Account Access to Kubernetes API Server in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-flux-suspend-namespace", + "file": "runbook.robot", + "filepath": "codebundles/k8s-flux-suspend-namespace/runbook.robot", + "task": "Flux Suspend Namespace ${NAMESPACE}", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (Namespace) and the 'Where' (specific scope) variable with the correct format for substitution.", + "suggested_title": "Flux Suspend Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-flux-suspend-namespace", + "file": "runbook.robot", + "filepath": "codebundles/k8s-flux-suspend-namespace/runbook.robot", + "task": "Unsuspend Flux for Namespace ${NAMESPACE}", + "score": 4, + "reasoning": "The task title is clear, human-readable, and specific. It clearly describes the action to be taken (unsuspending flux) and specifies the scope by using the 'NAMESPACE' variable. The tags also provide additional context.", + "suggested_title": "Unsuspend Flux for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-jaeger-http-query", + "file": "runbook.robot", + "filepath": "codebundles/k8s-jaeger-http-query/runbook.robot", + "task": "Query Traces in Jaeger for Unhealthy HTTP Response Codes in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The title is clear, human readable, and specific. It clearly defines the task of querying Jaeger for unhealthy HTTP response codes in a specific namespace. The usage of backticks and curly braces around the NAMESPACE variable enhances clarity and specificity.", + "suggested_title": "Query Traces in Jaeger for Unhealthy HTTP Response Codes in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "aws-elasticache-redis-health", + "file": "runbook.robot", + "filepath": "codebundles/aws-elasticache-redis-health/runbook.robot", + "task": "Scan AWS Elasticache Redis Status", + "score": 3, + "reasoning": "The task title is clear and specific, indicating the action of scanning for AWS Elasticache Redis status. It also provides a high-level description of what will be checked. The use of the imported user variable 'AWS_REGION' ensures that the task has a specific scope. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Scan AWS Elasticache Redis Status in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "aws-elasticache-redis-health", + "file": "sli.robot", + "filepath": "codebundles/aws-elasticache-redis-health/sli.robot", + "task": "Scan ElastiCaches", + "score": 3, + "reasoning": "The task title is clear, specific, and human-readable. It includes the action 'Scan', the resource 'ElastiCaches', and the scope 'all Elasticache instances in the region'. The imported variable 'AWS_REGION' is appropriately used as the 'Where' variable. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Scan ElastiCaches in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "gcloud-node-preempt", + "file": "runbook.robot", + "filepath": "codebundles/gcloud-node-preempt/runbook.robot", + "task": "List all nodes in an active prempt operation for GCP Project `${GCP_PROJECT_ID}`", + "score": 4, + "reasoning": "The task title provides a clear and specific instruction to list all nodes that have been preempted within the defined time interval. It includes the 'What' (nodes) and the 'Where' (GCP Project specified by the GCP_PROJECT_ID variable). The title could be improved by specifying the time interval or age of the preempted nodes.", + "suggested_title": "List all nodes in an active preempt operation for GCP Project `${GCP_PROJECT_ID}` within the last `${AGE}` hours" + }, + { + "codebundle": "gcloud-node-preempt", + "file": "sli.robot", + "filepath": "codebundles/gcloud-node-preempt/sli.robot", + "task": "Count the number of nodes in active prempt operation", + "score": 3, + "reasoning": "The title is clear and specific, indicating the task is to count the number of nodes in active preemption. It is also human-readable without jargon. The documentation and tags provide further context and clarity. The imported user variables are used for substitution. The 'Where' variable 'GCP_PROJECT_ID' is enclosed in backticks and curly braces. The task lacks a specific 'Where' variable; consider using `GCP_PROJECT_ID`.", + "suggested_title": "Count the number of nodes in active preempt operation in project `${GCP_PROJECT_ID}`" + }, + { + "codebundle": "azure-appgateway-health", + "file": "runbook.robot", + "filepath": "codebundles/azure-appgateway-health/runbook.robot", + "task": "Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The task title is clear, human-readable, and specific, providing both the 'What' (application gateway) and the 'Where' (resource group) variables. It also mentions the documentation and tags to provide context.", + "suggested_title": "Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appgateway-health", + "file": "runbook.robot", + "filepath": "codebundles/azure-appgateway-health/runbook.robot", + "task": "Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The title is clear and specific, mentioning the resource type 'Application Gateway' and the specific scope in backticks & curly braces. However, it could be more human-readable and detailed.", + "suggested_title": "Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` in Subscription `${AZURE_RESOURCE_SUBSCRIPTION_ID}`" + }, + { + "codebundle": "azure-appgateway-health", + "file": "runbook.robot", + "filepath": "codebundles/azure-appgateway-health/runbook.robot", + "task": "Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, specific, and human-readable. It includes both the 'What' (application gateway backend pool) and the 'Where' (resource group) variables in a well-structured format. The documentation, tags, and user variables also complement the clarity and specificity of the task.", + "suggested_title": "Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appgateway-health", + "file": "sli.robot", + "filepath": "codebundles/azure-appgateway-health/sli.robot", + "task": "Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The title is very clear and specific, mentioning the resource type (Application Gateway) and the specific scope (resource group) using the imported variables. It is also human-readable and provides a clear understanding of the task.", + "suggested_title": "Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appgateway-health", + "file": "sli.robot", + "filepath": "codebundles/azure-appgateway-health/sli.robot", + "task": "Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action of checking the configuration health of an Application Gateway in a specified resource group. The title also includes placeholders for user variables, making it ready for runtime substitution.", + "suggested_title": "Fetch AKS Cluster Config in Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appgateway-health", + "file": "sli.robot", + "filepath": "codebundles/azure-appgateway-health/sli.robot", + "task": "Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, specific, and human-readable. It includes both the 'What' (application gateway backend pool) and the 'Where' (resource group) variables in a well-structured format. The documentation, tags, and user variables also complement the clarity and specificity of the task.", + "suggested_title": "Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appgateway-health", + "file": "sli.robot", + "filepath": "codebundles/azure-appgateway-health/sli.robot", + "task": "Generate Application Gateway Health Score", + "score": 3, + "reasoning": "The title is clear and specific, but could be more human-readable. It specifies the 'What' (Application Gateway) and utilizes imported variables for 'Where' (AZ_RESOURCE_GROUP, APP_GATEWAY_NAME). The task lacks a specific 'Where' variable; consider using `AZ_RESOURCE_GROUP`.", + "suggested_title": "Generate Application Gateway Health Score for `${APP_GATEWAY_NAME}` in Azure Subscription `${AZURE_RESOURCE_SUBSCRIPTION_ID}`" + }, + { + "codebundle": "k8s-kubectl-cmd", + "file": "runbook.robot", + "filepath": "codebundles/k8s-kubectl-cmd/runbook.robot", + "task": "Run User Provided Kubectl Command", + "score": 3, + "reasoning": "The title is clear in its purpose, readable by humans, and specific in its action of running a user provided kubectl command. It also includes the relevant tag 'kubectl' for easy categorization. The documentation provides additional clarity on the task's purpose. The task lacks a specific 'Where' variable; consider using `KUBECTL_COMMAND`.", + "suggested_title": "Run User Provided Kubectl Command on `${KUBECTL_COMMAND}` in Kubernetes Cluster" + }, + { + "codebundle": "k8s-kubectl-cmd", + "file": "sli.robot", + "filepath": "codebundles/k8s-kubectl-cmd/sli.robot", + "task": "Run User Provided Kubectl Command", + "score": 3, + "reasoning": "The title is clear and specific about the task of running a user provided kubectl command and pushing the metric as an SLI. It is readable and does not include placeholders. The imported variable 'KUBECTL_COMMAND' is used to specify the user provided kubectl command. The task lacks a specific 'Where' variable; consider using `KUBECTL_COMMAND`.", + "suggested_title": "Run User Provided Kubectl Command in Kubernetes Cluster `$${KUBECTL_CLUSTER}`" + }, + { + "codebundle": "k8s-ingress-gce-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-ingress-gce-healthcheck/runbook.robot", + "task": "Search For GCE Ingress Warnings in GKE", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the action to be taken (Search For GCE Ingress Warnings) and the specific location to perform the search (in GKE). The imported user variables are not explicitly used in the title, however, 'CONTEXT' can be used as the specific GKE context for clarity. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Search For GCE Ingress Warnings in GKE Context `${CONTEXT}`" + }, + { + "codebundle": "k8s-ingress-gce-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-ingress-gce-healthcheck/runbook.robot", + "task": "Identify Unhealthy GCE HTTP Ingress Backends", + "score": 3, + "reasoning": "The title is clear and provides a specific task to identify unhealthy GCE HTTP Ingress Backends. It includes relevant tags and specific documentation for clarity. The imported variables are also used in the documentation. The 'Where' variable is provided as 'INGRESS' and is properly formatted with backticks and curly braces. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Identify Unhealthy GCE HTTP Ingress Backends in GKE Namespace `$${NAMESPACE}`" + }, + { + "codebundle": "k8s-ingress-gce-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-ingress-gce-healthcheck/runbook.robot", + "task": "Validate GCP HTTP Load Balancer Configurations", + "score": 3, + "reasoning": "The task title is clear, specific, and human-readable. It mentions the resource type (GCP HTTP Load Balancer Configurations) and the specific action to be taken (Validate), as well as the method (Extract from ingress annotations and check health of each object). It includes relevant tags and imported user variables for context. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Validate GCP HTTP Load Balancer Configurations in GCP Project `$${GCP_PROJECT_ID}`" + }, + { + "codebundle": "k8s-ingress-gce-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-ingress-gce-healthcheck/runbook.robot", + "task": "Fetch Network Error Logs from GCP Operations Manager for Ingress Backends", + "score": 3, + "reasoning": "The title is clear and specific, providing details about the task, the resource type (GCP Operations Manager) and the specific scope (Ingress Backends). The documentation and tags provide additional clarity and specificity. The imported user variables are also clear and relevant. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Fetch Network Error Logs from GCP Operations Manager for Ingress Backends in GCP Project `$${GCP_PROJECT_ID}`" + }, + { + "codebundle": "k8s-ingress-gce-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-ingress-gce-healthcheck/runbook.robot", + "task": "Review GCP Operations Logging Dashboard", + "score": 3, + "reasoning": "The title is clear and specific in indicating the task to review GCP Operations Logging Dashboard, providing clarity on the action to be taken. It is human-readable and the use of backticks and curly braces for the 'Where' variable ensures specificity. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Review GCP Operations Logging Dashboard in GCP project `$${GCP_PROJECT_ID}`" + }, + { + "codebundle": "k8s-restart-resource", + "file": "runbook.robot", + "filepath": "codebundles/k8s-restart-resource/runbook.robot", + "task": "Get Current Resource State with Labels `${LABELS}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the action to be performed ('Get'), the resource type ('Current Resource State'), and the specific scope ('with Labels ${LABELS}'). The documentation provides additional context about the purpose of the task.", + "suggested_title": null + }, + { + "codebundle": "k8s-restart-resource", + "file": "runbook.robot", + "filepath": "codebundles/k8s-restart-resource/runbook.robot", + "task": "Get Resource Logs with Labels `${LABELS}`", + "score": 4, + "reasoning": "The task title is clear in its purpose of collecting resource logs with specific labels, and the imported variable ${LABELS} provides specificity. The documentation also adds clarity to the task.", + "suggested_title": "Get Resource Logs with Labels `${LABELS}`" + }, + { + "codebundle": "k8s-restart-resource", + "file": "runbook.robot", + "filepath": "codebundles/k8s-restart-resource/runbook.robot", + "task": "Restart Resource with Labels `${LABELS}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to be taken (Restart) and the resource type (Resource) with labels specified by the user variable. It is also human-readable and includes information about the purpose of the task. The absence of a specific 'Where' variable is compensated by the use of the imported user variable 'CONTEXT' as a relevant substitute.", + "suggested_title": "Restart Resource with Labels `${LABELS}` in `${CONTEXT}`" + }, + { + "codebundle": "k8s-cluster-resource-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-cluster-resource-health/runbook.robot", + "task": "Identify High Utilization Nodes for Cluster `${CONTEXT}`", + "score": 4, + "reasoning": "The task title is clear and specific in its purpose, and the use of backticks & curly braces for the 'Where' variable adds human readability. The documentation and tags provide additional context.", + "suggested_title": "Identify High Utilization Nodes for Cluster `${CONTEXT}`" + }, + { + "codebundle": "k8s-cluster-resource-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-cluster-resource-health/runbook.robot", + "task": "Identify Pods Causing High Node Utilization in Cluster `${CONTEXT}`", + "score": 5, + "reasoning": "The task title clearly indicates the 'What' (identifying pods causing high node utilization) and includes a specific 'Where' variable in backticks & curly braces (Cluster `${CONTEXT}`). It is human-readable and specific.", + "suggested_title": "Identify Pods Causing High Node Utilization in Cluster `${CONTEXT}`" + }, + { + "codebundle": "k8s-cluster-resource-health", + "file": "sli.robot", + "filepath": "codebundles/k8s-cluster-resource-health/sli.robot", + "task": "Identify High Utilization Nodes for Cluster `${CONTEXT}`", + "score": 4, + "reasoning": "The task is clear in its objective to identify high utilization nodes in a specific cluster. It is human-readable and specific in the requirement for CPU or Memory utilization above 90%. The use of backticks and curly braces for the 'Where' variable (CONTEXT) ensures specificity.", + "suggested_title": "Identify High Utilization Nodes for Cluster `${CONTEXT}`" + }, + { + "codebundle": "k8s-prometheus-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-prometheus-healthcheck/runbook.robot", + "task": "Check Prometheus Service Monitors", + "score": 3, + "reasoning": "The task title is clear and specific, mentioning the 'Prometheus Service Monitors' and the specific action to be taken. It is also human-readable. The only improvement would be to include the specific namespace in the title. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Check Prometheus Service Monitors in namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-prometheus-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-prometheus-healthcheck/runbook.robot", + "task": "Check For Successful Rule Setup", + "score": 3, + "reasoning": "The task title is clear and specific in stating the action to 'Check For Successful Rule Setup'. It provides a clear direction for what needs to be done. However, it lacks the specific 'Where' variable, so the most relevant imported variable, 'NAMESPACE', can be used as the 'Where' variable. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Check For Successful Rule Setup in Kubernetes Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-prometheus-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-prometheus-healthcheck/runbook.robot", + "task": "Verify Prometheus RBAC Can Access ServiceMonitors", + "score": 3, + "reasoning": "The title is clear and specific, it mentions the task to verify RBAC for Prometheus accessing ServiceMonitors. The documentation also provides a clear direction on how to approach the task by fetching operator rbac and verifying it has ServiceMonitors in rbac. The imported user variables also provide context for where this verification needs to take place. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Verify Prometheus RBAC Can Access ServiceMonitors in Namespace `${PROM_NAMESPACE}`" + }, + { + "codebundle": "k8s-prometheus-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-prometheus-healthcheck/runbook.robot", + "task": "Identify Endpoint Scraping Errors", + "score": 3, + "reasoning": "The task title is clear and specific, providing a clear instruction to inspect prometheus operator logs for scraping errors and raise issues if any found. It lacks a specific 'Where' variable, but the most relevant imported variable 'NAMESPACE' can be used. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Inspect Prometheus Operator Logs for Scraping Errors in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-prometheus-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-prometheus-healthcheck/runbook.robot", + "task": "Check Prometheus API Healthy", + "score": 3, + "reasoning": "The title is clear and specific, indicating the task is to check the Prometheus healthy API endpoint. It could be more specific if it included the 'Where' variable being used, such as 'PROM_NAMESPACE'. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Check Prometheus API Healthy in Namespace `${PROM_NAMESPACE}`" + }, + { + "codebundle": "k8s-loki-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-loki-healthcheck/runbook.robot", + "task": "Check Loki Ring API", + "score": 3, + "reasoning": "The title is somewhat clear in its purpose but lacks specificity and human readability. It mentions the API to be checked and the type of inspection, but it could benefit from additional details and clarity.", + "suggested_title": "Check Loki Ring API for Unhealthy Shards in Kubernetes Cluster `$${NAMESPACE}`" + }, + { + "codebundle": "k8s-loki-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-loki-healthcheck/runbook.robot", + "task": "Check Loki API Ready", + "score": 3, + "reasoning": "The title is clear and specifies the task of checking Loki API readiness. It is also human-readable although it could be improved with more specific details. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Check Loki API Ready in Kubernetes Cluster `${NAMESPACE}`" + }, + { + "codebundle": "aws-eks-health", + "file": "runbook.robot", + "filepath": "codebundles/aws-eks-health/runbook.robot", + "task": "Check EKS Fargate Cluster Health Status", + "score": 3, + "reasoning": "The title is clear, human-readable, and specific. It specifies the task of checking the health status of an EKS Fargate cluster, which is the 'What', and also includes the AWS region as the 'Where' variable using the imported user variable. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Check EKS Fargate Cluster Health Status in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "aws-eks-health", + "file": "runbook.robot", + "filepath": "codebundles/aws-eks-health/runbook.robot", + "task": "Check EKS Cluster Health Status", + "score": 3, + "reasoning": "The task title is clear and specific, indicating it checks the health status of an Amazon EKS cluster. The documentation and tags provide additional context, and the imported user variable 'AWS_REGION' suggests a specific scope. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Check Amazon EKS Cluster Health Status in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "aws-eks-health", + "file": "runbook.robot", + "filepath": "codebundles/aws-eks-health/runbook.robot", + "task": "List EKS Cluster Metrics", + "score": 3, + "reasoning": "The task title is clear in its purpose and specifies the resource type (EKS Cluster) and the Where variable AWS region which is provided as an imported user variable. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Monitor EKS Cluster Health in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "aws-eks-health", + "file": "sli.robot", + "filepath": "codebundles/aws-eks-health/sli.robot", + "task": "Check EKS Cluster Health Status", + "score": 3, + "reasoning": "The task title is clear and specific, indicating it checks the health status of an Amazon EKS cluster. The documentation and tags provide additional context, and the imported user variable 'AWS_REGION' suggests a specific scope. The task lacks a specific 'Where' variable; consider using `AWS_REGION`.", + "suggested_title": "Check Amazon EKS Cluster Health Status in AWS Region `${AWS_REGION}`" + }, + { + "codebundle": "k8s-ingress-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-ingress-healthcheck/runbook.robot", + "task": "Fetch Ingress Object Health in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title is clear in stating the task to fetch the health of Ingress objects in a specific namespace. It is human-readable and provides specific details on what data will be fetched. It lacks the specific 'Where' variable, but the imported 'NAMESPACE' variable can be used as the 'Where' in this context.", + "suggested_title": "Fetch Ingress Object Health in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-ingress-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-ingress-healthcheck/runbook.robot", + "task": "Check for Ingress and Service Conflicts in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title provides clear guidance on the specific action to be performed (Check for Ingress and Service Conflicts) and specifies the scope of the action using the `${NAMESPACE}` variable. The documentation and tags further clarify the purpose and context of the task.", + "suggested_title": "" + }, + { + "codebundle": "k8s-argocd-helm-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-argocd-helm-health/runbook.robot", + "task": "Fetch all available ArgoCD Helm releases in namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to be performed (fetch) and the resource type (ArgoCD Helm releases). It also includes a specific scope in the form of the namespace `${NAMESPACE}`. The usage of backticks & curly braces for the 'Where' variable adds clarity.", + "suggested_title": "Fetch all available ArgoCD Helm releases in namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-argocd-helm-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-argocd-helm-health/runbook.robot", + "task": "Fetch Installed ArgoCD Helm release versions in namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action (Fetch) and the target (Installed ArgoCD Helm release versions) in the specified namespace using the provided 'NAMESPACE' variable. It is also human-readable and provides a clear understanding of the task.", + "suggested_title": "Fetch Installed ArgoCD Helm release versions in namespace `${NAMESPACE}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-aks-triage/runbook.robot", + "task": "Check for Resource Health Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to be taken ('Check for Resource Health Issues') and specifying the resource type ('AKS Cluster') as well as the location ('Resource Group'). The documentation provides a clear understanding of the task. However, it could be improved by including specific health issues to check for.", + "suggested_title": "Check for Overutilization and Networking Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-aks-triage/runbook.robot", + "task": "Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The title provides a clear indication of the task, specifying to check the configuration health of the AKS Cluster in a specific resource group in Azure. The use of backticks and curly braces for the 'Where' variable (${AZ_RESOURCE_GROUP}) adds specificity.", + "suggested_title": "Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-aks-triage/runbook.robot", + "task": "Check Network Configuration of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear and specific, providing the 'What' (AKS Cluster) and 'Where' (Resource Group) variables in backticks and curly braces. It is also human-readable and includes relevant tags for easy categorization.", + "suggested_title": "Check Network Configuration of AKS Cluster `${{AKS_CLUSTER}}` In Resource Group `${{AZ_RESOURCE_GROUP}}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-aks-triage/runbook.robot", + "task": "Fetch Activities for AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The title is clear, readable, and specific. It clearly states the task of fetching activities for a specific AKS cluster in a specific resource group. It provides enough context for someone to understand the purpose of the task.", + "suggested_title": "Fetch Activities for AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-aks-triage/sli.robot", + "task": "Check for Resource Health Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It includes the 'What' (AKS cluster) and 'Where' (Resource Group) variables in backticks & curly braces as required. The documentation and tags provide additional context.", + "suggested_title": "Check for Resource Health Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-aks-triage/sli.robot", + "task": "Fetch Activities for AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, easily readable by humans, and very specific. It includes the 'What' (AKS Cluster) and the 'Where' (Resource Group) variables in backticks and curly braces.", + "suggested_title": "Fetch Activities for AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-aks-triage/sli.robot", + "task": "Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the task of checking the configuration health of the AKS cluster in a specific resource group in Azure. The use of backticks and curly braces for the 'Where' variable (AKS_CLUSTER and AZ_RESOURCE_GROUP) adds specificity and clarity.", + "suggested_title": "Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-aks-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-aks-triage/sli.robot", + "task": "Generate AKS Cluster Health Score", + "score": 3, + "reasoning": "The task title lacks specificity and does not provide clear details about what exactly the health score is measuring. It also does not specify the 'Where' variable, such as the location or scope of the AKS Cluster.", + "suggested_title": "Calculate AKS Cluster Health Score for AKS Cluster `${AKS_CLUSTER}` in Azure Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "k8s-tail-logs-dynamic", + "file": "runbook.robot", + "filepath": "codebundles/k8s-tail-logs-dynamic/runbook.robot", + "task": "Get `${CONTAINER_NAME}` Application Logs", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the collection of logs from a particular container. It is also human-readable. However, it could be improved by including the 'Where' variable in the title.", + "suggested_title": "Get `${CONTAINER_NAME}` Application Logs in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-tail-logs-dynamic", + "file": "runbook.robot", + "filepath": "codebundles/k8s-tail-logs-dynamic/runbook.robot", + "task": "Tail `${CONTAINER_NAME}` Application Logs For Stacktraces", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the task of tailing application logs for stacktraces and mentions parsing them to find relevant source code information. The imported user variables are used appropriately and there is a clear 'What' (container logs) and 'Where' (specified by ${CONTAINER_NAME} variable) in the title.", + "suggested_title": "Tail `${CONTAINER_NAME}` Application Logs For Stacktraces" + }, + { + "codebundle": "k8s-tail-logs-dynamic", + "file": "sli.robot", + "filepath": "codebundles/k8s-tail-logs-dynamic/sli.robot", + "task": "Tail `${CONTAINER_NAME}` Application Logs For Stacktraces", + "score": 4, + "reasoning": "The title is clear and specific about the task of tailing application logs for stacktraces. It includes a variable `${CONTAINER_NAME}` indicating the specific scope, which ensures the task has both a 'What' (resource type) and a 'Where' (specific scope). The documentation provides additional context and clarity.", + "suggested_title": "Tail `${CONTAINER_NAME}` Application Logs For Stacktraces" + }, + { + "codebundle": "k8s-daemonset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-daemonset-healthcheck/runbook.robot", + "task": "Get DaemonSet Logs for `${DAEMONSET_NAME}` and Add to Report", + "score": 5, + "reasoning": "The task title is clear, specific, and human-readable. It includes the 'What' (daemonset) and 'Where' (specific daemonset name in the namespace) variables, and provides a clear instruction to fetch logs and add them to a report.", + "suggested_title": "Get DaemonSet Logs for `${DAEMONSET_NAME}` and Add to Report" + }, + { + "codebundle": "k8s-daemonset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-daemonset-healthcheck/runbook.robot", + "task": "Get Related Daemonset `${DAEMONSET_NAME}` Events", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (daemonset) and the 'Where' (namespace). The documentation and tags also provide additional context and specificity.", + "suggested_title": "Get Related Daemonset `${DAEMONSET_NAME}` Events in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-daemonset-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-daemonset-healthcheck/runbook.robot", + "task": "Check Daemonset `${DAEMONSET_NAME}` Replicas", + "score": 5, + "reasoning": "The task title is clear, easily readable, and specific. It clearly states the action 'Check Daemonset Replicas', includes the 'Where' variable `${DAEMONSET_NAME}`, and provides additional context about checking for high availability and expected replica counts.", + "suggested_title": "Check Daemonset `${DAEMONSET_NAME}` Replicas" + }, + { + "codebundle": "k8s-chaos-workload", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-workload/runbook.robot", + "task": "Test `${WORKLOAD_NAME}` High Availability", + "score": 4, + "reasoning": "The task title is clear and specific, it mentions the purpose of the task which is to test the high availability of a specific workload. It also uses the imported variable for the workload name. However, it could be improved by including the 'Where' variable, which in this case would be the 'NAMESPACE' variable.", + "suggested_title": "Test `${WORKLOAD_NAME}` High Availability in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-workload", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-workload/runbook.robot", + "task": "OOMKill `${WORKLOAD_NAME}` Pod", + "score": 4, + "reasoning": "The title is clear and specific about the task, explaining that it will OOMKill a specific pod under a configured workload. It also includes relevant tags for context. The use of backticks and curly braces in the title ensures clarity and specificity.", + "suggested_title": "OOMKill `${WORKLOAD_NAME}` Pod" + }, + { + "codebundle": "k8s-chaos-workload", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-workload/runbook.robot", + "task": "Mangle Service Selector For `${WORKLOAD_NAME}`", + "score": 4, + "reasoning": "The title is specific and clear in its purpose, mentioning the manipulation of a service's selector to cause a network disruption. It includes the placeholder for the 'What' variable (${WORKLOAD_NAME}), but could benefit from including the 'Where' variable in backticks & curly braces.", + "suggested_title": "Mangle Service Selector For `${WORKLOAD_NAME}` in `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-workload", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-workload/runbook.robot", + "task": "Mangle Service Port For `${WORKLOAD_NAME}`", + "score": 4, + "reasoning": "The task title is clear and specific, with a defined action and target. It is readable and provides enough context to understand the intended purpose. The imported variable `${WORKLOAD_NAME}` is used appropriately. However, it could be more specific if it included the 'Where' variable, such as NAMESPACE", + "suggested_title": "Mangle Service Port For `${WORKLOAD_NAME}` in `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-workload", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-workload/runbook.robot", + "task": "Fill Tmp Directory Of Pod From `${WORKLOAD_NAME}`", + "score": 4, + "reasoning": "The title provides a clear instruction to fill the /tmp directory of a specific pod identified by the WORKLOAD_NAME variable. It is human-readable and has a specific scope.", + "suggested_title": "Fill Tmp Directory Of Pod From `${WORKLOAD_NAME}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "List Resources Related to Postgres Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is specific and clear, providing a clear 'What' (Postgres Cluster) and a 'Where' (Namespace) using the imported variable. The documentation provides a simple explanation of the task, and the tags give further context to the task's purpose.", + "suggested_title": "List Resources Related to Postgres Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "Get Postgres Pod Logs & Events for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating that the goal is to get Postgres pod logs and events for a particular cluster in a specific namespace. The use of backticks and curly braces for the 'Where' variable (`${NAMESPACE}`) adds clarity and specificity.", + "suggested_title": "Get Postgres Pod Logs & Events for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "Get Postgres Pod Resource Utilization for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (Postgres Pod Resource Utilization) and the 'Where' (Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`). The use of backticks & curly braces for the 'Where' variable is correct.", + "suggested_title": "Get Postgres Pod Resource Utilization for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "Get Running Postgres Configuration for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to 'Get Running Postgres Configuration' and including both the 'What' (Postgres instance) and 'Where' (Cluster and Namespace) variables in backticks & curly braces.", + "suggested_title": "Get Running Postgres Configuration for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "Get Patroni Output and Add to Report for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (resource type) which is the cluster `${OBJECT_NAME}` and the 'Where' (specific scope) which is the namespace `${NAMESPACE}`. The documentation and tags provide additional context and clarity.", + "suggested_title": "Get Patroni Output and Add to Report for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "Fetch Patroni Database Lag for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is extremely clear, human-readable, and specific. It provides a clear action ('Fetch Patroni Database Lag'), specifies the resource type ('Cluster `${OBJECT_NAME}`') and the specific scope ('in Namespace `${NAMESPACE}`). The use of backticks & curly braces for variables adds clarity to the specific scope.", + "suggested_title": "Fetch Patroni Database Lag for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "Check Database Backup Status for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action of checking the database backup status for a specific cluster and namespace in Kubernetes. It also includes relevant tags and imported user variables. The only room for improvement would be to include the 'WHERE' variable in backticks and curly braces.", + "suggested_title": "Check Database Backup Status for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/runbook.robot", + "task": "Run DB Queries for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is specific and human-readable, including the 'What' (Cluster) and 'Where' (Namespace) variables in backticks and curly braces.", + "suggested_title": "Run DB Queries for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/sli.robot", + "task": "Fetch Patroni Database Lag", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It accurately describes the action of fetching Patroni database lag using 'patronictl' and mentions raising issues if necessary. The tags provide additional context, and the imported user variables will be substituted at runtime. This task has both a 'What' (resource type - Patroni database) and a 'Where' (specific scope) with variables like 'NAMESPACE', 'HOSTNAME', and 'DATABASE_CONTAINER'. The task lacks a specific 'Where' variable; consider using `CONTEXT`.", + "suggested_title": "Check Patroni Database Lag in Namespace `${NAMESPACE}` on Host `${HOSTNAME}` using `patronictl`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/sli.robot", + "task": "Check Database Backup Status for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title is clear and specific, including the 'What' (cluster database backup status) and the 'Where' (the cluster and namespace variables). The documentation provides additional clarity on the task. The tags also help to further specify the task.", + "suggested_title": "Check Database Backup Status for Cluster `${OBJECT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-postgres-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-postgres-healthcheck/sli.robot", + "task": "Generate Namspace Score", + "score": 3, + "reasoning": "The task title 'Generate Namspace Score' is clear and specific in terms of the action to be performed, but it lacks clarity in terms of the specific scope or 'Where' variable. It does not provide a clear indication of the specific namespace for which the score should be generated.", + "suggested_title": "Generate Namespace Score for Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-reconcile", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-reconcile/runbook.robot", + "task": "Health Check Flux Reconciliation", + "score": 3, + "reasoning": "The task title is clear and specific, indicating that it fetches reconciliation logs for flux and creates a report for them. The tags provide additional context about the technology involved. The imported user variables are used in a clear manner. The title lacks a specific 'Where' variable, so 'FLUX_NAMESPACE' can be used as the most relevant imported variable as a 'Where' in this case. The task lacks a specific 'Where' variable; consider using `CONTEXT`.", + "suggested_title": "Health Check Flux Reconciliation in Kubernetes Namespace `${FLUX_NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-reconcile", + "file": "sli.robot", + "filepath": "codebundles/k8s-fluxcd-reconcile/sli.robot", + "task": "Health Check Flux Reconciliation", + "score": 3, + "reasoning": "The task title is clear, specific, and human-readable. It includes the 'What' (Flux Reconciliation) and the 'Where' (Flux namespace) variables, making it very specific and well-defined. The task lacks a specific 'Where' variable; consider using `CONTEXT`.", + "suggested_title": "" + }, + { + "codebundle": "curl-gmp-nginx-ingress-inspection", + "file": "runbook.robot", + "filepath": "codebundles/curl-gmp-nginx-ingress-inspection/runbook.robot", + "task": "Fetch Nginx HTTP Errors From GMP for Ingress `${INGRESS_OBJECT_NAME}`", + "score": 5, + "reasoning": "The task title is very clear and specific, mentioning the resource type (Nginx HTTP Errors), the location (GMP for Ingress `${INGRESS_OBJECT_NAME}`), and the action to be taken. It is also human-readable and does not require technical knowledge to understand.", + "suggested_title": "Fetch Nginx HTTP Errors From GMP for Ingress `${INGRESS_OBJECT_NAME}`" + }, + { + "codebundle": "curl-gmp-nginx-ingress-inspection", + "file": "runbook.robot", + "filepath": "codebundles/curl-gmp-nginx-ingress-inspection/runbook.robot", + "task": "Find Owner and Service Health for Ingress `${INGRESS_OBJECT_NAME}`", + "score": 5, + "reasoning": "The task title clearly states the specific action to be performed (Find Owner and Service Health), the resource type (Ingress), and includes a placeholder for the specific scope (`${INGRESS_OBJECT_NAME}`). The documentation and tags provide additional clarity and specificity.", + "suggested_title": "Find Owner and Service Health for Ingress `${INGRESS_OBJECT_NAME}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Inspect Warning Events in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, providing a clear action to be taken ('Inspect Warning Events') and specifying the scope with the 'Where' variable placeholder. The documentation provides additional context and details for the task.", + "suggested_title": "Inspect Warning Events in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Inspect Container Restarts In Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title provides a clear and specific instruction to inspect container restarts in the specified namespace. It is human-readable and includes the 'What' (container restarts) and the 'Where' (namespace) variables.", + "suggested_title": "Inspect Container Restarts In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Inspect Pending Pods In Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is specific in its instruction to inspect pending pods in a particular namespace, making it clear and easily understandable. The use of the imported variable for the namespace ensures human readability and specificity.", + "suggested_title": "Inspect Pending Pods In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Inspect Failed Pods In Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear in its purpose, readable, and specific. It includes the 'What' (failed pods) and the 'Where' (namespace) variables. The tags also provide additional context.", + "suggested_title": "Inspect Failed Pods In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Inspect Workload Status Conditions In Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is specific and clear in its purpose of inspecting workload status conditions in a particular namespace. It includes the 'What' (workload status conditions) and the 'Where' (namespace) variables, leading to a high score.", + "suggested_title": "Inspect Workload Status Conditions In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Get Listing Of Resources In Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear about what it wants to accomplish (get listing of resources) and where it should be done (in the namespace). It provides a specific action and location, but could benefit from including the namespace variable in backticks and curly braces.", + "suggested_title": "Get Listing Of Resources In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Check Event Anomalies in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action of checking for event anomalies in a specific namespace. It is also human-readable and provides enough information for a user to understand the purpose of the task.", + "suggested_title": "Check Event Anomalies in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Check Missing or Risky PodDisruptionBudget Policies in Namepace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear, human-readable, and specific. It clearly defines the 'What' (resource type - PodDisruptionBudget Policies) and the 'Where' (specific scope - Namespace). The documentation and tags provide additional context and clarity.", + "suggested_title": "Check Missing or Risky PodDisruptionBudget Policies in Namepace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/runbook.robot", + "task": "Check Resource Quota Utilization in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (resource quotas) and the 'Where' (specific namespace) variables within backticks and curly braces. The documentation and tags also provide clear context for the task.", + "suggested_title": "Check Resource Quota Utilization in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/sli.robot", + "task": "Get Event Count and Score", + "score": 3, + "reasoning": "The task title clearly states the action to be performed (Get Event Count) and specifies the purpose (and Score). The documentation provides clear explanation of what the task does. The tags also provide additional context. The imported user variables are relevant and will be used in the task. Overall, the title is very clear, human-readable, and specific. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Get Error Event Count within ${EVENT_AGE} and calculate Score" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/sli.robot", + "task": "Get Container Restarts and Score", + "score": 3, + "reasoning": "The title is clear in its purpose and provides a specific action - counting container restarts and determining if they're beyond a threshold. It is readable and understandable for a human. The use of imported user variables and tags also adds specificity to the task. The only improvement would be to specify the 'Where' variable, using the NAMESPACE variable as a suggestion. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Get Container Restarts and Score in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/sli.robot", + "task": "Get NotReady Pods", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It clearly indicates the purpose of fetching unready pods. The 'What' (resource type) is 'Pods', and the suggested 'Where' variable could be 'NAMESPACE' from the imported variables. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Get NotReady Pods in `${NAMESPACE}`" + }, + { + "codebundle": "k8s-namespace-healthcheck", + "file": "sli.robot", + "filepath": "codebundles/k8s-namespace-healthcheck/sli.robot", + "task": "Generate Namspace Score", + "score": 3, + "reasoning": "The task title is clear and specific, but it lacks a specific 'Where' variable. The 'NAMESPACE' variable can be used as the most relevant 'Where' variable. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Generate Namespace Score in `${NAMESPACE}`" + }, + { + "codebundle": "azure-loadbalancer-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-loadbalancer-triage/runbook.robot", + "task": "Check Activity Logs for Azure Load Balancer `${AZ_LB_NAME}`", + "score": 4, + "reasoning": "The title is clear, human-readable, and specific. It clearly states the task of checking activity logs for a specific Azure Load Balancer using a provided variable. The imported 'AZ_LB_NAME' variable provides the necessary 'Where' (specific scope) for the task.", + "suggested_title": "Check Activity Logs for Azure Load Balancer `${AZ_LB_NAME}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Fetch Vault CSI Driver Logs", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It includes the action to be performed ('Fetch'), the resource type ('Vault CSI Driver'), and the specific scope ('Logs'). The documentation and tags provide additional context and details. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Fetch Vault CSI Driver Logs in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Get Vault CSI Driver Warning Events", + "score": 3, + "reasoning": "The task title is clear in specifying the action to be performed (Get Vault CSI Driver Warning Events), it is human-readable, and it provides specific details about the type of events to be fetched. The title includes the 'What' (Vault CSI Driver Warning Events) but lacks a specific 'Where' variable. The most relevant imported variable 'NAMESPACE' can be used as a 'Where' variable in backticks & curly braces. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Get Vault CSI Driver Warning Events in `${NAMESPACE}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Check Vault CSI Driver Replicas", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the action to be performed, the resource type (replicas), and the specific scope (vault CSI driver daemonset). The tags and imported user variables provide additional context. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": null + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Fetch Vault Logs", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the action to be performed (Fetch Vault Logs), provides details on what will be fetched (last 100 lines of logs for all vault pod workloads), and specifies the scope (vault namespace). The imported variables 'NAMESPACE' and 'LABELS' are used for specifying the 'What' (namespace) and 'Where' (workloads with specific labels) respectively. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Fetch Vault Pod Workload Logs in Namespace `${NAMESPACE}` with Labels `${LABELS}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Get Related Vault Events", + "score": 3, + "reasoning": "The title is clear, human-readable, and specific. It clearly states the 'What' (vault) and 'Where' (namespace) using the imported variable 'NAMESPACE'. It also defines the purpose of fetching warning-type events related to the vault in the namespace. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Get Related Vault Events in Namespace `$${NAMESPACE}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Fetch Vault StatefulSet Manifest Details", + "score": 3, + "reasoning": "The task title is clear, specific, and human-readable. It clearly defines the action to be taken ('Fetch Vault StatefulSet Manifest Details') and provides a context for where the action will be performed ('NAMESPACE'). The documentation and tags provide further clarity on the purpose of the task. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Fetch Vault StatefulSet Manifest Details in `${NAMESPACE}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Fetch Vault DaemonSet Manifest Details", + "score": 3, + "reasoning": "The title is clear, human-readable, and specific. It clearly states the task of fetching details of the vault daemonset manifest for inspection. It includes the 'What' (manifest) and the 'Where' (daemonset) variables. The imported user variables are also well-defined and will provide necessary context for the task. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Fetch Vault DaemonSet Manifest Details in Kubernetes Cluster `${NAMESPACE}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Verify Vault Availability", + "score": 3, + "reasoning": "The title is clear and specific, indicating the task involves verifying the availability of a vault endpoint. It is also human-readable and includes relevant tags. The imported variables are not directly related to the 'Where' aspect, but 'NAMESPACE' and 'CONTEXT' can be used as the 'where' variable in Kubernetes environments. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Verify Vault Availability in Namespace `${NAMESPACE}` and Context `${CONTEXT}`" + }, + { + "codebundle": "k8s-vault-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-vault-healthcheck/runbook.robot", + "task": "Check Vault StatefulSet Replicas", + "score": 3, + "reasoning": "The task title is clear and specific, providing a clear 'What' (Vault StatefulSet Replicas) and also mentions the check should be for highly available and healthy values. It also includes relevant tags and imported user variables. The specific scope is not mentioned in the title but could be substituted with the 'NAMESPACE' imported variable. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Check Vault StatefulSet Replicas in `NAMESPACE`" + }, + { + "codebundle": "k8s-fluxcd-helm-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-helm-health/runbook.robot", + "task": "List all available FluxCD Helmreleases in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action (List) and the resource type (FluxCD Helmreleases) in a specific scope (Namespace `${NAMESPACE}`). The documentation and tags provide additional context for clarity. The use of variable `${NAMESPACE}` provides specificity to the scope.", + "suggested_title": "List all available FluxCD Helmreleases in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-helm-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-helm-health/runbook.robot", + "task": "Fetch Installed FluxCD Helmrelease Versions in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title is clear and specific, indicating the exact task to be performed in the specified namespace. It provides details on what information needs to be fetched and where it needs to be fetched from using the NAMESPACE variable.", + "suggested_title": "Fetch Installed FluxCD Helmrelease Versions in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-helm-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-helm-health/runbook.robot", + "task": "Fetch Mismatched FluxCD HelmRelease Version in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear, specific, and human-readable. It provides a clear action to be taken (Fetch Mismatched FluxCD HelmRelease Version) and specifies the exact location for the action (in Namespace `${NAMESPACE}`). The documentation and tags provide additional context and specificity.", + "suggested_title": "Fetch Mismatched FluxCD HelmRelease Version in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-helm-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-helm-health/runbook.robot", + "task": "Fetch FluxCD HelmRelease Error Messages in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title is clear in its purpose, human-readable, and specific. It includes the 'What' (HelmRelease) and a clear 'Where' scope (namespace). The tags provide additional context for the task.", + "suggested_title": "Fetch FluxCD HelmRelease Error Messages in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-helm-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-helm-health/runbook.robot", + "task": "Check for Available Helm Chart Updates in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear, human-readable, and specific. It provides the 'What' (Helm Chart Updates) and the 'Where' (Namespace). The documentation and tags further clarify the task's purpose and context.", + "suggested_title": "Check for Available Helm Chart Updates in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-flux", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-flux/runbook.robot", + "task": "Suspend the Flux Resource Reconciliation", + "score": 3, + "reasoning": "The title is clear, human readable, and specific. It provides the 'What' (Flux Resource Reconciliation) and the 'Where' (specific scope) by using the imported variable FLUX_RESOURCE_NAME in backticks & curly braces to suspend a specific flux resource for chaos purposes in a Kubernetes cluster. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Suspend the Flux Resource Reconciliation for ${FLUX_RESOURCE_NAME} in namespace ${FLUX_RESOURCE_NAMESPACE}" + }, + { + "codebundle": "k8s-chaos-flux", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-flux/runbook.robot", + "task": "Find Random FluxCD Workload as Chaos Target", + "score": 3, + "reasoning": "The task title is clear and specific about finding a random FluxCD workload as a chaos target. It provides human-readable instructions and mentions the condition under which it runs. The documentation also adds to the clarity of the task. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Select Random FluxCD Workload for Chaos Target in Namespace `${FLUX_RESOURCE_NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-flux", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-flux/runbook.robot", + "task": "Execute Chaos Command", + "score": 3, + "reasoning": "The title is clear, specific, and human-readable. It provides a clear 'What' (Chaos Command) and 'Where' (targeted resource) by using the imported variables, such as KUBERNETES_DISTRIBUTION_BINARY, TARGET_NAMESPACE, and TARGET_RESOURCE. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Execute Chaos Command on ${TARGET_RESOURCE} in Namespace ${TARGET_NAMESPACE}" + }, + { + "codebundle": "k8s-chaos-flux", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-flux/runbook.robot", + "task": "Execute Additional Chaos Command", + "score": 3, + "reasoning": "The task title is clear and specific, but lacks some human readability. The documentation provides clear instructions on what to do. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Execute Additional Chaos Command on ${FLUX_RESOURCE_TYPE} '${FLUX_RESOURCE_NAME}' in namespace '${FLUX_RESOURCE_NAMESPACE}'" + }, + { + "codebundle": "k8s-chaos-flux", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-flux/runbook.robot", + "task": "Resume Flux Resource Reconciliation", + "score": 3, + "reasoning": "The task title is clear and specific, indicating the action to be performed ('Resume Flux reconciliation') and the desired resource ('desired resource'). The tags provide additional context. The task lacks a specific 'Where' variable, so the most relevant imported variable 'TARGET_NAMESPACE' has been suggested. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Resume Flux Resource Reconciliation in `${TARGET_NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-kustomization-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-kustomization-health/runbook.robot", + "task": "List all available Kustomization objects in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear and specific, providing a clear instruction to list all available FluxCD Kustomization objects in a specific namespace. The use of backticks & curly braces for the 'Where' variable ensures that the 'NAMESPACE' variable will be replaced at runtime, making it clear where the action should take place.", + "suggested_title": "List all available Kustomization objects in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-fluxcd-kustomization-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-fluxcd-kustomization-health/runbook.robot", + "task": "Get details for unready Kustomizations in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear, specific, and human-readable. It clearly states the task to list all unready Kustomizations in a specific namespace. It also uses the imported variable for the namespace to ensure clarity and specificity.", + "suggested_title": "List Unready Kustomizations in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "aws-cloudwatch-overused-ec2", + "file": "runbook.robot", + "filepath": "codebundles/aws-cloudwatch-overused-ec2/runbook.robot", + "task": "Check For Overutilized Ec2 Instances", + "score": 3, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the action to be performed, the resource type (EC2 instances), and the specific scope (utilization) in the AWS region specified by the 'AWS_DEFAULT_REGION' variable. The utilization threshold is also clearly mentioned. The task lacks a specific 'Where' variable; consider using `AWS_DEFAULT_REGION`.", + "suggested_title": null + }, + { + "codebundle": "k8s-chaos-namespace", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-namespace/runbook.robot", + "task": "Kill Random Pods In Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the action to be performed (kill pods) and the specific scope (namespace). It also includes a brief explanation of the purpose. The use of imported user variables adds to the clarity and specificity.", + "suggested_title": "Kill Random Pods In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-namespace", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-namespace/runbook.robot", + "task": "OOMKill Pods In Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific about the action (OOMKill Pods) and the scope (Namespace `${NAMESPACE}`). It is also human-readable and provides enough information for someone familiar with Kubernetes to understand the task.", + "suggested_title": "OOMKill Pods In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-namespace", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-namespace/runbook.robot", + "task": "Mangle Service Selector In Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title is clear and specific, mentioning the action to mangle a service selector in a particular namespace. The use of backticks and curly braces for the `NAMESPACE` variable adds specificity.", + "suggested_title": "Mangle Service Selector In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-namespace", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-namespace/runbook.robot", + "task": "Mangle Service Port In Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The title clearly states the action (Mangle Service Port) and includes the specific scope (Namespace `${NAMESPACE}`). It is also human-readable and specific.", + "suggested_title": "Mangle Service Port In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-chaos-namespace", + "file": "runbook.robot", + "filepath": "codebundles/k8s-chaos-namespace/runbook.robot", + "task": "Fill Random Pod Tmp Directory In Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It includes both the 'What' (pod) and 'Where' (namespace) variables, making it very detailed.", + "suggested_title": "Fill Random Pod Tmp Directory In Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-app-troubleshoot", + "file": "runbook.robot", + "filepath": "codebundles/k8s-app-troubleshoot/runbook.robot", + "task": "Get `${CONTAINER_NAME}` Application Logs", + "score": 5, + "reasoning": "The title is clear, human-readable, and specific. It includes the specific task of getting application logs from a container, the variable `CONTAINER_NAME` is clearly marked for substitution, and it provides context for where the logs are being collected from. It also utilizes relevant imported user variables for NAMESPACE and CONTEXT.", + "suggested_title": "Get `${CONTAINER_NAME}` Application Logs from Workload `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-app-troubleshoot", + "file": "runbook.robot", + "filepath": "codebundles/k8s-app-troubleshoot/runbook.robot", + "task": "Scan `${CONTAINER_NAME}` Application For Misconfigured Environment", + "score": 4, + "reasoning": "The title is clear and specific, mentioning the action 'Scan', the resource type 'Application' and the specific scope '`${CONTAINER_NAME}`'. It is also human-readable and uses relevant imported user variables.", + "suggested_title": "Scan `${CONTAINER_NAME}` Application For Misconfigured Environment" + }, + { + "codebundle": "k8s-app-troubleshoot", + "file": "runbook.robot", + "filepath": "codebundles/k8s-app-troubleshoot/runbook.robot", + "task": "Tail `${CONTAINER_NAME}` Application Logs For Stacktraces", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It provides a clear action ('Tail'), specifies the container name using the imported variable, and describes the purpose of the task in detail.", + "suggested_title": "Tail `${CONTAINER_NAME}` Application Logs For Stacktraces in Workload `${WORKLOAD_NAME}`" + }, + { + "codebundle": "k8s-app-troubleshoot", + "file": "sli.robot", + "filepath": "codebundles/k8s-app-troubleshoot/sli.robot", + "task": "Measure Application Exceptions", + "score": 3, + "reasoning": "The title is clear in its purpose of measuring application exceptions, it is human-readable, and provides specificity by mentioning the examination of recent logs. It could score higher if it included a specific 'Where' variable, such as 'NAMESPACE', which is the most relevant imported variable. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Measure Application Exceptions in `${NAMESPACE}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Check for Resource Health Issues Affecting App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is specific, clear, and human-readable. It includes the 'What' (App Service) and 'Where' (Resource Group) variables in backticks & curly braces, which provides a high level of clarity and specificity.", + "suggested_title": "Check for Resource Health Issues Affecting App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Check App Service `${APP_SERVICE_NAME}` Health Check Metrics In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the app service and resource group. It provides a clear instruction to check the health status of a specific appservice workload.", + "suggested_title": "Check App Service `${APP_SERVICE_NAME}` Health in Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Fetch App Service `${APP_SERVICE_NAME}` Utilization Metrics In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The task title is clear about the action to be performed, which is fetching app service utilization metrics. It is also human-readable and specific as it includes the resource group variable as the 'where' scope.", + "suggested_title": "Fetch App Service `${APP_SERVICE_NAME}` Utilization Metrics In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Get App Service `${APP_SERVICE_NAME}` Logs In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the action of fetching logs for a specific app service in a specific resource group.", + "suggested_title": "Get App Service `${APP_SERVICE_NAME}` Logs In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Check Configuration Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (App Service) and the 'Where' (Resource Group) variables in backticks and curly braces, making it easy for users to understand and execute the task.", + "suggested_title": "Check Configuration Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Check Deployment Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It provides the 'What' (App Service) and the 'Where' (Resource Group) with placeholders for substitution. It also matches the documentation and tags provided.", + "suggested_title": "Fetch Deployment Health of App Service `${APP_SERVICE_NAME}` in Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Fetch App Service `${APP_SERVICE_NAME}` Activities In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, specific, and human-readable. It includes the 'What' (app service) and 'Where' (resource group) variables within backticks and curly braces. The documentation and tags provide additional context and the imported user variables are used in an organized manner.", + "suggested_title": "Fetch App Service `${APP_SERVICE_NAME}` Activities In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-appservice-triage/runbook.robot", + "task": "Check Logs for Errors in App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The title is clear and specific in terms of what needs to be checked (logs for errors) and where (in the specified app service and resource group). It is also human-readable.", + "suggested_title": "Check Logs for Errors in App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-appservice-triage/sli.robot", + "task": "Check for Resource Health Issues Affecting App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is specific, clear, and human-readable. It includes the 'What' (App Service) and 'Where' (Resource Group) variables in backticks & curly braces, which provides a high level of clarity and specificity.", + "suggested_title": "Check for Resource Health Issues Affecting App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-appservice-triage/sli.robot", + "task": "Check App Service `${APP_SERVICE_NAME}` Health Check Metrics In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, specific, and human-readable. It includes the 'What' (appservice) and the 'Where' (resource group) variables in backticks and curly braces, ensuring clarity and specificity.", + "suggested_title": "Check App Service `${APP_SERVICE_NAME}` Health Check Metrics In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-appservice-triage/sli.robot", + "task": "Check App Service `${APP_SERVICE_NAME}` Configuration Health In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is very clear, human-readable, and specific. It includes the 'What' (app service) and the 'Where' (resource group) variables in backticks and curly braces, and provides a clear indication of the expected outcome.", + "suggested_title": "Check App Service `${APP_SERVICE_NAME}` Configuration Health In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-appservice-triage/sli.robot", + "task": "Check Deployment Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It provides the 'What' (App Service) and the 'Where' (Resource Group) with placeholders for substitution. It also matches the documentation and tags provided.", + "suggested_title": "Fetch Deployment Health of App Service `${APP_SERVICE_NAME}` in Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-appservice-triage/sli.robot", + "task": "Fetch App Service `${APP_SERVICE_NAME}` Activities In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, specific, and human-readable. It includes the 'What' (app service) and 'Where' (resource group) variables within backticks and curly braces. The documentation and tags provide additional context and the imported user variables are used in an organized manner.", + "suggested_title": "Fetch App Service `${APP_SERVICE_NAME}` Activities In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-appservice-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-appservice-triage/sli.robot", + "task": "Generate App Service Health Score", + "score": 3, + "reasoning": "The task title is clear and specific, providing a clear objective to generate an app service health score. The human readability is good, but the inclusion of backticks & curly braces for the specific scope variable would improve readability. The imported variables provide clarity on what metrics are being used for the health score. The task lacks a specific 'Where' variable; consider using `AZ_RESOURCE_GROUP`.", + "suggested_title": "Generate App Service Health Score for `${APP_SERVICE_NAME}` in resource group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "cmd-test", + "file": "runbook.robot", + "filepath": "codebundles/cmd-test/runbook.robot", + "task": "Run CLI Command", + "score": 3, + "reasoning": "The title is clear about the task, the documentation provides a clear description of what the task does, and the imported user variables provide context for where the CLI command will be executed. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Run CLI Command in `${NAMESPACE}` namespace" + }, + { + "codebundle": "cmd-test", + "file": "runbook.robot", + "filepath": "codebundles/cmd-test/runbook.robot", + "task": "Run Bash File", + "score": 3, + "reasoning": "The task title is clear and specific, as it clearly states the action to be performed (Run Bash File) and the purpose of verifying script passthrough. It also includes relevant tags. However, it could be improved by including the specific file or location where the bash file is located. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Run Bash File in `${NAMESPACE}`/${CONTEXT}/path/to/script.sh" + }, + { + "codebundle": "cmd-test", + "file": "runbook.robot", + "filepath": "codebundles/cmd-test/runbook.robot", + "task": "Log Suggestion", + "score": 3, + "reasoning": "The task title is clear in its purpose to generate a next step suggestion, format it, and log it. It provides enough information to understand the task, and it uses specific terminology like 'next step suggestion' and 'log'. It could be more specific if it included a 'Where' variable, such as logging the suggestion in a specific context or namespace. The task lacks a specific 'Where' variable; consider using `NAMESPACE`.", + "suggested_title": "Generate Next Step Suggestion and Log in `NAMESPACE`" + }, + { + "codebundle": "k8s-argocd-application-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-argocd-application-health/runbook.robot", + "task": "Fetch ArgoCD Application Sync Status & Health for `${APPLICATION}`", + "score": 5, + "reasoning": "The task title is clear and specific, indicating the action to be performed (fetch ArgoCD Application Sync Status & Health) and the specific application for which the status will be fetched. The title also includes a relevant tag, providing clarity and specificity.", + "suggested_title": "" + }, + { + "codebundle": "k8s-argocd-application-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-argocd-application-health/runbook.robot", + "task": "Fetch ArgoCD Application Last Sync Operation Details for `${APPLICATION}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It specifies the action of fetching ArgoCD Application last sync operation details for a specific application, using the imported variable `${APPLICATION}`. The documentation and tags also provide clarity on the purpose of the task.", + "suggested_title": "Fetch ArgoCD Application Last Sync Operation Details for `${APPLICATION}`" + }, + { + "codebundle": "k8s-argocd-application-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-argocd-application-health/runbook.robot", + "task": "Fetch Unhealthy ArgoCD Application Resources for `${APPLICATION}`", + "score": 5, + "reasoning": "The title clearly states the task, specifies the resource type (ArgoCD Application Resources), and provides a placeholder for the 'Where' variable (${APPLICATION}). The documentation also explains the purpose of the task and the tags provide additional context. Overall, it is clear, human-readable, and specific.", + "suggested_title": "Fetch Unhealthy ArgoCD Application Resources for `${APPLICATION}`" + }, + { + "codebundle": "k8s-argocd-application-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-argocd-application-health/runbook.robot", + "task": "Scan For Errors in Pod Logs Related to ArgoCD Application `${APPLICATION}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It clearly states the action (Scan For Errors), the resource type (Pod Logs), and the specific scope (ArgoCD Application `${APPLICATION}`). The documentation and tags provide additional context and clarity.", + "suggested_title": "Scan For Errors in Pod Logs Related to ArgoCD Application `${APPLICATION}`" + }, + { + "codebundle": "k8s-argocd-application-health", + "file": "runbook.robot", + "filepath": "codebundles/k8s-argocd-application-health/runbook.robot", + "task": "Fully Describe ArgoCD Application `${APPLICATION}`", + "score": 4, + "reasoning": "The title is specific, clear, and readable. It uses the imported user variable for 'APPLICATION' and mentions ArgoCD and application description.", + "suggested_title": "Fully Describe ArgoCD Application `${APPLICATION}`" + }, + { + "codebundle": "test-issue", + "file": "runbook.robot", + "filepath": "codebundles/test-issue/runbook.robot", + "task": "Raise Full Issue", + "score": 3, + "reasoning": "The title is clear and specific, indicating that the task is to raise an issue with full content. It is also human-readable as it uses common language. The tags provide additional context. The task lacks a specific 'Where' variable; consider using `N/A`.", + "suggested_title": "Raise Full Issue on `${RESOURCE_TYPE}` in `${SCOPE}`" + }, + { + "codebundle": "azure-vmss-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-vmss-triage/runbook.robot", + "task": "Check Scale Set `${VMSCALESET}` Key Metrics In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It includes the 'What' (VM Scale Set) and the 'Where' (specific resource group) variables in backticks & curly braces. The tags also support the clarity and specificity of the task.", + "suggested_title": "Check Scale Set `${VMSCALESET}` Key Metrics In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-vmss-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-vmss-triage/runbook.robot", + "task": "Fetch VM Scale Set `${VMSCALESET}` Config In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The title is clear and specific, indicating the task is to fetch the VM Scale Set config in a specific Azure Resource Group. The use of backticks and curly braces for the variables makes it clear where the substitution will occur.", + "suggested_title": "Fetch VM Scale Set `${VMSCALESET}` Config In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-vmss-triage", + "file": "runbook.robot", + "filepath": "codebundles/azure-vmss-triage/runbook.robot", + "task": "Fetch Activities for VM Scale Set `${VMSCALESET}` In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (VM Scale Set) and the 'Where' (Resource Group) variables in backticks & curly braces. The documentation and tags provide additional clarity and specificity.", + "suggested_title": "Fetch Activities for VM Scale Set `${VMSCALESET}` In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "azure-vmss-triage", + "file": "sli.robot", + "filepath": "codebundles/azure-vmss-triage/sli.robot", + "task": "Check Scale Set `${VMSCALESET}` Key Metrics In Resource Group `${AZ_RESOURCE_GROUP}`", + "score": 4, + "reasoning": "The task title is clear, human-readable, and specific. It provides the 'What' (VM Scale Set) and 'Where' (Resource Group) variables. It also includes relevant tags for easy categorization.", + "suggested_title": "Check Scale Set `${VMSCALESET}` Key Metrics In Resource Group `${AZ_RESOURCE_GROUP}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Check Deployment Log For Issues with `${DEPLOYMENT_NAME}`", + "score": 5, + "reasoning": "The title is very clear, human-readable, and specific. It includes both the 'What' (deployment) and the 'Where' (specific deployment name) variables. The use of backticks and curly braces around the 'Where' variable ensures clarity and specificity.", + "suggested_title": "Check Deployment Log For Issues with `${DEPLOYMENT_NAME}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Fetch Deployments Logs for `${DEPLOYMENT_NAME}` and Add to Report", + "score": 4, + "reasoning": "The task title is clear and specific about fetching deployment logs, and the use of the `${DEPLOYMENT_NAME}` variable adds human readability and specificity. However, it could be improved by including the `NAMESPACE` variable as the 'Where' scope.", + "suggested_title": "Fetch Deployments Logs for `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` and Add to Report" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Check Liveness Probe Configuration for Deployment `${DEPLOYMENT_NAME}`", + "score": 5, + "reasoning": "The task title is very clear and specific. It includes the action 'Check', the resource type 'Liveness Probe Configuration', and the specific scope 'for Deployment ${DEPLOYMENT_NAME}'. The documentation and tags provide further context and the imported user variables are used appropriately.", + "suggested_title": "Check Liveness Probe Configuration for Deployment `${DEPLOYMENT_NAME}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Check Readiness Probe Configuration for Deployment `${DEPLOYMENT_NAME}`", + "score": 4, + "reasoning": "The title is clear and specific, indicating the task is to check the readiness probe configuration for a specific deployment. The use of backticks and curly braces shows that the title will be easily substitutable with the DEPLOYMENT_NAME variable. However, it could be improved by including the NAMESPACE as the 'Where' variable.", + "suggested_title": "Check Readiness Probe Configuration for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Inspect Container Restarts for Deployment `${DEPLOYMENT_NAME}` Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, providing a clear action 'Inspect Container Restarts' and specifying the 'Where' with the placeholders ${DEPLOYMENT_NAME} and ${NAMESPACE}. It is human-readable and provides a specific action to be taken.", + "suggested_title": "Inspect Container Restarts for Deployment `${DEPLOYMENT_NAME}` Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Inspect Deployment Warning Events for `${DEPLOYMENT_NAME}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (deployment workload) and the 'Where' (namespace), and it provides a clear action to be performed.", + "suggested_title": "Inspect Deployment Warning Events for `${DEPLOYMENT_NAME}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Get Deployment Workload Details For `${DEPLOYMENT_NAME}` and Add to Report", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to fetch deployment workload details for a specific deployment name. It provides clarity on what needs to be done and where using the DEPLOYMENT_NAME variable.", + "suggested_title": "Fetch Deployment Workload Details For `${DEPLOYMENT_NAME}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Inspect Deployment Replicas for `${DEPLOYMENT_NAME}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the resource type (deployment) and indicating the need to inspect its replicas. The documentation provides clarity on what the task entails, making it human-readable. The use of imported user variables like DEPLOYMENT_NAME ensures specificity.", + "suggested_title": "Inspect Deployment Replicas for `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}`", + "score": 5, + "reasoning": "The title is very clear about the task, easy to understand, and specific. It includes the task to check for anomalies in deployment events for a specific deployment name.", + "suggested_title": "Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}`" + }, + { + "codebundle": "k8s-deployment-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-healthcheck/runbook.robot", + "task": "Check ReplicaSet Health for Deployment `${DEPLOYMENT_NAME}`", + "score": 4, + "reasoning": "The task title is clear and specific, providing the action to be performed (Check ReplicaSet Health) and the specific scope (for Deployment `${DEPLOYMENT_NAME}`). The documentation and tags provide further clarity on the purpose of the task.", + "suggested_title": "Check ReplicaSet Health for Deployment `${DEPLOYMENT_NAME}`" + }, + { + "codebundle": "k8s-jenkins-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-jenkins-healthcheck/runbook.robot", + "task": "Query The Jenkins Kubernetes Workload HTTP Endpoint", + "score": 3, + "reasoning": "The title is clear, specific, and human-readable. It clearly defines the task of querying the Jenkins Kubernetes workload HTTP endpoint to check if the pod is up and healthy. It provides the necessary context for the task and includes specific tags for easy identification. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Query The Jenkins Kubernetes Workload HTTP Endpoint in Kubernetes StatefulSet `${STATEFULSET_NAME}`" + }, + { + "codebundle": "k8s-jenkins-healthcheck", + "file": "runbook.robot", + "filepath": "codebundles/k8s-jenkins-healthcheck/runbook.robot", + "task": "Query For Stuck Jenkins Jobs", + "score": 3, + "reasoning": "The title is clear and specific, providing a detailed task of querying for stuck Jenkins jobs within the kubernetes statefulset workload. It is human-readable and includes relevant tags for context. The imported variables are used for specificity. The task lacks a specific 'Where' variable; consider using `KUBERNETES_DISTRIBUTION_BINARY`.", + "suggested_title": "Query For Stuck Jenkins Jobs in Kubernetes Statefulset Workload `$${STATEFULSET_NAME}`" + }, + { + "codebundle": "gcp-bucket-health", + "file": "runbook.robot", + "filepath": "codebundles/gcp-bucket-health/runbook.robot", + "task": "Fetch GCP Bucket Storage Utilization for `${PROJECT_IDS}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action of fetching GCP Bucket Storage Utilization. It provides a specific resource type (GCP buckets) and scope (`${PROJECT_IDS}`). The use of backticks and curly braces for the 'Where' variable is also present. The documentation and tags provide additional context and clarity.", + "suggested_title": "Fetch GCP Bucket Storage Utilization for `${PROJECT_IDS}`" + }, + { + "codebundle": "gcp-bucket-health", + "file": "runbook.robot", + "filepath": "codebundles/gcp-bucket-health/runbook.robot", + "task": "Add GCP Bucket Storage Configuration for `${PROJECT_IDS}` to Report", + "score": 4, + "reasoning": "The task title is clear in its objective, readable to humans, and specific in what it aims to achieve. It includes the 'What' (GCP Bucket Storage Configuration) and the 'Where' (PROJECT_IDS). The documentation further clarifies the task's purpose and the tags provide additional context.", + "suggested_title": "Add GCP Bucket Storage Configuration for `${PROJECT_IDS}` to Report" + }, + { + "codebundle": "gcp-bucket-health", + "file": "runbook.robot", + "filepath": "codebundles/gcp-bucket-health/runbook.robot", + "task": "Check GCP Bucket Security Configuration for `${PROJECT_IDS}`", + "score": 5, + "reasoning": "The title is clear, human-readable, and specific. It includes the 'What' (GCP Bucket Security Configuration) and the 'Where' (project IDs) in backticks and curly braces.", + "suggested_title": "Check GCP Bucket Security Configuration for `${PROJECT_IDS}`" + }, + { + "codebundle": "gcp-bucket-health", + "file": "runbook.robot", + "filepath": "codebundles/gcp-bucket-health/runbook.robot", + "task": "Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It clearly states the task to fetch GCP Bucket Storage Operations Rate for a specific PROJECT_IDS. It also mentions generating issues if the rate is above a specified threshold.", + "suggested_title": "Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}`" + }, + { + "codebundle": "gcp-bucket-health", + "file": "sli.robot", + "filepath": "codebundles/gcp-bucket-health/sli.robot", + "task": "Fetch GCP Bucket Storage Utilization for `${PROJECT_IDS}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It includes the action to be performed (Fetch), the resource type (GCP Bucket Storage), and the specific scope (for `${PROJECT_IDS}`). The documentation and tags provide additional clarity and context.", + "suggested_title": "Fetch GCP Bucket Storage Utilization for `${PROJECT_IDS}`" + }, + { + "codebundle": "gcp-bucket-health", + "file": "sli.robot", + "filepath": "codebundles/gcp-bucket-health/sli.robot", + "task": "Check GCP Bucket Security Configuration for `${PROJECT_IDS}`", + "score": 4, + "reasoning": "The title is clear about the task of checking GCP Bucket Security Configuration and includes a specific 'What' (resource type) and a placeholder for the 'Where' (specific scope) variable. The documentation also provides clear details on what will be checked, making it human-readable and specific.", + "suggested_title": "Check GCP Bucket Security Configuration for `${PROJECT_IDS}`" + }, + { + "codebundle": "gcp-bucket-health", + "file": "sli.robot", + "filepath": "codebundles/gcp-bucket-health/sli.robot", + "task": "Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the need to fetch GCP Bucket Storage Operations Rate for a specified project. The documentation and tags provide additional context and clarity.", + "suggested_title": "Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}`" + }, + { + "codebundle": "gcp-bucket-health", + "file": "sli.robot", + "filepath": "codebundles/gcp-bucket-health/sli.robot", + "task": "Generate Bucket Score", + "score": 3, + "reasoning": "The task title is clear and specific, but lacks human readability and documentation. It also does not specify the 'Where' variable.", + "suggested_title": "Generate Bucket Score in Project `$${PROJECT_IDS}`" + }, + { + "codebundle": "k8s-deployment-ops", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-ops/runbook.robot", + "task": "Restart Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear, human-readable, and specific. It includes the 'What' (deployment) and the 'Where' (namespace) variables in backticks and curly braces, making it easily understandable and ready for variable substitution at runtime.", + "suggested_title": "Restart Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-deployment-ops", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-ops/runbook.robot", + "task": "Force Delete Pods in Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the deployment name and namespace. It is also human-readable and includes the necessary information for someone to understand what needs to be done.", + "suggested_title": "Force Delete Pods in Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-deployment-ops", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-ops/runbook.robot", + "task": "Rollback Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` to Previous Version", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It provides a clear instruction to rollback a deployment to a previous version in a specific namespace. The tags and imported user variables are relevant and align with the task.", + "suggested_title": "" + }, + { + "codebundle": "k8s-deployment-ops", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-ops/runbook.robot", + "task": "Scale Down Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear, human-readable, and specific. It includes both the 'What' (deployment) and the 'Where' (namespace) variables. The documentation and tags provide additional context and clarity.", + "suggested_title": "Halt Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-deployment-ops", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-ops/runbook.robot", + "task": "Scale Up Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` by ${SCALE_UP_FACTOR}x", + "score": 5, + "reasoning": "The task title is very clear and specific, providing the exact action ('Scale Up Deployment'), the specific deployment and namespace ('${DEPLOYMENT_NAME}' in Namespace '${NAMESPACE}'), and the scale up factor. The documentation and tags also support clarity and specificity.", + "suggested_title": "Scale Up Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` by ${SCALE_UP_FACTOR}x" + }, + { + "codebundle": "k8s-deployment-ops", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-ops/runbook.robot", + "task": "Clean Up Stale ReplicaSets for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (ReplicaSets for Deployment) and the 'Where' (Namespace). The documentation and tags provide additional clarity on what the task entails.", + "suggested_title": "" + }, + { + "codebundle": "k8s-deployment-ops", + "file": "runbook.robot", + "filepath": "codebundles/k8s-deployment-ops/runbook.robot", + "task": "Scale Down Stale ReplicaSets for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes both the 'What' (ReplicaSets for Deployment) and the 'Where' (Namespace). The documentation and tags provide additional context, and the imported user variables are used to specify the 'What' and 'Where'.", + "suggested_title": "Scale Down Stale ReplicaSets for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "gcp-cloud-function-health", + "file": "runbook.robot", + "filepath": "codebundles/gcp-cloud-function-health/runbook.robot", + "task": "List Unhealhy Cloud Functions in GCP Project `${GCP_PROJECT_ID}`", + "score": 4, + "reasoning": "The task title is clear, human-readable, and specific. It provides the 'What' (Cloud Functions) and the 'Where' (${GCP_PROJECT_ID}) variables in backticks and curly braces, and the documentation gives a clear explanation of the task.", + "suggested_title": "List Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}`" + }, + { + "codebundle": "gcp-cloud-function-health", + "file": "runbook.robot", + "filepath": "codebundles/gcp-cloud-function-health/runbook.robot", + "task": "Get Error Logs for Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}`", + "score": 4, + "reasoning": "The task title is clear and specific, mentioning the GCP Project ID and the type of resource (Cloud Functions). It is also human-readable. The only improvement would be to specify a particular GCP region or environment for better specificity.", + "suggested_title": "Get Error Logs for Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` in `us-central1` Region" + }, + { + "codebundle": "gcp-cloud-function-health", + "file": "sli.robot", + "filepath": "codebundles/gcp-cloud-function-health/sli.robot", + "task": "Count unhealthy GCP Cloud Functions in GCP Project `${GCP_PROJECT_ID}`", + "score": 5, + "reasoning": "The title is clear, specific, and human-readable. It includes the task to count unhealthy GCP Cloud Functions, specifies the GCP Project using the imported variable, and provides a clear definition of what 'unhealthy' means in this context.", + "suggested_title": "Count unhealthy GCP Cloud Functions in GCP Project `${GCP_PROJECT_ID}`" + }, + { + "codebundle": "k8s-gitops-gh-remediate", + "file": "runbook.robot", + "filepath": "codebundles/k8s-gitops-gh-remediate/runbook.robot", + "task": "Remediate Readiness and Liveness Probe GitOps Manifests in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is very clear and specific, indicating the action to fix misconfigured probes in a specific namespace within a GitOps repository. It includes placeholders for variables, making it easily interpretable when executed.", + "suggested_title": "Remediate Readiness and Liveness Probe GitOps Manifests in Namespace `${NAMESPACE}`" + }, + { + "codebundle": "k8s-gitops-gh-remediate", + "file": "runbook.robot", + "filepath": "codebundles/k8s-gitops-gh-remediate/runbook.robot", + "task": "Increase ResourceQuota for Namespace `${NAMESPACE}`", + "score": 4, + "reasoning": "The task title is clear and specific, indicating the action to be taken ('Increase ResourceQuota') and the specific scope for the action ('Namespace ${NAMESPACE}'). The tags also provide relevant context. The title could be more human-readable by adding a verb and specifying the type of resourcequota.", + "suggested_title": "Increase ResourceQuota Limit for Namespace `${NAMESPACE}` in GitHub GitOps Repository" + }, + { + "codebundle": "k8s-gitops-gh-remediate", + "file": "runbook.robot", + "filepath": "codebundles/k8s-gitops-gh-remediate/runbook.robot", + "task": "Adjust Pod Resources to Match VPA Recommendation in `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title is clear, human-readable, and specific. It includes the 'What' (Pod Resources) and 'Where' (specific namespace) variables. The documentation and tags also provide additional context and clarity.", + "suggested_title": null + }, + { + "codebundle": "k8s-gitops-gh-remediate", + "file": "runbook.robot", + "filepath": "codebundles/k8s-gitops-gh-remediate/runbook.robot", + "task": "Expand Persistent Volume Claims in Namespace `${NAMESPACE}`", + "score": 5, + "reasoning": "The task title clearly states the specific action of expanding Persistent Volume Claims in a specific namespace. It is human-readable and provides clear direction for the intended action. The tags also provide additional context for the task.", + "suggested_title": "" + } + ] +} \ No newline at end of file From f59a1e934fa2c63526a66d7a5da029cbd71b7307 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Sun, 2 Mar 2025 21:33:46 -0500 Subject: [PATCH 2/2] updated task and tags --- codebundles/aws-eks-health/runbook.robot | 6 +- codebundles/aws-eks-node-reboot/runbook.robot | 2 +- .../runbook.robot | 2 +- codebundles/aws-lambda-health/runbook.robot | 8 +- codebundles/aws-lambda-health/sli.robot | 2 +- codebundles/azure-aks-triage/runbook.robot | 12 +- codebundles/azure-aks-triage/sli.robot | 2 +- .../azure-appgateway-health/runbook.robot | 55 +------- codebundles/azure-appgateway-health/sli.robot | 8 +- .../azure-appservice-triage/runbook.robot | 18 +-- codebundles/azure-appservice-triage/sli.robot | 2 +- codebundles/cli-test/meta.yaml | 1 - codebundles/cli-test/runbook.robot | 124 ------------------ codebundles/cmd-test/getdeploys.sh | 4 - codebundles/cmd-test/meta.yaml | 1 - codebundles/cmd-test/runbook.robot | 107 --------------- .../runbook.robot | 12 +- codebundles/curl-http-ok/runbook.robot | 2 +- .../gcloud-log-inspection/runbook.robot | 2 +- codebundles/gcloud-node-preempt/runbook.robot | 2 +- codebundles/gcp-bucket-health/sli.robot | 2 +- .../gcp-cloud-function-health/runbook.robot | 6 +- .../runbook.robot | 4 +- .../gh-actions-artifact-analysis/sli.robot | 2 +- .../k8s-app-troubleshoot/runbook.robot | 3 +- .../k8s-artifactory-health/runbook.robot | 2 +- codebundles/k8s-chaos-flux/runbook.robot | 14 +- codebundles/k8s-chaos-workload/runbook.robot | 10 +- .../k8s-cluster-node-health/runbook.robot | 2 +- .../k8s-daemonset-healthcheck/runbook.robot | 5 +- .../k8s-deployment-healthcheck/runbook.robot | 23 ++-- codebundles/k8s-deployment-ops/runbook.robot | 9 +- .../k8s-flux-suspend-namespace/runbook.robot | 10 +- .../runbook.robot | 8 +- .../k8s-fluxcd-reconcile/runbook.robot | 4 +- .../k8s-gitops-gh-remediate/runbook.robot | 8 +- .../k8s-ingress-gce-healthcheck/runbook.robot | 20 +-- .../k8s-jenkins-healthcheck/runbook.robot | 6 +- codebundles/k8s-kubectl-cmd/README.md | 15 --- codebundles/k8s-kubectl-cmd/meta.yaml | 1 - codebundles/k8s-kubectl-cmd/runbook.robot | 40 ------ codebundles/k8s-kubectl-cmd/sli.robot | 36 ----- .../k8s-labeledpods-healthcheck/sli.robot | 2 +- .../k8s-loki-healthcheck/runbook.robot | 2 + .../k8s-namespace-healthcheck/sli.robot | 4 +- codebundles/k8s-otelcollector/runbook.robot | 6 +- .../k8s-podresources-health/runbook.robot | 7 +- .../k8s-prometheus-healthcheck/runbook.robot | 5 + .../k8s-redis-healthcheck/runbook.robot | 4 +- .../k8s-restart-resource/runbook.robot | 6 +- .../k8s-statefulset-healthcheck/runbook.robot | 11 +- .../k8s-tail-logs-dynamic/runbook.robot | 3 +- .../k8s-vault-healthcheck/runbook.robot | 18 +-- .../runbook.robot | 2 +- codebundles/test-issue/meta.yaml | 1 - codebundles/test-issue/runbook.robot | 31 ----- 56 files changed, 164 insertions(+), 540 deletions(-) delete mode 100644 codebundles/cli-test/meta.yaml delete mode 100644 codebundles/cli-test/runbook.robot delete mode 100755 codebundles/cmd-test/getdeploys.sh delete mode 100644 codebundles/cmd-test/meta.yaml delete mode 100644 codebundles/cmd-test/runbook.robot delete mode 100644 codebundles/k8s-kubectl-cmd/README.md delete mode 100644 codebundles/k8s-kubectl-cmd/meta.yaml delete mode 100644 codebundles/k8s-kubectl-cmd/runbook.robot delete mode 100644 codebundles/k8s-kubectl-cmd/sli.robot delete mode 100644 codebundles/test-issue/meta.yaml delete mode 100644 codebundles/test-issue/runbook.robot diff --git a/codebundles/aws-eks-health/runbook.robot b/codebundles/aws-eks-health/runbook.robot index d27c8a65f..250163f06 100644 --- a/codebundles/aws-eks-health/runbook.robot +++ b/codebundles/aws-eks-health/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Check EKS Fargate Cluster Health Status in AWS Region `${AWS_REGION}` [Documentation] This script checks the health status of an Amazon EKS Fargate cluster. - [Tags] EKS Fargate Cluster Health AWS Kubernetes Pods Nodes + [Tags] EKS Fargate Cluster Health AWS Kubernetes Pods Nodes access:read-only ${process}= RW.CLI.Run Bash File check_eks_fargate_cluster_health_status.sh ... env=${env} ... secret__AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} @@ -37,7 +37,7 @@ Check EKS Fargate Cluster Health Status in AWS Region `${AWS_REGION}` Check Amazon EKS Cluster Health Status in AWS Region `${AWS_REGION}` [Documentation] This script checks the health status of an Amazon EKS cluster. - [Tags] EKS Cluster Health AWS Kubernetes Pods Nodes + [Tags] EKS Cluster Health AWS Kubernetes Pods Nodes access:read-only ${process}= RW.CLI.Run Bash File check_eks_cluster_health.sh ... env=${env} ... secret__AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} @@ -56,7 +56,7 @@ Check Amazon EKS Cluster Health Status in AWS Region `${AWS_REGION}` Monitor EKS Cluster Health in AWS Region `${AWS_REGION}` [Documentation] This bash script is designed to monitor the health and status of an Amazon EKS cluster. - [Tags] AWS EKS Fargate Bash Script Node Health + [Tags] AWS EKS Fargate Bash Script Node Health access:read-only ${process}= RW.CLI.Run Bash File list_eks_fargate_metrics.sh ... env=${env} ... secret__AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} diff --git a/codebundles/aws-eks-node-reboot/runbook.robot b/codebundles/aws-eks-node-reboot/runbook.robot index b9c788b05..cf4ce55f0 100644 --- a/codebundles/aws-eks-node-reboot/runbook.robot +++ b/codebundles/aws-eks-node-reboot/runbook.robot @@ -13,7 +13,7 @@ Suite Setup Suite Initialization *** Tasks *** Check EKS Nodegroup Status in `${EKS_CLUSTER_NAME}` [Documentation] Performs a check on a given cluster's nodegroup, raising an issue if the status of the nodegroup is not healthy. - [Tags] aws eks node group status + [Tags] aws eks node group status access:read-only ${node_state}= RW.CLI.Run Cli ... cmd=${AWS_ASSUME_ROLE_CMD} aws eks describe-nodegroup --cluster-name ${EKS_CLUSTER_NAME} --nodegroup-name ${EKS_NODEGROUP} --output json ... target_service=${AWS_SERVICE} diff --git a/codebundles/aws-elasticache-redis-health/runbook.robot b/codebundles/aws-elasticache-redis-health/runbook.robot index 698df36ae..2c27bf19d 100644 --- a/codebundles/aws-elasticache-redis-health/runbook.robot +++ b/codebundles/aws-elasticache-redis-health/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Scan AWS Elasticache Redis Status in AWS Region `${AWS_REGION}` [Documentation] Checks the high level metrics and status of the elasticache redis instances in the region. - [Tags] AWS Elasticache configuration endpoint configuration + [Tags] AWS Elasticache configuration endpoint configuration access:read-only ${process}= RW.CLI.Run Bash File analyze_aws_elasticache_redis_metrics.sh ... env=${env} ... secret__AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} diff --git a/codebundles/aws-lambda-health/runbook.robot b/codebundles/aws-lambda-health/runbook.robot index b8e002d6b..a11111180 100644 --- a/codebundles/aws-lambda-health/runbook.robot +++ b/codebundles/aws-lambda-health/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** List Lambda Versions and Runtimes in AWS Region `${AWS_REGION}` [Documentation] This script is designed to list all the versions and runtimes of a specified AWS Lambda function. - [Tags] AWS Lambda Versions Runtimes + [Tags] AWS Lambda Versions Runtimes access:read-only ${process}= RW.CLI.Run Bash File list_lambda_runtimes.sh ... env=${env} ... secret__AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} @@ -26,9 +26,9 @@ List Lambda Versions and Runtimes in AWS Region `${AWS_REGION}` ... secret__AWS_ROLE_ARN=${AWS_ROLE_ARN} RW.Core.Add Pre To Report ${process.stdout} -Analyze AWS Lambda Invocation Errors for Function `${AWS_REGION}` in Region `${AWS_REGION}` +Analyze AWS Lambda Invocation Errors in Region `${AWS_REGION}` [Documentation] This bash script is designed to analyze AWS Lambda Invocation Errors for a specified function within a specified region. - [Tags] AWS Lambda Error Analysis Invocation Errors CloudWatch Logs + [Tags] AWS Lambda Error Analysis Invocation Errors CloudWatch Logs access:read-only ${process}= RW.CLI.Run Bash File analyze_lambda_invocation_errors.sh ... env=${env} ... secret__AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} @@ -47,7 +47,7 @@ Analyze AWS Lambda Invocation Errors for Function `${AWS_REGION}` in Region `${A Monitor AWS Lambda Performance Metrics in AWS Region `${AWS_REGION}` [Documentation] This script is a bash utility for AWS Lambda functions the lists their notable metrics. - [Tags] AWS Lambda CloudWatch Logs Metrics + [Tags] AWS Lambda CloudWatch Logs Metrics access:read-only ${process}= RW.CLI.Run Bash File monitor_aws_lambda_performance_metrics.sh ... env=${env} ... secret__AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} diff --git a/codebundles/aws-lambda-health/sli.robot b/codebundles/aws-lambda-health/sli.robot index e73f16831..7b60e66b6 100644 --- a/codebundles/aws-lambda-health/sli.robot +++ b/codebundles/aws-lambda-health/sli.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Analyze AWS Lambda Invocation Errors for Function `${AWS_REGION}` in Region `${AWS_REGION}` +Analyze AWS Lambda Invocation Errors in Region `${AWS_REGION}` [Documentation] This bash script is designed to analyze AWS Lambda Invocation Errors for a specified function within a specified region. [Tags] AWS Lambda Error Analysis Invocation Errors CloudWatch Logs ${process}= RW.CLI.Run Bash File analyze_lambda_invocation_errors.sh diff --git a/codebundles/azure-aks-triage/runbook.robot b/codebundles/azure-aks-triage/runbook.robot index cbb63a913..48f245c77 100644 --- a/codebundles/azure-aks-triage/runbook.robot +++ b/codebundles/azure-aks-triage/runbook.robot @@ -13,9 +13,9 @@ Suite Setup Suite Initialization *** Tasks *** -Check for Overutilization and Networking Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` +Check for Resource Health Issues Affecting AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch a list of issues that might affect the AKS cluster - [Tags] aks config + [Tags] aks config access:read-only ${resource_health}= RW.CLI.Run Bash File ... bash_file=aks_resource_health.sh ... env=${env} @@ -55,7 +55,7 @@ Check for Overutilization and Networking Issues Affecting AKS Cluster `${AKS_CLU Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the config of the AKS cluster in azure - [Tags] AKS config + [Tags] AKS config access:read-only ${config}= RW.CLI.Run Bash File ... bash_file=aks_cluster_health.sh ... env=${env} @@ -82,9 +82,9 @@ Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${ ... details=${item["details"]} END END -Check Network Configuration of AKS Cluster `${{AKS_CLUSTER}}` In Resource Group `${{AZ_RESOURCE_GROUP}}` +Check Network Configuration of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the network configuration, generating resource URLs and basic recommendations - [Tags] AKS config network route firewall + [Tags] AKS config network route firewall access:read-only ${network}= RW.CLI.Run Bash File ... bash_file=aks_network.sh ... env=${env} @@ -95,7 +95,7 @@ Check Network Configuration of AKS Cluster `${{AKS_CLUSTER}}` In Resource Group Fetch Activities for AKS Cluster `${AKS_CLUSTER}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Gets the activities for the AKS cluster set and checks for errors - [Tags] AKS activities monitor events errors + [Tags] AKS activities monitor events errors access:read-only ${activites}= RW.CLI.Run Bash File ... bash_file=aks_activities.sh ... env=${env} diff --git a/codebundles/azure-aks-triage/sli.robot b/codebundles/azure-aks-triage/sli.robot index 36e35b8e0..85bbf1f91 100644 --- a/codebundles/azure-aks-triage/sli.robot +++ b/codebundles/azure-aks-triage/sli.robot @@ -77,7 +77,7 @@ Check Configuration Health of AKS Cluster `${AKS_CLUSTER}` In Resource Group `${ ${aks_config_score}= Evaluate 1 if len(@{issue_list["issues"]}) == 0 else 0 Set Global Variable ${aks_config_score} -Calculate AKS Cluster Health Score for AKS Cluster `${AKS_CLUSTER}` in Azure Resource Group `${AZ_RESOURCE_GROUP}` +Generate AKS Cluster Health Score ${aks_cluster_health_score}= Evaluate (${aks_resource_score} + ${aks_activities_score} + ${aks_config_score}) / 3 ${health_score}= Convert to Number ${aks_cluster_health_score} 2 RW.Core.Push Metric ${health_score} diff --git a/codebundles/azure-appgateway-health/runbook.robot b/codebundles/azure-appgateway-health/runbook.robot index 3954b3114..3f044b172 100644 --- a/codebundles/azure-appgateway-health/runbook.robot +++ b/codebundles/azure-appgateway-health/runbook.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch a list of issues that might affect the application gateway cluster - [Tags] aks config + [Tags] appgateway resourcehealth access:read-only ${resource_health}= RW.CLI.Run Bash File ... bash_file=app_gateway_resource_health.sh ... env=${env} @@ -51,9 +51,9 @@ Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NA ... details=${issue_list} ... next_steps=Please escalate to the Azure service owner to enable provider Microsoft.ResourceHealth. END -Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` in Subscription `${AZURE_RESOURCE_SUBSCRIPTION_ID}` +Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the details and health of the application gateway configuration - [Tags] appservice logs tail + [Tags] appservice logs tail access:read-only ${config_health}= RW.CLI.Run Bash File ... bash_file=app_gateway_config_health.sh ... env=${env} @@ -82,7 +82,7 @@ Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resou Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the health of the application gateway backend pool members - [Tags] appservice logs tail + [Tags] appservice logs tail access:read-only ${config_health}= RW.CLI.Run Bash File ... bash_file=app_gateway_backend_health.sh ... env=${env} @@ -108,54 +108,7 @@ Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resou ... details=${item["details"]} END END -# Check Application Gateway `${APP_GATEWAY_NAME}` Health Status In Resource Group `${AZ_RESOURCE_GROUP}` -# [Documentation] Checks the health status of a APP_GATEWAY_NAME workload. -# [Tags] -# ${process}= RW.CLI.Run Bash File -# ... bash_file=APP_GATEWAY_NAME_health.sh -# ... env=${env} -# ... timeout_seconds=180 -# ... include_in_history=false -# IF ${process.returncode} > 0 -# RW.Core.Add Issue title=Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` Failing Health Check -# ... severity=2 -# ... next_steps=Tail the logs of the Application Gateway `${APP_GATEWAY_NAME}` in resource group `${AZ_RESOURCE_GROUP}`\nReview resource usage metrics of Application Gateway `${APP_GATEWAY_NAME}` in resource group `${AZ_RESOURCE_GROUP}` -# ... expected=Application Gateway `${APP_GATEWAY_NAME}` in resource group `${AZ_RESOURCE_GROUP}` should not be failing its health check -# ... actual=Application Gateway `${APP_GATEWAY_NAME}` in resource group `${AZ_RESOURCE_GROUP}` is failing its health check -# ... reproduce_hint=Run APP_GATEWAY_NAME_health.sh -# ... details=${process.stdout} -# END -# RW.Core.Add Pre To Report ${process.stdout} -# Check AppService `${APP_GATEWAY_NAME}` Key Metrics In Resource Group `${AZ_RESOURCE_GROUP}` -# [Documentation] Reviews key metrics for the application gateway and generates a report -# [Tags] -# ${process}= RW.CLI.Run Bash File -# ... bash_file=APP_GATEWAY_NAME_metrics.sh -# ... env=${env} -# ... timeout_seconds=180 -# ... include_in_history=false -# ${next_steps}= RW.CLI.Run Cli cmd=echo -e "${process.stdout}" | grep "Next Steps" -A 20 | tail -n +2 -# IF ${process.returncode} > 0 -# RW.Core.Add Issue title=Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` Failed Metric Check -# ... severity=2 -# ... next_steps=${next_steps.stdout} -# ... expected=Application Gateway `${APP_GATEWAY_NAME}` in resource group `${AZ_RESOURCE_GROUP}` has no unusual metrics -# ... actual=Application Gateway `${APP_GATEWAY_NAME}` in resource group `${AZ_RESOURCE_GROUP}` metric check did not pass -# ... reproduce_hint=Run APP_GATEWAY_NAME_metrics.sh -# ... details=${process.stdout} -# END -# RW.Core.Add Pre To Report ${process.stdout} - -# Fetch Application Gateway `${APP_GATEWAY_NAME}` Config In Resource Group `${AZ_RESOURCE_GROUP}` -# [Documentation] Fetch logs of APP_GATEWAY_NAME workload -# [Tags] APP_GATEWAY_NAME logs tail -# ${process}= RW.CLI.Run Bash File -# ... bash_file=APP_GATEWAY_NAME_config.sh -# ... env=${env} -# ... timeout_seconds=180 -# ... include_in_history=false -# RW.Core.Add Pre To Report ${process.stdout} *** Keywords *** Suite Initialization diff --git a/codebundles/azure-appgateway-health/sli.robot b/codebundles/azure-appgateway-health/sli.robot index e07c701c0..c32c0fb11 100644 --- a/codebundles/azure-appgateway-health/sli.robot +++ b/codebundles/azure-appgateway-health/sli.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch a list of issues that might affect the Application Gateway as reported from Azure. - [Tags] aks resource health service azure + [Tags] appgateway resource health service azure access:read-only ${resource_health}= RW.CLI.Run Bash File ... bash_file=app_gateway_resource_health.sh ... env=${env} @@ -36,9 +36,9 @@ Check for Resource Health Issues Affecting Application Gateway `${APP_GATEWAY_NA END Set Global Variable ${appgw_resource_score} -Fetch AKS Cluster Config in Resource Group `${AZ_RESOURCE_GROUP}` +Check Configuration Health of Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the config of the AKS cluster in azure - [Tags] AKS config + [Tags] appgateway config access:read-only ${config}= RW.CLI.Run Bash File ... bash_file=app_gateway_config_health.sh ... env=${env} @@ -57,7 +57,7 @@ Fetch AKS Cluster Config in Resource Group `${AZ_RESOURCE_GROUP}` Check Backend Pool Health for Application Gateway `${APP_GATEWAY_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the health of the application gateway backend pool members - [Tags] appservice logs tail + [Tags] appservice logs tail access:read-only ${config_health}= RW.CLI.Run Bash File ... bash_file=app_gateway_backend_health.sh ... env=${env} diff --git a/codebundles/azure-appservice-triage/runbook.robot b/codebundles/azure-appservice-triage/runbook.robot index 56ee53544..6f8d49ff5 100644 --- a/codebundles/azure-appservice-triage/runbook.robot +++ b/codebundles/azure-appservice-triage/runbook.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** Check for Resource Health Issues Affecting App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch a list of issues that might affect the APP Service as reported from Azure. - [Tags] aks resource health service azure + [Tags] aks resource health service azure access:read-only ${resource_health}= RW.CLI.Run Bash File ... bash_file=appservice_resource_health.sh ... env=${env} @@ -55,7 +55,7 @@ Check for Resource Health Issues Affecting App Service `${APP_SERVICE_NAME}` In Check App Service `${APP_SERVICE_NAME}` Health in Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Checks the health status of a appservice workload. - [Tags] + [Tags] access:read-only appservice health ${health_check_metric}= RW.CLI.Run Bash File ... bash_file=appservice_health_metric.sh ... env=${env} @@ -99,7 +99,7 @@ Check App Service `${APP_SERVICE_NAME}` Health in Resource Group `${AZ_RESOURCE_ Fetch App Service `${APP_SERVICE_NAME}` Utilization Metrics In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Reviews key metrics for the app service and generates a report - [Tags] + [Tags] access:read-only appservice utilization ${metric_health}= RW.CLI.Run Bash File ... bash_file=appservice_metric_health.sh ... env=${env} @@ -135,7 +135,7 @@ Fetch App Service `${APP_SERVICE_NAME}` Utilization Metrics In Resource Group `$ Get App Service `${APP_SERVICE_NAME}` Logs In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch logs of appservice workload - [Tags] appservice logs tail + [Tags] appservice logs tail access:read-only ${logs}= RW.CLI.Run Bash File ... bash_file=appservice_logs.sh ... env=${env} @@ -145,7 +145,7 @@ Get App Service `${APP_SERVICE_NAME}` Logs In Resource Group `${AZ_RESOURCE_GROU Check Configuration Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch the configuration health of the App Service - [Tags] appservice logs tail + [Tags] appservice logs tail access:read-only ${config_health}= RW.CLI.Run Bash File ... bash_file=appservice_config_health.sh ... env=${env} @@ -172,9 +172,9 @@ Check Configuration Health of App Service `${APP_SERVICE_NAME}` In Resource Grou END END -Fetch Deployment Health of App Service `${APP_SERVICE_NAME}` in Resource Group `${AZ_RESOURCE_GROUP}` +Check Deployment Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch deployment health of the App Service - [Tags] appservice deployment + [Tags] appservice deployment access:read-only ${deployment_health}= RW.CLI.Run Bash File ... bash_file=appservice_deployment_health.sh ... env=${env} @@ -203,7 +203,7 @@ Fetch Deployment Health of App Service `${APP_SERVICE_NAME}` in Resource Group ` Fetch App Service `${APP_SERVICE_NAME}` Activities In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Gets the events of appservice and checks for errors - [Tags] appservice monitor events errors + [Tags] appservice monitor events errors access:read-only ${activities}= RW.CLI.Run Bash File ... bash_file=appservice_activities.sh ... env=${env} @@ -227,7 +227,7 @@ Fetch App Service `${APP_SERVICE_NAME}` Activities In Resource Group `${AZ_RESOU END Check Logs for Errors in App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Gets the events of appservice and checks for errors - [Tags] appservice logs errors + [Tags] appservice logs errors access:read-only ${log_errors}= RW.CLI.Run Bash File ... bash_file=appservice_log_analysis.sh ... env=${env} diff --git a/codebundles/azure-appservice-triage/sli.robot b/codebundles/azure-appservice-triage/sli.robot index 616db7c6e..0c2cb70fb 100644 --- a/codebundles/azure-appservice-triage/sli.robot +++ b/codebundles/azure-appservice-triage/sli.robot @@ -89,7 +89,7 @@ Check App Service `${APP_SERVICE_NAME}` Configuration Health In Resource Group ` END END END -Fetch Deployment Health of App Service `${APP_SERVICE_NAME}` in Resource Group `${AZ_RESOURCE_GROUP}` +Check Deployment Health of App Service `${APP_SERVICE_NAME}` In Resource Group `${AZ_RESOURCE_GROUP}` [Documentation] Fetch deployment health of the App Service [Tags] appservice deployment ${deployment_health}= RW.CLI.Run Bash File diff --git a/codebundles/cli-test/meta.yaml b/codebundles/cli-test/meta.yaml deleted file mode 100644 index 649b28667..000000000 --- a/codebundles/cli-test/meta.yaml +++ /dev/null @@ -1 +0,0 @@ -commands: [] diff --git a/codebundles/cli-test/runbook.robot b/codebundles/cli-test/runbook.robot deleted file mode 100644 index 9268b28e1..000000000 --- a/codebundles/cli-test/runbook.robot +++ /dev/null @@ -1,124 +0,0 @@ -*** Settings *** -Metadata Author jon-funk -Documentation This taskset smoketests the CLI codebundle setup and run process -Suite Setup Suite Initialization -Library BuiltIn -Library RW.Core -Library RW.platform -Library OperatingSystem -Library RW.CLI - -*** Keywords *** -Suite Initialization - ${kubeconfig}= RW.Core.Import Secret kubeconfig - ... type=string - ... description=The kubernetes kubeconfig yaml containing connection configuration used to connect to cluster(s). - ... pattern=\w* - ... example=For examples, start here https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ - ${NAMESPACE}= RW.Core.Import User Variable NAMESPACE - ... type=string - ... description=The name of the Kubernetes namespace to scope actions and searching to. - ... pattern=\w* - ... example=my-namespace - ${CONTEXT}= RW.Core.Import User Variable CONTEXT - ... type=string - ... description=Which Kubernetes context to operate within. - ... pattern=\w* - ... example=my-main-cluster - Set Suite Variable ${NAMESPACE} ${NAMESPACE} - Set Suite Variable ${CONTEXT} ${CONTEXT} - Set Suite Variable ${kubeconfig} ${kubeconfig} - Set Suite Variable ${env} {"KUBECONFIG":"./${kubeconfig.key}"} - -*** Tasks *** -Run CLI and Parse Output For Issues in `${NAMESPACE}` namespace and `${CONTEXT}` context - [Documentation] Fetch some output from the cluster in varying forms and run tests against it - [Tags] Stdout Test Output Pods - ${rsp}= RW.CLI.Run Cli - ... cmd=kubectl get pods --context ${CONTEXT} -n ${NAMESPACE} - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} - # TODO: remove double slashes and find WYSIWYG method for regex passing - ${regexp}= Evaluate r'(?P[\\w-]+)\\s+[\\w//]+\\s+(?P\\w+)\\s+(?P[\\d]+)' - ${rsp}= RW.CLI.Parse Cli Output By Line - ... rsp=${rsp} - ... lines_like_regexp=${regexp} - ... set_severity_level=1 - ... set_issue_expected=No crashloops or errors in the output $_stdout! - ... set_issue_actual=We found crashloops in the output line $_line! - ... set_issue_reproduce_hint=Run 'kubectl get pods --context ${CONTEXT} -n ${NAMESPACE}' and check the output for crashloops - ... set_issue_title=The output should contain no crashloopbackoffs - ... pod_status__raise_issue_if_eq=CrashLoopBackOff - ... pod_status__raise_issue_if_eq=Error - ... pod_name__raise_issue_if_contains=crashi - ... pod_name__raise_issue_if_contains=bobbydroptables - ... pod_name__raise_issue_if_ncontains=Kyle - ... pod_restarts__raise_issue_if_gt=0 - ... nonsense__raise_issue_if_gt=0 - ... potatoes=0 - RW.Core.Add Pre To Report Found issues after parsing the output of: kubectl get pods --context ${CONTEXT} -n ${NAMESPACE} - - ${rsp}= RW.CLI.Run Cli - ... cmd=kubectl get pods --context ${CONTEXT} -n ${NAMESPACE} -ojson - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} - ${rsp}= RW.CLI.Parse Cli Json Output - ... rsp=${rsp} - ... set_severity_level=1 - ... set_issue_title=Pod Restarts Detected - ... set_issue_actual=We found $total_container_restarts in the namespace: ${NAMESPACE} - ... set_issue_details=There were a total of $total_container_restarts pod restarts in the stdout of the command we ran. - ... extract_path_to_var__pod_names=items[*].metadata.name - ... extract_path_to_var__pod_count=length(items) - ... extract_path_to_var__total_container_restarts=sum(items[*].status.containerStatuses[*].restartCount[]) - ... extract_path_to_var__all_data=@ - ... from_var_with_path__all_data__to__mycount=length(@.items) - ... total_container_restarts__raise_issue_if_gt=0 - ... total_container_restarts__raise_issue_if_lt=1 - RW.Core.Add Pre To Report Found issues after parsing the output of: kubectl get pods --context ${CONTEXT} -n ${NAMESPACE} -ojson - -Exec Test in Pod `$${NAMESPACE}` - [Documentation] Used to verify that running CLI commands in remote workloads works - [Tags] Remote Exec Command Tags Workload Pod - ${df}= RW.CLI.Run Cli - ... cmd=df - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} - ... run_in_workload_with_name=deploy/crashi - ... optional_namespace=${NAMESPACE} - ... optional_context=${CONTEXT} - ${ls}= RW.CLI.Run Cli - ... cmd=ls - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} - ... run_in_workload_with_labels=app=crashi - ... optional_namespace=${NAMESPACE} - ... optional_context=${CONTEXT} - -Local Process Test for '$${NAMESPACE}' - [Documentation] Verifies that commands can be run locally directly within the runner - [Tags] Commands Direct Process Local - # run local 'ls' - ${ls}= RW.CLI.Run Cli - ... cmd=pwd - # run local 'kubectl get pods locally' - ${rsp}= RW.CLI.Run Cli - ... cmd=kubectl get pods --context ${CONTEXT} -n ${NAMESPACE} - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} - # run kubectl exec locally and df in a remote pod - ${rsp}= RW.CLI.Run Cli - ... cmd=df - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} - ... run_in_workload_with_name=deploy/crashi - ... optional_namespace=${NAMESPACE} - ... optional_context=${CONTEXT} - # run kubectl exec locally and df in a remote pod with tags - ${rsp}= RW.CLI.Run Cli - ... cmd=df - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} - ... run_in_workload_with_labels=app=crashi - ... optional_namespace=${NAMESPACE} - ... optional_context=${CONTEXT} \ No newline at end of file diff --git a/codebundles/cmd-test/getdeploys.sh b/codebundles/cmd-test/getdeploys.sh deleted file mode 100755 index 17ec24b3b..000000000 --- a/codebundles/cmd-test/getdeploys.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -myns=$NAMESPACE -myctxt=$CONTEXT -kubectl get deployments -n "$myns" --context $myctxt -ojson | jq -r '.items[].metadata.name' \ No newline at end of file diff --git a/codebundles/cmd-test/meta.yaml b/codebundles/cmd-test/meta.yaml deleted file mode 100644 index 649b28667..000000000 --- a/codebundles/cmd-test/meta.yaml +++ /dev/null @@ -1 +0,0 @@ -commands: [] diff --git a/codebundles/cmd-test/runbook.robot b/codebundles/cmd-test/runbook.robot deleted file mode 100644 index df16a83d0..000000000 --- a/codebundles/cmd-test/runbook.robot +++ /dev/null @@ -1,107 +0,0 @@ -*** Settings *** -Documentation This taskset smoketests the CLI codebundle setup and run process by running a bare command -Metadata Author jon-funk - -Library BuiltIn -Library RW.Core -Library RW.platform -Library OperatingSystem -Library RW.CLI -Library RW.NextSteps - -Suite Setup Suite Initialization - - -*** Tasks *** -Run CLI Command in `${NAMESPACE}` namespace - [Documentation] Runs a bare CLI command and captures the stderr and stdout for the report - [Tags] stdout test output pods - ${rsp}= RW.CLI.Run Cli - ... cmd=${CLI_COMMAND} - ... env={"KUBECONFIG":"./${kubeconfig.key}"} - ... secret_file__kubeconfig=${kubeconfig} - RW.Core.Add Pre To Report Command Stdout:\n${rsp.stdout} - RW.Core.Add Pre To Report Command Stderr:\n${rsp.stderr} - -Run Bash File in `${NAMESPACE}`/${CONTEXT}/path/to/script.sh - [Documentation] Runs a bash file to verify script passthrough works - [Tags] file script - ${rsp}= RW.CLI.Run Bash File in `${NAMESPACE}`/${CONTEXT}/path/to/script.sh - ... bash_file=getdeploys.sh - ... secret_file__kubeconfig=${kubeconfig} - ... env=${env} - RW.Core.Add Pre To Report Command Stdout:\n${rsp.stdout} - RW.Core.Add Pre To Report Command Stderr:\n${rsp.stderr} - -Generate Next Step Suggestion and Log in `NAMESPACE` - [Documentation] Generate a next step suggestion, format it, and log it - ${next_steps}= RW.NextSteps.Suggest Bind Mount - ${next_steps}= RW.NextSteps.Format ${next_steps} - ... pvc_name=cartservicestorage - RW.Core.Add Pre To Report ${next_steps} - - ${next_steps}= RW.NextSteps.Suggest Useless Error Message - ${next_steps}= RW.NextSteps.Format ${next_steps} - ... blah=foo - RW.Core.Add Pre To Report ${next_steps} - - ${next_steps}= RW.NextSteps.Suggest HTTP 500 errors found in logs - # pretend to fetch ingress name - ${db_name}= RW.CLI.Run Cli - ... cmd=echo "online-boutique" - ${next_steps}= RW.NextSteps.Format ${next_steps} - ... ingress_name=online-boutique - RW.Core.Add Pre To Report ${next_steps} - - # simulate a connection err pulled from API logs - ${next_steps}= RW.NextSteps.Suggest OperationalError: FATAL: connection limit exceeded for non-superusers - # simulate fetch object name from k8s api - ${db_name}= RW.CLI.Run Cli - ... cmd=echo "mypostgresdb" - # inject db name into next steps - ${next_steps}= RW.NextSteps.Format ${next_steps} - ... postgres_name=${db_name.stdout} - RW.Core.Add Pre To Report ${next_steps} - - -*** Keywords *** -Suite Initialization - ${kubeconfig}= RW.Core.Import Secret - ... kubeconfig - ... type=string - ... description=The kubernetes kubeconfig yaml containing connection configuration used to connect to cluster(s). - ... pattern=\w* - ... example=For examples, start here https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ - ${NAMESPACE}= RW.Core.Import User Variable NAMESPACE - ... type=string - ... description=The name of the Kubernetes namespace to scope actions and searching to. - ... pattern=\w* - ... example=my-namespace - ... default=online-boutique - ${CONTEXT}= RW.Core.Import User Variable CONTEXT - ... type=string - ... description=Which Kubernetes context to operate within. - ... pattern=\w* - ... example=my-main-cluster - ... default=sandbox-cluster-1 - ${CLI_COMMAND}= RW.Core.Import User Variable CLI_COMMAND - ... type=string - ... description=The CLI command to run. - ... pattern=\w* - ... example=kubectl get pods - ${RUN_LOCAL}= RW.Core.Import User Variable RUN_LOCAL - ... type=string - ... description=Controls whether or not the command is run locally or uses the provided shell service. - ... enum=[YES,NO] - ... example=YES - ... default=YES - Set Suite Variable ${CONTEXT} ${CONTEXT} - Set Suite Variable ${NAMESPACE} ${NAMESPACE} - Set Suite Variable ${CLI_COMMAND} ${CLI_COMMAND} - Set Suite Variable ${kubeconfig} ${kubeconfig} - Set Suite Variable - ... ${env} - ... {"KUBECONFIG":"./${kubeconfig.key}", "CONTEXT":"${CONTEXT}", "NAMESPACE":"${NAMESPACE}"} - IF "${RUN_LOCAL}" == "YES" - ${kubectl}= Evaluate None - END diff --git a/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot b/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot index 87258c5f2..6714efa4c 100644 --- a/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot +++ b/codebundles/curl-gmp-kong-ingress-inspection/runbook.robot @@ -15,9 +15,9 @@ Suite Setup Suite Initialization *** Tasks *** -Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold in GCP Project `$${GCP_PROJECT_ID}` +Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold in GCP Project `${GCP_PROJECT_ID}` [Documentation] Fetches HTTP Error metrics for the Kong ingress host and service from GMP and performs an inspection on the results. If there are currently any results with more than the defined HTTP error threshold, their route and service names will be surfaced for further troubleshooting. - [Tags] curl http ingress errors metrics kong gmp + [Tags] curl http ingress errors metrics kong gmp access:read-only ${gmp_rsp}= RW.CLI.Run Cli ... cmd=gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && response=$(curl -s -d "query=rate(kong_http_requests_total{service='${INGRESS_SERVICE}',code=~'${HTTP_ERROR_CODES}'}[${TIME_SLICE}]) > ${HTTP_ERROR_RATE_THRESHOLD}" -H "Authorization: Bearer $(gcloud auth print-access-token)" 'https://monitoring.googleapis.com/v1/projects/runwhen-nonprod-sandbox/location/global/prometheus/api/v1/query') && echo "$response" | jq -e '.data.result | length > 0' && echo "$response" | jq -r '.data.result[] | "Route:" + .metric.route + " Service:" + .metric.service + " Kong Instance:" + .metric.instance + " HTTP Error Count:" + .value[1]' || echo "No HTTP Error threshold violations found for ${INGRESS_SERVICE}." ... show_in_rwl_cheatsheet=true @@ -48,9 +48,9 @@ Check If Kong Ingress HTTP Error Rate Violates HTTP Error Threshold in GCP Proje RW.Core.Add Pre To Report HTTP Error Violation & Details:\n${gmp_rsp.stdout} RW.Core.Add Pre To Report GMP Json Data:\n${gmp_json.stdout} -Check If Kong Ingress HTTP Request Latency Violates Threshold for Upstream `$${INGRESS_UPSTREAM}` +Check If Kong Ingress HTTP Request Latency Violates Threshold in GCP Project `${GCP_PROJECT_ID}` [Documentation] Fetches metrics for the Kong ingress 99th percentile request latency from GMP and performs an inspection on the results. If there are currently any results with more than the defined request latency threshold, their route and service names will be surfaced for further troubleshooting. - [Tags] curl request ingress latency http kong gmp + [Tags] curl request ingress latency http kong gmp access:read-only ${gmp_rsp}= RW.CLI.Run Cli ... cmd=gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && response=$(curl -s -d "query=histogram_quantile(0.99, sum(rate(kong_request_latency_ms_bucket{service='${INGRESS_SERVICE}'}[${TIME_SLICE}])) by (le)) > ${REQUEST_LATENCY_THRESHOLD}" -H "Authorization: Bearer $(gcloud auth print-access-token)" 'https://monitoring.googleapis.com/v1/projects/runwhen-nonprod-sandbox/location/global/prometheus/api/v1/query') && echo "$response" | jq -e '.data.result | length > 0' && echo "$response" | jq -r '.data.result[] | "Service: ${INGRESS_SERVICE}" + " HTTP Request Latency(ms):" + .value[1]' || echo "No HTTP request latency threshold violations found for ${INGRESS_SERVICE}." ... show_in_rwl_cheatsheet=true @@ -79,9 +79,9 @@ Check If Kong Ingress HTTP Request Latency Violates Threshold for Upstream `$${I RW.Core.Add Pre To Report Commands Used: ${history} RW.Core.Add Pre To Report HTTP Request Latency Within Acceptable Parameters:\n${gmp_rsp.stdout} -Check If Kong Ingress Controller Reports Upstream Errors in GCP Project `$${GCP_PROJECT_ID}` +Check If Kong Ingress Controller Reports Upstream Errors in GCP Project `${GCP_PROJECT_ID}` [Documentation] Fetches metrics for the Kong ingress controller related to upstream healthchecks or dns errors. - [Tags] curl request ingress upstream healthcheck dns errrors http kong gmp + [Tags] curl request ingress upstream healthcheck dns errrors http kong gmp access:read-only ${gmp_healthchecks_off_rsp}= RW.CLI.Run Cli ... cmd=gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && response=$(curl -s -d "query=kong_upstream_target_health{upstream='${INGRESS_UPSTREAM}',state='healthchecks_off'} > 0" -H "Authorization: Bearer $(gcloud auth print-access-token)" 'https://monitoring.googleapis.com/v1/projects/runwhen-nonprod-sandbox/location/global/prometheus/api/v1/query') && echo "$response" | jq -e '.data.result | length > 0' && echo "$response" | jq -r '.data.result[] | "Service: ${INGRESS_UPSTREAM}" + " Healthchecks Disabled!' || echo "${INGRESS_UPSTREAM} has healthchecks enabled." ... show_in_rwl_cheatsheet=true diff --git a/codebundles/curl-http-ok/runbook.robot b/codebundles/curl-http-ok/runbook.robot index 29f617890..809c47e14 100644 --- a/codebundles/curl-http-ok/runbook.robot +++ b/codebundles/curl-http-ok/runbook.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** Check HTTP URL Availability and Timeliness for `${URL}` [Documentation] Use cURL to validate the http response - [Tags] curl http ingress latency errors + [Tags] curl http ingress latency errors access:read-only ${curl_rsp}= RW.CLI.Run Cli ... cmd=curl -o /dev/null -w '{"http_code": \%{http_code}, "time_total": \%{time_total}}' -s ${URL} ... show_in_rwl_cheatsheet=true diff --git a/codebundles/gcloud-log-inspection/runbook.robot b/codebundles/gcloud-log-inspection/runbook.robot index cb9d335b8..de0352170 100644 --- a/codebundles/gcloud-log-inspection/runbook.robot +++ b/codebundles/gcloud-log-inspection/runbook.robot @@ -52,7 +52,7 @@ Suite Initialization *** Tasks *** Inspect GCP Logs For Common Errors in GCP Project `${GCP_PROJECT_ID}` - [Tags] Logs Query Gcloud GCP Errors Common + [Tags] Logs Query Gcloud GCP Errors Common access:read-only [Documentation] Fetches logs from a Google Cloud Project and filters for a count of common error messages. ${cmd} Set Variable gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && gcloud logging read "severity>=${SEVERITY}${ADD_FILTERS}" --freshness=120m --limit=50 --format=json ${rsp}= RW.CLI.Run Cli diff --git a/codebundles/gcloud-node-preempt/runbook.robot b/codebundles/gcloud-node-preempt/runbook.robot index 7a6324ab8..e11a00ca8 100644 --- a/codebundles/gcloud-node-preempt/runbook.robot +++ b/codebundles/gcloud-node-preempt/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** List all nodes in an active preempt operation for GCP Project `${GCP_PROJECT_ID}` within the last `${AGE}` hours [Documentation] Fetches all nodes that have been preempted within the defined time interval. - [Tags] stdout gcloud node preempt gcp ${gcp_project_id} + [Tags] stdout gcloud node preempt gcp ${gcp_project_id} access:read-only ${preempt_node_list}= RW.CLI.Run Cli ... cmd=gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && gcloud compute operations list --filter='operationType:(compute.instances.preempted)' --format=json --project=${GCP_PROJECT_ID} | jq -r --arg now "$(date -u +%s)" '[.[] | select((.startTime | sub("\\\\.[0-9]+"; "") | strptime("%Y-%m-%dT%H:%M:%S%z") | mktime) > ($now | tonumber - (${AGE}*60)))] ' ... env=${env} diff --git a/codebundles/gcp-bucket-health/sli.robot b/codebundles/gcp-bucket-health/sli.robot index bf8f44548..a22c07850 100644 --- a/codebundles/gcp-bucket-health/sli.robot +++ b/codebundles/gcp-bucket-health/sli.robot @@ -60,7 +60,7 @@ Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}` ${bucket_ops_rate_score}= Evaluate 1 if int(${buckets_over_ops_threshold.stdout}) == 0 else 0 Set Global Variable ${bucket_ops_rate_score} -Generate Bucket Score in Project `$${PROJECT_IDS}` +Generate Bucket Score in Project `${PROJECT_IDS}` ${bucket_health_score}= Evaluate (${buckets_over_utilization} + ${public_bucket_score} + ${bucket_ops_rate_score}) / 3 ${health_score}= Convert to Number ${bucket_health_score} 2 RW.Core.Push Metric ${health_score} diff --git a/codebundles/gcp-cloud-function-health/runbook.robot b/codebundles/gcp-cloud-function-health/runbook.robot index 5bcdf5296..6ca672c70 100644 --- a/codebundles/gcp-cloud-function-health/runbook.robot +++ b/codebundles/gcp-cloud-function-health/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** List Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` [Documentation] Fetches a list of GCP Cloud Functions that are not healthy. - [Tags] gcloud function gcp ${GCP_PROJECT_ID} + [Tags] gcloud function gcp ${GCP_PROJECT_ID} access:read-only # This command is cheat-sheet friendly ${unhealthy_cloud_function_list_simple_output}= RW.CLI.Run Cli ... cmd=gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && gcloud functions list --filter="state!=ACTIVE OR status!=ACTIVE" --format="table[box](name, state, status, stateMessages.severity, stateMessages.type, stateMessages.message:wrap=30)" --project=${GCP_PROJECT_ID} && echo "Run 'gcloud functions describe [name]' for full details." @@ -67,9 +67,9 @@ List Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` RW.Core.Add Pre To Report Failed GCP Functions Table:\n${unhealthy_cloud_function_list_simple_output.stdout} RW.Core.Add Pre To Report Commands Used:\n${history} -Get Error Logs for Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` in `us-central1` Region +Get Error Logs for Unhealthy Cloud Functions in GCP Project `${GCP_PROJECT_ID}` [Documentation] Fetches GCP logs related to unhealthy Cloud Functions within the last 14 days - [Tags] gcloud function gcp ${GCP_PROJECT_ID} + [Tags] gcloud function gcp ${GCP_PROJECT_ID} access:read-only # This command is cheat-sheet friendly ${error_logs_simple_output}= RW.CLI.Run Cli ... cmd=gcloud functions list --filter="state!=ACTIVE OR status!=ACTIVE" --format="value(name)" --project=${GCP_PROJECT_ID} | xargs -I {} gcloud logging read "severity=ERROR AND resource.type=cloud_function AND resource.labels.function_name={}" --limit 50 --freshness=14d diff --git a/codebundles/gh-actions-artifact-analysis/runbook.robot b/codebundles/gh-actions-artifact-analysis/runbook.robot index d68ccdd71..22fee9f44 100644 --- a/codebundles/gh-actions-artifact-analysis/runbook.robot +++ b/codebundles/gh-actions-artifact-analysis/runbook.robot @@ -14,9 +14,9 @@ Suite Setup Suite Initialization *** Tasks *** -Analyze artifact from GitHub workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` using command `${ANALYSIS_COMMAND}` +Analyze artifact from GitHub workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` [Documentation] Check GitHub workflow status and analyze artifact with a user provided command. - [Tags] github workflow actions artifact report + [Tags] github workflow actions artifact report access:read-only ${ESCAPED_ANALYSIS_COMMAND}= RW.CLI.Escape Bash Command ${ANALYSIS_COMMAND} Log ${ESCAPED_ANALYSIS_COMMAND} ${workflow_analysis}= RW.CLI.Run Bash File diff --git a/codebundles/gh-actions-artifact-analysis/sli.robot b/codebundles/gh-actions-artifact-analysis/sli.robot index 40445c15a..e7f2e42bc 100644 --- a/codebundles/gh-actions-artifact-analysis/sli.robot +++ b/codebundles/gh-actions-artifact-analysis/sli.robot @@ -14,7 +14,7 @@ Suite Setup Suite Initialization *** Tasks *** -Analyze artifact from GitHub Workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` and push `${METRIC}` metric +Analyze artifact from GitHub Workflow `${WORKFLOW_NAME}` in repository `${GITHUB_REPO}` and push metric [Documentation] Check GitHub workflow status, run a user provided analysis command, and push the metric. The analysis command should result in a single metric. [Tags] github workflow actions artifact report ${ESCAPED_ANALYSIS_COMMAND}= RW.CLI.Escape Bash Command ${ANALYSIS_COMMAND} diff --git a/codebundles/k8s-app-troubleshoot/runbook.robot b/codebundles/k8s-app-troubleshoot/runbook.robot index caad00e70..18f6bd088 100644 --- a/codebundles/k8s-app-troubleshoot/runbook.robot +++ b/codebundles/k8s-app-troubleshoot/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Get `${CONTAINER_NAME}` Application Logs from Workload `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}` [Documentation] Collects the last approximately 300 lines of logs from the workload - [Tags] resource application workload logs state ${container_name} ${workload_name} + [Tags] resource application workload logs state ${container_name} ${workload_name} access:read-only ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} logs -l ${LABELS} --tail=${MAX_LOG_LINES} --limit-bytes=256000 --since=${LOGS_SINCE} --container=${CONTAINER_NAME} ... show_in_rwl_cheatsheet=true @@ -54,6 +54,7 @@ Tail `${CONTAINER_NAME}` Application Logs For Stacktraces in Workload `${WORKLOA ... logs ... ${container_name} ... ${workload_name} + ... access:read-only ${cmd}= Set Variable ... ${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} logs -l ${LABELS} --tail=${MAX_LOG_LINES} --limit-bytes=256000 --since=${LOGS_SINCE} --container=${CONTAINER_NAME} IF $EXCLUDE_PATTERN != "" diff --git a/codebundles/k8s-artifactory-health/runbook.robot b/codebundles/k8s-artifactory-health/runbook.robot index cc584fbca..a205757e9 100644 --- a/codebundles/k8s-artifactory-health/runbook.robot +++ b/codebundles/k8s-artifactory-health/runbook.robot @@ -68,7 +68,7 @@ Suite Initialization *** Tasks *** Check Artifactory Liveness and Readiness Endpoints in `NAMESPACE` [Documentation] Runs a set of exec commands internally in the Artifactory workloads to curl the system health endpoints. - [Tags] Pods Statefulset Artifactory Health System Curl API OK HTTP + [Tags] Pods Statefulset Artifactory Health System Curl API OK HTTP access:read-only # these endpoints dont respect json type headers ${liveness}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} exec statefulset/${STATEFULSET_NAME} --context=${CONTEXT} -n ${NAMESPACE} -- curl -k --max-time 10 http://localhost:8091/artifactory/api/v1/system/liveness diff --git a/codebundles/k8s-chaos-flux/runbook.robot b/codebundles/k8s-chaos-flux/runbook.robot index 9b8036fdf..273b24455 100644 --- a/codebundles/k8s-chaos-flux/runbook.robot +++ b/codebundles/k8s-chaos-flux/runbook.robot @@ -104,9 +104,9 @@ Suite Initialization Set Suite Variable ${env} {"KUBECONFIG":"./${kubeconfig.key}"} *** Tasks *** -Suspend the Flux Resource Reconciliation for ${FLUX_RESOURCE_NAME} in namespace ${FLUX_RESOURCE_NAMESPACE} +Suspend the Flux Resource Reconciliation for `${FLUX_RESOURCE_NAME}` in namespace `${FLUX_RESOURCE_NAMESPACE}` [Documentation] Suspends a flux resource so that it can be manipulated for chaos purposes. - [Tags] Chaos Flux Kubernetes Resource Suspend + [Tags] Chaos Flux Kubernetes Resource Suspend access:read-write ${suspend_flux_resource}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} patch ${FLUX_RESOURCE_TYPE} ${FLUX_RESOURCE_NAME} -n ${FLUX_RESOURCE_NAMESPACE} --context ${CONTEXT} --type='json' -p='[{"op": "add", "path": "/spec/suspend", "value":true}]' ... env=${env} @@ -116,7 +116,7 @@ Suspend the Flux Resource Reconciliation for ${FLUX_RESOURCE_NAME} in namespace Select Random FluxCD Workload for Chaos Target in Namespace `${FLUX_RESOURCE_NAMESPACE}` [Documentation] Inspects the Flux resource and randomly selects a deployment to tickle. Tehe. Only runs if RANDOMIZE = Yes. - [Tags] Chaos Flux Kubernetes Resource Random + [Tags] Chaos Flux Kubernetes Resource Random access:read-write IF "${RANDOMIZE}" == "Yes" ${deployments}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployments -l kustomize.toolkit.fluxcd.io/name=${FLUX_RESOURCE_NAME} -n ${TARGET_NAMESPACE} --context ${CONTEXT} -o json @@ -138,9 +138,9 @@ Select Random FluxCD Workload for Chaos Target in Namespace `${FLUX_RESOURCE_NAM Set Suite Variable ${TARGET_RESOURCE} deployment/${deployment_name} END -Execute Chaos Command on ${TARGET_RESOURCE} in Namespace ${TARGET_NAMESPACE} +Execute Chaos Command on `${TARGET_RESOURCE}` in Namespace `${TARGET_NAMESPACE}` [Documentation] Run the desired chaos command within a targeted resource - [Tags] Chaos Flux Kubernetes Resource Kill OOM + [Tags] Chaos Flux Kubernetes Resource Kill OOM access:read-write FOR ${index} IN RANGE ${CHAOS_COMMAND_LOOP} ${run_chaos_command}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} exec ${TARGET_RESOURCE} -n ${TARGET_NAMESPACE} --context ${CONTEXT} -- ${CHAOS_COMMAND} @@ -152,7 +152,7 @@ Execute Chaos Command on ${TARGET_RESOURCE} in Namespace ${TARGET_NAMESPACE} Execute Additional Chaos Command on ${FLUX_RESOURCE_TYPE} '${FLUX_RESOURCE_NAME}' in namespace '${FLUX_RESOURCE_NAMESPACE}' [Documentation] Run the additional command as input, verbatim. - [Tags] Chaos Flux Kubernetes Resource + [Tags] Chaos Flux Kubernetes Resource access:read-write ${run_additional_command}= RW.CLI.Run Cli ... cmd=${ADDNL_COMMAND} -n ${TARGET_NAMESPACE} --context ${CONTEXT} ... env=${env} @@ -162,7 +162,7 @@ Execute Additional Chaos Command on ${FLUX_RESOURCE_TYPE} '${FLUX_RESOURCE_NAME} Resume Flux Resource Reconciliation in `${TARGET_NAMESPACE}` [Documentation] Resumes Flux reconciliation on desired resource. - [Tags] Chaos Flux Kubernetes Resource Resume + [Tags] Chaos Flux Kubernetes Resource Resume access:read-write ${resume_flux}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} patch ${FLUX_RESOURCE_TYPE} ${FLUX_RESOURCE_NAME} -n ${FLUX_RESOURCE_NAMESPACE} --context ${CONTEXT} --type='json' -p='[{"op": "remove", "path": "/spec/suspend", "value":true}]' ... env=${env} diff --git a/codebundles/k8s-chaos-workload/runbook.robot b/codebundles/k8s-chaos-workload/runbook.robot index ad83cff31..439381a1d 100644 --- a/codebundles/k8s-chaos-workload/runbook.robot +++ b/codebundles/k8s-chaos-workload/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Test `${WORKLOAD_NAME}` High Availability in Namespace `${NAMESPACE}` [Documentation] Kills a pod under this workload to test high availability. - [Tags] Kubernetes StatefulSet Deployments Pods Highly Available + [Tags] Kubernetes StatefulSet Deployments Pods Highly Available access:read-write ${process}= RW.CLI.Run Bash File ... bash_file=kill_workload_pod.sh ... env=${env} @@ -27,7 +27,7 @@ Test `${WORKLOAD_NAME}` High Availability in Namespace `${NAMESPACE}` OOMKill `${WORKLOAD_NAME}` Pod [Documentation] Kills the oldest pod running under the configured workload. - [Tags] Kubernetes StatefulSet Deployments Pods Highly Available OOMkill Memory + [Tags] Kubernetes StatefulSet Deployments Pods Highly Available OOMkill Memory access:read-write ${process}= RW.CLI.Run Bash File ... bash_file=oomkill_workload_pod.sh ... env=${env} @@ -36,7 +36,7 @@ OOMKill `${WORKLOAD_NAME}` Pod Mangle Service Selector For `${WORKLOAD_NAME}` in `${NAMESPACE}` [Documentation] Breaks a service's label selector to cause a network disruption - [Tags] Kubernetes networking Services Selector + [Tags] Kubernetes networking Services Selector access:read-only ${process}= RW.CLI.Run Bash File ... bash_file=change_service_selector.sh ... env=${env} @@ -45,7 +45,7 @@ Mangle Service Selector For `${WORKLOAD_NAME}` in `${NAMESPACE}` Mangle Service Port For `${WORKLOAD_NAME}` in `${NAMESPACE}` [Documentation] Changes a service's port to cause a network disruption - [Tags] Kubernetes networking Services Port + [Tags] Kubernetes networking Services Port access:read-write ${process}= RW.CLI.Run Bash File ... bash_file=change_service_port.sh ... env=${env} @@ -54,7 +54,7 @@ Mangle Service Port For `${WORKLOAD_NAME}` in `${NAMESPACE}` Fill Tmp Directory Of Pod From `${WORKLOAD_NAME}` [Documentation] Attaches to a pod and fills the /tmp directory with random data - [Tags] Kubernetes pods volumes tmp + [Tags] Kubernetes pods volumes tmp access:read-write ${process}= RW.CLI.Run Bash File expand_tmp.sh ... cmd_override=./expand_tmp.sh ... env=${env} diff --git a/codebundles/k8s-cluster-node-health/runbook.robot b/codebundles/k8s-cluster-node-health/runbook.robot index 854bc709e..34b6ea2b7 100644 --- a/codebundles/k8s-cluster-node-health/runbook.robot +++ b/codebundles/k8s-cluster-node-health/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** Check for Node Restarts in Cluster `${CONTEXT}` within Interval `${INTERVAL}` [Documentation] Identify nodes that are starting and stopping within the time interval. - [Tags] cluster preempt spot reboot utilization saturation exhaustion starvation + [Tags] cluster preempt spot reboot utilization saturation exhaustion access:read-only ${node_restart_details}= RW.CLI.Run Bash File ... bash_file=node_restart_check.sh ... env=${env} diff --git a/codebundles/k8s-daemonset-healthcheck/runbook.robot b/codebundles/k8s-daemonset-healthcheck/runbook.robot index 697fb6dc9..2936c92d3 100644 --- a/codebundles/k8s-daemonset-healthcheck/runbook.robot +++ b/codebundles/k8s-daemonset-healthcheck/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** Get DaemonSet Logs for `${DAEMONSET_NAME}` and Add to Report [Documentation] Fetches the last 100 lines of logs for the given daemonset in the namespace. - [Tags] fetch log pod container errors inspect trace info daemonset csi + [Tags] fetch log pod container errors inspect trace info daemonset csi access:read-only ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs --tail=100 daemonset/${DAEMONSET_NAME} --context ${CONTEXT} -n ${NAMESPACE} ... env=${env} @@ -29,7 +29,7 @@ Get DaemonSet Logs for `${DAEMONSET_NAME}` and Add to Report Get Related Daemonset `${DAEMONSET_NAME}` Events in Namespace `${NAMESPACE}` [Documentation] Fetches events related to the daemonset workload in the namespace. - [Tags] events workloads errors warnings get daemonset csi + [Tags] access:read-only events workloads errors warnings get daemonset csi ${events}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --field-selector type=Warning --context ${CONTEXT} -n ${NAMESPACE} | grep -i "${DAEMONSET_NAME}" || true ... env=${env} @@ -55,6 +55,7 @@ Check Daemonset `${DAEMONSET_NAME}` Replicas ... rollout ... stuck ... pods + ... access:read-only ${daemonset_describe}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} describe daemonset/${DAEMONSET_NAME} --context ${CONTEXT} -n ${NAMESPACE} ... env=${env} diff --git a/codebundles/k8s-deployment-healthcheck/runbook.robot b/codebundles/k8s-deployment-healthcheck/runbook.robot index 0b851db74..46982572d 100644 --- a/codebundles/k8s-deployment-healthcheck/runbook.robot +++ b/codebundles/k8s-deployment-healthcheck/runbook.robot @@ -31,6 +31,7 @@ Check Deployment Log For Issues with `${DEPLOYMENT_NAME}` ... info ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-only ${logs}= RW.CLI.Run Bash File ... bash_file=deployment_logs.sh ... cmd_override=./deployment_logs.sh | tee "${SCRIPT_TMP_DIR}/log_analysis" @@ -69,6 +70,7 @@ Fetch Deployments Logs for `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` and ... logs ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-only ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs deployment/${DEPLOYMENT_NAME} -n ${NAMESPACE} --tail=${LOG_LINES} --all-containers=true --max-log-requests=20 --context ${CONTEXT} ... env=${env} @@ -93,6 +95,7 @@ Check Liveness Probe Configuration for Deployment `${DEPLOYMENT_NAME}` ... get ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-only ${liveness_probe_health}= RW.CLI.Run Bash File ... bash_file=validate_probes.sh ... cmd_override=./validate_probes.sh livenessProbe | tee "${SCRIPT_TMP_DIR}/liveness_probe_output" @@ -130,6 +133,7 @@ Check Readiness Probe Configuration for Deployment `${DEPLOYMENT_NAME}` in Names ... get ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-only ${readiness_probe_health}= RW.CLI.Run Bash File ... bash_file=validate_probes.sh ... cmd_override=./validate_probes.sh readinessProbe | tee "${SCRIPT_TMP_DIR}/readiness_probe_output" @@ -155,9 +159,9 @@ Check Readiness Probe Configuration for Deployment `${DEPLOYMENT_NAME}` in Names RW.Core.Add Pre To Report Readiness probe testing results:\n\n${readiness_probe_health.stdout} RW.Core.Add Pre To Report Commands Used: ${readiness_probe_health.cmd} -Inspect Container Restarts for Deployment `${DEPLOYMENT_NAME}` Namespace `${NAMESPACE}` +Inspect Container Restarts for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Fetches pods that have container restarts and provides a report of the restart issues. - [Tags] namespace containers status restarts ${DEPLOYMENT_NAME} ${NAMESPACE} + [Tags] access:read-only namespace containers status restarts ${DEPLOYMENT_NAME} ${NAMESPACE} ${container_restart_analysis}= RW.CLI.Run Bash File ... bash_file=container_restarts.sh ... env=${env} @@ -187,9 +191,9 @@ Inspect Container Restarts for Deployment `${DEPLOYMENT_NAME}` Namespace `${NAME RW.Core.Add Pre To Report ${container_restart_analysis.stdout} RW.Core.Add Pre To Report Commands Used:\n${history} -Inspect Deployment Warning Events for `${DEPLOYMENT_NAME}` +Inspect Deployment Warning Events for `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Fetches warning events related to the deployment workload in the namespace and triages any issues found in the events. - [Tags] events workloads errors warnings get deployment ${DEPLOYMENT_NAME} + [Tags] access:read-only events workloads errors warnings get deployment ${DEPLOYMENT_NAME} ${events}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '(now - (60*60)) as $time_limit | [ .items[] | select(.type == "Warning" and (.involvedObject.kind == "Deployment" or .involvedObject.kind == "ReplicaSet" or .involvedObject.kind == "Pod") and (.involvedObject.name | tostring | contains("${DEPLOYMENT_NAME}")) and (.lastTimestamp | fromdateiso8601) >= $time_limit) | {kind: .involvedObject.kind, name: .involvedObject.name, reason: .reason, message: .message, firstTimestamp: .firstTimestamp, lastTimestamp: .lastTimestamp} ] | group_by([.kind, .name]) | map({kind: .[0].kind, name: .[0].name, count: length, reasons: map(.reason) | unique, messages: map(.message) | unique, firstTimestamp: map(.firstTimestamp | fromdateiso8601) | sort | .[0] | todateiso8601, lastTimestamp: map(.lastTimestamp | fromdateiso8601) | sort | reverse | .[0] | todateiso8601})' ... env=${env} @@ -229,9 +233,9 @@ Inspect Deployment Warning Events for `${DEPLOYMENT_NAME}` RW.Core.Add Pre To Report ${events.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Fetch Deployment Workload Details For `${DEPLOYMENT_NAME}` +Fetch Deployment Workload Details For `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Fetches the current state of the deployment for future review in the report. - [Tags] deployment details manifest info ${DEPLOYMENT_NAME} + [Tags] access:read-only deployment details manifest info ${DEPLOYMENT_NAME} ${deployment}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o yaml ... env=${env} @@ -258,6 +262,7 @@ Inspect Deployment Replicas for `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}` ... stuck ... pods ... ${DEPLOYMENT_NAME} + ... access:read-only ${deployment_replicas}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '.status | {desired_replicas: .replicas, ready_replicas: (.readyReplicas // 0), missing_replicas: ((.replicas // 0) - (.readyReplicas // 0)), unavailable_replicas: (.unavailableReplicas // 0), available_condition: (if any(.conditions[]; .type == "Available") then (.conditions[] | select(.type == "Available")) else "Condition not available" end), progressing_condition: (if any(.conditions[]; .type == "Progressing") then (.conditions[] | select(.type == "Progressing")) else "Condition not available" end)}' ... secret_file__kubeconfig=${kubeconfig} @@ -303,7 +308,7 @@ Inspect Deployment Replicas for `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}` ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}` +Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Parses all events in a namespace within a timeframe and checks for unusual activity, raising issues for any found. [Tags] ... deployment @@ -315,6 +320,7 @@ Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}` ... occurences ... connection error ... ${DEPLOYMENT_NAME} + ... access:read-only ${recent_anomalies}= RW.CLI.Run Bash File ... bash_file=event_anomalies.sh ... env=${env} @@ -359,7 +365,7 @@ Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}` RW.Core.Add To Report ${anomalies_report_output}\n RW.Core.Add Pre To Report Commands Used:\n${history} -Check ReplicaSet Health for Deployment `${DEPLOYMENT_NAME}` +Check ReplicaSet Health for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Fetches all replicasets related to deployment to ensure that conflicting versions don't exist. [Tags] ... replica @@ -369,6 +375,7 @@ Check ReplicaSet Health for Deployment `${DEPLOYMENT_NAME}` ... pods ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-only ${check_replicaset}= RW.CLI.Run Bash File ... bash_file=check_replicaset.sh ... cmd_override=./check_replicaset.sh | tee "${SCRIPT_TMP_DIR}/rs_analysis" diff --git a/codebundles/k8s-deployment-ops/runbook.robot b/codebundles/k8s-deployment-ops/runbook.robot index eee3d26ef..9e47c7f2b 100644 --- a/codebundles/k8s-deployment-ops/runbook.robot +++ b/codebundles/k8s-deployment-ops/runbook.robot @@ -25,6 +25,7 @@ Restart Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` ... restart ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-write ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs deployment/${DEPLOYMENT_NAME} --tail 50 --all-containers=true --max-log-requests=20 -n ${NAMESPACE} --context ${CONTEXT} @@ -73,6 +74,7 @@ Force Delete Pods in Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` ... restart ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-write ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs deployment/${DEPLOYMENT_NAME} --tail 50 -n ${NAMESPACE} --all-containers=true --max-log-requests=20 --context ${CONTEXT} @@ -121,6 +123,7 @@ Rollback Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` to Previous ... rollback ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-write ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs deployment/${DEPLOYMENT_NAME} --tail 50 -n ${NAMESPACE} --all-containers=true --max-log-requests=20 --context ${CONTEXT} @@ -161,7 +164,7 @@ Rollback Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` to Previous ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report Commands Used: ${history} -Halt Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` +Scale Down Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` [Documentation] Stops all running pods in a deployment to immediately halt a failing or runaway service. [Tags] ... log @@ -169,6 +172,7 @@ Halt Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` ... scaledown ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-write ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs deployment/${DEPLOYMENT_NAME} --tail 50 -n ${NAMESPACE} --all-containers=true --max-log-requests=20 --context ${CONTEXT} @@ -210,6 +214,7 @@ Scale Up Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}` by ${SCALE_ ... scaleup ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-write ${scaleup}= RW.CLI.Run Cli # ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} scale deployment/${DEPLOYMENT_NAME} -n ${NAMESPACE} --context ${CONTEXT} --replicas=$(($(${KUBERNETES_DISTRIBUTION_BINARY} get deployment ${DEPLOYMENT_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o jsonpath='{.spec.replicas}') == 0 ? 1 : $(${KUBERNETES_DISTRIBUTION_BINARY} get deployment ${DEPLOYMENT_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o jsonpath='{.spec.replicas}') * ${SCALE_UP_FACTOR} )) @@ -245,6 +250,7 @@ Clean Up Stale ReplicaSets for Deployment `${DEPLOYMENT_NAME}` in Namespace `${N ... stale ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-write ${rs_cleanup}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get rs -n ${NAMESPACE} --context ${CONTEXT} --selector=$(${KUBERNETES_DISTRIBUTION_BINARY} get deployment ${DEPLOYMENT_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o jsonpath='{.spec.selector.matchLabels}' | tr -d '{}" ' | tr ':' '=') -o json | jq -r '.items[] | select(.status.replicas == 0) | .metadata.name' | while read -r rs; do ${KUBERNETES_DISTRIBUTION_BINARY} delete rs "$rs" -n ${NAMESPACE} --context ${CONTEXT}; done @@ -276,6 +282,7 @@ Scale Down Stale ReplicaSets for Deployment `${DEPLOYMENT_NAME}` in Namespace `$ ... scaledown ... deployment ... ${DEPLOYMENT_NAME} + ... access:read-write ${rs_scaledown}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} scale rs -n ${NAMESPACE} --context ${CONTEXT} -l $(${KUBERNETES_DISTRIBUTION_BINARY} get deployment ${DEPLOYMENT_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o jsonpath='{.spec.selector.matchLabels}' | tr -d '{}" ' | tr ':' '=') --replicas=0 diff --git a/codebundles/k8s-flux-suspend-namespace/runbook.robot b/codebundles/k8s-flux-suspend-namespace/runbook.robot index 0f0e6980f..bf04d4a5c 100644 --- a/codebundles/k8s-flux-suspend-namespace/runbook.robot +++ b/codebundles/k8s-flux-suspend-namespace/runbook.robot @@ -16,17 +16,19 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Flux Suspend Namespace `${NAMESPACE}` +Suspend FluxCD for Namespace `${NAMESPACE}` [Documentation] Applies a flux suspend to the spec of all flux objects reconciling in a given namespace. - [Tags] Kubernetes Namespace Flux Suspend + [Tags] Kubernetes Namespace FluxCD Suspend access:read-write + ${process}= RW.CLI.Run Bash File suspend_namespace.sh ... env=${env} ... secret_file__kubeconfig=${kubeconfig} RW.Core.Add Pre To Report ${process.stdout} -Unsuspend Flux for Namespace `${NAMESPACE}` +Resume FluxCD for Namespace `${NAMESPACE}` [Documentation] Unsuspends any suspended flux objects in a given namespace, allowing reconciliation to resume. - [Tags] Kubernetes Namespace Flux Unsuspend + [Tags] Kubernetes Namespace FluxCD Resume access:read-write + ${process}= RW.CLI.Run Bash File unsuspend_namespace.sh ... env=${env} ... secret_file__kubeconfig=${kubeconfig} diff --git a/codebundles/k8s-fluxcd-kustomization-health/runbook.robot b/codebundles/k8s-fluxcd-kustomization-health/runbook.robot index d2db1831d..d0a617a7c 100644 --- a/codebundles/k8s-fluxcd-kustomization-health/runbook.robot +++ b/codebundles/k8s-fluxcd-kustomization-health/runbook.robot @@ -13,9 +13,9 @@ Suite Setup Suite Initialization *** Tasks *** -List all available Kustomization objects in Namespace `${NAMESPACE}` +List all available FluxCD Kustomization objects in Namespace `${NAMESPACE}` [Documentation] List all FluxCD kustomization objects found in ${NAMESPACE} - [Tags] FluxCD Kustomization Available List ${NAMESPACE} + [Tags] access:read-only FluxCD Kustomization Available List ${NAMESPACE} ${kustomizations}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${RESOURCE_NAME} -n ${NAMESPACE} --context ${CONTEXT} ... env=${env} @@ -26,9 +26,9 @@ List all available Kustomization objects in Namespace `${NAMESPACE}` RW.Core.Add Pre To Report Kustomizations available: \n ${kustomizations.stdout} RW.Core.Add Pre To Report Commands Used:\n${history} -List Unready Kustomizations in Namespace `${NAMESPACE}` +List Unready FluxCD Kustomizations in Namespace `${NAMESPACE}` [Documentation] List all Kustomizations that are not found in a ready state in namespace ${NAMESPACE} - [Tags] FluxCD Kustomization Versions ${NAMESPACE} + [Tags] access:read-only FluxCD Kustomization Versions ${NAMESPACE} ${kustomizations_not_ready}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${RESOURCE_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o json | jq '[.items[] | select(.status.conditions[] | select(.type == "Ready" and .status == "False")) | {KustomizationName: .metadata.name, ReadyStatus: {ready: (.status.conditions[] | select(.type == "Ready").status), message: (.status.conditions[] | select(.type == "Ready").message), reason: (.status.conditions[] | select(.type == "Ready").reason), last_transition_time: (.status.conditions[] | select(.type == "Ready").lastTransitionTime)}, ReconcileStatus: {reconciling: (.status.conditions[] | select(.type == "Reconciling").status), message: (.status.conditions[] | select(.type == "Reconciling").message)}}]' ... env=${env} diff --git a/codebundles/k8s-fluxcd-reconcile/runbook.robot b/codebundles/k8s-fluxcd-reconcile/runbook.robot index 5a40ebb69..c91505cf8 100644 --- a/codebundles/k8s-fluxcd-reconcile/runbook.robot +++ b/codebundles/k8s-fluxcd-reconcile/runbook.robot @@ -16,9 +16,9 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Health Check Flux Reconciliation in Kubernetes Namespace `${FLUX_NAMESPACE}` +Check FluxCD Reconciliation Health in Kubernetes Namespace `${FLUX_NAMESPACE}` [Documentation] Fetches reconciliation logs for flux and creates a report for them. - [Tags] Kubernetes Namespace Flux + [Tags] access:read-only Kubernetes Namespace Flux ${process}= RW.CLI.Run Bash File flux_reconcile_report.sh ... env=${env} ... secret_file__kubeconfig=${kubeconfig} diff --git a/codebundles/k8s-gitops-gh-remediate/runbook.robot b/codebundles/k8s-gitops-gh-remediate/runbook.robot index 9079736f8..8d8a561d3 100644 --- a/codebundles/k8s-gitops-gh-remediate/runbook.robot +++ b/codebundles/k8s-gitops-gh-remediate/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Remediate Readiness and Liveness Probe GitOps Manifests in Namespace `${NAMESPACE}` [Documentation] Fixes misconfigured readiness or liveness probe configurations for pods in a namespace that are managed in a GitHub GitOps repository - [Tags] readiness liveness probe remediate gitops github ${NAMESPACE} + [Tags] access:read-write readiness liveness probe remediate gitops github ${NAMESPACE} ${probe_health}= RW.CLI.Run Bash File ... bash_file=validate_all_probes.sh ... cmd_override=./validate_all_probes.sh ${NAMESPACE} @@ -56,7 +56,7 @@ Remediate Readiness and Liveness Probe GitOps Manifests in Namespace `${NAMESPAC Increase ResourceQuota Limit for Namespace `${NAMESPACE}` in GitHub GitOps Repository [Documentation] Looks for a resourcequota object in the namespace and increases it if applicable, and if it is managed in a GitHub GitOps repository - [Tags] resourcequota quota namespace remediate github gitops ${NAMESPACE} + [Tags] access:read-write resourcequota quota namespace remediate github gitops ${NAMESPACE} ${quota_usage}= RW.CLI.Run Bash File ... bash_file=resource_quota_check.sh ... env=${env} @@ -98,7 +98,7 @@ Increase ResourceQuota Limit for Namespace `${NAMESPACE}` in GitHub GitOps Repos Adjust Pod Resources to Match VPA Recommendation in `${NAMESPACE}` [Documentation] Queries the namespace for any Vertical Pod Autoscaler resource recommendations and applies them to GitOps GitHub controlled manifests. - [Tags] recommendation resources utilization gitops github pods cpu memory allocation vpa + [Tags] access:read-write recommendation resources utilization gitops github pods cpu memory allocation vpa ${vpa_usage}= RW.CLI.Run Bash File ... bash_file=vpa_recommendations.sh ... env=${env} @@ -140,7 +140,7 @@ Adjust Pod Resources to Match VPA Recommendation in `${NAMESPACE}` Expand Persistent Volume Claims in Namespace `${NAMESPACE}` [Documentation] Checks the disk utilization for all PVCs and updates the GitOps manifest for any that are highly utilized. - [Tags] recommendation pv pvc utilization gitops github persistentvolumeclaim persistentvolume storage capacity ${NAMESPACE} + [Tags] access:read-write recommendation pv pvc utilization gitops github persistentvolumeclaim persistentvolume storage capacity ${NAMESPACE} ${pvc_utilization}= RW.CLI.Run Bash File ... bash_file=pvc_utilization_check.sh ... env=${env} diff --git a/codebundles/k8s-ingress-gce-healthcheck/runbook.robot b/codebundles/k8s-ingress-gce-healthcheck/runbook.robot index 82a1b6ded..25ffff0ff 100644 --- a/codebundles/k8s-ingress-gce-healthcheck/runbook.robot +++ b/codebundles/k8s-ingress-gce-healthcheck/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** Search For GCE Ingress Warnings in GKE Context `${CONTEXT}` [Documentation] Find warning events related to GCE Ingress and services objects - [Tags] service ingress endpoint health ingress-gce gke + [Tags] access:read-only service ingress endpoint health ingress-gce gke ${event_warnings}= RW.CLI.Run Cli ... cmd=INGRESS_NAME=${INGRESS}; NAMESPACE=${NAMESPACE}; CONTEXT=${CONTEXT}; ${KUBERNETES_DISTRIBUTION_BINARY} get events -n $NAMESPACE --context $CONTEXT --field-selector involvedObject.kind=Ingress,involvedObject.name=$INGRESS_NAME,type!=Normal; for SERVICE_NAME in $(${KUBERNETES_DISTRIBUTION_BINARY} get ingress $INGRESS_NAME -n $NAMESPACE --context $CONTEXT -o=jsonpath='{.spec.rules[*].http.paths[*].backend.service.name}'); do ${KUBERNETES_DISTRIBUTION_BINARY} get events -n $NAMESPACE --context $CONTEXT --field-selector involvedObject.kind=Service,involvedObject.name=$SERVICE_NAME,type!=Normal; done ... env=${env} @@ -31,15 +31,15 @@ Search For GCE Ingress Warnings in GKE Context `${CONTEXT}` ... set_issue_actual=Ingress and service objects have warnings in namespace `${NAMESPACE}` for ingress `${INGRESS}` ... set_issue_title=Unhealthy GCE ingress or service objects found in namespace `${NAMESPACE}` for ingress `${INGRESS}` ... set_issue_details=The following warning events were found:\n\n${event_warnings.stdout}\n\n - ... set_issue_next_steps=Validate GCP HTTP Load Balancer Configurations in GCP Project `$${GCP_PROJECT_ID}` for ${INGRESS} + ... set_issue_next_steps=Validate GCP HTTP Load Balancer Configurations in GCP Project `${GCP_PROJECT_ID}` for ${INGRESS} ... _line__raise_issue_if_contains=Warning ${history}= RW.CLI.Pop Shell History RW.Core.Add Pre To Report GCE Ingress warnings for ${NAMESPACE}:\n\n${event_warnings.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Identify Unhealthy GCE HTTP Ingress Backends in GKE Namespace `$${NAMESPACE}` +Identify Unhealthy GCE HTTP Ingress Backends in GKE Namespace `${NAMESPACE}` [Documentation] Checks the backend annotations on the ingress object to determine if they are not regstered as healthy - [Tags] service ingress endpoint health ingress-gce gke + [Tags] access:read-only service ingress endpoint health ingress-gce gke ${unhealthy_backends}= RW.CLI.Run Cli ... cmd=INGRESS_NAME=${INGRESS}; NAMESPACE=${NAMESPACE}; CONTEXT=${CONTEXT}; ${KUBERNETES_DISTRIBUTION_BINARY} get ingress $INGRESS_NAME -n $NAMESPACE --context $CONTEXT -o=json | jq -r '.metadata.annotations["ingress.kubernetes.io/backends"] | fromjson | to_entries[] | select(.value != "HEALTHY") | "Backend: " + .key + " Status: " + .value' ... env=${env} @@ -61,9 +61,9 @@ Identify Unhealthy GCE HTTP Ingress Backends in GKE Namespace `$${NAMESPACE}` ... GCE unhealthy backends in `${NAMESPACE}` for ingress `${INGRESS}`:\n\n${unhealthy_backends.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Validate GCP HTTP Load Balancer Configurations in GCP Project `$${GCP_PROJECT_ID}` +Validate GCP HTTP Load Balancer Configurations in GCP Project `${GCP_PROJECT_ID}` [Documentation] Extract GCP HTTP Load Balancer components from ingress annotations and check health of each object - [Tags] service ingress endpoint health backends urlmap gce + [Tags] access:read-only service ingress endpoint health backends urlmap gce ${gce_config_objects}= RW.CLI.Run Bash File ... bash_file=check_gce_ingress_objects.sh ... secret_file__kubeconfig=${kubeconfig} @@ -90,9 +90,9 @@ Validate GCP HTTP Load Balancer Configurations in GCP Project `$${GCP_PROJECT_ID RW.Core.Add Pre To Report Ingress object summary for ingress: `${INGRESS}` in namespace: `${NAMESPACE}`:\n\n${gce_config_objects.stdout} -Fetch Network Error Logs from GCP Operations Manager for Ingress Backends in GCP Project `$${GCP_PROJECT_ID}` +Fetch Network Error Logs from GCP Operations Manager for Ingress Backends in GCP Project `${GCP_PROJECT_ID}` [Documentation] Fetch logs from the last 1d that are specific to the HTTP Load Balancer within the last 60 minutes - [Tags] service ingress endpoint health + [Tags] access:read-only service ingress endpoint health ${network_error_logs}= RW.CLI.Run Cli ... cmd=INGRESS_NAME=${INGRESS}; NAMESPACE=${NAMESPACE}; CONTEXT=${CONTEXT}; GCP_PROJECT_ID=${GCP_PROJECT_ID};for backend in $(${KUBERNETES_DISTRIBUTION_BINARY} get ingress $INGRESS_NAME -n $NAMESPACE --context $CONTEXT -o=json | jq -r '.metadata.annotations["ingress.kubernetes.io/backends"] | fromjson | to_entries[] | select(.value != "HEALTHY") | .key'); do echo "Backend: \${backend}" && gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS && gcloud logging read 'severity="ERROR" AND resource.type="gce_network" AND protoPayload.resourceName=~"'\${backend}'"' --freshness=1d --limit=50 --project "$GCP_PROJECT_ID" --format=json | jq 'if length > 0 then [ .[] | select(.protoPayload.response.error.message? or .protoPayload.status.message?) | { timestamp: .timestamp, ip: (if .protoPayload.request.networkEndpoints? then .protoPayload.request.networkEndpoints[].ipAddress else null end), message: (.protoPayload.response.error.message? // .protoPayload.status.message?) } ] | group_by(.message) | map(max_by(.timestamp)) | .[] | (.timestamp + " | IP: " + (.ip // "N/A") + " | Error: " + .message) else "No results found" end'; done ... env=${env} @@ -115,9 +115,9 @@ Fetch Network Error Logs from GCP Operations Manager for Ingress Backends in GCP RW.Core.Add Pre To Report Network error logs possibly related to Ingress ${INGRESS}:\n\n${network_error_logs.stdout} RW.Core.Add Pre To Report Commands Used: ${history} -Review GCP Operations Logging Dashboard in GCP project `$${GCP_PROJECT_ID}` +Review GCP Operations Logging Dashboard in GCP project `${GCP_PROJECT_ID}` [Documentation] Create urls that will help users obtain logs from the GCP Dashboard - [Tags] service ingress endpoint health logging http loadbalancer + [Tags] access:read-only service ingress endpoint health logging http loadbalancer ${loadbalancer_log_url}= RW.CLI.Run CLI ... cmd=INGRESS=${INGRESS}; NAMESPACE=${NAMESPACE}; CONTEXT=${CONTEXT}; FORWARDING_RULE=$(${KUBERNETES_DISTRIBUTION_BINARY} get ingress $INGRESS -n $NAMESPACE --context $CONTEXT -o=jsonpath='{.metadata.annotations.ingress\\.kubernetes\\.io/forwarding-rule}') && URL_MAP=$(${KUBERNETES_DISTRIBUTION_BINARY} get ingress $INGRESS -n $NAMESPACE --context $CONTEXT -o=jsonpath='{.metadata.annotations.ingress\\.kubernetes\\.io/url-map}') && TARGET_PROXY=$(${KUBERNETES_DISTRIBUTION_BINARY} get ingress $INGRESS -n $NAMESPACE --context $CONTEXT -o=jsonpath='{.metadata.annotations.ingress\\.kubernetes\\.io/target-proxy}') && LOG_QUERY="resource.type=\\"http_load_balancer\\" AND resource.labels.forwarding_rule_name=\\"$FORWARDING_RULE\\" AND resource.labels.target_proxy_name=\\"$TARGET_PROXY\\" AND resource.labels.url_map_name=\\"$URL_MAP\\"" && ENCODED_LOG_QUERY=$(echo $LOG_QUERY | sed -e 's| |%20|g' -e 's|"|%22|g' -e 's|(|%28|g' -e 's|)|%29|g' -e 's|=|%3D|g' -e 's|/|%2F|g') && GCP_LOGS_URL="https://console.cloud.google.com/logs/query;query=$ENCODED_LOG_QUERY?project=$GCP_PROJECT_ID" && echo $GCP_LOGS_URL ... secret_file__kubeconfig=${kubeconfig} diff --git a/codebundles/k8s-jenkins-healthcheck/runbook.robot b/codebundles/k8s-jenkins-healthcheck/runbook.robot index 16679085f..fd837044a 100644 --- a/codebundles/k8s-jenkins-healthcheck/runbook.robot +++ b/codebundles/k8s-jenkins-healthcheck/runbook.robot @@ -69,7 +69,7 @@ Suite Initialization *** Tasks *** Query The Jenkins Kubernetes Workload HTTP Endpoint in Kubernetes StatefulSet `${STATEFULSET_NAME}` [Documentation] Performs a curl within the jenkins statefulset kubernetes workload to determine if the pod is up and healthy, and can serve requests. - [Tags] HTTP Curl Web Code OK Available Jenkins HTTP Endpoint API + [Tags] access:read-only HTTP Curl Web Code OK Available Jenkins HTTP Endpoint API ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} exec statefulset/${STATEFULSET_NAME} --context=${CONTEXT} -n ${NAMESPACE} -- curl -s -o /dev/null -w "\%{http_code}" localhost:8080/login ... show_in_rwl_cheatsheet=true @@ -97,9 +97,9 @@ Query The Jenkins Kubernetes Workload HTTP Endpoint in Kubernetes StatefulSet `$ RW.Core.Add Pre To Report Commands Used: ${history} -Query For Stuck Jenkins Jobs in Kubernetes Statefulset Workload `$${STATEFULSET_NAME}` +Query For Stuck Jenkins Jobs in Kubernetes Statefulset Workload `${STATEFULSET_NAME}` [Documentation] Performs a curl within the jenkins statefulset kubernetes workload to check for stuck jobs in the jenkins piepline queue. - [Tags] HTTP Curl Web Code OK Available Queue Stuck Jobs Jenkins + [Tags] access:read-only HTTP Curl Web Code OK Available Queue Stuck Jobs Jenkins ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} exec statefulset/${STATEFULSET_NAME} --context=${CONTEXT} -n ${NAMESPACE} -- curl -s localhost:8080/queue/api/json --user $${JENKINS_SA_USERNAME.key}:$${JENKINS_SA_TOKEN.key} | jq -r '.items[] | select((.stuck == true) or (.blocked == true)) | "Why: " + .why + "\\nBlocked: " + (.blocked|tostring) + "\\nStuck: " + (.stuck|tostring)' ... secret__jenkins_sa_username=${JENKINS_SA_USERNAME} diff --git a/codebundles/k8s-kubectl-cmd/README.md b/codebundles/k8s-kubectl-cmd/README.md deleted file mode 100644 index 4128e7287..000000000 --- a/codebundles/k8s-kubectl-cmd/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Kubernetes kubectl cmd -A generic codebundle used for running bare kubectl commands in a bash shell. - -## SLI -The command provided must provide a single metric that is pushed to the RunWhen Platform. - -Example: `kubectl get pods -n online-boutique -o json | jq '[.items[]] | length` - -## TaskSet -The command has all output added to the report for review during a RunSession. - -Example: `kubectl describe pods -n online-boutique` - -## Requirements -- A kubeconfig with appropriate RBAC permissions to perform the desired command. \ No newline at end of file diff --git a/codebundles/k8s-kubectl-cmd/meta.yaml b/codebundles/k8s-kubectl-cmd/meta.yaml deleted file mode 100644 index 649b28667..000000000 --- a/codebundles/k8s-kubectl-cmd/meta.yaml +++ /dev/null @@ -1 +0,0 @@ -commands: [] diff --git a/codebundles/k8s-kubectl-cmd/runbook.robot b/codebundles/k8s-kubectl-cmd/runbook.robot deleted file mode 100644 index 9956a57ec..000000000 --- a/codebundles/k8s-kubectl-cmd/runbook.robot +++ /dev/null @@ -1,40 +0,0 @@ -*** Settings *** -Documentation This taskset runs a user provided kubectl command andadds the output to the report. Command line tools like jq are available. -Metadata Author stewartshea - -Library BuiltIn -Library RW.Core -Library RW.platform -Library OperatingSystem -Library RW.CLI - -Suite Setup Suite Initialization - - -*** Tasks *** -Run User Provided Kubectl Command on `${KUBECTL_COMMAND}` in Kubernetes Cluster - [Documentation] Runs a user provided kubectl command and adds the output to the report. - [Tags] kubectl cli - ${rsp}= RW.CLI.Run Cli - ... cmd=${KUBECTL_COMMAND} - ... env={"KUBECONFIG":"./${kubeconfig.key}"} - ... secret_file__kubeconfig=${kubeconfig} - ${history}= RW.CLI.Pop Shell History - RW.Core.Add Pre To Report Command stdout: ${rsp.stdout} - RW.Core.Add Pre To Report Command stderr: ${rsp.stderr} - RW.Core.Add Pre To Report Commands Used: ${history} - - -*** Keywords *** -Suite Initialization - ${kubeconfig}= RW.Core.Import Secret - ... kubeconfig - ... type=string - ... description=The kubernetes kubeconfig yaml containing connection configuration used to connect to cluster(s). - ... pattern=\w* - ... example=For examples, start here https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ - ${KUBECTL_COMMAND}= RW.Core.Import User Variable KUBECTL_COMMAND - ... type=string - ... description=The kubectl command to run. Can use tools like jq. - ... pattern=\w* - ... example="kubectl describe pods -n online-boutique" diff --git a/codebundles/k8s-kubectl-cmd/sli.robot b/codebundles/k8s-kubectl-cmd/sli.robot deleted file mode 100644 index f66fdc1bd..000000000 --- a/codebundles/k8s-kubectl-cmd/sli.robot +++ /dev/null @@ -1,36 +0,0 @@ -*** Settings *** -Documentation This taskset runs a user provided kubectl command and pushes the metric. The supplied command must result in distinct single metric. Command line tools like jq are available. -Metadata Author stewartshea - -Library BuiltIn -Library RW.Core -Library RW.platform -Library OperatingSystem -Library RW.CLI - -Suite Setup Suite Initialization - - -*** Tasks *** -Run User Provided Kubectl Command in Kubernetes Cluster `$${KUBECTL_CLUSTER}` - [Documentation] Runs a user provided kubectl command and pushes the metric as an SLI - [Tags] kubectl cli metric sli - ${rsp}= RW.CLI.Run Cli - ... cmd=${KUBECTL_COMMAND} - ... env={"KUBECONFIG":"./${kubeconfig.key}"} - ... secret_file__kubeconfig=${kubeconfig} - RW.Core.Push Metric ${rsp.stdout} - -*** Keywords *** -Suite Initialization - ${kubeconfig}= RW.Core.Import Secret - ... kubeconfig - ... type=string - ... description=The kubernetes kubeconfig yaml containing connection configuration used to connect to cluster(s). - ... pattern=\w* - ... example=For examples, start here https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/ - ${KUBECTL_COMMAND}= RW.Core.Import User Variable KUBECTL_COMMAND - ... type=string - ... description=The kubectl command to run. Must produce a single value that can be pushed as a metric. Can use tools like jq. - ... pattern=\w* - ... example="kubectl get pods -n online-boutique -o json | jq '[.items[]] | length" \ No newline at end of file diff --git a/codebundles/k8s-labeledpods-healthcheck/sli.robot b/codebundles/k8s-labeledpods-healthcheck/sli.robot index b32419634..309688cb4 100644 --- a/codebundles/k8s-labeledpods-healthcheck/sli.robot +++ b/codebundles/k8s-labeledpods-healthcheck/sli.robot @@ -48,7 +48,7 @@ Suite Initialization *** Tasks *** Measure Number of Running Pods with Label in `${NAMESPACE}` [Documentation] Counts the number of running pods with the configured labels. - [Tags] Pods Containers Running Status Count Health + [Tags] access:read-only Pods Containers Running Status Count Health ${running_pods}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get pods --context=${CONTEXT} -n ${NAMESPACE} -l ${LABELS} --field-selector=status.phase=Running -ojson ... env=${env} diff --git a/codebundles/k8s-loki-healthcheck/runbook.robot b/codebundles/k8s-loki-healthcheck/runbook.robot index 97cda9cbe..cc4329e53 100644 --- a/codebundles/k8s-loki-healthcheck/runbook.robot +++ b/codebundles/k8s-loki-healthcheck/runbook.robot @@ -17,6 +17,7 @@ Suite Setup Suite Initialization *** Tasks *** Check Loki Ring API for Unhealthy Shards in Kubernetes Cluster `$${NAMESPACE}` [Documentation] Request and inspect the state of the Loki hash rings for non-active (potentially unhealthy) shards. + [Tags] access:read-only Loki # TODO: extend to dedicated script for parsing complex ring output/state ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get pods -l app.kubernetes.io/component=single-binary -o=jsonpath='{.items[0].metadata.name}') -- wget -q --header="Accept: application/json" -O - http://localhost:3100/ring | jq -r '.shards[] | select(.state != "ACTIVE") | {name: .id, state: .state}' @@ -38,6 +39,7 @@ Check Loki Ring API for Unhealthy Shards in Kubernetes Cluster `$${NAMESPACE}` Check Loki API Ready in Kubernetes Cluster `${NAMESPACE}` [Documentation] Pings the internal Loki API to check it's ready. + [Tags] access:read-only Loki ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get pods -l app.kubernetes.io/component=single-binary -o=jsonpath='{.items[0].metadata.name}') -- wget -q --header="Accept: application/json" -O - http://localhost:3100/ready ... show_in_rwl_cheatsheet=true diff --git a/codebundles/k8s-namespace-healthcheck/sli.robot b/codebundles/k8s-namespace-healthcheck/sli.robot index d2505c6a5..30582dcbd 100644 --- a/codebundles/k8s-namespace-healthcheck/sli.robot +++ b/codebundles/k8s-namespace-healthcheck/sli.robot @@ -70,7 +70,7 @@ Suite Initialization *** Tasks *** Get Error Event Count within ${EVENT_AGE} and calculate Score [Documentation] Captures error events and counts them within a configurable timeframe. - [Tags] Event Count Warning + [Tags] Event Count Warning ${error_events}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --field-selector type=Warning --context ${CONTEXT} -n ${NAMESPACE} -o json ... env=${env} @@ -105,7 +105,7 @@ Get Container Restarts and Score in Namespace `${NAMESPACE}` Get NotReady Pods in `${NAMESPACE}` [Documentation] Fetches a count of unready pods. - [Tags] Pods Status Phase Ready Unready Running + [Tags] access:read-only Pods Status Phase Ready Unready Running ${unreadypods_results}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get pods --context ${CONTEXT} -n ${NAMESPACE} -o json | jq -r '.items[] | select(.status.conditions[]? | select(.type == "Ready" and .status == "False" and .reason != "PodCompleted")) | {kind: .kind, name: .metadata.name, conditions: .status.conditions}' | jq -s '. | length' | tr -d '\n' ... env=${env} diff --git a/codebundles/k8s-otelcollector/runbook.robot b/codebundles/k8s-otelcollector/runbook.robot index 850edd08f..6eb9eaeb8 100644 --- a/codebundles/k8s-otelcollector/runbook.robot +++ b/codebundles/k8s-otelcollector/runbook.robot @@ -15,7 +15,7 @@ Suite Setup Suite Initialization *** Tasks *** Query Collector Queued Spans in Namespace `${NAMESPACE}` [Documentation] Query the collector metrics endpoint and inspect queue size - [Tags] otel collector metrics queued back pressure + [Tags] access:read-only otel-collector metrics queued back pressure ${process}= RW.CLI.Run Bash File ... bash_file=otel_metrics_check.sh ... env=${env} @@ -35,7 +35,7 @@ Query Collector Queued Spans in Namespace `${NAMESPACE}` Check OpenTelemetry Collector Logs For Errors In Namespace `${NAMESPACE}` [Documentation] Fetch logs and check for errors - [Tags] otel collector metrics errors logs + [Tags] access:read-only otel-collector metrics errors logs ${process}= RW.CLI.Run Bash File ... bash_file=otel_error_check.sh ... env=${env} @@ -55,7 +55,7 @@ Check OpenTelemetry Collector Logs For Errors In Namespace `${NAMESPACE}` Query OpenTelemetry Logs For Dropped Spans In Namespace `${NAMESPACE}` [Documentation] Query the collector logs for dropped spans from errors - [Tags] otel collector metrics errors logs dropped rejected + [Tags] access:read-only otel-collector metrics errors logs dropped rejected ${process}= RW.CLI.Run Bash File ... bash_file=otel_dropped_check.sh ... env=${env} diff --git a/codebundles/k8s-podresources-health/runbook.robot b/codebundles/k8s-podresources-health/runbook.robot index 0f22f7342..e4765dd47 100644 --- a/codebundles/k8s-podresources-health/runbook.robot +++ b/codebundles/k8s-podresources-health/runbook.robot @@ -30,6 +30,7 @@ Show Pods Without Resource Limit or Resource Requests Set in Namespace `${NAMESP ... liveness ... readiness ... ${NAMESPACE} + ... access:read-only ${pods_without_limits}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get pods --context=${CONTEXT} -n ${NAMESPACE} ${LABELS} --field-selector=status.phase=Running -ojson | jq -r '[.items[] as $pod | ($pod.spec.containers // [][])[] | select(.resources.limits == null) | {pod: $pod.metadata.name, container_without_limits: .name}]' ... env=${env} @@ -74,7 +75,7 @@ Show Pods Without Resource Limit or Resource Requests Set in Namespace `${NAMESP Check Pod Resource Utilization with Top in Namespace `${NAMESPACE}` [Documentation] Performs and a top command on list of labeled workloads to check pod resources. - [Tags] top resources utilization pods workloads cpu memory allocation labeled ${NAMESPACE} + [Tags] access:read-only top resources utilization pods workloads cpu memory allocation labeled ${NAMESPACE} ${pods_top}= RW.CLI.Run Cli ... cmd=for pod in $(${KUBERNETES_DISTRIBUTION_BINARY} get pods ${LABELS} -n ${NAMESPACE} --context ${CONTEXT} -o custom-columns=":metadata.name" --field-selector=status.phase=Running); do ${KUBERNETES_DISTRIBUTION_BINARY} top pod $pod -n ${NAMESPACE} --context ${CONTEXT} --containers; done ... env=${env} @@ -91,7 +92,7 @@ Check Pod Resource Utilization with Top in Namespace `${NAMESPACE}` Identify VPA Pod Resource Recommendations in Namespace `${NAMESPACE}` [Documentation] Queries the namespace for any Vertical Pod Autoscaler resource recommendations. - [Tags] recommendation resources utilization pods cpu memory allocation vpa ${NAMESPACE} + [Tags] access:read-only recommendation resources utilization pods cpu memory allocation vpa ${NAMESPACE} ${vpa_usage}= RW.CLI.Run Bash File ... bash_file=vpa_recommendations.sh ... env=${env} @@ -120,7 +121,7 @@ Identify VPA Pod Resource Recommendations in Namespace `${NAMESPACE}` Identify Overutilized Pods in Namespace `${NAMESPACE}` [Documentation] Scans the namespace for pods that are over utilizing resources or may be experiencing resource problems like oomkills or restarts. - [Tags] overutilized resources utilization pods cpu memory allocation ${NAMESPACE} oomkill restarts + [Tags] access:read-only overutilized resources utilization pods cpu memory allocation ${NAMESPACE} oomkill restarts ${pod_usage_analysis}= RW.CLI.Run Bash File identify_resource_contrained_pods.sh ... env=${env} ... secret_file__kubeconfig=${kubeconfig} diff --git a/codebundles/k8s-prometheus-healthcheck/runbook.robot b/codebundles/k8s-prometheus-healthcheck/runbook.robot index 39dce14ea..6762d79a2 100644 --- a/codebundles/k8s-prometheus-healthcheck/runbook.robot +++ b/codebundles/k8s-prometheus-healthcheck/runbook.robot @@ -18,6 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Check Prometheus Service Monitors in namespace `${NAMESPACE}` [Documentation] Checks the selector mappings of service monitors are valid in the namespace + [Tags] access:read-only prometheus ${sm_report}= RW.CLI.Run Bash File ... bash_file=validate_servicemonitors.sh ... env=${env} @@ -41,6 +42,7 @@ Check Prometheus Service Monitors in namespace `${NAMESPACE}` Check For Successful Rule Setup in Kubernetes Namespace `${NAMESPACE}` [Documentation] Inspects operator instance logs for failed rules setup + [Tags] access:read-only prometheys Log To Console Prometheus ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} logs $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} get pods -l app.kubernetes.io/name=prometheus -o=jsonpath='{.items[0].metadata.name}') -c prometheus | grep -iP "(load.*.fail)" || true @@ -67,6 +69,7 @@ Check For Successful Rule Setup in Kubernetes Namespace `${NAMESPACE}` Verify Prometheus RBAC Can Access ServiceMonitors in Namespace `${PROM_NAMESPACE}` [Documentation] Fetch operator rbac and verify it has ServiceMonitors in rbac. + [Tags] access:read-only prometheus ${clusterrole}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get clusterrole/kube-prometheus-stack-operator -ojson ... show_in_rwl_cheatsheet=true @@ -89,6 +92,7 @@ Verify Prometheus RBAC Can Access ServiceMonitors in Namespace `${PROM_NAMESPACE Inspect Prometheus Operator Logs for Scraping Errors in Namespace `${NAMESPACE}` [Documentation] Inspect the prometheus operator logs for scraping errors and raise issues if any found + [Tags] access:read-only prometheus ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} logs $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} get pods -l app.kubernetes.io/name=prometheus -o=jsonpath='{.items[0].metadata.name}') -c prometheus | grep -iP "(scrape.*.error)" || true ... show_in_rwl_cheatsheet=true @@ -114,6 +118,7 @@ Inspect Prometheus Operator Logs for Scraping Errors in Namespace `${NAMESPACE}` Check Prometheus API Healthy in Namespace `${PROM_NAMESPACE}` [Documentation] Ping Prometheus healthy API endpoint for a 200 response code. + [Tags] access:read-only prometheus ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${PROM_NAMESPACE} get pods -l app.kubernetes.io/name=prometheus -o=jsonpath='{.items[0].metadata.name}') --container prometheus -- wget -qO- -S 127.0.0.1:9090/-/healthy 2>&1 | grep "HTTP/" | awk '{print $2}' ... show_in_rwl_cheatsheet=true diff --git a/codebundles/k8s-redis-healthcheck/runbook.robot b/codebundles/k8s-redis-healthcheck/runbook.robot index fe21269e3..f60c055dc 100644 --- a/codebundles/k8s-redis-healthcheck/runbook.robot +++ b/codebundles/k8s-redis-healthcheck/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Ping `${DEPLOYMENT_NAME}` Redis Workload [Documentation] Verifies that a PING can be peformed against the redis workload. - [Tags] redis cli ping pong alive probe ready + [Tags] access:read-only redis cli ping pong alive probe ready ${rsp}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} exec deployment/${DEPLOYMENT_NAME} --context=${CONTEXT} -n ${NAMESPACE} -- redis-cli PING ... show_in_rwl_cheatsheet=true @@ -40,7 +40,7 @@ Ping `${DEPLOYMENT_NAME}` Redis Workload Verify `${DEPLOYMENT_NAME}` Redis Read Write Operation in Kubernetes [Documentation] Attempts to perform a write and read operation on the redis workload, checking that a key can be set, incremented, and read from. - [Tags] redis cli increment health check read write + [Tags] access:read-only redis cli increment health check read write ${set_op}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} exec deployment/${DEPLOYMENT_NAME} --context=${CONTEXT} -n ${NAMESPACE} -- redis-cli SET ${REDIS_HEALTHCHECK_KEY} 0 ... show_in_rwl_cheatsheet=true diff --git a/codebundles/k8s-restart-resource/runbook.robot b/codebundles/k8s-restart-resource/runbook.robot index ac20073be..8c842bdef 100644 --- a/codebundles/k8s-restart-resource/runbook.robot +++ b/codebundles/k8s-restart-resource/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Get Current Resource State with Labels `${LABELS}` [Documentation] Gets the current state of the resource before applying the restart for report review. - [Tags] resource application restart state yaml + [Tags] access:read-only resource application restart state yaml ${resource}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get daemonset,deployment,statefulset -l ${LABELS} -oyaml ... show_in_rwl_cheatsheet=true @@ -31,7 +31,7 @@ Get Current Resource State with Labels `${LABELS}` Get Resource Logs with Labels `${LABELS}` [Documentation] Collects the last approximately 200 lines of logs from the resource before restarting it. - [Tags] resource application workload logs state + [Tags] access:read-only resource application workload logs state ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} logs -l ${LABELS} --tail=200 --limit-bytes=256000 ... show_in_rwl_cheatsheet=true @@ -44,7 +44,7 @@ Get Resource Logs with Labels `${LABELS}` Restart Resource with Labels `${LABELS}` in `${CONTEXT}` [Documentation] Restarts the labeled resource in an attempt to get it out of a bad state. - [Tags] resource application restart pod kill rollout revision + [Tags] access:read-write resource application restart pod kill rollout revision ${resource_name}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get daemonset,deployment,statefulset -l ${LABELS} -o=jsonpath='{.items[0].kind}/{.items[0].metadata.name}' ... show_in_rwl_cheatsheet=true diff --git a/codebundles/k8s-statefulset-healthcheck/runbook.robot b/codebundles/k8s-statefulset-healthcheck/runbook.robot index d58e12203..b9996266b 100644 --- a/codebundles/k8s-statefulset-healthcheck/runbook.robot +++ b/codebundles/k8s-statefulset-healthcheck/runbook.robot @@ -27,6 +27,7 @@ Check Readiness Probe Configuration for StatefulSet `${STATEFULSET_NAME}` ... get ... statefulset ... ${statefulset_name} + ... access:read-only ${readiness_probe_health}= RW.CLI.Run Bash File ... bash_file=validate_probes.sh ... cmd_overide=./validate_probes.sh readinessProbe @@ -63,6 +64,7 @@ Check Liveness Probe Configuration for StatefulSet `${STATEFULSET_NAME}` ... get ... statefulset ... ${statefulset_name} + ... access:read-only ${liveness_probe_health}= RW.CLI.Run Bash File ... bash_file=validate_probes.sh ... cmd_overide=./validate_probes.sh livenessProbe @@ -89,7 +91,7 @@ Check Liveness Probe Configuration for StatefulSet `${STATEFULSET_NAME}` Troubleshoot StatefulSet Warning Events for `${STATEFULSET_NAME}` [Documentation] Fetches warning events related to the statefulset workload in the namespace and triages any issues found in the events. - [Tags] events workloads errors warnings get statefulset ${statefulset_name} + [Tags] access:read-only events workloads errors warnings get statefulset ${statefulset_name} ${events}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '(now - (60*60)) as $time_limit | [ .items[] | select(.type == "Warning" and (.involvedObject.kind == "StatefulSet" or .involvedObject.kind == "Pod") and (.involvedObject.name | tostring | contains("${STATEFULSET_NAME}")) and (.lastTimestamp | fromdateiso8601) >= $time_limit) | {kind: .involvedObject.kind, name: .involvedObject.name, reason: .reason, message: .message, firstTimestamp: .firstTimestamp, lastTimestamp: .lastTimestamp} ] | group_by([.kind, .name]) | map({kind: .[0].kind, name: .[0].name, count: length, reasons: map(.reason) | unique, messages: map(.message) | unique, firstTimestamp: map(.firstTimestamp | fromdateiso8601) | sort | .[0] | todateiso8601, lastTimestamp: map(.lastTimestamp | fromdateiso8601) | sort | reverse | .[0] | todateiso8601})' ... env=${env} @@ -122,7 +124,7 @@ Troubleshoot StatefulSet Warning Events for `${STATEFULSET_NAME}` Check StatefulSet Event Anomalies for `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}` [Documentation] Parses all events in a namespace within a timeframe and checks for unusual activity, raising issues for any found. - [Tags] statefulset events info state anomolies count occurences ${statefulset_name} + [Tags] access:read-only statefulset events info state anomolies count occurences ${statefulset_name} ${recent_anomalies}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '(now - (60*60)) as $time_limit | [ .items[] | select(.type != "Warning" and (.involvedObject.kind == "StatefulSet" or .involvedObject.kind == "Pod") and (.involvedObject.name | tostring | contains("${STATEFULSET_NAME}"))) | {kind: .involvedObject.kind, count: .count, name: .involvedObject.name, reason: .reason, message: .message, firstTimestamp: .firstTimestamp, lastTimestamp: .lastTimestamp, duration: (if (((.lastTimestamp | fromdateiso8601) - (.firstTimestamp | fromdateiso8601)) == 0) then 1 else (((.lastTimestamp | fromdateiso8601) - (.firstTimestamp | fromdateiso8601))/60) end) } ] | group_by([.kind, .name]) | map({kind: .[0].kind, name: .[0].name, count: (map(.count) | add), reasons: map(.reason) | unique, messages: map(.message) | unique, average_events_per_minute: (if .[0].duration == 1 then 1 else ((map(.count) | add)/.[0].duration ) end),firstTimestamp: map(.firstTimestamp | fromdateiso8601) | sort | .[0] | todateiso8601, lastTimestamp: map(.lastTimestamp | fromdateiso8601) | sort | reverse | .[0] | todateiso8601})' ... env=${env} @@ -160,7 +162,7 @@ Check StatefulSet Event Anomalies for `${STATEFULSET_NAME}` in Namespace `${NAME Fetch StatefulSet Logs for `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}` and Add to Report [Documentation] Fetches the last 100 lines of logs for the given statefulset in the namespace. - [Tags] fetch log pod container errors inspect trace info ${STATEFULSET_NAME} statefulset + [Tags] access:read-only fetch log pod container errors inspect trace info ${STATEFULSET_NAME} statefulset ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs --tail=100 statefulset/${STATEFULSET_NAME} --context ${CONTEXT} -n ${NAMESPACE} ... env=${env} @@ -186,7 +188,7 @@ Get Related StatefulSet `${STATEFULSET_NAME}` Events Fetch Manifest Details for StatefulSet `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}` [Documentation] Fetches the current state of the statefulset manifest for inspection. - [Tags] statefulset details manifest info ${STATEFULSET_NAME} + [Tags] access:read-only statefulset details manifest info ${STATEFULSET_NAME} ${statefulset}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get statefulset ${LABELS} --context=${CONTEXT} -n ${NAMESPACE} -o yaml ... env=${env} @@ -212,6 +214,7 @@ List Unhealthy Replica Counts for StatefulSets in Namespace `${NAMESPACE}` ... stuck ... pods ... ${NAMESPACE} + ... access:read-only RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get statefulset -n ${NAMESPACE} -o json --context ${CONTEXT} | jq -r '.items[] | select(.status.availableReplicas < .status.replicas) | "---\\nStatefulSet Name: " + (.metadata.name|tostring) + "\\nDesired Replicas: " + (.status.replicas|tostring) + "\\nAvailable Replicas: " + (.status.availableReplicas|tostring)' ... env=${env} diff --git a/codebundles/k8s-tail-logs-dynamic/runbook.robot b/codebundles/k8s-tail-logs-dynamic/runbook.robot index 4d458b45f..db09fa983 100644 --- a/codebundles/k8s-tail-logs-dynamic/runbook.robot +++ b/codebundles/k8s-tail-logs-dynamic/runbook.robot @@ -18,7 +18,7 @@ Suite Setup Suite Initialization *** Tasks *** Get `${CONTAINER_NAME}` Application Logs in Namespace `${NAMESPACE}` [Documentation] Collects the last approximately 300 lines of logs from the workload - [Tags] resource application workload logs state ${container_name} ${workload_name} + [Tags] access:read-only resource application workload logs state ${container_name} ${workload_name} ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} logs -l ${LABELS} --tail=${MAX_LOG_LINES} --max-log-requests=10 --limit-bytes=${MAX_LOG_BYTES} --since=${LOGS_SINCE} --container=${CONTAINER_NAME} ... env=${env} @@ -40,6 +40,7 @@ Tail `${CONTAINER_NAME}` Application Logs For Stacktraces ... logs ... ${container_name} ... ${workload_name} + ... access:read-only ${cmd}= Set Variable ... ${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} logs -l ${LABELS} --tail=${MAX_LOG_LINES} --max-log-requests=10 --limit-bytes=${MAX_LOG_BYTES} --since=${LOGS_SINCE} --container=${CONTAINER_NAME} IF $EXCLUDE_PATTERN != "" diff --git a/codebundles/k8s-vault-healthcheck/runbook.robot b/codebundles/k8s-vault-healthcheck/runbook.robot index 95365c3d7..7cd5ac159 100644 --- a/codebundles/k8s-vault-healthcheck/runbook.robot +++ b/codebundles/k8s-vault-healthcheck/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** Fetch Vault CSI Driver Logs in Namespace `${NAMESPACE}` [Documentation] Fetches the last 100 lines of logs for the vault CSI driver. - [Tags] fetch log pod container errors inspect trace info vault csi driver + [Tags] access:read-only fetch log pod container errors inspect trace info vault csi driver ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs --tail=100 daemonset.apps/vault-csi-provider --context ${CONTEXT} -n ${NAMESPACE} ... env=${env} @@ -33,7 +33,7 @@ Fetch Vault CSI Driver Logs in Namespace `${NAMESPACE}` Get Vault CSI Driver Warning Events in `${NAMESPACE}` [Documentation] Fetches warning-type events related to the vault CSI driver. - [Tags] events errors warnings get vault csi driver + [Tags] access:read-only events errors warnings get vault csi driver ${events}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --field-selector type=Warning --context ${CONTEXT} -n ${NAMESPACE} | grep -i "vault-csi-provider" || true ... env=${env} @@ -62,6 +62,7 @@ Check Vault CSI Driver Replicas ... rollout ... stuck ... pods + ... access:read-only ${daemonset_describe}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} describe daemonset.apps/vault-csi-provider --context ${CONTEXT} -n ${NAMESPACE} ... env=${env} @@ -132,7 +133,7 @@ Check Vault CSI Driver Replicas Fetch Vault Pod Workload Logs in Namespace `${NAMESPACE}` with Labels `${LABELS}` [Documentation] Fetches the last 100 lines of logs for all vault pod workloads in the vault namespace. - [Tags] fetch log pod container errors inspect trace info statefulset vault + [Tags] access:read-only fetch log pod container errors inspect trace info statefulset vault ${logs}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs --tail=100 statefulset.apps/vault --context ${CONTEXT} -n ${NAMESPACE} ... env=${env} @@ -147,9 +148,9 @@ Fetch Vault Pod Workload Logs in Namespace `${NAMESPACE}` with Labels `${LABELS} RW.Core.Add Pre To Report ${found_logs} RW.Core.Add Pre To Report Commands Used: ${history} -Get Related Vault Events in Namespace `$${NAMESPACE}` +Get Related Vault Events in Namespace `${NAMESPACE}` [Documentation] Fetches all warning-type events related to vault in the vault namespace. - [Tags] events workloads errors warnings get statefulset vault + [Tags] access:read-only events workloads errors warnings get statefulset vault ${events}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --field-selector type=Warning --context ${CONTEXT} -n ${NAMESPACE} | grep -i "vault" || true ... env=${env} @@ -166,7 +167,7 @@ Get Related Vault Events in Namespace `$${NAMESPACE}` Fetch Vault StatefulSet Manifest Details in `${NAMESPACE}` [Documentation] Fetches the current state of the vault statefulset manifest for inspection. - [Tags] statefulset details manifest info vault + [Tags] access:read-only statefulset details manifest info vault ${statefulset}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get statefulset.apps/vault --context=${CONTEXT} -n ${NAMESPACE} -o yaml ... env=${env} @@ -179,7 +180,7 @@ Fetch Vault StatefulSet Manifest Details in `${NAMESPACE}` Fetch Vault DaemonSet Manifest Details in Kubernetes Cluster `${NAMESPACE}` [Documentation] Fetches the current state of the vault daemonset manifest for inspection. - [Tags] statefulset details manifest info vault + [Tags] access:read-only statefulset details manifest info vault ${statefulset}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get daemonset.apps/vault-csi-provider --context=${CONTEXT} -n ${NAMESPACE} -o yaml ... env=${env} @@ -192,7 +193,7 @@ Fetch Vault DaemonSet Manifest Details in Kubernetes Cluster `${NAMESPACE}` Verify Vault Availability in Namespace `${NAMESPACE}` and Context `${CONTEXT}` [Documentation] Curls the vault endpoint and checks the HTTP response code. - [Tags] http curl vault web code ok available + [Tags] access:read-only http curl vault web code ok available ${rsp}= RW.CLI.Run Cli ... cmd=curl ${VAULT_URL} ... show_in_rwl_cheatsheet=true @@ -222,6 +223,7 @@ Check Vault StatefulSet Replicas in `NAMESPACE` ... stuck ... pods ... vault + ... access:read-only ${statefulset}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} get statefulset.apps/vault --context=${CONTEXT} -n ${NAMESPACE} -o json ... env=${env} diff --git a/codebundles/terraform-cloud-workspace-lock-check/runbook.robot b/codebundles/terraform-cloud-workspace-lock-check/runbook.robot index 6880295fc..4fb9edb30 100644 --- a/codebundles/terraform-cloud-workspace-lock-check/runbook.robot +++ b/codebundles/terraform-cloud-workspace-lock-check/runbook.robot @@ -16,7 +16,7 @@ Suite Setup Suite Initialization *** Tasks *** Checking whether the Terraform Cloud Workspace '${TERRAFORM_WORKSPACE_NAME}' is in a locked state [Documentation] Use curl to check whether the Terraform Cloud Workspace is in a locked state - [Tags] terraform cloud workspace lock + [Tags] access:read-only terraform cloud workspace lock ${curl_rsp}= RW.CLI.Run Cli ... cmd=TERRAFORM_API_TOKEN_VALUE=$(cat $TERRAFORM_API_TOKEN) && curl --header "Authorization: Bearer $TERRAFORM_API_TOKEN_VALUE" --header "Content-Type: application/vnd.api+json" -s '${TERRAFORM_API_URL}/organizations/${TERRAFORM_ORGANIZATION_NAME}/workspaces/${TERRAFORM_WORKSPACE_NAME}' ... show_in_rwl_cheatsheet=true diff --git a/codebundles/test-issue/meta.yaml b/codebundles/test-issue/meta.yaml deleted file mode 100644 index 649b28667..000000000 --- a/codebundles/test-issue/meta.yaml +++ /dev/null @@ -1 +0,0 @@ -commands: [] diff --git a/codebundles/test-issue/runbook.robot b/codebundles/test-issue/runbook.robot deleted file mode 100644 index cf90ef2b0..000000000 --- a/codebundles/test-issue/runbook.robot +++ /dev/null @@ -1,31 +0,0 @@ -*** Settings *** -Documentation A codebundle for testing the issues feature. Purely for testing flow. -Metadata Author jon-funk -Metadata Display Name Test Issues -Metadata Supports Test - -Library BuiltIn -Library RW.Core -Library RW.CLI -Library RW.platform - -Suite Setup Suite Initialization - - -*** Tasks *** -Raise Full Issue on `${RESOURCE_TYPE}` in `${SCOPE}` - [Documentation] Always raises an issue with full content - [Tags] test - ${issue}= RW.CLI.Run Cli - ... cmd=echo "issue" - RW.CLI.Parse Cli Output By Line - ... rsp=${issue} - ... set_severity_level=4 - ... set_issue_expected=We expected there to not be an issue. - ... set_issue_actual=We found a synthetic issue. - ... set_issue_title=Synthetic Issue Raised - ... set_issue_details=This issue was forcibly raised. - ... set_issue_next_steps=Next steps provided with: $_line - ... _line__raise_issue_if_contains=issue - ${history}= RW.CLI.Pop Shell History - RW.Core.Add Pre To Report Commands Used: ${history}