diff --git a/.gitignore b/.gitignore index 23fd82d9b..735ce0952 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ build *egg-info .kube *.out +# Ignore SSH private key files +*.pem # Ignore Terraform state files **/terraform.tfstate diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml new file mode 100644 index 000000000..8bb47cd19 --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -0,0 +1,29 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + platform: aws + generationRules: + - resourceTypes: + - ec2_instance + matchRules: + - type: and + matches: + - type: pattern + pattern: "jenkins-server" + properties: [tag-values] + mode: substring + - type: pattern + pattern: "running" + properties: [state/Name] + mode: substring + + slxs: + - baseName: jenkins-instance-health + levelOfDetail: detailed + qualifiers: ["resource"] + baseTemplateName: jenkins-instance-health + outputItems: + - type: slx + - type: sli + - type: runbook + templateName: jenkins-instance-health-taskset.yaml diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml new file mode 100644 index 000000000..ee567a0d4 --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml @@ -0,0 +1,49 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: OK + displayUnitsShort: ok + locations: + - {{default_location}} + description: The number of failed Jenkins builds. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/jenkins-health/sli.robot + intervalStrategy: intermezzo + intervalSeconds: 600 + configProvided: + - name: JENKINS_URL + value: {{custom.jenkins_url}} + secretsProvided: + - name: JENKINS_USERNAME + workspaceKey: {{custom.jenkins_username}} + - name: JENKINS_TOKEN + workspaceKey: {{custom.jenkins_token}} + alerts: + warning: + operator: < + threshold: '1' + for: '20m' + ticket: + operator: < + threshold: '1' + for: '40m' + page: + operator: '==' + threshold: '0' + for: '' \ No newline at end of file diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml new file mode 100644 index 000000000..896506f86 --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml @@ -0,0 +1,20 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/Jenkins_logo.svg + alias: {{match_resource.resource.tags.Name}} Instance Health + # alias: Jenkins Health + asMeasuredBy: The number of failed Jenkins builds. + configProvided: + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER + owners: + - {{workspace.owner_email}} + statement: The number of failed Jenkins builds should be zero. + additionalContext: [] \ No newline at end of file diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml new file mode 100644 index 000000000..eabbe31f9 --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml @@ -0,0 +1,31 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/jenkins-health/runbook.robot + configProvided: + - name: JENKINS_URL + value: {{custom.jenkins_url}} + secretsProvided: + - name: JENKINS_USERNAME + workspaceKey: {{custom.jenkins_username}} + - name: JENKINS_TOKEN + workspaceKey: {{custom.jenkins_token}} + \ No newline at end of file diff --git a/codebundles/jenkins-health/.test/README.md b/codebundles/jenkins-health/.test/README.md new file mode 100644 index 000000000..ee8b5a3c5 --- /dev/null +++ b/codebundles/jenkins-health/.test/README.md @@ -0,0 +1,135 @@ +### How to test this codebundle? + +## Prerequisites + +The following credentials and configuration are required: + +- Jenkins URL +- Jenkins username +- Jenkins API token + +## Configuration + +**Infrastructure Deployment** + +Purpose: Cloud infrastructure provisioning and management using Terraform + +#### Credential Setup + +Navigate to the `.test/terraform` directory and configure two secret files for authentication: + +`cb.secret` - CloudCustodian and RunWhen Credentials + +Create this file with the following environment variables: + + ```sh + export RW_PAT="" + export RW_WORKSPACE="" + export RW_API_URL="papi.beta.runwhen.com" + + export JENKINS_URL="" + export JENKINS_USERNAME="" + export JENKINS_TOKEN="" + ``` + + +`tf.secret` - Terraform Deployment Credentials + +Create this file with the following environment variables: + + ```sh + export AWS_DEFAULT_REGION="" + export AWS_ACCESS_KEY_ID="" + export AWS_SECRET_ACCESS_KEY="" + export AWS_SESSION_TOKEN="" # Optional: Include if using temporary credentials + ``` + +#### Testing Workflow + +1. Build test infra: + ```sh + task build-infra + ``` + +2. Configure Jenkins and create pipelines: + + - **Initial Setup**: Follow the Jenkins UI prompts to install suggested plugins. + + - **Reproducing Scenarios**: + + - **Failed Pipeline Logs**: + Create a `Freestyle project` and choose the `Execute shell` option under `Build Steps` with an arbitrary script that will fail, such as a syntax error. + + - **Long Running Pipelines**: + Create a `Freestyle project` and choose the `Execute shell` option under `Build Steps`. Use the following script: + ```sh + #!/bin/bash + + # Print the start time + echo "Script started at: $(date)" + + # Sleep for 30 minutes (1800 seconds) + sleep 1800 + + # Print the end time + echo "Script ended at: $(date)" + ``` + + - **Queued Builds**: + Create three `Freestyle projects` using the above long-running script. With the default Jenkins setup having two executors, triggering all three projects will result in one being queued for a long time. + + - **Failed Tests**: + Create a `Pipeline` project and under the Definition section, paste the following Groovy script: + ```groovy + pipeline { + agent any + + tools { + // Install the Maven version configured as "M3" and add it to the path. + maven "M3" + } + + stages { + stage('Build') { + steps { + // Get some code from a GitHub repository + git 'https://github.com/saurabh3460/simple-maven-project-with-tests.git' + + // Run Maven on a Unix agent. + sh "mvn -Dmaven.test.failure.ignore=true clean package" + + // To run Maven on a Windows agent, use + // bat "mvn -Dmaven.test.failure.ignore=true clean package" + } + + post { + // If Maven was able to run the tests, even if some of the test + // failed, record the test results and archive the jar file. + success { + junit '**/target/surefire-reports/TEST-*.xml' + archiveArtifacts 'target/*.jar' + } + } + } + } + } + ``` + + +3. Generate RunWhen Configurations + ```sh + tasks + ``` + +4. Upload generated SLx to RunWhen Platform + + ```sh + task upload-slxs + ``` + +5. At last, after testing, clean up the test infrastructure. + + ```sh + task clean + ``` + diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml new file mode 100644 index 000000000..247ef0bd6 --- /dev/null +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -0,0 +1,381 @@ +version: "3" + +tasks: + default: + desc: "Generate workspaceInfo and rebuild/test" + cmds: + - task: check-unpushed-commits + - task: generate-rwl-config + - task: run-rwl-discovery + + clean: + desc: "Run cleanup tasks" + cmds: + - task: check-and-cleanup-terraform + # - task: clean-rwl-discovery + + build-infra: + desc: "Build test infrastructure" + cmds: + - task: build-terraform-infra + + check-unpushed-commits: + desc: Check if outstanding commits or file updates need to be pushed before testing. + vars: + # Specify the base directory relative to your Taskfile location + BASE_DIR: "../" + cmds: + - | + echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNCOMMITTED_FILES" ]; then + echo "✗" + echo "Uncommitted changes found:" + echo "$UNCOMMITTED_FILES" + echo "Remember to commit & push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No uncommitted changes in specified directories." + echo "------------" + fi + - | + echo "Checking for unpushed commits in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + git fetch origin + UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNPUSHED_FILES" ]; then + echo "✗" + echo "Unpushed commits found:" + echo "$UNPUSHED_FILES" + echo "Remember to push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No unpushed commits in specified directories." + echo "------------" + fi + silent: true + + generate-rwl-config: + desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" + env: + AWS_ACCESS_KEY_ID: "{{.AWS_ACCESS_KEY_ID}}" + AWS_SECRET_ACCESS_KEY: "{{.AWS_SECRET_ACCESS_KEY}}" + AWS_DEFAULT_REGION: "{{.AWS_DEFAULT_REGION}}" + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + cmds: + - | + source terraform/cb.secret + repo_url=$(git config --get remote.origin.url) + branch_name=$(git rev-parse --abbrev-ref HEAD) + codebundle=$(basename "$(dirname "$PWD")") + + ## Fetch individual cluster details from Terraform state + pushd terraform > /dev/null + jenkins_url=$(terraform show -json terraform.tfstate | jq -r ' + .values.outputs.jenkins_url.value') + + popd > /dev/null + + # Check if any of the required cluster variables are empty + if [ -z "$jenkins_url" ]; then + echo "Error: Missing jenkins_url details. Ensure Terraform plan has been applied." + exit 1 + fi + + # Generate workspaceInfo.yaml with fetched cluster details + cat < workspaceInfo.yaml + workspaceName: "$RW_WORKSPACE" + workspaceOwnerEmail: authors@runwhen.com + defaultLocation: location-01-us-west1 + defaultLOD: detailed + cloudConfig: + aws: + awsAccessKeyId: "$AWS_ACCESS_KEY_ID" + awsSecretAccessKey: "$AWS_SECRET_ACCESS_KEY" + codeCollections: + - repoURL: "$repo_url" + branch: "$branch_name" + codeBundles: ["$codebundle"] + custom: + aws_access_key_id: AWS_ACCESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + jenkins_url: $jenkins_url + jenkins_username: jenkins_username + jenkins_token: jenkins_token + EOF + silent: true + + run-rwl-discovery: + desc: "Run RunWhen Local Discovery on test infrastructure" + cmds: + - | + CONTAINER_NAME="RunWhenLocal" + if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Stopping and removing existing container $CONTAINER_NAME..." + docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME + elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Removing existing stopped container $CONTAINER_NAME..." + docker rm $CONTAINER_NAME + else + echo "No existing container named $CONTAINER_NAME found." + fi + + echo "Cleaning up output directory..." + sudo rm -rf output || { echo "Failed to remove output directory"; exit 1; } + mkdir output && chmod 777 output || { echo "Failed to set permissions"; exit 1; } + + echo "Starting new container $CONTAINER_NAME..." + + docker run --name $CONTAINER_NAME -e DEBUG_LOGGING=true -p 8081:8081 -v "$(pwd)":/shared -d ghcr.io/runwhen-contrib/runwhen-local:latest || { + echo "Failed to start container"; exit 1; + } + + echo "Running workspace builder script in container..." + docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { + echo "Error executing script in container"; exit 1; + } + + echo "Review generated config files under output/workspaces/" + silent: true + + check-terraform-infra: + desc: "Check if Terraform has any deployed infrastructure in the terraform subdirectory" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Navigate to the Terraform directory + if [ ! -d "terraform" ]; then + echo "Terraform directory not found." + exit 1 + fi + cd terraform + + # Check if Terraform state file exists + if [ ! -f "terraform.tfstate" ]; then + echo "No Terraform state file found in the terraform directory. No infrastructure is deployed." + exit 0 + fi + + # List resources in Terraform state + resources=$(terraform state list) + + # Check if any resources are listed in the state file + if [ -n "$resources" ]; then + echo "Deployed infrastructure detected." + echo "$resources" + exit 0 + else + echo "No deployed infrastructure found in Terraform state." + exit 0 + fi + silent: true + + build-terraform-infra: + desc: "Run terraform apply" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Terraform apply aborted." + exit 1 + fi + task format-and-init-terraform + echo "Starting Terraform Build of Terraform infrastructure..." + terraform apply -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure build completed." + silent: true + + check-rwp-config: + desc: Check if env vars are set for RunWhen Platform + cmds: + - | + source terraform/cb.secret + missing_vars=() + + if [ -z "$RW_WORKSPACE" ]; then + missing_vars+=("RW_WORKSPACE") + fi + + if [ -z "$RW_API_URL" ]; then + missing_vars+=("RW_API_URL") + fi + + if [ -z "$RW_PAT" ]; then + missing_vars+=("RW_PAT") + fi + + if [ ${#missing_vars[@]} -ne 0 ]; then + echo "The following required environment variables are missing: ${missing_vars[*]}" + exit 1 + fi + silent: true + + upload-slxs: + desc: "Upload SLX files to the appropriate URL" + env: + RW_WORKSPACE: "{{.RW_WORKSPACE}}" + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/cb.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Upload aborted." + exit 1 + fi + # Upload Secrets + pushd terraform > /dev/null + jenkins_token=$(terraform show -json | jq -r ' + .values.outputs.jenkins_api_token.value') + + popd > /dev/null + + # Check if any of the required cluster variables are empty + if [ -z "$jenkins_token" ]; then + echo "Error: Missing jenkins_token details. Ensure Terraform plan has been applied." + exit 1 + fi + # Create Secrets + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" + PAYLOAD="{\"secrets\": {\"jenkins_username\": \"admin\", \"jenkins_token\": \"${jenkins_token}\"}}" + echo "Uploading secrets to $URL" + response_code=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" \ + -w "%{http_code}" -o /dev/null -s) + if [[ "$response_code" == "200" || "$response_code" == "201" ]]; then + echo "Successfully uploaded secrets to $URL" + else + echo "Failed to upload secrets: $SLX_NAME to $URL. Unexpected response code: $response_code" + fi + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + PAYLOAD=$(jq -n --arg commitMsg "Creating new SLX $SLX_NAME" '{ commitMsg: $commitMsg, files: {} }') + for file in slx.yaml runbook.yaml sli.yaml; do + if [ -f "$dir/$file" ]; then + CONTENT=$(cat "$dir/$file") + PAYLOAD=$(echo "$PAYLOAD" | jq --arg fileContent "$CONTENT" --arg fileName "$file" '.files[$fileName] = $fileContent') + fi + done + + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Uploading SLX: $SLX_NAME to $URL" + response_code=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" \ + -w "%{http_code}" -o /dev/null -s) + + if [[ "$response_code" == "200" || "$response_code" == "201" ]]; then + echo "Successfully uploaded SLX: $SLX_NAME to $URL" + elif [[ "$response_code" == "405" ]]; then + echo "Failed to upload SLX: $SLX_NAME to $URL. Method not allowed (405)." + else + echo "Failed to upload SLX: $SLX_NAME to $URL. Unexpected response code: $response_code" + fi + fi + done + silent: true + + delete-slxs: + desc: "Delete SLX objects from the appropriate URL" + env: + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/cb.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Deletion aborted." + exit 1 + fi + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Deleting SLX: $SLX_NAME from $URL" + response_code=$(curl -X DELETE "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -w "%{http_code}" -o /dev/null -s) + + if [[ "$response_code" == "200" || "$response_code" == "204" ]]; then + echo "Successfully deleted SLX: $SLX_NAME from $URL" + elif [[ "$response_code" == "405" ]]; then + echo "Failed to delete SLX: $SLX_NAME from $URL. Method not allowed (405)." + else + echo "Failed to delete SLX: $SLX_NAME from $URL. Unexpected response code: $response_code" + fi + fi + done + silent: true + + cleanup-terraform-infra: + desc: "Cleanup deployed Terraform infrastructure" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Cleanup aborted." + exit 1 + fi + + echo "Starting cleanup of Terraform infrastructure..." + terraform destroy -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure cleanup completed." + silent: true + + check-and-cleanup-terraform: + desc: "Check and clean up deployed Terraform infrastructure if it exists" + cmds: + - | + # Capture the output of check-terraform-infra + infra_output=$(task check-terraform-infra | tee /dev/tty) + + # Check if output contains indication of deployed infrastructure + if echo "$infra_output" | grep -q "Deployed infrastructure detected"; then + echo "Infrastructure detected; proceeding with cleanup." + task cleanup-terraform-infra + else + echo "No deployed infrastructure found; no cleanup required." + fi + silent: true + + clean-rwl-discovery: + desc: "Check and clean up RunWhen Local discovery output" + cmds: + - | + sudo rm -rf output + rm workspaceInfo.yaml + silent: true diff --git a/codebundles/jenkins-health/.test/terraform/Taskfile.yaml b/codebundles/jenkins-health/.test/terraform/Taskfile.yaml new file mode 100644 index 000000000..08e0e835d --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/Taskfile.yaml @@ -0,0 +1,69 @@ +version: '3' + +env: + TERM: screen-256color + +tasks: + default: + cmds: + - task: test + + test: + desc: Run tests. + cmds: + - task: test-terraform + + clean: + desc: Clean the environment. + cmds: + - task: clean-go + - task: clean-terraform + + clean-terraform: + desc: Clean the terraform environment (remove terraform directories and files) + cmds: + - find . -type d -name .terraform -exec rm -rf {} + + - find . -type f -name .terraform.lock.hcl -delete + + format-and-init-terraform: + desc: Run Terraform fmt and init + cmds: + - | + terraform fmt + terraform init + test-terraform: + desc: Run tests for all terraform directories. + silent: true + env: + DIRECTORIES: + sh: find . -path '*/.terraform/*' -prune -o -name '*.tf' -type f -exec dirname {} \; | sort -u + cmds: + - | + BOLD=$(tput bold) + NORM=$(tput sgr0) + + CWD=$PWD + + for d in $DIRECTORIES; do + cd $d + echo "${BOLD}$PWD:${NORM}" + if ! terraform fmt -check=true -list=false -recursive=false; then + echo " ✗ terraform fmt" && exit 1 + else + echo " √ terraform fmt" + fi + + if ! terraform init -backend=false -input=false -get=true -no-color > /dev/null; then + echo " ✗ terraform init" && exit 1 + else + echo " √ terraform init" + fi + + if ! terraform validate > /dev/null; then + echo " ✗ terraform validate" && exit 1 + else + echo " √ terraform validate" + fi + + cd $CWD + done \ No newline at end of file diff --git a/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh new file mode 100755 index 000000000..f0b2ae3e3 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh @@ -0,0 +1,72 @@ +#!/usr/bin/bash +# echo '{"token": "hello"}' +set -x + +# 1) Read JSON input from stdin +read -r input + +# All debug statements go to stderr via >&2 +echo "[DEBUG] Received JSON input: $input" >&2 + +JENKINS_URL=$(echo "$input" | jq -r .jenkins_url) +USERNAME=$(echo "$input" | jq -r .username) +PASSWORD=$(echo "$input" | jq -r .password) + +echo "[DEBUG] Jenkins URL: $JENKINS_URL" >&2 +echo "[DEBUG] Username: $USERNAME" >&2 + +# 2) Wait for Jenkins up to MAX_ATTEMPTS +MAX_ATTEMPTS=100 +SLEEP_SECONDS=10 +echo "[DEBUG] Checking Jenkins readiness up to $MAX_ATTEMPTS attempts..." >&2 + +for i in $(seq 1 "$MAX_ATTEMPTS"); do + STATUS_CODE=$(curl -s -o /dev/null -w '%{http_code}' \ + --max-time 5 \ + -u "${USERNAME}:${PASSWORD}" \ + "${JENKINS_URL}/api/json" || echo "curl_error") + + if [ "$STATUS_CODE" = "200" ]; then + echo "[DEBUG] Jenkins responded HTTP 200 on attempt #$i." >&2 + break + else + echo "[DEBUG] Attempt #$i: HTTP $STATUS_CODE. Retrying in $SLEEP_SECONDS seconds..." >&2 + sleep "$SLEEP_SECONDS" + fi + + if [ "$i" -eq "$MAX_ATTEMPTS" ]; then + echo "[ERROR] Jenkins not ready after $MAX_ATTEMPTS attempts." >&2 + # Return some valid JSON to Terraform (it sees failure). + echo '{"error":"Jenkins never returned 200"}' + exit 1 + fi +done + +# 3) Generate a new token +echo "[DEBUG] Generating a new token via REST..." >&2 + +RESPONSE=$(curl -s -X POST \ + --max-time 10 \ + -u "${USERNAME}:${PASSWORD}" \ + --data "newTokenName=terraformToken" \ + "${JENKINS_URL}/user/${USERNAME}/descriptorByName/jenkins.security.ApiTokenProperty/generateNewToken" || true) + +echo "[DEBUG] Response: $RESPONSE" >&2 + +TOKEN_VALUE=$(echo "$RESPONSE" | jq -r '.data.tokenValue' 2>/dev/null || echo "") + +if [ -z "$TOKEN_VALUE" ] || [ "$TOKEN_VALUE" = "null" ]; then + echo "[ERROR] Could not parse a valid token from the response." >&2 + echo "[ERROR] Full response: $RESPONSE" >&2 + echo '{"error":"Token generation failed"}' + exit 1 +fi + +echo "[DEBUG] Successfully generated token: $TOKEN_VALUE" >&2 + +# 4) Print only JSON to stdout +cat < + + + A Freestyle job deliberately configured to fail for testing. + false + + + + + + true + false + false + false + + + false + + + + + + + + + + + diff --git a/codebundles/jenkins-health/.test/terraform/failed-pipeline.xml b/codebundles/jenkins-health/.test/terraform/failed-pipeline.xml new file mode 100644 index 000000000..79da4b607 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/failed-pipeline.xml @@ -0,0 +1,46 @@ + + + + A sample Pipeline job that uses Maven (M3) to build a simple Maven project and run tests. + false + + + + + true + + + false + diff --git a/codebundles/jenkins-health/.test/terraform/long-running-job.xml b/codebundles/jenkins-health/.test/terraform/long-running-job.xml new file mode 100644 index 000000000..16e172d60 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/long-running-job.xml @@ -0,0 +1,43 @@ + + + + Freestyle job that runs a 30-minute sleep script. + false + + + + + + true + false + false + false + + + + + false + + + + + + + + + + + + + diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf new file mode 100644 index 000000000..1a08ae4a4 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -0,0 +1,384 @@ +resource "random_password" "jenkins_admin_password" { + length = 12 + special = false + min_upper = 1 + min_lower = 1 + min_numeric = 1 +} + +# Get latest Ubuntu AMI +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] # Canonical + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +# Generate SSH key +resource "tls_private_key" "jenkins_key" { + algorithm = "RSA" + rsa_bits = 2048 +} + +resource "aws_key_pair" "generated_key" { + key_name = "jenkins-key" + public_key = tls_private_key.jenkins_key.public_key_openssh +} + +# Save private key locally +resource "local_file" "private_key" { + content = tls_private_key.jenkins_key.private_key_pem + filename = "jenkins-key.pem" + + provisioner "local-exec" { + command = "chmod 400 jenkins-key.pem" + } +} + +# VPC Configuration +resource "aws_vpc" "jenkins_vpc" { + cidr_block = "10.0.0.0/16" + enable_dns_hostnames = true + enable_dns_support = true + + tags = { + Name = "jenkins-vpc" + } +} + +# Internet Gateway +resource "aws_internet_gateway" "jenkins_igw" { + vpc_id = aws_vpc.jenkins_vpc.id + + tags = { + Name = "jenkins-igw" + } +} + +# Public Subnet +resource "aws_subnet" "jenkins_subnet" { + vpc_id = aws_vpc.jenkins_vpc.id + cidr_block = "10.0.1.0/24" + map_public_ip_on_launch = true + availability_zone = "us-west-2a" + + tags = { + Name = "jenkins-subnet" + } +} + +# Route Table +resource "aws_route_table" "jenkins_rt" { + vpc_id = aws_vpc.jenkins_vpc.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.jenkins_igw.id + } + + tags = { + Name = "jenkins-rt" + } +} + +# Route Table Association +resource "aws_route_table_association" "jenkins_rta" { + subnet_id = aws_subnet.jenkins_subnet.id + route_table_id = aws_route_table.jenkins_rt.id +} + +# Security Group +resource "aws_security_group" "jenkins_sg" { + name = "jenkins-sg" + description = "Security group for Jenkins server" + vpc_id = aws_vpc.jenkins_vpc.id + + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 8080 + to_port = 8080 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +# EC2 Instance +resource "aws_instance" "jenkins_server" { + ami = data.aws_ami.ubuntu.id + instance_type = "t2.micro" + subnet_id = aws_subnet.jenkins_subnet.id + vpc_security_group_ids = [aws_security_group.jenkins_sg.id] + key_name = aws_key_pair.generated_key.key_name + associate_public_ip_address = true + + user_data = <<-EOF + #!/bin/bash + apt-get update + apt-get install -y openjdk-17-jdk + curl -fsSL https://pkg.jenkins.io/debian-stable/jenkins.io-2023.key | tee /usr/share/keyrings/jenkins-keyring.asc > /dev/null + echo deb [signed-by=/usr/share/keyrings/jenkins-keyring.asc] https://pkg.jenkins.io/debian-stable binary/ | tee /etc/apt/sources.list.d/jenkins.list > /dev/null + apt-get update && apt-get install -y jenkins && systemctl enable jenkins && systemctl start jenkins + + # Wait a bit for Jenkins to start + sleep 60 + + # Retrieve the initial admin password (only valid until we run our Groovy script) + JENKINS_PASS=$(sudo cat /var/lib/jenkins/secrets/initialAdminPassword) + + # Download Jenkins CLI + wget -q http://localhost:8080/jnlpJars/jenkins-cli.jar + + # Install the Job DSL plugin + echo "[INFO] Installing Job DSL plugin..." + java -jar jenkins-cli.jar \ + -s http://localhost:8080 \ + -auth "admin:$JENKINS_PASS" \ + install-plugin job-dsl -deploy + + echo "[INFO] Installing Pipeline plugin (workflow-aggregator)..." + java -jar jenkins-cli.jar \ + -s "http://localhost:8080" \ + -auth "admin:$JENKINS_PASS" \ + install-plugin workflow-aggregator -deploy + + echo "[INFO] Installing git and docker plugin..." + java -jar jenkins-cli.jar \ + -s "http://localhost:8080" \ + -auth "admin:$JENKINS_PASS" \ + install-plugin git docker-plugin docker-workflow -deploy + + echo "[INFO] Restarting Jenkins..." + java -jar jenkins-cli.jar \ + -s http://localhost:8080 \ + -auth "admin:$JENKINS_PASS" \ + safe-restart + + sleep 30 + + # Create Groovy script to set Jenkins to "INITIAL_SETUP_COMPLETED" + # and create a new admin user with the random password + cat < create_admin.groovy + import jenkins.model.* + import hudson.security.* + import jenkins.install.* + + def instance = Jenkins.getInstance() + + // Skip the Jenkins setup wizard + instance.setInstallState(InstallState.INITIAL_SETUP_COMPLETED) + + // Disable CSRF + instance.setCrumbIssuer(null) + + // Create admin user with a random password + def hudsonRealm = new HudsonPrivateSecurityRealm(false) + hudsonRealm.createAccount("admin", "${random_password.jenkins_admin_password.result}") + instance.setSecurityRealm(hudsonRealm) + + def strategy = new FullControlOnceLoggedInAuthorizationStrategy() + strategy.setAllowAnonymousRead(false) + instance.setAuthorizationStrategy(strategy) + + instance.save() + GROOVY + + # Use the initial Jenkins password to run the Groovy script + java -jar jenkins-cli.jar \ + -s http://localhost:8080 \ + -auth admin:$JENKINS_PASS \ + groovy = < create_admin.groovy || { + echo "Failed to create admin user" + exit 1 + } + + rm -f create_admin.groovy + + # (Optional) Additional setup commands, e.g. Docker, etc. + # ... + # Add Docker's official GPG key: + apt-get update + apt-get -y install ca-certificates curl + install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + chmod a+r /etc/apt/keyrings/docker.asc + + # Add the repository to Apt sources: + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null + apt-get update + apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + + TCP_ADDRESS="tcp://127.0.0.1:2375" + mkdir -p /etc/systemd/system/docker.service.d + cat < /etc/systemd/system/docker.service.d/override.conf + [Service] + ExecStart= + ExecStart=/usr/bin/dockerd -H $TCP_ADDRESS -H unix:///var/run/docker.sock + ELF + + # Reload systemctl configuration + echo "Reloading systemctl configuration..." + systemctl daemon-reload + + # Restart Docker + echo "Restarting Docker service..." + systemctl restart docker.service + groupadd docker + usermod -aG docker ubuntu + usermod -aG docker jenkins + EOF + + tags = { + Name = "jenkins-server", + lifecycle = "deleteme" + } +} + + +data "external" "jenkins_token" { + depends_on = [aws_instance.jenkins_server] + program = ["bash", "./create_jenkins_token.sh"] + + # These JSON values get passed on stdin to the script + query = { + jenkins_url = "http://${aws_instance.jenkins_server.public_ip}:8080" + username = "admin" + password = "${random_password.jenkins_admin_password.result}" + } +} + +resource "null_resource" "create_jobs" { + depends_on = [data.external.jenkins_token] + + provisioner "local-exec" { + command = <<-EOT + #!/usr/bin/env bash + + TOKEN='${data.external.jenkins_token.result["token"]}' + JENKINS_URL="http://${aws_instance.jenkins_server.public_ip}:8080" + + # Define a function to check if a job exists. If yes, update; if not, create. + function upsert_job() { + local job_name="$1" + local config_file="$2" + + # Check if job exists by hitting its /api/json + local status_code + status_code=$(curl -s -o /dev/null -w '%%{http_code}' -u "admin:$TOKEN" "$JENKINS_URL/job/$job_name/api/json") + + if [ "$status_code" = "200" ]; then + echo "Updating job: $job_name" + curl -X POST -u "admin:$TOKEN" \ + -H "Content-Type: application/xml" \ + --data-binary @"$config_file" \ + "$JENKINS_URL/job/$job_name/config.xml" + else + echo "Creating job: $job_name" + curl -X POST -u "admin:$TOKEN" \ + -H "Content-Type: application/xml" \ + --data-binary @"$config_file" \ + "$JENKINS_URL/createItem?name=$job_name" + fi + } + + # Upsert each job + upsert_job "the-fastest-job" "long-running-job.xml" + upsert_job "this-never-breaks" "failed-job.xml" + upsert_job "my-fun-pipeline" "failed-pipeline.xml" + upsert_job "python-docker" "python-docker-pipeline.xml" + + # Now queue the slow jobs (build them) -- same as before + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/the-fastest-job/build" + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/the-fastest-job/build" + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/the-fastest-job/build" + + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/this-never-breaks/build" + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/my-fun-pipeline/build" + + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/python-docker/build" + EOT + # This ensures /bin/bash is used: + interpreter = ["/bin/bash", "-c"] + } +} + +# Create IAM user for Jenkins +resource "aws_iam_user" "jenkins_user" { + name = "jenkins-user" +} + +# Create access key for the IAM user +resource "aws_iam_access_key" "jenkins_user_key" { + user = aws_iam_user.jenkins_user.name +} + +# Attach policy to the user +resource "aws_iam_user_policy_attachment" "jenkins_user_policy" { + user = aws_iam_user.jenkins_user.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2FullAccess" +} + +# Output the credentials +output "jenkins_user_access_key" { + value = aws_iam_access_key.jenkins_user_key.id + sensitive = true +} + +output "jenkins_user_secret_key" { + value = aws_iam_access_key.jenkins_user_key.secret + sensitive = true +} + +output "jenkins_public_ip" { + value = aws_instance.jenkins_server.public_ip +} + +output "ssh_connection_string" { + value = "ssh -i jenkins-key.pem ubuntu@${aws_instance.jenkins_server.public_ip}" +} + +output "jenkins_admin_password" { + value = random_password.jenkins_admin_password.result + sensitive = true +} + +output "fetch_admin_passwrd" { + value = "JENKINS_PASSWORD=$(cd terraform && terraform show -json | jq -r '.values.outputs.jenkins_admin_password.value')" +} + +output "jenkins_url" { + value = "http://${aws_instance.jenkins_server.public_ip}:8080" +} + +output "jenkins_api_token" { + value = data.external.jenkins_token.result["token"] + sensitive = true +} + +output "fetch_jenkins_api_token" { + value = "JENKINS_TOKEN=$(cd terraform && terraform show -json | jq -r '.values.outputs.jenkins_api_token.value')" +} + diff --git a/codebundles/jenkins-health/.test/terraform/provider.tf b/codebundles/jenkins-health/.test/terraform/provider.tf new file mode 100644 index 000000000..aa39e393f --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/provider.tf @@ -0,0 +1,3 @@ +provider "aws" { + region = "us-west-2" # Replace with your desired region +} \ No newline at end of file diff --git a/codebundles/jenkins-health/.test/terraform/python-docker-pipeline.xml b/codebundles/jenkins-health/.test/terraform/python-docker-pipeline.xml new file mode 100644 index 000000000..5e6d93455 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/python-docker-pipeline.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + +false + + + +2 + + +https://github.com/saurabh3460/python-testing-ci.git + + + + +main + + +false + + + +Jenkinsfile +true + + +false + \ No newline at end of file diff --git a/codebundles/jenkins-health/README.md b/codebundles/jenkins-health/README.md new file mode 100644 index 000000000..db6d3096e --- /dev/null +++ b/codebundles/jenkins-health/README.md @@ -0,0 +1,25 @@ +# AWS Jenkins Health + +This CodeBundle monitors and evaluates the health of Jenkins using the Jenkins REST API + +## SLI +The SLI produces a score of 0 (bad), 1(good), or a value in between. This score is generated by capturing the following: +- Check if Jenkins instance is reachable and responding (endpoint) +- Check For Failed Build Logs in Jenkins +- Check For Long Running Builds in Jenkins +- Check For Long Queued Builds in Jenkins +- Check Jenkins Executor Utilization + +## TaskSet +Similar to the SLI, but produces a report on the specific jenkns apis and raises issues for each Jenkins check that requires attention. + +## Required Configuration + +``` + export JENKINS_URL="" + export JENKINS_USERNAME="" + export JENKINS_TOKEN="" +``` + +## Testing +See the `.test` directory for infrastructure test code. \ No newline at end of file diff --git a/codebundles/jenkins-health/error_patterns.json b/codebundles/jenkins-health/error_patterns.json new file mode 100644 index 000000000..a9697d081 --- /dev/null +++ b/codebundles/jenkins-health/error_patterns.json @@ -0,0 +1,268 @@ +{ + "COMPILATION_ERROR": { + "pattern": "\\[ERROR\\] COMPILATION ERROR", + "suggestion": "Check detailed logs above for root cause and retry build after fixing issues." + }, + "TEST_FAILURE": { + "pattern": "\\[ERROR\\] Failures:", + "suggestion": "Review failed test cases and ensure test data is correct." + }, + "DEPENDENCY_ERROR": { + "pattern": "Could not resolve dependencies", + "suggestion": "Verify dependency versions in pom.xml and check repository accessibility." + }, + "CHECKSTYLE_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal org\\.apache\\.maven\\.plugins:maven-checkstyle-plugin", + "suggestion": "Fix code style violations and check checkstyle configuration." + }, + "JAVA_VERSION_MISMATCH": { + "pattern": "Unsupported major\\.minor version", + "suggestion": "Verify Java version in build environment and pom.xml." + }, + "OUT_OF_MEMORY": { + "pattern": "java\\.lang\\.OutOfMemoryError", + "suggestion": "Increase memory allocation for the build or optimize memory usage." + }, + "MAVEN_PLUGIN_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal .*:maven-.*-plugin", + "suggestion": "Ensure the required Maven plugin is installed and correctly configured." + }, + "MISSING_CLASS": { + "pattern": "java\\.lang\\.NoClassDefFoundError", + "suggestion": "Verify classpath configuration and ensure dependencies are properly included." + }, + "CONNECTION_TIMEOUT": { + "pattern": "Connection timed out", + "suggestion": "Check network connectivity and increase connection timeout in Maven settings." + }, + "AUTHENTICATION_FAILURE": { + "pattern": "401 Unauthorized|403 Forbidden", + "suggestion": "Verify credentials in settings.xml and check user permissions." + }, + "GIT_ERROR": { + "pattern": "fatal: .+", + "suggestion": "Check Git repository URL, credentials, and branch availability." + }, + "SSL_ERROR": { + "pattern": "javax.net.ssl.SSLException", + "suggestion": "Verify SSL certificates and Java keystore settings." + }, + "JAR_NOT_FOUND": { + "pattern": "Could not find artifact .+ in central", + "suggestion": "Ensure the artifact exists in Maven repository or check repository settings." + }, + "MAVEN_COMPATIBILITY_ERROR": { + "pattern": "Non-resolvable parent POM", + "suggestion": "Ensure parent POM exists and is accessible." + }, + "FORK_FAILURE": { + "pattern": "Execution default-test of goal org.apache.maven.plugins:maven-surefire-plugin failed", + "suggestion": "Check for missing dependencies or configuration issues in Surefire plugin." + }, + "PORT_BIND_ERROR": { + "pattern": "Address already in use", + "suggestion": "Ensure the port is not already occupied by another process." + }, + "GROOVY_COMPILATION_ERROR": { + "pattern": "org\\.codehaus\\.groovy\\.control\\.MultipleCompilationErrorsException", + "suggestion": "Ensure that all required tools (e.g., Maven) are correctly installed and configured in Jenkins global tool settings." + }, + "MAVEN_TOOL_NOT_FOUND": { + "pattern": "Tool type \\\"maven\\\" does not have an install of \\\".*\\\" configured", + "suggestion": "Check Jenkins global tool configuration to ensure Maven is installed and referenced correctly in the pipeline script." + }, + "COMPILATION_ERROR": { + "pattern": "\\[ERROR\\] COMPILATION ERROR", + "suggestion": "Check detailed logs above for root cause and retry build after fixing issues." + }, + "TEST_FAILURE": { + "pattern": "\\[ERROR\\] Failures:", + "suggestion": "Review failed test cases and ensure test data is correct." + }, + "DEPENDENCY_ERROR": { + "pattern": "Could not resolve dependencies", + "suggestion": "Verify dependency versions in pom.xml and check repository accessibility." + }, + "CHECKSTYLE_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal org\\.apache\\.maven\\.plugins:maven-checkstyle-plugin", + "suggestion": "Fix code style violations and check checkstyle configuration." + }, + "JAVA_VERSION_MISMATCH": { + "pattern": "Unsupported major\\.minor version", + "suggestion": "Verify Java version in build environment and pom.xml." + }, + "OUT_OF_MEMORY": { + "pattern": "java\\.lang\\.OutOfMemoryError", + "suggestion": "Increase memory allocation for the build or optimize memory usage." + }, + "MAVEN_PLUGIN_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal .*:maven-.*-plugin", + "suggestion": "Ensure the required Maven plugin is installed and correctly configured." + }, + "MISSING_CLASS": { + "pattern": "java\\.lang\\.NoClassDefFoundError", + "suggestion": "Verify classpath configuration and ensure dependencies are properly included." + }, + "CONNECTION_TIMEOUT": { + "pattern": "Connection timed out", + "suggestion": "Check network connectivity and increase connection timeout in Maven settings." + }, + "AUTHENTICATION_FAILURE": { + "pattern": "401 Unauthorized|403 Forbidden", + "suggestion": "Verify credentials in settings.xml and check user permissions." + }, + "GROOVY_COMPILATION_ERROR": { + "pattern": "org\\.codehaus\\.groovy\\.control\\.MultipleCompilationErrorsException", + "suggestion": "Ensure that all required tools (e.g., Maven) are correctly installed and configured in Jenkins global tool settings." + }, + "MAVEN_TOOL_NOT_FOUND": { + "pattern": "Tool type \\\"maven\\\" does not have an install of \\\".*\\\" configured", + "suggestion": "Check Jenkins global tool configuration to ensure Maven is installed and referenced correctly in the pipeline script." + }, + "JENKINS_STARTUP_FAILURE": { + "pattern": "SEVERE: Failed to initialize Jenkins", + "suggestion": "This error usually indicates a corrupted Jenkins configuration file or plugin. Review the Jenkins logs to identify the problematic configuration or plugin and rectify it." + }, + "GROOVY_CLASS_RESOLUTION_ERROR": { + "pattern": "unable to resolve class", + "suggestion": "This error suggests that the Groovy script is unable to find a specified class. Ensure that all necessary classes are imported correctly and that the classpath is properly configured." + }, + "MISSING_FILE_OR_DIRECTORY": { + "pattern": "No such file or directory", + "suggestion": "This error indicates that Jenkins is unable to find the specified file or directory. Verify the path provided in your Jenkins job configuration and ensure the file or directory exists at the specified location." + }, + "JENKINS_PIPELINE_SYNTAX_ERROR": { + "pattern": "WorkflowScript: .*: Expected .* but found .*", + "suggestion": "This error points to a syntax issue in your Jenkins Pipeline script. Use the Jenkins Pipeline Syntax tool to validate the script and identify any errors or warnings." + }, + "JENKINS_GROOVY_SCRIPT_ERROR": { + "pattern": "groovy.lang.MissingPropertyException", + "suggestion": "This error occurs when a Groovy script tries to access a property that doesn't exist. Check the script for typos or incorrect property names." + }, + "JENKINS_DISK_SPACE_ERROR": { + "pattern": "No space left on device", + "suggestion": "Jenkins is unable to write to disk due to insufficient space. Free up disk space on the Jenkins server and consider setting up disk usage monitoring." + }, + "JENKINS_AGENT_CONNECTION_ERROR": { + "pattern": "java\\.io\\.IOException: Connection reset by peer", + "suggestion": "This error indicates a loss of connection between the Jenkins master and agent. Check the network connectivity and firewall settings between the master and agent machines." + }, + "JENKINS_GIT_AUTHENTICATION_ERROR": { + "pattern": "Authentication failed for 'https://.*'", + "suggestion": "Jenkins is unable to authenticate with the Git repository. Verify the credentials and ensure they are correctly configured in Jenkins." + }, + "JENKINS_SCM_ERROR": { + "pattern": "hudson\\.plugins\\.git\\.GitException: Command \"git .*\" returned status code 128", + "suggestion": "This error suggests an issue with the Source Code Management (SCM) configuration. Check the repository URL and credentials." + }, + "JENKINS_MAVEN_BUILD_FAILURE": { + "pattern": "\\[ERROR\\] BUILD FAILURE", + "suggestion": "The Maven build has failed. Review the error messages above this line in the log to identify the cause and address it accordingly." + }, + "JENKINS_MAVEN_COMPILATION_FAILURE": { + "pattern": "\\[ERROR\\] COMPILATION ERROR", + "suggestion": "There are compilation errors in the Maven build. Check the detailed error messages to identify and fix the issues in the code." + }, + "JENKINS_MAVEN_TEST_FAILURE": { + "pattern": "\\[ERROR\\] Failed to execute goal org\\.apache\\.maven\\.plugins:maven-surefire-plugin", + "suggestion": "Test execution has failed. Review the test results to identify failing tests and address the underlying issues." + }, + "DEPENDENCY_RESOLUTION_ERROR": { + "pattern": "Could not resolve dependencies for project", + "suggestion": "Ensure the required dependencies exist in the specified repository and check the internet connection or repository credentials." + }, + + + "SYNTAX_ERROR": { + "pattern": "SyntaxError: .+", + "suggestion": "Check for incorrect syntax, missing colons, incorrect indentation, or mismatched parentheses." + }, + "INDENTATION_ERROR": { + "pattern": "IndentationError: .+", + "suggestion": "Ensure consistent indentation using spaces or tabs. Avoid mixing both." + }, + "TYPE_ERROR": { + "pattern": "TypeError: .+", + "suggestion": "Check data types in function calls and operations. Convert variables if necessary." + }, + "NAME_ERROR": { + "pattern": "NameError: name '.+' is not defined", + "suggestion": "Ensure the variable or function is defined before use." + }, + "INDEX_ERROR": { + "pattern": "IndexError: list index out of range", + "suggestion": "Check list length before accessing elements by index." + }, + "KEY_ERROR": { + "pattern": "KeyError: '.+'", + "suggestion": "Ensure the dictionary contains the key before accessing it." + }, + "VALUE_ERROR": { + "pattern": "ValueError: .+", + "suggestion": "Check if the function arguments or data formats are valid." + }, + "ATTRIBUTE_ERROR": { + "pattern": "AttributeError: '.+' object has no attribute '.+'", + "suggestion": "Ensure the object has the specified attribute before accessing it." + }, + "MODULE_NOT_FOUND": { + "pattern": "ModuleNotFoundError: No module named '.+'", + "suggestion": "Ensure the required module is installed and the import statement is correct." + }, + "IMPORT_ERROR": { + "pattern": "ImportError: .+", + "suggestion": "Check module paths and dependencies. Ensure required modules are installed." + }, + "IO_ERROR": { + "pattern": "OSError: .+|IOError: .+", + "suggestion": "Check file paths, permissions, and disk space before performing I/O operations." + }, + "ZERO_DIVISION_ERROR": { + "pattern": "ZeroDivisionError: division by zero", + "suggestion": "Avoid dividing by zero. Check for zero before performing division." + }, + "RECURSION_ERROR": { + "pattern": "RecursionError: maximum recursion depth exceeded", + "suggestion": "Check recursive function calls to prevent infinite recursion." + }, + "MEMORY_ERROR": { + "pattern": "MemoryError", + "suggestion": "Optimize memory usage. Consider processing data in smaller chunks." + }, + "TIMEOUT_ERROR": { + "pattern": "TimeoutError", + "suggestion": "Increase timeout limits or optimize code to prevent long execution times." + }, + "CONNECTION_ERROR": { + "pattern": "requests\\.exceptions\\.ConnectionError", + "suggestion": "Check network connectivity and ensure the server is reachable." + }, + "SSL_ERROR": { + "pattern": "requests\\.exceptions\\.SSLError", + "suggestion": "Verify SSL certificates and ensure secure connections." + }, + "DOCKER_BUILD_ERROR": { + "pattern": "docker build .* failed", + "suggestion": "Review the Dockerfile and check for syntax errors or missing files." + }, + "DOCKER_CONTAINER_NOT_RUNNING": { + "pattern": "Jenkins does not seem to be running inside a container", + "suggestion": "Ensure Jenkins is running inside a container or check the Docker configuration." + }, + "DOCKER_PERMISSION_DENIED": { + "pattern": "ERROR: permission denied while trying to connect to the Docker daemon socket", + "suggestion": "Add the Jenkins user to the Docker group with `sudo usermod -aG docker jenkins`, then restart Jenkins with `sudo systemctl restart jenkins`." + }, + "PYTHON_REQUIREMENTS_FAIL": { + "pattern": "ERROR: Could not find a version that satisfies the requirement .*", + "suggestion": "Verify the package name and version in 'requirements.txt' and check the package index." + }, + "DEBCONF_FRONTEND_ERROR": { + "pattern": "debconf: unable to initialize frontend", + "suggestion": "Set the 'DEBIAN_FRONTEND' environment variable to 'noninteractive' for Docker builds." + }, + "NO_SUCH_DSL_METHOD": { + "pattern": "java\\.lang\\.NoSuchMethodError: No such DSL method '([^']+)'", + "suggestion": "Check if the plugin providing '{match}' is installed and up-to-date." + } +} diff --git a/codebundles/jenkins-health/failed_build_logs.sh b/codebundles/jenkins-health/failed_build_logs.sh new file mode 100755 index 000000000..b3f8aca4a --- /dev/null +++ b/codebundles/jenkins-health/failed_build_logs.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# set -x +# Check if required environment variables are set +if [ -z "${JENKINS_URL}" ] || [ -z "${JENKINS_USERNAME}" ] || [ -z "${JENKINS_TOKEN}" ]; then + echo "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." + exit 1 +fi + +# Authentication string for curl +AUTH_HEADER="${JENKINS_USERNAME}:${JENKINS_TOKEN}" + +# Temporary file for JSON output +OUTPUT_FILE="faild_build_logs.json" + +# Fetch Jenkins jobs data +jenkins_data=$(curl -s -u "${AUTH_HEADER}" "${JENKINS_URL}/api/json?depth=2") + + +# Validate if Jenkins data was retrieved successfully +if [ -z "$jenkins_data" ]; then + echo "Failed to fetch data from Jenkins. Please check your credentials or URL." + exit 1 +fi + +# Start JSON array +echo "[" > "$OUTPUT_FILE" +first_entry=true + +# Process each job with a failed last build +echo "$jenkins_data" | jq -c '.jobs[] | select(.lastBuild.result == "FAILURE") | {name: .name, url: .lastBuild.url, number: .lastBuild.number}' | \ +while read -r job_info; do + # Extract job details + job_name=$(echo "$job_info" | jq -r '.name') + build_url=$(echo "$job_info" | jq -r '.url') + build_number=$(echo "$job_info" | jq -r '.number') + + # Skip if any of the required fields are missing + if [ -z "$job_name" ] || [ -z "$build_url" ] || [ -z "$build_number" ]; then + echo "Skipping a job due to missing information." + continue + fi + + # Fetch build logs + logs=$(curl -s -u "$AUTH_HEADER" "${build_url}logText/progressiveText?start=0") + if [ $? -ne 0 ]; then + echo "Failed to fetch logs for job: $job_name, build: $build_number." + continue + fi + + # Escape special characters in logs for JSON + escaped_logs=$(echo "$logs" | jq -sR .) + + # Add comma if not the first entry + if [ "$first_entry" = false ]; then + echo "," >> "$OUTPUT_FILE" + fi + first_entry=false + + # Write JSON entry to file + cat << EOF >> "$OUTPUT_FILE" +{ + "job_name": "$job_name", + "result": "FAILURE", + "build_number": $build_number, + "logs": $escaped_logs, + "url": "$build_url" +} +EOF +done + +# Close JSON array +echo "]" >> "$OUTPUT_FILE" + +# Validate JSON and pretty-print the output +if jq empty "$OUTPUT_FILE" > /dev/null 2>&1; then + cat "$OUTPUT_FILE" + # echo "Failed builds data has been saved to $OUTPUT_FILE" +else + echo "Error: Invalid JSON generated. Check the output file for issues." + exit 1 +fi diff --git a/codebundles/jenkins-health/long_running_builds.sh b/codebundles/jenkins-health/long_running_builds.sh new file mode 100755 index 000000000..36928f1d6 --- /dev/null +++ b/codebundles/jenkins-health/long_running_builds.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# set -x +# Check if required environment variables are set +if [ -z "${JENKINS_URL}" ] || [ -z "${JENKINS_USERNAME}" ] || [ -z "${JENKINS_TOKEN}" ]; then + echo "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." + exit 1 +fi + +convert_to_minutes() { + local time_str=$1 + # Convert to lowercase and remove any spaces + time_str=$(echo "$time_str" | tr '[:upper:]' '[:lower:]' | tr -d ' ') + + # Extract number using regex + local number=$(echo "$time_str" | grep -o '^[0-9]\+') + + # Extract unit by removing the number + local unit=$(echo "$time_str" | sed 's/^[0-9]\+//') + + case $unit in + m|min|minute|minutes) + if [ "$number" -lt 0 ] || [ "$number" -gt 59 ]; then + echo "Minutes should be between 0-59" >&2 + exit 1 + fi + echo $number ;; + h|hr|hour|hours) + if [ "$number" -lt 0 ] || [ "$number" -gt 23 ]; then + echo "Hours should be between 0-23" >&2 + exit 1 + fi + echo $((number * 60)) ;; + d|day|days) + echo $((number * 1440)) ;; + *) + echo "Invalid time format. Please use formats like '5m', '2h', '1d' or '5min', '2hours', '1day'" >&2 + echo "Minutes should be between 0-59" >&2 + echo "Hours should be between 0-23" >&2 + exit 1 + ;; + esac +} + +# Check if threshold parameter is provided +if [ -z "$1" ]; then + echo "Please provide time threshold (e.g., ./long_running_jobs.sh 5m or 2h or 1d)" + exit 1 +fi + +THRESHOLD_MINUTES=$(convert_to_minutes "$1") + +# Authentication string for curl +AUTH_HEADER="${JENKINS_USERNAME}:${JENKINS_TOKEN}" + +# Get current timestamp in milliseconds +current_time=$(date +%s%3N) + +# Fetch Jenkins data and process it using jq to find long running jobs +jenkins_data=$(curl -s -u "${AUTH_HEADER}" "${JENKINS_URL}/api/json?depth=2") + +# Validate if Jenkins data was retrieved successfully +if [ -z "$jenkins_data" ]; then + echo "Failed to fetch data from Jenkins. Please check your credentials or URL." + exit 1 +fi + +# Process the data using jq to find long running jobs and output as JSON +echo "$jenkins_data" | jq --arg threshold "$THRESHOLD_MINUTES" --arg current "$current_time" ' +{ + "timestamp": ($current | tonumber), + "threshold": ($threshold | tonumber), + "long_running_jobs": [ + .jobs[] | + select(.lastBuild != null and .lastBuild.building) | + { + "job_name": .name, + "build_number": .lastBuild.number, + "node": (if .lastBuild.builtOn == "" then "Built-in Node" else .lastBuild.builtOn end), + "start_time": .lastBuild.timestamp, + "duration_minutes": (((($current | tonumber) - .lastBuild.timestamp) / 1000 / 60) | floor), + "url": .lastBuild.url + } | + select(.duration_minutes >= ($threshold | tonumber)) + ] +}' | jq '.long_running_jobs[] |= . + { + "duration": ((.duration_minutes | tostring) + "m") +}' | jq 'walk( + if type == "object" and has("duration") then + .duration = (if .duration_minutes >= 1440 then + ((.duration_minutes / 1440) | floor | tostring) + "d " + + (((.duration_minutes % 1440) / 60) | floor | tostring) + "h " + + (.duration_minutes % 60 | tostring) + "m" + elif .duration_minutes >= 60 then + ((.duration_minutes / 60) | floor | tostring) + "h " + + (.duration_minutes % 60 | tostring) + "m" + else + (.duration_minutes | tostring) + "m" + end | sub("\\s+$"; "")) + else + . + end +)' diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot new file mode 100644 index 000000000..9264ce09d --- /dev/null +++ b/codebundles/jenkins-health/runbook.robot @@ -0,0 +1,325 @@ +*** Settings *** +Documentation List Jenkins health, failed builds, tests and long running builds +Metadata Author saurabh3460 +Metadata Display Name Jenkins Health +Metadata Supports Jenkins + +Library RW.Core +Library RW.CLI +Library RW.platform +Library String +Library Jenkins +Suite Setup Suite Initialization + +*** Tasks *** +List Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Fetches logs from failed Jenkins builds using the Jenkins API + [Tags] Jenkins Logs Builds + ${rsp}= RW.CLI.Run Bash File + ... bash_file=${CURDIR}/failed_build_logs.sh + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${jobs}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${jobs}= Create List + END + IF len(@{jobs}) > 0 + FOR ${job} IN @{jobs} + ${job_name}= Set Variable ${job['job_name']} + ${build_number}= Set Variable ${job['build_number']} + ${json_str}= Evaluate json.dumps(${job}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Job Name", "Build #", "Result", "URL"] as $headers | $headers, (. | [.job_name, .build_number, .result, .url]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Failed Builds:\n=======================================\n${formatted_results.stdout} + ${next_steps}= Analyze Logs + ... logs=${job['logs']} + ... error_patterns_file=${CURDIR}/error_patterns.json + + ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint + + ${suggestions}= Set Variable ${EMPTY} + ${logs_details}= Set Variable ${EMPTY} + FOR ${step} IN @{next_steps} + ${suggestions}= Set Variable ${suggestions}${step['suggestion']}\n + ${logs_details}= Set Variable ${logs_details}Log: ${step['log']}\n + END + + RW.Core.Add Issue + ... severity=3 + ... expected=Jenkins job `${job_name}` should complete successfully + ... actual=Jenkins job `${job_name}` build #`${build_number}` failed + ... title=Jenkins Build Failure: `${job_name}` (Build #`${build_number}`) + ... reproduce_hint=Navigate to Jenkins build `${job_name}` #`${build_number}` + ... details=Error Logs:\n${logs_details} + ... next_steps=${suggestions} + END + ELSE + RW.Core.Add Pre To Report "No failed builds found" + END + +List Long Running Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Identifies Jenkins builds that have been running longer than a specified threshold + [Tags] Jenkins Builds + ${rsp}= RW.CLI.Run Bash File + ... bash_file=${CURDIR}/long_running_builds.sh + ... cmd_override=${CURDIR}/long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty object. WARN + ${data}= Create Dictionary + END + + ${long_running_jobs}= Set Variable ${data.get('long_running_jobs', [])} + + IF len(${long_running_jobs}) > 0 + ${json_str}= Evaluate json.dumps(${long_running_jobs}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Job Name", "Build #", "Duration", "URL"] as $headers | $headers, (.[] | [.job_name, .build_number, .duration, .url]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Long Running Jobs:\n=======================================\n${formatted_results.stdout} + + FOR ${job} IN @{long_running_jobs} + ${job_name}= Set Variable ${job['job_name']} + ${duration}= Set Variable ${job['duration']} + ${build_number}= Set Variable ${job['build_number']} + ${url}= Set Variable ${job['url']} + + ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint + RW.Core.Add Issue + ... severity=4 + ... expected=Jenkins job `${job_name}` (Build #`${build_number}`) should complete within ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + ... actual=Jenkins job `${job_name}` (Build #`${build_number}`) has been running for ${duration} (exceeds threshold) + ... title=Long Running Build: `${job_name}` (Build #`${build_number}`) - ${duration} + ... reproduce_hint=Navigate to Jenkins build `${job_name}` #`${build_number}` + ... details=${pretty_item} + ... next_steps=Investigate build logs of job `${job_name}`\nCheck resource utilization on build node + END + ELSE + RW.Core.Add Pre To Report "No long running builds found" + END + +List Recent Failed Tests in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] List Recent Failed Tests in Jenkins Instance + [Tags] Jenkins Tests + ${failed_tests}= Jenkins.Get Failed Tests + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + + IF len(${failed_tests}) > 0 + FOR ${test_suite} IN @{failed_tests} + ${pipeline_details}= Set Variable ${test_suite['pipeline_details']} + ${test_results}= Set Variable ${test_suite['test_results']} + ${pipeline_url}= Set Variable ${pipeline_details['pipeline_url']} + ${pipeline_name}= Set Variable ${pipeline_details['pipeline_name']} + ${build_number}= Set Variable ${pipeline_details['build_number']} + + ${json_str}= Evaluate json.dumps(${test_results}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["FailedTests", "Duration", "StdErr", "StdOut", "Status"] as $headers | $headers, (.[] | [.name, .duration, .stderr, .stdout, .status]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Pipeline Name: ${pipeline_name} Build No.${build_number}:\n=======================================\n${formatted_results.stdout} + + FOR ${test} IN @{test_results} + ${class_name}= Set Variable ${test['className']} + ${test_name}= Set Variable ${test['name']} + ${error_details}= Set Variable ${test['errorDetails']} + ${stack_trace}= Set Variable ${test['errorStackTrace']} + + ${pretty_test}= Evaluate pprint.pformat(${test}) modules=pprint + RW.Core.Add Issue + ... severity=3 + ... expected=Test `${test_name}` in pipeline `${pipeline_name}` (Build #`${build_number}`) should pass successfully + ... actual=Test '${test_name}' failed with error:\n${error_details} + ... title=Test Failure: `${test_name}` in ${pipeline_name} (Build #`${build_number}`) + ... details=${pretty_test} + ... reproduce_hint=Navigate to Jenkins build `${pipeline_url}lastCompletedBuild/testReport/` + ... next_steps=Review the error message and stack trace + END + END + ELSE + RW.Core.Add Pre To Report "No failed tests found" + END + +Check Jenkins Instance `${JENKINS_INSTANCE_NAME}` Health + [Documentation] Check if Jenkins instance is reachable and responding + [Tags] Jenkins Health + # TODO: Capture more exceptions here + ${rsp}= RW.CLI.Run Cli + ... cmd=curl -s -u "$${JENKINS_USERNAME.key}:$${JENKINS_TOKEN.key}" "${JENKINS_URL}/api/json" + ... env=${env} + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads('''${rsp.stdout}''') json + RW.Core.Add Pre To Report Jenkins instance is up and responding + EXCEPT + RW.Core.Add Issue + ... severity=1 + ... expected=Jenkins instance at ${JENKINS_URL}/api/json should be reachable and responding + ... actual=Unable to connect to Jenkins instance or received invalid response + ... title=Jenkins instance is not reachable + ... details=Failed to connect to Jenkins instance at ${JENKINS_URL}/api/json response: ${rsp.stdout} + ... reproduce_hint=Try accessing ${JENKINS_URL}/api/json in a web browser + ... next_steps=- Check if Jenkins service is running\n- Verify network connectivity\n- Validate Jenkins URL\n- Check Jenkins logs for errors + END + +List Long Queued Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Check for builds stuck in queue beyond threshold + [Tags] Jenkins Queue Builds + + ${queued_builds}= Jenkins.Get Queued Builds + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + ... wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} + + TRY + IF ${queued_builds} == [] + RW.Core.Add Pre To Report No builds currently queued beyond threshold + ELSE + ${json_str}= Evaluate json.dumps(${queued_builds}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Job Name", "Wait Time", "Why", "Stuck", "Blocked", "URL"] as $headers | $headers, (.[] | [.job_name, .wait_time, .why, .stuck, .blocked, .url]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Builds Currently Queued:\n=======================================\n${formatted_results.stdout} + + FOR ${build} IN @{queued_builds} + ${url}= Set Variable ${build['url']} + ${job_name}= Set Variable ${build['job_name']} + ${wait_time}= Set Variable ${build['wait_time']} + ${why}= Set Variable ${build['why']} + ${stuck}= Set Variable ${build['stuck']} + ${blocked}= Set Variable ${build['blocked']} + + # Add specific next steps based on status + ${next_steps}= Set Variable If ${stuck} + ... - Check Jenkins executor status\n- Review system resources\n- Consider restarting Jenkins if needed + ... ${blocked} + ... Consider increasing executors if bottlenecked + ... Consider adding more build agents + + RW.Core.Add Issue + ... severity=4 + ... expected=Builds should not be queued for more than ${QUEUED_BUILD_MAX_WAIT_TIME} + ... actual=Build '${job_name}' has been queued for ${wait_time} (exceeds threshold) + ... title=Long Queued Build: ${job_name} (${wait_time}) + ... details=${build} + ... reproduce_hint=Access Jenkins at ${JENKINS_URL} + ... next_steps=${next_steps} + END + END + + EXCEPT + RW.Core.Add Pre To Report No queued builds found + END + + +List Executor Utilization in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Check Jenkins executor utilization across nodes + [Tags] Jenkins Executors Utilization + + ${executor_utilization}= Jenkins.Get Executor Utilization + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + + TRY + IF ${executor_utilization} == [] + RW.Core.Add Pre To Report No executor utilization data found + ELSE + ${json_str}= Evaluate json.dumps(${executor_utilization}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Node Name", "Busy Executors", "Total Executors", "Utilization %"] as $headers | $headers, (.[] | [.node_name, .busy_executors, .total_executors, .utilization_percentage]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Executor Utilization:\n=======================================\n${formatted_results.stdout} + + FOR ${executor} IN @{executor_utilization} + ${node_name}= Set Variable ${executor['node_name']} + ${utilization}= Set Variable ${executor['utilization_percentage']} + ${busy_executors}= Set Variable ${executor['busy_executors']} + ${total_executors}= Set Variable ${executor['total_executors']} + + IF ${utilization} > float(${MAX_EXECUTOR_UTILIZATION}) + RW.Core.Add Issue + ... severity=3 + ... expected=Executor utilization should be below ${MAX_EXECUTOR_UTILIZATION}% + ... actual=Node '${node_name}' has ${utilization}% utilization (${busy_executors}/${total_executors} executors busy) + ... title=Jenkins High Executor Utilization: ${node_name} (${utilization}%) + ... details=${executor} + ... reproduce_hint=Check executor status at ${JENKINS_URL}/computer/ + ... next_steps=- Consider adding more executors\n- Review job distribution\n- Check for stuck builds + END + END + END + + EXCEPT + RW.Core.Add Pre To Report Failed to fetch executor utilization data + END + + +Fetch Jenkins Instance `${JENKINS_INSTANCE_NAME}` Logs and Add to Report + [Documentation] Fetches and displays Jenkins logs from the Atom feed + [Tags] Jenkins Logs + ${rsp}= Jenkins.Parse Atom Feed + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + RW.Core.Add Pre To Report ${rsp} + + +*** Keywords *** +Suite Initialization + ${JENKINS_URL}= RW.Core.Import User Variable JENKINS_URL + ... type=string + ... description=The URL of your Jenkins instance + ... pattern=\w* + ... example=https://jenkins.example.com + ${JENKINS_USERNAME}= RW.Core.Import Secret JENKINS_USERNAME + ... type=string + ... description=Jenkins username for authentication + ... pattern=\w* + ... example=admin + ${JENKINS_TOKEN}= RW.Core.Import Secret JENKINS_TOKEN + ... type=string + ... description=Jenkins API token for authentication + ... pattern=\w* + ... example=11aa22bb33cc44dd55ee66ff77gg88hh + ${LONG_RUNNING_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable LONG_RUNNING_BUILD_MAX_WAIT_TIME + ... type=string + ... description=The threshold for long running builds, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="10m" + ... default="10m" + ${QUEUED_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable QUEUED_BUILD_MAX_WAIT_TIME + ... type=string + ... description=The time threshold for builds in queue, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="10m" + ... default="10m" + ${MAX_EXECUTOR_UTILIZATION}= RW.Core.Import User Variable MAX_EXECUTOR_UTILIZATION + ... type=string + ... description=The maximum percentage of executor utilization to consider healthy + ... pattern=\d+ + ... example="80" + ... default="80" + ${JENKINS_INSTANCE_NAME}= RW.Core.Import User Variable JENKINS_INSTANCE_NAME + ... type=string + ... description=Jenkins Instance Name + ... pattern=\w* + ... example="prod-jenkins" + ... default="prod-jenkins" + Set Suite Variable ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} + Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} + Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} + Set Suite Variable ${QUEUED_BUILD_MAX_WAIT_TIME} ${QUEUED_BUILD_MAX_WAIT_TIME} + Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} + Set Suite Variable ${JENKINS_INSTANCE_NAME} ${JENKINS_INSTANCE_NAME} + Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} + #Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}", "JENKINS_USERNAME":"${JENKINS_USERNAME.key}", "JENKINS_TOKEN":"${JENKINS_TOKEN.key}"} diff --git a/codebundles/jenkins-health/sli.robot b/codebundles/jenkins-health/sli.robot new file mode 100644 index 000000000..6566b4761 --- /dev/null +++ b/codebundles/jenkins-health/sli.robot @@ -0,0 +1,201 @@ +*** Settings *** +Documentation Check Jenkins health, failed builds, tests and long running builds +Metadata Author saurabh3460 +Metadata Display Name Jenkins Health +Metadata Supports Jenkins + +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform +Library Jenkins + +Suite Setup Suite Initialization + +*** Tasks *** +Check For Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Check For Failed Build Logs in Jenkins + [Tags] Jenkins Logs Builds + ${rsp}= RW.CLI.Run Bash File + ... bash_file=${CURDIR}/failed_build_logs.sh + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${jobs}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${jobs}= Create List + END + ${failed_builds}= Evaluate len(@{jobs}) + ${failed_builds_score}= Evaluate 1 if int(${failed_builds}) <= int(${MAX_FAILED_BUILDS}) else 0 + Set Global Variable ${failed_builds_score} + +Check For Long Running Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Check Jenkins builds that have been running longer than a specified threshold + [Tags] Jenkins Builds + ${rsp}= RW.CLI.Run Bash File + ... bash_file=${CURDIR}/long_running_builds.sh + ... cmd_override=${CURDIR}/long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty object. WARN + ${data}= Create Dictionary + END + + ${long_running_builds}= Set Variable ${data.get('long_running_jobs', [])} + ${long_running_count}= Evaluate len($long_running_builds) + + ${long_running_score}= Evaluate 1 if int(${long_running_count}) <= int(${MAX_LONG_RUNNING_BUILDS}) else 0 + Set Global Variable ${long_running_score} + +Check For Recent Failed Tests in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Check For Recent Failed Tests in Jenkins + [Tags] Jenkins Tests + ${failed_tests}= Jenkins.Get Failed Tests + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + IF len(${failed_tests}) > 0 + ${total_failed_tests}= Evaluate sum([len(suite['test_results']) for suite in ${failed_tests}]) + ${failed_test_score}= Evaluate 1 if int(${total_failed_tests}) <= int(${MAX_ALLOWED_FAILED_TESTS}) else 0 + Set Global Variable ${failed_test_score} + ELSE + Set Global Variable ${failed_test_score} 1 + END + +Check For Jenkins Instance `${JENKINS_INSTANCE_NAME}` Health + [Documentation] Check if Jenkins instance is reachable and responding + [Tags] Jenkins Health + ${rsp}= RW.CLI.Run Cli + ... cmd=curl -s -u "$${JENKINS_USERNAME.key}:$${JENKINS_TOKEN.key}" "${JENKINS_URL}/api/json" + ... env=${env} + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads('''${rsp.stdout}''') json + Set Global Variable ${jenkins_health_score} 1 + EXCEPT + Set Global Variable ${jenkins_health_score} 0 + END + +Check For Long Queued Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Check for builds stuck in queue beyond threshold and calculate SLI score + [Tags] Jenkins Queue Builds SLI + ${queued_builds}= Jenkins.Get Queued Builds + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + ... wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} + ${queued_count}= Evaluate len(${queued_builds}) + ${queued_builds_score}= Evaluate 1 if int(${queued_count}) <= int(${MAX_QUEUED_BUILDS}) else 0 + Set Global Variable ${queued_builds_score} + +Check Jenkins Executor Utilization in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] Check if Jenkins executor utilization is above 80% + [Tags] Jenkins Executors Utilization + ${executor_utilization}= Jenkins.Get Executor Utilization + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + ${high_utilization}= Set Variable False + FOR ${executor} IN @{executor_utilization} + IF ${executor['utilization_percentage']} > float(${MAX_EXECUTOR_UTILIZATION}) + ${high_utilization}= Set Variable True + BREAK + END + END + ${executor_utilization_score}= Evaluate 0 if ${high_utilization} else 1 + Set Global Variable ${executor_utilization_score} + + +Generate Health Score + ${health_score}= Evaluate (${failed_builds_score} + ${long_running_score} + ${failed_test_score} + ${jenkins_health_score} + ${queued_builds_score} + ${executor_utilization_score}) / 6 + ${health_score}= Convert to Number ${health_score} 2 + RW.Core.Push Metric ${health_score} + + +*** Keywords *** +Suite Initialization + ${JENKINS_URL}= RW.Core.Import User Variable JENKINS_URL + ... type=string + ... description=The URL of your Jenkins instance + ... pattern=\w* + ... example=https://jenkins.example.com + ${JENKINS_USERNAME}= RW.Core.Import Secret JENKINS_USERNAME + ... type=string + ... description=Jenkins username for authentication + ... pattern=\w* + ... example=admin + ${JENKINS_TOKEN}= RW.Core.Import Secret JENKINS_TOKEN + ... type=string + ... description=Jenkins API token for authentication + ... pattern=\w* + ... example=11aa22bb33cc44dd55ee66ff77gg88hh + ${LONG_RUNNING_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable LONG_RUNNING_BUILD_MAX_WAIT_TIME + ... type=string + ... description=The time threshold for long running builds, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="10m" + ... default="10m" + ${MAX_LONG_RUNNING_BUILDS}= RW.Core.Import User Variable MAX_LONG_RUNNING_BUILDS + ... type=string + ... description=The maximum number of long running builds to consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + ${QUEUED_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable QUEUED_BUILD_MAX_WAIT_TIME + ... type=string + ... description=The time threshold for builds in queue, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="10m" + ... default="10m" + ${MAX_FAILED_BUILDS}= RW.Core.Import User Variable MAX_FAILED_BUILDS + ... type=string + ... description=The maximum number of failed builds allowed and consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + ${MAX_ALLOWED_FAILED_TESTS}= RW.Core.Import User Variable MAX_ALLOWED_FAILED_TESTS + ... type=string + ... description=The maximum number of failed tests allowed and consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + ${MAX_QUEUED_BUILDS}= RW.Core.Import User Variable MAX_QUEUED_BUILDS + ... type=string + ... description=The maximum number of builds stuck in queue to consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + ${MAX_EXECUTOR_UTILIZATION}= RW.Core.Import User Variable MAX_EXECUTOR_UTILIZATION + ... type=string + ... description=The maximum percentage of executor utilization to consider healthy + ... pattern=\d+ + ... example="80" + ... default="80" + ${JENKINS_INSTANCE_NAME}= RW.Core.Import User Variable JENKINS_INSTANCE_NAME + ... type=string + ... description=Jenkins Instance Name + ... pattern=\w* + ... example="prod-jenkins" + ... default="prod-jenkins" + Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} + Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} + Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} + Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} + Set Suite Variable ${MAX_FAILED_BUILDS} ${MAX_FAILED_BUILDS} + Set Suite Variable ${MAX_LONG_RUNNING_BUILDS} ${MAX_LONG_RUNNING_BUILDS} + Set Suite Variable ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + Set Suite Variable ${MAX_ALLOWED_FAILED_TESTS} ${MAX_ALLOWED_FAILED_TESTS} + Set Suite Variable ${MAX_QUEUED_BUILDS} ${MAX_QUEUED_BUILDS} + Set Suite Variable ${QUEUED_BUILD_MAX_WAIT_TIME} ${QUEUED_BUILD_MAX_WAIT_TIME} + Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} + Set Suite Variable ${JENKINS_INSTANCE_NAME} ${JENKINS_INSTANCE_NAME} + \ No newline at end of file diff --git a/interactive_console_output.xml b/interactive_console_output.xml new file mode 100644 index 000000000..852fdf171 --- /dev/null +++ b/interactive_console_output.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + +All Tests + + + + +Robot Interactive Console + + + +Error in file '/home/runwhen/.vscode-remote/extensions/robocorp.robotframework-lsp-1.13.0/src/robotframework_ls/vendored/robotframework_interactive/robot_interactive_console.robot' on line 4: Singular section headers like '*** Test Case ***' are deprecated. Use plural format like '*** Test Cases ***' instead. + + diff --git a/libraries/Jenkins/__init__.py b/libraries/Jenkins/__init__.py new file mode 100644 index 000000000..ac8b6c2f7 --- /dev/null +++ b/libraries/Jenkins/__init__.py @@ -0,0 +1 @@ +from .jenkins import * diff --git a/libraries/Jenkins/jenkins.py b/libraries/Jenkins/jenkins.py new file mode 100644 index 000000000..e98e53a3b --- /dev/null +++ b/libraries/Jenkins/jenkins.py @@ -0,0 +1,500 @@ +import requests +import time +import re +import json +import xml.etree.ElementTree as ET +from collections import defaultdict +from thefuzz import fuzz +from thefuzz import process as fuzzprocessor + +from robot.api.deco import keyword +from RW import platform + +def normalize_log(log: str) -> str: + """Normalize logs to improve pattern matching.""" + log = log.lower() # Convert to lowercase for case-insensitive matching + log = re.sub(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z', '{timestamp}', log) # Remove timestamps + log = re.sub(r'[a-f0-9]{12,}', '{hash}', log) # Replace long hashes or container IDs + log = re.sub(r'\s+', ' ', log).strip() # Collapse multiple spaces + return log + +class Jenkins: + """ + This Robot Framework library exposes its keywords so that each one + accepts jenkins_url, jenkins_username, and jenkins_token directly. + + The `jenkins_username` and `jenkins_token` parameters are expected + to be `platform.Secret` objects, so we do `jenkins_username.value` + and `jenkins_token.value` to retrieve the actual strings. + + Example usage in Robot: + + *** Settings *** + Library Jenkins + + *** Variables *** + ${JENKINS_URL} https://my-jenkins.example + ${JENKINS_USERNAME} MyJenkinsUsernameSecret + ${JENKINS_TOKEN} MyJenkinsTokenSecret + + *** Test Cases *** + List Recent Failed Tests in Jenkins + ${failed_tests}= Get Failed Tests ${JENKINS_URL} ${JENKINS_USERNAME} ${JENKINS_TOKEN} + Log Found ${len(${failed_tests})} unstable builds + """ + + def __init__(self): + # We don't store credentials or Jenkins data at construction time + pass + + def _fetch_jenkins_data( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Helper method that calls Jenkins at /api/json?depth=2 and returns the parsed JSON. + Raises ConnectionError if the request fails. + """ + api_url = f"{jenkins_url}/api/json?depth=2" + # Extract the actual secret values for Basic Auth + auth = (jenkins_username.value, jenkins_token.value) + try: + response = requests.get(api_url, auth=auth, timeout=10) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + + @keyword("Get Failed Tests") + def get_failed_tests( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Returns a list of pipelines in the 'UNSTABLE' state along with their failed tests. + + Example: + | ${failed_tests}= Get Failed Tests ${JENKINS_URL} ${JENKINS_USERNAME} ${JENKINS_TOKEN} | + | FOR ${pipeline} IN @{failed_tests} | + | Log Pipeline name: ${pipeline['pipeline_details']['pipeline_name']} | + | Log Test results: ${pipeline['test_results']} | + | END | + """ + jenkins_data = self._fetch_jenkins_data(jenkins_url, jenkins_username, jenkins_token) + # For requests during test-report fetching: + auth = (jenkins_username.value, jenkins_token.value) + + failed_tests = [] + for job in jenkins_data.get('jobs', []): + last_build = job.get('lastBuild') or {} + if last_build.get('result') == 'UNSTABLE': + pipeline_details = { + 'pipeline_name': job.get('name'), + 'pipeline_url': job.get('url'), + 'build_result': last_build.get('result'), + 'build_number': last_build.get('number'), + 'build_timestamp': last_build.get('timestamp'), + 'build_duration': last_build.get('duration'), + 'build_queueId': last_build.get('queueId'), + 'build_building': last_build.get('building'), + 'build_changeSet': last_build.get('changeSet') + } + try: + test_report_url = f"{last_build.get('url')}testReport/api/json" + tests_response = requests.get(test_report_url, auth=auth, timeout=10) + tests_response.raise_for_status() + suites = tests_response.json().get('suites', []) + test_results = [] + for suite in suites: + for case in suite.get('cases', []): + test_results.append(case) + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch test data from Jenkins: {e}") + + failed_tests.append({ + "pipeline_details": pipeline_details, + "test_results": test_results + }) + + return failed_tests + + @keyword("Get Queued Builds") + def get_queued_builds( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret, + wait_threshold: str = "10m" + ): + """ + Get builds waiting in queue longer than the specified threshold (e.g., '10m', '1h', '1d'). + + Returns a list of dictionaries with details of each queued build. + + Example usage in Robot: + | ${queued_builds}= | Get Queued Builds | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | 15m | + | FOR ${build} IN @{queued_builds} | + | Log Job ${build['job_name']} has been queued for ${build['wait_time']}. | + | END | + """ + wt = wait_threshold.lower().replace(' ', '').strip('"').strip("'") + threshold_value = 0 + if 'min' in wt: + threshold_value = int(wt.replace('min', '')) + elif 'h' in wt: + threshold_value = int(wt.replace('h', '')) * 60 + elif 'm' in wt: + threshold_value = int(wt.replace('m', '')) + elif 'day' in wt: + threshold_value = int(wt.replace('day', '')) * 24 * 60 + elif 'd' in wt: + threshold_value = int(wt.replace('d', '')) * 24 * 60 + else: + raise ValueError( + "Invalid threshold format. Use '10min', '1h', '30m', '1d', '1day', etc." + ) + + # Use .value to extract the actual username/token + auth = (jenkins_username.value, jenkins_token.value) + queue_url = f"{jenkins_url}/queue/api/json" + queued_builds = [] + + try: + queue_response = requests.get(queue_url, auth=auth, timeout=10) + queue_response.raise_for_status() + queue_data = queue_response.json() + + current_time = int(time.time() * 1000) + for item in queue_data.get('items', []): + in_queue_since = item.get('inQueueSince', 0) + wait_time_mins = (current_time - in_queue_since) / (1000 * 60) + + if wait_time_mins >= threshold_value: + if wait_time_mins >= 24*60: + wait_time = f"{wait_time_mins/(24*60):.1f}d" + elif wait_time_mins >= 60: + wait_time = f"{wait_time_mins/60:.1f}h" + else: + wait_time = f"{wait_time_mins:.1f}min" + + job_name = item.get('task', {}).get('name', '') + if not job_name: + try: + queued_build_url = item.get('url', '') + if queued_build_url: + queued_build_url = f"{jenkins_url}/{queued_build_url}api/json?depth=1" + rsp = requests.get(queued_build_url, auth=auth, timeout=10).json() + job_name = rsp.get('task', {}).get('name', 'Unknown Job') + else: + job_name = 'Unknown Job' + except requests.exceptions.RequestException: + job_name = 'Unknown Job' + + queued_builds.append({ + 'job_name': job_name, + 'waiting_since': in_queue_since, + 'wait_time': wait_time, + 'why': item.get('why', 'Unknown Reason'), + 'stuck': item.get('stuck', False), + 'blocked': item.get('blocked', False), + 'url': f"{jenkins_url}/{item.get('url', '')}" + }) + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch queue data from Jenkins: {e}") + + return queued_builds + + @keyword("Get Executor Utilization") + def get_executor_utilization( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Returns a list with executor utilization info for each Jenkins node. + + | ${utilization}= | Get Executor Utilization | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | + | FOR ${node} IN @{utilization} | + | Log Node ${node['node_name']} is at ${node['utilization_percentage']}% utilization. | + | END | + """ + jenkins_data = self._fetch_jenkins_data(jenkins_url, jenkins_username, jenkins_token) + executor_utilization = [] + + for label in jenkins_data.get('assignedLabels', []): + busy_executors = label.get('busyExecutors', 0) + total_executors = label.get('totalExecutors', 0) + utilization = (busy_executors / total_executors) * 100 if total_executors else 0 + executor_utilization.append({ + 'node_name': label.get('name', 'unknown'), + 'busy_executors': busy_executors, + 'total_executors': total_executors, + 'utilization_percentage': utilization + }) + + return executor_utilization + + @keyword("Build Logs Analytics") + def build_logs_analytics( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret, + history_limit: int = 5 + ): + """ + For each job in Jenkins, retrieve up to `history_limit` failed builds, + analyze their logs, and attempt to find common error patterns using fuzzy matching. + + Returns a list of dictionaries, each describing: + - job_name + - builds_analyzed + - similarity_score + - common_error_patterns + + Example usage: + | ${analysis_results}= | Build Logs Analytics | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | 5 | + | FOR ${analysis} IN @{analysis_results} | + | Log Job ${analysis['job_name']} has average log similarity ${analysis['similarity_score']}. | + | Log Common error patterns: ${analysis['common_error_patterns']} | + | END | + """ + auth = (jenkins_username.value, jenkins_token.value) + jenkins_data = self._fetch_jenkins_data(jenkins_url, jenkins_username, jenkins_token) + failed_builds = [] + + # Collect up to history_limit failed builds per job + for job in jenkins_data.get('jobs', []): + builds = [] + failed_count = 0 + for build in job.get('builds', []): + if build.get('result') == 'FAILURE': + builds.append({'number': build.get('number'), 'url': build.get('url')}) + failed_count += 1 + if failed_count == history_limit: + break + + if builds: + failed_builds.append({'job_name': job.get('name'), 'builds': builds}) + + analysis_results = [] + for job_info in failed_builds: + job_logs = [] + for build_info in job_info['builds']: + try: + log_url = f"{build_info['url']}logText/progressiveText?start=0" + log_response = requests.get(log_url, auth=auth, timeout=10) + log_response.raise_for_status() + job_logs.append({ + 'build_number': build_info['number'], + 'log_content': log_response.text + }) + except requests.exceptions.RequestException as e: + print(f"Failed to fetch logs for {job_info['job_name']} #{build_info['number']}: {e}") + continue + + # If there's only one failed build, can't compare logs across multiple builds + if len(job_logs) < 2: + continue + + # Extract error sections + error_sections = [] + for log in job_logs: + lines = log['log_content'].split('\n') + error_section = [] + in_error = False + + for line in lines: + lower_line = line.lower() + if any(term in lower_line for term in ['error:', 'exception', 'failed', 'failure']): + if any(skip_term in lower_line for skip_term in ['finished: failure', 'build failure', '[info]']): + continue + in_error = True + error_section = [line] + elif in_error and line.strip(): + if not lower_line.startswith('[info]'): + error_section.append(line) + if len(error_section) > 10: + in_error = False + if error_section: + error_sections.append('\n'.join(error_section)) + elif in_error: + in_error = False + if error_section: + error_sections.append('\n'.join(error_section)) + + # Use fuzzy matching to find common error sections + common_patterns = defaultdict(dict) + processed_sections = set() + + for section in error_sections: + if section in processed_sections: + continue + matches = fuzzprocessor.extractBests( + section, + error_sections, + scorer=fuzz.token_set_ratio, + score_cutoff=85 + ) + similar_sections = [m[0] for m in matches] + # Only keep if it appears in all logs + if len(similar_sections) == len(job_logs): + pattern_key = similar_sections[0] + common_patterns[pattern_key] = { + 'occurrences': len(similar_sections), + 'similar_sections': similar_sections, + 'similarity_scores': [m[1] for m in matches] + } + processed_sections.update(similar_sections) + + # Calculate overall log similarity + similarity_scores = [] + for i in range(len(job_logs)): + for j in range(i + 1, len(job_logs)): + score = fuzz.token_set_ratio(job_logs[i]['log_content'], job_logs[j]['log_content']) + similarity_scores.append(score) + + avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0 + # Filter out patterns that appear in all logs + significant_patterns = { + pattern: details + for pattern, details in common_patterns.items() + if details['occurrences'] == len(job_logs) + } + + analysis_results.append({ + 'job_name': job_info['job_name'], + 'builds_analyzed': len(job_logs), + 'similarity_score': avg_similarity, + 'common_error_patterns': [ + { + 'pattern': pattern, + 'occurrences': details['occurrences'], + 'similar_sections': details['similar_sections'], + 'similarity_scores': details['similarity_scores'] + } + for pattern, details in significant_patterns.items() + ] + }) + + return analysis_results + + @keyword("Parse Atom Feed") + def parse_atom_feed( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Fetches and parses the Jenkins manage/log Atom feed, returning the combined log text. + Any sensitive information like initial admin passwords will be redacted. + + Example usage: + | ${logs}= | Parse Jenkins Atom Feed | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | + | Log | Jenkins logs: ${logs} | + """ + auth = (jenkins_username.value, jenkins_token.value) + feed_url = f"{jenkins_url}/manage/log/rss" + namespace = {'atom': 'http://www.w3.org/2005/Atom'} + + try: + response = requests.get(feed_url, auth=auth, timeout=10) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + + root = ET.fromstring(response.text) + logs = "" + for entry in root.findall('atom:entry', namespace): + content_elem = entry.find('atom:content', namespace) + if content_elem is not None and content_elem.text: + log_text = content_elem.text.strip() + # Redact initial admin password lines + if "Jenkins initial setup is required" in log_text and "This may also be found at:" in log_text: + log_text = "Jenkins initial setup is required. [REDACTED]" + logs += f"{log_text}\n{'=' * 80}\n" + + return logs + + + @keyword("Analyze Logs") + def analyze_logs(self, logs: str, error_patterns_file: str = None): + """Analyzes logs for common errors, prioritizing lines with ERROR, and suggests next steps.""" + if error_patterns_file: + with open(error_patterns_file, "r") as f: + error_patterns = json.load(f) + + suggestions = [] # Collect suggestions with error lines + error_lines = [] # Store error-prone log sections + normalized_logs = normalize_log(logs) + for error, details in error_patterns.items(): + pattern = details.get("pattern") + advice = details.get("suggestion") + + if pattern and advice: + matches = re.finditer(pattern.lower(), normalized_logs, re.MULTILINE) + for match in matches: + matched_line = match.group(0) + # Get context around the error + start = max(0, match.start() - 100) # Get 100 chars before match + end = min(len(normalized_logs), match.end() + 100) # Get 100 chars after match + context = normalized_logs[start:end] + + # Format advice with matches if needed + formatted_advice = advice + if '{match}' in advice: + # Replace all {match} placeholders with corresponding group matches + formatted_advice = advice + for i, group in enumerate(match.groups(), start=1): + formatted_advice = formatted_advice.replace('{match}', group, 1) + + # Prioritize lines with ERROR or FAILURE + if re.search(r"(ERROR|error|FAILURE)", matched_line): + error_lines.insert(0, (formatted_advice, context)) # Add to the front for higher priority + else: + error_lines.append((formatted_advice, context)) # Add normally + + # Collect unique suggestions in order of priority + seen_advice = set() + for advice, line in error_lines: + if advice not in seen_advice: + suggestions.append({"suggestion": advice, "log": line}) + seen_advice.add(advice) + + # Use default suggestion if no specific issues found + if not suggestions: + # Find all error/failure lines with context (5 lines before and after) + error_blocks = [] + for match in re.finditer(r'.*\b(error|failure)\b.*', normalized_logs, re.MULTILINE | re.IGNORECASE): + start = max(0, match.start() - 500) # Get 500 chars before for context + end = min(len(normalized_logs), match.end() + 500) # Get 500 chars after for context + error_blocks.append(normalized_logs[start:end]) + + if error_blocks: + # Deduplicate while preserving order + unique_errors = [] + seen = set() + for block in error_blocks: + if block not in seen: + seen.add(block) + unique_errors.append(block) + + suggestions.append({ + "suggestion": "Check detailed logs for root cause.", + "log": "\n---\n".join(unique_errors) + }) + else: + # Provide more specific guidance when no errors found + suggestions.append({ + "suggestion": "Check detailed logs for root cause.", + "log": logs + }) + + return suggestions +