From fe31d3e9805a095209c8127e6c8a7837e7fd839b Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 24 Jan 2025 18:40:31 +0530 Subject: [PATCH 01/72] Update .gitignore to ignore SSH private key files (*.pem) --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 23fd82d9b..735ce0952 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ build *egg-info .kube *.out +# Ignore SSH private key files +*.pem # Ignore Terraform state files **/terraform.tfstate From d25406236618b8407464b5883f57d5644bdc893b Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 24 Jan 2025 18:41:31 +0530 Subject: [PATCH 02/72] Add Jenkins test infrastructure and Terraform configuration --- .../jenkins-health/.test/Taskfile.yaml | 354 ++++++++++++++++++ .../.test/terraform/Taskfile.yaml | 69 ++++ .../jenkins-health/.test/terraform/main.tf | 159 ++++++++ .../.test/terraform/provider.tf | 3 + 4 files changed, 585 insertions(+) create mode 100644 codebundles/jenkins-health/.test/Taskfile.yaml create mode 100644 codebundles/jenkins-health/.test/terraform/Taskfile.yaml create mode 100644 codebundles/jenkins-health/.test/terraform/main.tf create mode 100644 codebundles/jenkins-health/.test/terraform/provider.tf diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml new file mode 100644 index 000000000..6294e0844 --- /dev/null +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -0,0 +1,354 @@ +version: "3" + +tasks: + default: + desc: "Generate workspaceInfo and rebuild/test" + cmds: + - task: check-unpushed-commits + - task: generate-rwl-config + - task: run-rwl-discovery + + clean: + desc: "Run cleanup tasks" + cmds: + - task: check-and-cleanup-terraform + # - task: clean-rwl-discovery + + build-infra: + desc: "Build test infrastructure" + cmds: + - task: build-terraform-infra + + check-unpushed-commits: + desc: Check if outstanding commits or file updates need to be pushed before testing. + vars: + # Specify the base directory relative to your Taskfile location + BASE_DIR: "../" + cmds: + - | + echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNCOMMITTED_FILES" ]; then + echo "✗" + echo "Uncommitted changes found:" + echo "$UNCOMMITTED_FILES" + echo "Remember to commit & push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No uncommitted changes in specified directories." + echo "------------" + fi + - | + echo "Checking for unpushed commits in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..." + git fetch origin + UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true) + if [ -n "$UNPUSHED_FILES" ]; then + echo "✗" + echo "Unpushed commits found:" + echo "$UNPUSHED_FILES" + echo "Remember to push changes before executing the `run-rwl-discovery` task." + echo "------------" + exit 1 + else + echo "√" + echo "No unpushed commits in specified directories." + echo "------------" + fi + silent: true + + generate-rwl-config: + desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)" + env: + AWS_ACCESS_KEY_ID: "{{.AWS_ACCESS_KEY_ID}}" + AWS_SECRET_ACCESS_KEY: "{{.AWS_SECRET_ACCESS_KEY}}" + AWS_DEFAULT_REGION: "{{.AWS_DEFAULT_REGION}}" + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + JENKINS_URL: '{{.JENKINS_URL | default "http://localhost:8080"}}' + cmds: + - | + source terraform/cb.secret + repo_url=$(git config --get remote.origin.url) + branch_name=$(git rev-parse --abbrev-ref HEAD) + codebundle=$(basename "$(dirname "$PWD")") + + # Fetch individual cluster details from Terraform state + # pushd terraform > /dev/null + # cluster1_name=$(terraform show -json terraform.tfstate | jq -r ' + # .values.outputs.cluster_1_name.value') + + # popd > /dev/null + + # # Check if any of the required cluster variables are empty + # if [ -z "$cluster1_name" ] || [ -z "$cluster1_server" ] || [ -z "$cluster1_resource_group" ]; then + # echo "Error: Missing cluster details. Ensure Terraform plan has been applied." + # exit 1 + # fi + + # Generate workspaceInfo.yaml with fetched cluster details + cat < workspaceInfo.yaml + workspaceName: "$RW_WORKSPACE" + workspaceOwnerEmail: authors@runwhen.com + defaultLocation: location-01-us-west1 + defaultLOD: detailed + cloudConfig: + aws: + awsAccessKeyId: "$AWS_ACCESS_KEY_ID" + awsSecretAccessKey: "$AWS_SECRET_ACCESS_KEY" + codeCollections: + - repoURL: "$repo_url" + branch: "$branch_name" + codeBundles: ["$codebundle"] + custom: + aws_access_key_id: AWS_ACCESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + jenkins_url: $JENKINS_URL + EOF + silent: true + + run-rwl-discovery: + desc: "Run RunWhen Local Discovery on test infrastructure" + cmds: + - | + CONTAINER_NAME="RunWhenLocal" + if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Stopping and removing existing container $CONTAINER_NAME..." + docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME + elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then + echo "Removing existing stopped container $CONTAINER_NAME..." + docker rm $CONTAINER_NAME + else + echo "No existing container named $CONTAINER_NAME found." + fi + + echo "Cleaning up output directory..." + sudo rm -rf output || { echo "Failed to remove output directory"; exit 1; } + mkdir output && chmod 777 output || { echo "Failed to set permissions"; exit 1; } + + echo "Starting new container $CONTAINER_NAME..." + + docker run --name $CONTAINER_NAME -e DEBUG_LOGGING=true -p 8081:8081 -v "$(pwd)":/shared -d ghcr.io/runwhen-contrib/runwhen-local:latest || { + echo "Failed to start container"; exit 1; + } + + echo "Running workspace builder script in container..." + docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || { + echo "Error executing script in container"; exit 1; + } + + echo "Review generated config files under output/workspaces/" + silent: true + + check-terraform-infra: + desc: "Check if Terraform has any deployed infrastructure in the terraform subdirectory" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Navigate to the Terraform directory + if [ ! -d "terraform" ]; then + echo "Terraform directory not found." + exit 1 + fi + cd terraform + + # Check if Terraform state file exists + if [ ! -f "terraform.tfstate" ]; then + echo "No Terraform state file found in the terraform directory. No infrastructure is deployed." + exit 0 + fi + + # List resources in Terraform state + resources=$(terraform state list) + + # Check if any resources are listed in the state file + if [ -n "$resources" ]; then + echo "Deployed infrastructure detected." + echo "$resources" + exit 0 + else + echo "No deployed infrastructure found in Terraform state." + exit 0 + fi + silent: true + + build-terraform-infra: + desc: "Run terraform apply" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Terraform apply aborted." + exit 1 + fi + task format-and-init-terraform + echo "Starting Terraform Build of Terraform infrastructure..." + terraform apply -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure build completed." + silent: true + + check-rwp-config: + desc: Check if env vars are set for RunWhen Platform + cmds: + - | + source terraform/cb.secret + missing_vars=() + + if [ -z "$RW_WORKSPACE" ]; then + missing_vars+=("RW_WORKSPACE") + fi + + if [ -z "$RW_API_URL" ]; then + missing_vars+=("RW_API_URL") + fi + + if [ -z "$RW_PAT" ]; then + missing_vars+=("RW_PAT") + fi + + if [ ${#missing_vars[@]} -ne 0 ]; then + echo "The following required environment variables are missing: ${missing_vars[*]}" + exit 1 + fi + silent: true + + upload-slxs: + desc: "Upload SLX files to the appropriate URL" + env: + RW_WORKSPACE: "{{.RW_WORKSPACE}}" + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/cb.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Upload aborted." + exit 1 + fi + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + PAYLOAD=$(jq -n --arg commitMsg "Creating new SLX $SLX_NAME" '{ commitMsg: $commitMsg, files: {} }') + for file in slx.yaml runbook.yaml sli.yaml; do + if [ -f "$dir/$file" ]; then + CONTENT=$(cat "$dir/$file") + PAYLOAD=$(echo "$PAYLOAD" | jq --arg fileContent "$CONTENT" --arg fileName "$file" '.files[$fileName] = $fileContent') + fi + done + + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Uploading SLX: $SLX_NAME to $URL" + response_code=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" \ + -w "%{http_code}" -o /dev/null -s) + + if [[ "$response_code" == "200" || "$response_code" == "201" ]]; then + echo "Successfully uploaded SLX: $SLX_NAME to $URL" + elif [[ "$response_code" == "405" ]]; then + echo "Failed to upload SLX: $SLX_NAME to $URL. Method not allowed (405)." + else + echo "Failed to upload SLX: $SLX_NAME to $URL. Unexpected response code: $response_code" + fi + fi + done + silent: true + + delete-slxs: + desc: "Delete SLX objects from the appropriate URL" + env: + RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' + RW_API_URL: "{{.RW_API}}" + RW_PAT: "{{.RW_PAT}}" + cmds: + - task: check-rwp-config + - | + source terraform/cb.secret + BASE_DIR="output/workspaces/${RW_WORKSPACE}/slxs" + if [ ! -d "$BASE_DIR" ]; then + echo "Directory $BASE_DIR does not exist. Deletion aborted." + exit 1 + fi + + for dir in "$BASE_DIR"/*; do + if [ -d "$dir" ]; then + SLX_NAME=$(basename "$dir") + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/branches/main/slxs/${SLX_NAME}" + echo "Deleting SLX: $SLX_NAME from $URL" + response_code=$(curl -X DELETE "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -w "%{http_code}" -o /dev/null -s) + + if [[ "$response_code" == "200" || "$response_code" == "204" ]]; then + echo "Successfully deleted SLX: $SLX_NAME from $URL" + elif [[ "$response_code" == "405" ]]; then + echo "Failed to delete SLX: $SLX_NAME from $URL. Method not allowed (405)." + else + echo "Failed to delete SLX: $SLX_NAME from $URL. Unexpected response code: $response_code" + fi + fi + done + silent: true + + cleanup-terraform-infra: + desc: "Cleanup deployed Terraform infrastructure" + cmds: + - | + # Source Envs for Auth + source terraform/tf.secret + + # Navigate to the Terraform directory + if [ -d "terraform" ]; then + cd terraform + else + echo "Terraform directory not found. Cleanup aborted." + exit 1 + fi + + echo "Starting cleanup of Terraform infrastructure..." + terraform destroy -auto-approve || { + echo "Failed to clean up Terraform infrastructure." + exit 1 + } + echo "Terraform infrastructure cleanup completed." + silent: true + + check-and-cleanup-terraform: + desc: "Check and clean up deployed Terraform infrastructure if it exists" + cmds: + - | + # Capture the output of check-terraform-infra + infra_output=$(task check-terraform-infra | tee /dev/tty) + + # Check if output contains indication of deployed infrastructure + if echo "$infra_output" | grep -q "Deployed infrastructure detected"; then + echo "Infrastructure detected; proceeding with cleanup." + task cleanup-terraform-infra + else + echo "No deployed infrastructure found; no cleanup required." + fi + silent: true + + clean-rwl-discovery: + desc: "Check and clean up RunWhen Local discovery output" + cmds: + - | + sudo rm -rf output + rm workspaceInfo.yaml + silent: true diff --git a/codebundles/jenkins-health/.test/terraform/Taskfile.yaml b/codebundles/jenkins-health/.test/terraform/Taskfile.yaml new file mode 100644 index 000000000..08e0e835d --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/Taskfile.yaml @@ -0,0 +1,69 @@ +version: '3' + +env: + TERM: screen-256color + +tasks: + default: + cmds: + - task: test + + test: + desc: Run tests. + cmds: + - task: test-terraform + + clean: + desc: Clean the environment. + cmds: + - task: clean-go + - task: clean-terraform + + clean-terraform: + desc: Clean the terraform environment (remove terraform directories and files) + cmds: + - find . -type d -name .terraform -exec rm -rf {} + + - find . -type f -name .terraform.lock.hcl -delete + + format-and-init-terraform: + desc: Run Terraform fmt and init + cmds: + - | + terraform fmt + terraform init + test-terraform: + desc: Run tests for all terraform directories. + silent: true + env: + DIRECTORIES: + sh: find . -path '*/.terraform/*' -prune -o -name '*.tf' -type f -exec dirname {} \; | sort -u + cmds: + - | + BOLD=$(tput bold) + NORM=$(tput sgr0) + + CWD=$PWD + + for d in $DIRECTORIES; do + cd $d + echo "${BOLD}$PWD:${NORM}" + if ! terraform fmt -check=true -list=false -recursive=false; then + echo " ✗ terraform fmt" && exit 1 + else + echo " √ terraform fmt" + fi + + if ! terraform init -backend=false -input=false -get=true -no-color > /dev/null; then + echo " ✗ terraform init" && exit 1 + else + echo " √ terraform init" + fi + + if ! terraform validate > /dev/null; then + echo " ✗ terraform validate" && exit 1 + else + echo " √ terraform validate" + fi + + cd $CWD + done \ No newline at end of file diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf new file mode 100644 index 000000000..ee938dd96 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -0,0 +1,159 @@ +# Get latest Ubuntu AMI +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] # Canonical + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +# Generate SSH key +resource "tls_private_key" "jenkins_key" { + algorithm = "RSA" + rsa_bits = 2048 +} + +resource "aws_key_pair" "generated_key" { + key_name = "jenkins-key" + public_key = tls_private_key.jenkins_key.public_key_openssh +} + +# Save private key locally +resource "local_file" "private_key" { + content = tls_private_key.jenkins_key.private_key_pem + filename = "jenkins-key.pem" + + provisioner "local-exec" { + command = "chmod 400 jenkins-key.pem" + } +} + +# VPC Configuration +resource "aws_vpc" "jenkins_vpc" { + cidr_block = "10.0.0.0/16" + enable_dns_hostnames = true + enable_dns_support = true + + tags = { + Name = "jenkins-vpc" + } +} + +# Internet Gateway +resource "aws_internet_gateway" "jenkins_igw" { + vpc_id = aws_vpc.jenkins_vpc.id + + tags = { + Name = "jenkins-igw" + } +} + +# Public Subnet +resource "aws_subnet" "jenkins_subnet" { + vpc_id = aws_vpc.jenkins_vpc.id + cidr_block = "10.0.1.0/24" + map_public_ip_on_launch = true + availability_zone = "us-west-2a" + + tags = { + Name = "jenkins-subnet" + } +} + +# Route Table +resource "aws_route_table" "jenkins_rt" { + vpc_id = aws_vpc.jenkins_vpc.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.jenkins_igw.id + } + + tags = { + Name = "jenkins-rt" + } +} + +# Route Table Association +resource "aws_route_table_association" "jenkins_rta" { + subnet_id = aws_subnet.jenkins_subnet.id + route_table_id = aws_route_table.jenkins_rt.id +} + +# Security Group +resource "aws_security_group" "jenkins_sg" { + name = "jenkins-sg" + description = "Security group for Jenkins server" + vpc_id = aws_vpc.jenkins_vpc.id + + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 8080 + to_port = 8080 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +# EC2 Instance +resource "aws_instance" "jenkins_server" { + ami = data.aws_ami.ubuntu.id + instance_type = "t2.micro" + + subnet_id = aws_subnet.jenkins_subnet.id + vpc_security_group_ids = [aws_security_group.jenkins_sg.id] + key_name = aws_key_pair.generated_key.key_name + associate_public_ip_address = true + + user_data = <<-EOF + #!/bin/bash + # Update package index + apt-get update + # Install Java 17 + apt-get install -y openjdk-17-jdk + # Add Jenkins repository key + curl -fsSL https://pkg.jenkins.io/debian-stable/jenkins.io-2023.key | sudo tee /usr/share/keyrings/jenkins-keyring.asc > /dev/null + # Add Jenkins repository + echo deb [signed-by=/usr/share/keyrings/jenkins-keyring.asc] https://pkg.jenkins.io/debian-stable binary/ | sudo tee /etc/apt/sources.list.d/jenkins.list > /dev/null + # Update package index again + apt-get update + # Install Jenkins + apt-get install -y jenkins + # Enable Jenkins to start on boot + systemctl enable jenkins + # Start Jenkins service + systemctl start jenkins + EOF + + tags = { + Name = "jenkins-server" + } +} + +output "jenkins_public_ip" { + value = aws_instance.jenkins_server.public_ip +} + +output "ssh_connection_string" { + value = "ssh -i jenkins-key.pem ubuntu@${aws_instance.jenkins_server.public_ip}" +} \ No newline at end of file diff --git a/codebundles/jenkins-health/.test/terraform/provider.tf b/codebundles/jenkins-health/.test/terraform/provider.tf new file mode 100644 index 000000000..aa39e393f --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/provider.tf @@ -0,0 +1,3 @@ +provider "aws" { + region = "us-west-2" # Replace with your desired region +} \ No newline at end of file From e0a2822636d199d1617c1bc711c5b3bcdcc5538a Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 24 Jan 2025 18:45:51 +0530 Subject: [PATCH 03/72] Add README for Jenkins health codebundle testing instructions --- codebundles/jenkins-health/.test/README.md | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 codebundles/jenkins-health/.test/README.md diff --git a/codebundles/jenkins-health/.test/README.md b/codebundles/jenkins-health/.test/README.md new file mode 100644 index 000000000..3eb34be8b --- /dev/null +++ b/codebundles/jenkins-health/.test/README.md @@ -0,0 +1,70 @@ +### How to test this codebundle? + +## Prerequisites + +The following credentials and configuration are required: + +- Jenkins URL +- Jenkins username +- Jenkins API token + +## Configuration + +**Infrastructure Deployment** + +Purpose: Cloud infrastructure provisioning and management using Terraform + +#### Credential Setup + +Navigate to the `.test/terraform` directory and configure two secret files for authentication: + +`cb.secret` - CloudCustodian and RunWhen Credentials + +Create this file with the following environment variables: + + ```sh + export RW_PAT="" + export RW_WORKSPACE="" + export RW_API_URL="papi.beta.runwhen.com" + + export JENKINS_URL="" + export JENKINS_USERNAME="" + export JENKINS_TOKEN="" + ``` + + +`tf.secret` - Terraform Deployment Credentials + +Create this file with the following environment variables: + + ```sh + export AWS_DEFAULT_REGION="" + export AWS_ACCESS_KEY_ID="" + export AWS_SECRET_ACCESS_KEY="" + export AWS_SESSION_TOKEN="" # Optional: Include if using temporary credentials + ``` + +#### Testing Workflow + +1. Build test infra: + ```sh + task build-infra + ``` + +2. Generate RunWhen Configurations + ```sh + tasks + ``` + +3. Upload generated SLx to RunWhen Platform + + ```sh + task upload-slxs + ``` + +4. At last, after testing, clean up the test infrastructure. + +```sh + task clean +``` + From dc64a51306a335984bb3c7b65908c01683f9585c Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 27 Jan 2025 18:02:07 +0530 Subject: [PATCH 04/72] Update Jenkins server provisioning with Ubuntu 22.04, automated plugin installation, and admin user setup --- .../jenkins-health/.test/terraform/main.tf | 117 ++++++++++++++++-- 1 file changed, 107 insertions(+), 10 deletions(-) diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index ee938dd96..29886a50f 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -5,7 +5,7 @@ data "aws_ami" "ubuntu" { filter { name = "name" - values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"] + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] } filter { @@ -132,17 +132,114 @@ resource "aws_instance" "jenkins_server" { # Install Java 17 apt-get install -y openjdk-17-jdk # Add Jenkins repository key - curl -fsSL https://pkg.jenkins.io/debian-stable/jenkins.io-2023.key | sudo tee /usr/share/keyrings/jenkins-keyring.asc > /dev/null + curl -fsSL https://pkg.jenkins.io/debian-stable/jenkins.io-2023.key | tee /usr/share/keyrings/jenkins-keyring.asc > /dev/null # Add Jenkins repository - echo deb [signed-by=/usr/share/keyrings/jenkins-keyring.asc] https://pkg.jenkins.io/debian-stable binary/ | sudo tee /etc/apt/sources.list.d/jenkins.list > /dev/null + echo deb [signed-by=/usr/share/keyrings/jenkins-keyring.asc] https://pkg.jenkins.io/debian-stable binary/ | tee /etc/apt/sources.list.d/jenkins.list > /dev/null # Update package index again - apt-get update - # Install Jenkins - apt-get install -y jenkins - # Enable Jenkins to start on boot - systemctl enable jenkins - # Start Jenkins service - systemctl start jenkins + apt-get update && apt-get install -y jenkins && systemctl enable jenkins && systemctl start jenkins + sleep 60 + # Get the initial admin password + JENKINS_PASS=$(sudo cat /var/lib/jenkins/secrets/initialAdminPassword) + + # Install Jenkins CLI + wget -q http://localhost:8080/jnlpJars/jenkins-cli.jar + + # Create groovy script to create admin user + cat < create_admin.groovy + import jenkins.model.* + import hudson.security.* + import jenkins.install.* + + def instance = Jenkins.getInstance() + + // Skip setup wizard + instance.setInstallState(InstallState.INITIAL_SETUP_COMPLETED) + + // Install suggested plugins + def pm = instance.getPluginManager() + def uc = instance.getUpdateCenter() + uc.updateAllSites() + + def plugins = [ + // Organization and Administration + "dashboard-view", + "cloudbees-folder", + "configuration-as-code", + "antisamy-markup-formatter", + + // Build Features + "build-name-setter", + "build-timeout", + "config-file-provider", + "credentials-binding", + "embeddable-build-status", + "rebuild", + "ssh-agent", + "throttle-concurrents", + "timestamper", + "ws-cleanup", + + // Build Tools + "ant", + "gradle", + + // Pipelines and Continuous Delivery + "workflow-aggregator", + "github-branch-source", + "pipeline-github-lib", + "pipeline-stage-view", + "conditional-buildstep", + "parameterized-trigger", + "copyartifact", + + // Source Code Management + "git", + "github", + + // Distributed Builds + "ssh-slaves", + + // User Management and Security + "matrix-auth", + "pam-auth", + "ldap", + + // Notifications and Publishing + "email-ext", + "mailer", + + "configuration-as-code", + "ec2" + ] + + plugins.each { plugin -> + if (!pm.getPlugin(plugin)) { + def installFuture = uc.getPlugin(plugin).deploy() + installFuture.get() + } + } + + // Create admin user + def hudsonRealm = new HudsonPrivateSecurityRealm(false) + hudsonRealm.createAccount("admin", "admin123!") + instance.setSecurityRealm(hudsonRealm) + + def strategy = new FullControlOnceLoggedInAuthorizationStrategy() + strategy.setAllowAnonymousRead(false) + instance.setAuthorizationStrategy(strategy) + + instance.save() + GROOVY + + # Execute the groovy script using Jenkins CLI + java -jar jenkins-cli.jar -s http://localhost:8080 -auth admin:$JENKINS_PASS groovy = < create_admin.groovy || { + echo "Failed to create admin user" + exit 1 + } + + # Clean up + rm -f create_admin.groovy + EOF tags = { From 3dbfdbcaf4251ea07c3412d812d734da2aa553ac Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 27 Jan 2025 18:02:34 +0530 Subject: [PATCH 05/72] Add Jenkins failed build log retrieval script --- .../jenkins-health/faild_build_logs.sh | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 codebundles/jenkins-health/faild_build_logs.sh diff --git a/codebundles/jenkins-health/faild_build_logs.sh b/codebundles/jenkins-health/faild_build_logs.sh new file mode 100755 index 000000000..68f375957 --- /dev/null +++ b/codebundles/jenkins-health/faild_build_logs.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# set -x +# Check if required environment variables are set +if [ -z "${JENKINS_URL}" ] || [ -z "${JENKINS_USERNAME}" ] || [ -z "${JENKINS_TOKEN}" ]; then + echo "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." + exit 1 +fi + +# Authentication string for curl +AUTH_HEADER="${JENKINS_USERNAME}:${JENKINS_TOKEN}" + +# Temporary file for JSON output +OUTPUT_FILE="faild_build_logs.json" + +# Fetch Jenkins jobs data +jenkins_data=$(curl -s -u "${AUTH_HEADER}" "${JENKINS_URL}/api/json?depth=2") + + +# Validate if Jenkins data was retrieved successfully +if [ -z "$jenkins_data" ]; then + echo "Failed to fetch data from Jenkins. Please check your credentials or URL." + exit 1 +fi + +# Start JSON array +echo "[" > "$OUTPUT_FILE" +first_entry=true + +# Process each job with a failed last build +echo "$jenkins_data" | jq -c '.jobs[] | select(.lastBuild.result == "FAILURE") | {name: .name, url: .lastBuild.url, number: .lastBuild.number}' | \ +while read -r job_info; do + # Extract job details + job_name=$(echo "$job_info" | jq -r '.name') + build_url=$(echo "$job_info" | jq -r '.url') + build_number=$(echo "$job_info" | jq -r '.number') + + # Skip if any of the required fields are missing + if [ -z "$job_name" ] || [ -z "$build_url" ] || [ -z "$build_number" ]; then + echo "Skipping a job due to missing information." + continue + fi + + # Fetch build logs + logs=$(curl -s -u "$AUTH_HEADER" "${build_url}logText/progressiveText?start=0") + if [ $? -ne 0 ]; then + echo "Failed to fetch logs for job: $job_name, build: $build_number." + continue + fi + + # Escape special characters in logs for JSON + escaped_logs=$(echo "$logs" | jq -sR .) + + # Add comma if not the first entry + if [ "$first_entry" = false ]; then + echo "," >> "$OUTPUT_FILE" + fi + first_entry=false + + # Write JSON entry to file + cat << EOF >> "$OUTPUT_FILE" +{ + "job_name": "$job_name", + "result": "FAILURE", + "buildNumber": $build_number, + "logs": $escaped_logs +} +EOF +done + +# Close JSON array +echo "]" >> "$OUTPUT_FILE" + +# Validate JSON and pretty-print the output +if jq empty "$OUTPUT_FILE" > /dev/null 2>&1; then + cat "$OUTPUT_FILE" + # echo "Failed builds data has been saved to $OUTPUT_FILE" +else + echo "Error: Invalid JSON generated. Check the output file for issues." + exit 1 +fi From c3efa1029ba2e30b37bc45ebdd4f74bb847868c0 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 27 Jan 2025 18:02:47 +0530 Subject: [PATCH 06/72] Add long-running Jenkins builds detection script --- .../jenkins-health/long_running_builds.sh | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100755 codebundles/jenkins-health/long_running_builds.sh diff --git a/codebundles/jenkins-health/long_running_builds.sh b/codebundles/jenkins-health/long_running_builds.sh new file mode 100755 index 000000000..ffc5e30b2 --- /dev/null +++ b/codebundles/jenkins-health/long_running_builds.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# set -x +# Check if required environment variables are set +if [ -z "${JENKINS_URL}" ] || [ -z "${JENKINS_USERNAME}" ] || [ -z "${JENKINS_TOKEN}" ]; then + echo "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." + exit 1 +fi + +convert_to_minutes() { + local time_str=$1 + # Convert to lowercase and remove any spaces + time_str=$(echo "$time_str" | tr '[:upper:]' '[:lower:]' | tr -d ' ') + + # Extract number using regex + local number=$(echo "$time_str" | grep -o '^[0-9]\+') + + # Extract unit by removing the number + local unit=$(echo "$time_str" | sed 's/^[0-9]\+//') + + case $unit in + m|min|minute|minutes) + if [ "$number" -lt 0 ] || [ "$number" -gt 59 ]; then + echo "Minutes should be between 0-59" >&2 + exit 1 + fi + echo $number ;; + h|hr|hour|hours) + if [ "$number" -lt 0 ] || [ "$number" -gt 23 ]; then + echo "Hours should be between 0-23" >&2 + exit 1 + fi + echo $((number * 60)) ;; + d|day|days) + echo $((number * 1440)) ;; + *) + echo "Invalid time format. Please use formats like '5m', '2h', '1d' or '5min', '2hours', '1day'" >&2 + echo "Minutes should be between 0-59" >&2 + echo "Hours should be between 0-23" >&2 + exit 1 + ;; + esac +} + +# Check if threshold parameter is provided +if [ -z "$1" ]; then + echo "Please provide time threshold (e.g., ./long_running_jobs.sh 5m or 2h or 1d)" + exit 1 +fi + +THRESHOLD_MINUTES=$(convert_to_minutes "$1") + +# Authentication string for curl +AUTH_HEADER="${JENKINS_USERNAME}:${JENKINS_TOKEN}" + +# Get current timestamp in milliseconds +current_time=$(date +%s%3N) + +# Fetch Jenkins data and process it using jq to find long running jobs +jenkins_data=$(curl -s -u "${AUTH_HEADER}" "${JENKINS_URL}/api/json?depth=2") + +# Validate if Jenkins data was retrieved successfully +if [ -z "$jenkins_data" ]; then + echo "Failed to fetch data from Jenkins. Please check your credentials or URL." + exit 1 +fi + +# Process the data using jq to find long running jobs and output as JSON +echo "$jenkins_data" | jq --arg threshold "$THRESHOLD_MINUTES" --arg current "$current_time" ' +{ + "timestamp": ($current | tonumber), + "threshold": ($threshold | tonumber), + "long_running_jobs": [ + .jobs[] | + select(.lastBuild != null and .lastBuild.building) | + { + "name": .name, + "build_number": .lastBuild.number, + "node": (if .lastBuild.builtOn == "" then "Built-in Node" else .lastBuild.builtOn end), + "start_time": .lastBuild.timestamp, + "duration_minutes": (((($current | tonumber) - .lastBuild.timestamp) / 1000 / 60) | floor), + "url": .lastBuild.url + } | + select(.duration_minutes >= ($threshold | tonumber)) + ] +}' | jq '.long_running_jobs[] |= . + { + "duration": ((.duration_minutes | tostring) + "m") +}' | jq 'walk( + if type == "object" and has("duration") then + .duration = (if .duration_minutes >= 1440 then + ((.duration_minutes / 1440) | floor | tostring) + "d " + + (((.duration_minutes % 1440) / 60) | floor | tostring) + "h " + + (.duration_minutes % 60 | tostring) + "m" + elif .duration_minutes >= 60 then + ((.duration_minutes / 60) | floor | tostring) + "h " + + (.duration_minutes % 60 | tostring) + "m" + else + (.duration_minutes | tostring) + "m" + end | sub("\\s+$"; "")) + else + . + end +)' From 15897afbfbed52b1d2404d9fa198e4e55eccd6b8 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 27 Jan 2025 18:03:16 +0530 Subject: [PATCH 07/72] Add Jenkins health runbook with failed builds and long-running builds detection --- codebundles/jenkins-health/runbook.robot | 124 +++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 codebundles/jenkins-health/runbook.robot diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot new file mode 100644 index 000000000..ba05540e7 --- /dev/null +++ b/codebundles/jenkins-health/runbook.robot @@ -0,0 +1,124 @@ +*** Settings *** +Documentation This taskset collects information about Jenkins health and failed builds to help troubleshoot potential issues. +Metadata Author saurabh3460 +Metadata Display Name Jenkins Healthcheck +Metadata Supports Jenkins + +Library RW.Core +Library RW.CLI +Library RW.platform + +Suite Setup Suite Initialization + +*** Tasks *** +List Failed Jenkins Builds + [Documentation] Fetches logs from failed Jenkins builds using the Jenkins API + [Tags] Jenkins Logs Failures API Builds + ${rsp}= RW.CLI.Run Bash File + ... bash_file=faild_build_logs.sh + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${jobs}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${jobs}= Create List + END + IF len(@{jobs}) > 0 + FOR ${job} IN @{jobs} + ${job_name}= Set Variable ${job['job_name']} + ${build_number}= Set Variable ${job['buildNumber']} + ${logs}= Set Variable ${job['logs']} + + RW.Core.Add Pre To Report ${job} + + ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint + RW.Core.Add Issue + ... severity=3 + ... expected=Jenkins job `${job_name}` should have a successful build + ... actual=Jenkins job `${job_name}` has a failed build + ... title=Jenkins job `${job_name}` has a failed build + ... reproduce_hint=${rsp.cmd} + ... details=${pretty_item} + ... next_steps=Review the Jenkins job `${job_name}` build number `${build_number}` + END + ELSE + RW.Core.Add Pre To Report "No failed builds found" + END + +List Long Running Builds + [Documentation] Identifies Jenkins builds that have been running longer than a specified threshold + [Tags] Jenkins Builds Monitoring + ${rsp}= RW.CLI.Run Bash File + ... bash_file=long_running_builds.sh + ... cmd_override=./long_running_builds.sh ${TIME_THRESHOLD} + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty object. WARN + ${data}= Create Dictionary + END + + ${long_running_jobs}= Set Variable ${data.get('long_running_jobs', [])} + + IF len(${long_running_jobs}) > 0 + ${json_str}= Evaluate json.dumps(${long_running_jobs}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Job Name", "Build #", "Duration", "URL"] as $headers | $headers, (.[] | [.name, .build_number, .duration, .url]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Long Running Jobs:\n${formatted_results.stdout} + + FOR ${job} IN @{long_running_jobs} + ${job_name}= Set Variable ${job['name']} + ${duration}= Set Variable ${job['duration']} + ${build_number}= Set Variable ${job['build_number']} + ${url}= Set Variable ${job['url']} + + ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint + RW.Core.Add Issue + ... severity=4 + ... expected=Jenkins job `${job_name}` should complete within ${TIME_THRESHOLD} + ... actual=Jenkins job `${job_name}` has been running for ${duration} + ... title=Jenkins job build `${job_name}` `${build_number}` has been running for ${duration} + ... reproduce_hint=${rsp.cmd} + ... details=${pretty_item} + ... next_steps=Review the Jenkins job `${job_name}` build number `${build_number}` at ${url} + END + ELSE + RW.Core.Add Pre To Report "No long running builds found" + END + +*** Keywords *** +Suite Initialization + ${JENKINS_URL}= RW.Core.Import User Variable JENKINS_URL + ... type=string + ... description=The URL of your Jenkins instance + ... pattern=\w* + ... example=https://jenkins.example.com + ${JENKINS_USERNAME}= RW.Core.Import Secret JENKINS_USERNAME + ... type=string + ... description=Jenkins username for authentication + ... pattern=\w* + ... example=admin + ${JENKINS_TOKEN}= RW.Core.Import Secret JENKINS_TOKEN + ... type=string + ... description=Jenkins API token for authentication + ... pattern=\w* + ... example=11aa22bb33cc44dd55ee66ff77gg88hh + ${TIME_THRESHOLD}= RW.Core.Import User Variable TIME_THRESHOLD + ... type=string + ... description=The threshold for long running builds, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="1m" + ... default="1m" + Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} + Set Suite Variable ${TIME_THRESHOLD} ${TIME_THRESHOLD} + Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} + Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} + Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} + \ No newline at end of file From 765fee582c9331346b44d542825fedee89e5cd27 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 27 Jan 2025 18:03:35 +0530 Subject: [PATCH 08/72] Add Jenkins health SLI Robot Framework task for monitoring build health --- codebundles/jenkins-health/sli.robot | 106 +++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 codebundles/jenkins-health/sli.robot diff --git a/codebundles/jenkins-health/sli.robot b/codebundles/jenkins-health/sli.robot new file mode 100644 index 000000000..fbeb41948 --- /dev/null +++ b/codebundles/jenkins-health/sli.robot @@ -0,0 +1,106 @@ +*** Settings *** +Documentation This taskset collects information about Jenkins health and failed builds +... to help troubleshoot potential issues. +Metadata Author saurabh3460 +Metadata Display Name Jenkins Healthcheck +Metadata Supports Jenkins + +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform + + +Suite Setup Suite Initialization + +*** Tasks *** +Check Failed Jenkins Builds + [Documentation] Check Failed Jenkins Builds + [Tags] Jenkins Logs Builds + ${rsp}= RW.CLI.Run Bash File + ... bash_file=faild_build_logs.sh + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${jobs}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty list. WARN + ${jobs}= Create List + END + ${failed_builds}= Evaluate len(@{jobs}) + ${failed_builds_score}= Evaluate 1 if int(${failed_builds}) <= int(${MAX_FAILED_BUILDS}) else 0 + Set Global Variable ${failed_builds_score} + +Check For Long Running Builds + [Documentation] Check Jenkins builds that have been running longer than a specified threshold + [Tags] Jenkins Builds + ${rsp}= RW.CLI.Run Bash File + ... bash_file=long_running_builds.sh + ... cmd_override=./long_running_builds.sh ${TIME_THRESHOLD} + ... env=${env} + ... include_in_history=False + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads(r'''${rsp.stdout}''') json + EXCEPT + Log Failed to load JSON payload, defaulting to empty object. WARN + ${data}= Create Dictionary + END + + ${long_running_builds}= Set Variable ${data.get('long_running_jobs', [])} + ${long_running_count}= Evaluate len($long_running_builds) + + ${long_running_score}= Evaluate 1 if int(${long_running_count}) <= int(${MAX_LONG_RUNNING_BUILDS}) else 0 + Set Global Variable ${long_running_score} + +Generate Health Score + ${health_score}= Evaluate (${failed_builds_score} + ${long_running_score}) / 2 + ${health_score}= Convert to Number ${health_score} 2 + RW.Core.Push Metric ${health_score} + + +*** Keywords *** +Suite Initialization + ${JENKINS_URL}= RW.Core.Import User Variable JENKINS_URL + ... type=string + ... description=The URL of your Jenkins instance + ... pattern=\w* + ... example=https://jenkins.example.com + ${JENKINS_USERNAME}= RW.Core.Import Secret JENKINS_USERNAME + ... type=string + ... description=Jenkins username for authentication + ... pattern=\w* + ... example=admin + ${JENKINS_TOKEN}= RW.Core.Import Secret JENKINS_TOKEN + ... type=string + ... description=Jenkins API token for authentication + ... pattern=\w* + ... example=11aa22bb33cc44dd55ee66ff77gg88hh + ${TIME_THRESHOLD}= RW.Core.Import User Variable TIME_THRESHOLD + ... type=string + ... description=The threshold for long running builds, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="1m" + ... default="1m" + ${MAX_FAILED_BUILDS}= RW.Core.Import User Variable MAX_FAILED_BUILDS + ... type=string + ... description=The maximum number of failed builds to consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + ${MAX_LONG_RUNNING_BUILDS}= RW.Core.Import User Variable MAX_LONG_RUNNING_BUILDS + ... type=string + ... description=The maximum number of long running builds to consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} + Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} + Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} + Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} + Set Suite Variable ${TIME_THRESHOLD} ${TIME_THRESHOLD} + Set Suite Variable ${MAX_FAILED_BUILDS} ${MAX_FAILED_BUILDS} + Set Suite Variable ${MAX_LONG_RUNNING_BUILDS} ${MAX_LONG_RUNNING_BUILDS} \ No newline at end of file From dd990d4d92922cb845d532b22bed35071d9db7f1 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:21:25 +0530 Subject: [PATCH 09/72] update faild_build_logs json --- codebundles/jenkins-health/faild_build_logs.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/faild_build_logs.sh b/codebundles/jenkins-health/faild_build_logs.sh index 68f375957..b3f8aca4a 100755 --- a/codebundles/jenkins-health/faild_build_logs.sh +++ b/codebundles/jenkins-health/faild_build_logs.sh @@ -61,8 +61,9 @@ while read -r job_info; do { "job_name": "$job_name", "result": "FAILURE", - "buildNumber": $build_number, - "logs": $escaped_logs + "build_number": $build_number, + "logs": $escaped_logs, + "url": "$build_url" } EOF done From ec0ae6641779bed6c3aeb65a91c4a9ac5a0c8526 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:22:00 +0530 Subject: [PATCH 10/72] rename name to job_name in logn_running_builds.sh --- codebundles/jenkins-health/long_running_builds.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/long_running_builds.sh b/codebundles/jenkins-health/long_running_builds.sh index ffc5e30b2..36928f1d6 100755 --- a/codebundles/jenkins-health/long_running_builds.sh +++ b/codebundles/jenkins-health/long_running_builds.sh @@ -73,7 +73,7 @@ echo "$jenkins_data" | jq --arg threshold "$THRESHOLD_MINUTES" --arg current "$c .jobs[] | select(.lastBuild != null and .lastBuild.building) | { - "name": .name, + "job_name": .name, "build_number": .lastBuild.number, "node": (if .lastBuild.builtOn == "" then "Built-in Node" else .lastBuild.builtOn end), "start_time": .lastBuild.timestamp, From 39773a72ae17f4ac613c9305ebed2d6838ccd031 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:23:31 +0530 Subject: [PATCH 11/72] add utils python script --- codebundles/jenkins-health/util.py | 295 +++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 codebundles/jenkins-health/util.py diff --git a/codebundles/jenkins-health/util.py b/codebundles/jenkins-health/util.py new file mode 100644 index 000000000..1839285c2 --- /dev/null +++ b/codebundles/jenkins-health/util.py @@ -0,0 +1,295 @@ +import os +import requests +import json +import time +from collections import defaultdict +from thefuzz import fuzz +from thefuzz import process as fuzzprocessor + +# Ensure required environment variables are set +JENKINS_URL = os.getenv("JENKINS_URL") +JENKINS_USERNAME = os.getenv("JENKINS_USERNAME") +JENKINS_TOKEN = os.getenv("JENKINS_TOKEN") + +if not all([JENKINS_URL, JENKINS_USERNAME, JENKINS_TOKEN]): + error_msg = "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." + raise ValueError(error_msg) + +# Jenkins API URL +api_url = f"{JENKINS_URL}/api/json?depth=2" +# Basic authentication +auth = (JENKINS_USERNAME, JENKINS_TOKEN) + +# Fetch Jenkins jobs data +try: + response = requests.get(api_url, auth=auth, timeout=10) + response.raise_for_status() # Raises an HTTPError for bad responses (4xx, 5xx) + jenkins_data = response.json() +except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + + +def get_failed_tests(): + failed_tests = [] + for job in jenkins_data.get('jobs'): + if job.get('lastBuild').get('result') == 'UNSTABLE': + pipeline_details = { + 'pipeline_name': job.get('name'), + 'pipeline_url': job.get('url'), + 'build_result': job.get('lastBuild').get('result'), + 'build_number': job.get('lastBuild').get('number'), + 'build_timestamp': job.get('lastBuild').get('timestamp'), + 'build_duration': job.get('lastBuild').get('duration'), + 'build_queueId': job.get('lastBuild').get('queueId'), + 'build_building': job.get('lastBuild').get('building'), + 'build_changeSet': job.get('lastBuild').get('changeSet') + } + try: + tests_response = requests.get(job.get('lastBuild').get('url')+"testReport/api/json", auth=auth, timeout=10) + tests_response.raise_for_status() + suites = tests_response.json().get('suites') + test_results = [] + for suite in suites: + for case in suite.get('cases'): + test_results.append(case) + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch test data from Jenkins: {e}") + + result = {"pipeline_details": pipeline_details, "test_results": test_results} + failed_tests.append(result) + return failed_tests + +def get_queued_builds(wait_threshold="10m"): + """Get builds waiting in queue longer than the specified threshold. + + Args: + wait_threshold (str): Time threshold in format like '10min', '1h', '30m', '1d', '1day' + + Returns: + list: List of queued builds that exceed the wait threshold + """ + # Convert threshold to minutes + wait_threshold = wait_threshold.lower().replace(' ', '').strip('"').strip("'") + threshold_value = 0 + if 'min' in wait_threshold: + threshold_value = int(wait_threshold.replace('min', '')) + elif 'h' in wait_threshold: + threshold_value = int(wait_threshold.replace('h', '')) * 60 + elif 'm' in wait_threshold: + threshold_value = int(wait_threshold.replace('m', '')) + elif 'day' in wait_threshold: + threshold_value = int(wait_threshold.replace('day', '')) * 24 * 60 + elif 'd' in wait_threshold: + threshold_value = int(wait_threshold.replace('d', '')) * 24 * 60 + else: + raise ValueError("Invalid threshold format. Use formats like '10min', '1h', '30m', '1d', '1day'") + + queued_builds = [] + + try: + queue_url = f"{JENKINS_URL}/queue/api/json" + queue_response = requests.get(queue_url, auth=auth, timeout=10) + queue_response.raise_for_status() + queue_data = queue_response.json() + + current_time = int(time.time() * 1000) # Convert to milliseconds + + for item in queue_data.get('items', []): + # Get time in queue in minutes + in_queue_since = item.get('inQueueSince', 0) + wait_time_mins = (current_time - in_queue_since) / (1000 * 60) # Convert to minutes + + # Format wait time based on duration + if wait_time_mins >= 24*60: # More than a day + wait_time = f"{wait_time_mins/(24*60):.1f}d" + elif wait_time_mins >= 60: # More than an hour + wait_time = f"{wait_time_mins/60:.1f}h" + else: + wait_time = f"{wait_time_mins:.1f}min" + + if wait_time_mins >= threshold_value: + job_name = item.get('task', {}).get('name', '') + if job_name == '': + try: + queued_build_url = item.get('url', '') + if queued_build_url != '': + queued_build_url = f"{JENKINS_URL}/{queued_build_url}api/json?depth=1" + rsp = requests.get(queued_build_url, auth=auth, timeout=10).json() + job_name = rsp.get('task').get('name') + else: + job_name = 'Unknown Job' + except requests.exceptions.RequestException as e: + job_name = 'Unknown Job' + + queued_build = { + 'job_name': job_name, + 'waiting_since': in_queue_since, + 'wait_time': wait_time, + 'why': item.get('why', 'Unknown Reason'), + 'stuck': item.get('stuck', False), + 'blocked': item.get('blocked', False), + 'url': f"{JENKINS_URL}/{item.get('url', '')}" + } + queued_builds.append(queued_build) + + return queued_builds + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch queue data from Jenkins: {e}") + + +def get_executor_utilization(): + executor_utilization = [] + for label in jenkins_data.get('assignedLabels', []): + busy_executors = label.get('busyExecutors', 0) + total_executors = label.get('totalExecutors', 0) + if total_executors > 0: + utilization = (busy_executors / total_executors) * 100 + else: + utilization = 0 + executor_utilization.append({ + 'node_name': label.get('name', 'unknown'), + 'busy_executors': busy_executors, + 'total_executors': total_executors, + 'utilization_percentage': utilization + }) + return executor_utilization + +def build_logs_analytics(history_limit=5): + # Get failed builds (up to limit) for each job + failed_builds = [] + for job in jenkins_data.get('jobs', []): + builds = [] + failed_count = 0 + + # Iterate through all builds until we find limit failed ones + for build in job.get('builds', []): + if build.get('result') == 'FAILURE': + builds.append({ + 'number': build.get('number'), + 'url': build.get('url') + }) + failed_count += 1 + if failed_count == history_limit: + break + + if builds: + failed_builds.append({ + 'job_name': job.get('name'), + 'builds': builds + }) + + # Analyze logs for each failed job + analysis_results = [] + for job in failed_builds: + job_logs = [] + + # Get logs for each failed build + for build in job['builds']: + try: + log_url = f"{build['url']}logText/progressiveText?start=0" + log_response = requests.get(log_url, auth=auth, timeout=10) + log_response.raise_for_status() + job_logs.append({ + 'build_number': build['number'], + 'log_content': log_response.text + }) + except requests.exceptions.RequestException as e: + print(f"Failed to fetch logs for {job['job_name']} #{build['number']}: {e}") + continue + + if len(job_logs) < 2: + continue + + # Extract error sections from logs + error_sections = [] + for log in job_logs: + log_lines = log['log_content'].split('\n') + error_section = [] + in_error = False + + for line in log_lines: + # Start capturing on error indicators + if any(error_term in line.lower() for error_term in ['error:', 'exception', 'failed', 'failure']): + # Skip common, less meaningful lines + if any(skip_term in line.lower() for skip_term in + ['finished: failure', 'build failure', '[info]']): + continue + in_error = True + error_section = [line] + # Continue capturing context + elif in_error and line.strip(): + # Skip info/debug lines in error context + if not line.lower().startswith('[info]'): + error_section.append(line) + # Stop after capturing some context + if len(error_section) > 10: + in_error = False + if error_section: # Only add if we have meaningful content + error_sections.append('\n'.join(error_section)) + elif in_error: + in_error = False + if error_section: # Only add if we have meaningful content + error_sections.append('\n'.join(error_section)) + + # Use thefuzz to find similar error patterns + common_patterns = defaultdict(list) + processed_sections = set() + + for section in error_sections: + if section in processed_sections: + continue + + # Use process.extractBests to find similar sections + matches = fuzzprocessor.extractBests( + section, + error_sections, + scorer=fuzz.token_set_ratio, + score_cutoff=85 # 85% similarity threshold + ) + + similar_sections = [match[0] for match in matches] + # Only include patterns that occur in all builds + if len(similar_sections) == len(job_logs): # Must occur in all builds + pattern_key = similar_sections[0] + common_patterns[pattern_key] = { + 'occurrences': len(similar_sections), + 'similar_sections': similar_sections, + 'similarity_scores': [match[1] for match in matches] + } + processed_sections.update(similar_sections) + + # Calculate overall log similarity + similarity_scores = [] + for i in range(len(job_logs)): + for j in range(i + 1, len(job_logs)): + score = fuzz.token_set_ratio( + job_logs[i]['log_content'], + job_logs[j]['log_content'] + ) + similarity_scores.append(score) + + avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0 + + # Filter out patterns that don't meet minimum occurrence threshold + significant_patterns = { + pattern: details for pattern, details in common_patterns.items() + if details['occurrences'] == len(job_logs) # Must occur in all builds + } + + analysis_results.append({ + 'job_name': job['job_name'], + 'builds_analyzed': len(job_logs), + 'similarity_score': avg_similarity, + 'common_error_patterns': [ + { + 'pattern': pattern, + 'occurrences': details['occurrences'], + 'similar_sections': details['similar_sections'], + 'similarity_scores': details['similarity_scores'] + } + for pattern, details in significant_patterns.items() + ] + }) + + return analysis_results \ No newline at end of file From dbc41adcbae30e36f54ee15dfcebab3b6cefe1ad Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:24:11 +0530 Subject: [PATCH 12/72] Install docker in ec2 while installing jenkins --- .../jenkins-health/.test/terraform/main.tf | 217 ++++++++++++------ 1 file changed, 153 insertions(+), 64 deletions(-) diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index 29886a50f..4f0ea0ee9 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -153,71 +153,19 @@ resource "aws_instance" "jenkins_server" { def instance = Jenkins.getInstance() // Skip setup wizard - instance.setInstallState(InstallState.INITIAL_SETUP_COMPLETED) + // instance.setInstallState(InstallState.INITIAL_SETUP_COMPLETED) // Install suggested plugins - def pm = instance.getPluginManager() - def uc = instance.getUpdateCenter() - uc.updateAllSites() - - def plugins = [ - // Organization and Administration - "dashboard-view", - "cloudbees-folder", - "configuration-as-code", - "antisamy-markup-formatter", - - // Build Features - "build-name-setter", - "build-timeout", - "config-file-provider", - "credentials-binding", - "embeddable-build-status", - "rebuild", - "ssh-agent", - "throttle-concurrents", - "timestamper", - "ws-cleanup", - - // Build Tools - "ant", - "gradle", - - // Pipelines and Continuous Delivery - "workflow-aggregator", - "github-branch-source", - "pipeline-github-lib", - "pipeline-stage-view", - "conditional-buildstep", - "parameterized-trigger", - "copyartifact", - - // Source Code Management - "git", - "github", - - // Distributed Builds - "ssh-slaves", - - // User Management and Security - "matrix-auth", - "pam-auth", - "ldap", - - // Notifications and Publishing - "email-ext", - "mailer", - - "configuration-as-code", - "ec2" - ] - - plugins.each { plugin -> - if (!pm.getPlugin(plugin)) { - def installFuture = uc.getPlugin(plugin).deploy() - installFuture.get() - } - } + // def pm = instance.getPluginManager() + // def uc = instance.getUpdateCenter() + // uc.updateAllSites() + + // plugins.each { plugin -> + // if (!pm.getPlugin(plugin)) { + // def installFuture = uc.getPlugin(plugin).deploy() + // installFuture.get() + // } + // } // Create admin user def hudsonRealm = new HudsonPrivateSecurityRealm(false) @@ -240,6 +188,26 @@ resource "aws_instance" "jenkins_server" { # Clean up rm -f create_admin.groovy + # Add Docker's official GPG key: + apt-get update + apt-get -y install ca-certificates curl + install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + chmod a+r /etc/apt/keyrings/docker.asc + + # Add the repository to Apt sources: + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null + apt-get update + apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + + echo "DOCKER_OPTS=\"-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock\"" >> /etc/default/docker + systemctl restart docker + groupadd docker + usermod -aG docker $USER + EOF tags = { @@ -247,10 +215,131 @@ resource "aws_instance" "jenkins_server" { } } + +# # Instance Profile for Jenkins +# resource "aws_iam_instance_profile" "jenkins_profile" { +# name = "jenkins_profile" +# role = aws_iam_role.jenkins_role.name +# } + +# # Security Group for Jenkins Agents +# resource "aws_security_group" "jenkins_agent_sg" { +# name = "jenkins-agent-sg" +# description = "Security group for Jenkins agents" +# vpc_id = aws_vpc.jenkins_vpc.id + +# ingress { +# from_port = 22 +# to_port = 22 +# protocol = "tcp" +# security_groups = [aws_security_group.jenkins_sg.id] +# } + +# egress { +# from_port = 0 +# to_port = 0 +# protocol = "-1" +# cidr_blocks = ["0.0.0.0/0"] +# } + +# tags = { +# Name = "jenkins-agent-sg" +# } +# } + + +# Wait for Jenkins to be ready +resource "null_resource" "wait_for_jenkins" { + depends_on = [aws_instance.jenkins_server] + + provisioner "local-exec" { + command = <<-EOF + while ! nc -z ${aws_instance.jenkins_server.public_ip} 8080; do + echo "Waiting for Jenkins to be ready..." + sleep 10 + done + EOF + } +} + + +# Configure Jenkins EC2 agents +# resource "null_resource" "configure_jenkins_agents" { +# depends_on = [null_resource.wait_for_jenkins] + +# connection { +# type = "ssh" +# user = "ubuntu" +# private_key = tls_private_key.jenkins_key.private_key_pem +# host = aws_instance.jenkins_server.public_ip +# } + +# provisioner "file" { +# content = tls_private_key.jenkins_key.private_key_pem +# destination = "/tmp/jenkins-key.pem" +# } + +# provisioner "file" { +# content = templatefile("${path.module}/configure_ec2_agent.groovy.tpl", { +# ami_id = data.aws_ami.ubuntu.id +# subnet_id = aws_subnet.jenkins_subnet.id +# security_group_id = aws_security_group.jenkins_sg.id +# }) +# destination = "/tmp/configure_ec2_agent.groovy" +# } + + +# provisioner "remote-exec" { +# inline = [ +# # Setup SSH key for Jenkins +# "sudo mkdir -p /var/lib/jenkins/.ssh", +# "sudo mv /tmp/jenkins-key.pem /var/lib/jenkins/.ssh/", +# "sudo chown -R jenkins:jenkins /var/lib/jenkins/.ssh", +# "sudo chmod 700 /var/lib/jenkins/.ssh", +# "sudo chmod 600 /var/lib/jenkins/.ssh/jenkins-key.pem", +# "cat /tmp/configure_ec2_agent.groovy", +# "wget -q http://localhost:8080/jnlpJars/jenkins-cli.jar", +# # Execute the Groovy script using Jenkins CLI +# "java -jar jenkins-cli.jar -s http://localhost:8080 -auth admin:admin123! groovy = < /tmp/configure_ec2_agent.groovy", + +# # Cleanup +# "rm /tmp/configure_ec2_agent.groovy" +# ] +# } +# } + +# Create IAM user for Jenkins +resource "aws_iam_user" "jenkins_user" { + name = "jenkins-user" +} + +# Create access key for the IAM user +resource "aws_iam_access_key" "jenkins_user_key" { + user = aws_iam_user.jenkins_user.name +} + +# Attach policy to the user +resource "aws_iam_user_policy_attachment" "jenkins_user_policy" { + user = aws_iam_user.jenkins_user.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2FullAccess" +} + +# Output the credentials +output "jenkins_user_access_key" { + value = aws_iam_access_key.jenkins_user_key.id + sensitive = true +} + +output "jenkins_user_secret_key" { + value = aws_iam_access_key.jenkins_user_key.secret + sensitive = true +} + output "jenkins_public_ip" { value = aws_instance.jenkins_server.public_ip } output "ssh_connection_string" { value = "ssh -i jenkins-key.pem ubuntu@${aws_instance.jenkins_server.public_ip}" -} \ No newline at end of file +} + From 287c02a5355587e2eed53949966616f5dce5caf2 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:25:25 +0530 Subject: [PATCH 13/72] added folwing tasks in sli.robot: - Check For Recent Failed Tests in Jenkins - Check For Jenkins Health - Check For Long Queued Builds in Jenkins - Check Jenkins Executor Utilization --- codebundles/jenkins-health/sli.robot | 113 ++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 18 deletions(-) diff --git a/codebundles/jenkins-health/sli.robot b/codebundles/jenkins-health/sli.robot index fbeb41948..ae9f107a8 100644 --- a/codebundles/jenkins-health/sli.robot +++ b/codebundles/jenkins-health/sli.robot @@ -1,21 +1,20 @@ *** Settings *** -Documentation This taskset collects information about Jenkins health and failed builds -... to help troubleshoot potential issues. +Documentation Check Jenkins health, failed builds, tests and long running builds Metadata Author saurabh3460 -Metadata Display Name Jenkins Healthcheck +Metadata Display Name Jenkins Health Metadata Supports Jenkins Library BuiltIn Library RW.Core Library RW.CLI Library RW.platform - +Library util.py Suite Setup Suite Initialization *** Tasks *** -Check Failed Jenkins Builds - [Documentation] Check Failed Jenkins Builds +Check For Failed Build Logs in Jenkins + [Documentation] Check For Failed Build Logs in Jenkins [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File ... bash_file=faild_build_logs.sh @@ -33,12 +32,12 @@ Check Failed Jenkins Builds ${failed_builds_score}= Evaluate 1 if int(${failed_builds}) <= int(${MAX_FAILED_BUILDS}) else 0 Set Global Variable ${failed_builds_score} -Check For Long Running Builds +Check For Long Running Builds in Jenkins [Documentation] Check Jenkins builds that have been running longer than a specified threshold [Tags] Jenkins Builds ${rsp}= RW.CLI.Run Bash File ... bash_file=long_running_builds.sh - ... cmd_override=./long_running_builds.sh ${TIME_THRESHOLD} + ... cmd_override=./long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} @@ -56,8 +55,58 @@ Check For Long Running Builds ${long_running_score}= Evaluate 1 if int(${long_running_count}) <= int(${MAX_LONG_RUNNING_BUILDS}) else 0 Set Global Variable ${long_running_score} +Check For Recent Failed Tests in Jenkins + [Documentation] Check For Recent Failed Tests in Jenkins + [Tags] Jenkins Tests + ${failed_test_suites}= Get Failed Tests + IF len(${failed_test_suites}) > 0 + ${total_failed_tests}= Evaluate sum([len(suite['test_results']) for suite in ${failed_test_suites}]) + ${failed_test_score}= Evaluate 1 if int(${total_failed_tests}) <= int(${MAX_ALLOWED_FAILED_TESTS}) else 0 + Set Global Variable ${failed_test_score} + ELSE + Set Global Variable ${failed_test_score} 1 + END + +Check For Jenkins Health + [Documentation] Check if Jenkins instance is reachable and responding + [Tags] Jenkins Health + ${rsp}= RW.CLI.Run Cli + ... cmd=curl -s -u "$${JENKINS_USERNAME.key}:$${JENKINS_TOKEN.key}" "${JENKINS_URL}/api/json" + ... env=${env} + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads('''${rsp.stdout}''') json + Set Global Variable ${jenkins_health_score} 1 + EXCEPT + Set Global Variable ${jenkins_health_score} 0 + END + +Check For Long Queued Builds in Jenkins + [Documentation] Check for builds stuck in queue beyond threshold and calculate SLI score + [Tags] Jenkins Queue Builds SLI + ${queued_builds}= Get Queued Builds wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} + ${queued_count}= Evaluate len(${queued_builds}) + ${queued_builds_score}= Evaluate 1 if int(${queued_count}) <= int(${MAX_QUEUED_BUILDS}) else 0 + Set Global Variable ${queued_builds_score} + +Check Jenkins Executor Utilization + [Documentation] Check if Jenkins executor utilization is above 80% + [Tags] Jenkins Executors Utilization + ${executor_utilization}= Get Executor Utilization + ${high_utilization}= Set Variable False + FOR ${executor} IN @{executor_utilization} + IF ${executor['utilization_percentage']} > float(${MAX_EXECUTOR_UTILIZATION}) + ${high_utilization}= Set Variable True + BREAK + END + END + ${executor_utilization_score}= Evaluate 0 if ${high_utilization} else 1 + Set Global Variable ${executor_utilization_score} + + Generate Health Score - ${health_score}= Evaluate (${failed_builds_score} + ${long_running_score}) / 2 + ${health_score}= Evaluate (${failed_builds_score} + ${long_running_score} + ${failed_test_score} + ${jenkins_health_score} + ${queued_builds_score} + ${executor_utilization_score}) / 6 ${health_score}= Convert to Number ${health_score} 2 RW.Core.Push Metric ${health_score} @@ -79,28 +128,56 @@ Suite Initialization ... description=Jenkins API token for authentication ... pattern=\w* ... example=11aa22bb33cc44dd55ee66ff77gg88hh - ${TIME_THRESHOLD}= RW.Core.Import User Variable TIME_THRESHOLD + ${LONG_RUNNING_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable LONG_RUNNING_BUILD_MAX_WAIT_TIME + ... type=string + ... description=The time threshold for long running builds, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="10m" + ... default="10m" + ${MAX_LONG_RUNNING_BUILDS}= RW.Core.Import User Variable MAX_LONG_RUNNING_BUILDS + ... type=string + ... description=The maximum number of long running builds to consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + ${QUEUED_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable QUEUED_BUILD_MAX_WAIT_TIME ... type=string - ... description=The threshold for long running builds, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... description=The time threshold for builds in queue, formats like '5m', '2h', '1d' or '5min', '2h', '1d' ... pattern=\d+ - ... example="1m" - ... default="1m" + ... example="10m" + ... default="10m" ${MAX_FAILED_BUILDS}= RW.Core.Import User Variable MAX_FAILED_BUILDS ... type=string - ... description=The maximum number of failed builds to consider healthy + ... description=The maximum number of failed builds allowed and consider healthy ... pattern=\d+ ... example="1" ... default="0" - ${MAX_LONG_RUNNING_BUILDS}= RW.Core.Import User Variable MAX_LONG_RUNNING_BUILDS + ${MAX_ALLOWED_FAILED_TESTS}= RW.Core.Import User Variable MAX_ALLOWED_FAILED_TESTS ... type=string - ... description=The maximum number of long running builds to consider healthy + ... description=The maximum number of failed tests allowed and consider healthy ... pattern=\d+ ... example="1" ... default="0" + ${MAX_QUEUED_BUILDS}= RW.Core.Import User Variable MAX_QUEUED_BUILDS + ... type=string + ... description=The maximum number of builds stuck in queue to consider healthy + ... pattern=\d+ + ... example="1" + ... default="0" + ${MAX_EXECUTOR_UTILIZATION}= RW.Core.Import User Variable MAX_EXECUTOR_UTILIZATION + ... type=string + ... description=The maximum percentage of executor utilization to consider healthy + ... pattern=\d+ + ... example="80" + ... default="80" Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} - Set Suite Variable ${TIME_THRESHOLD} ${TIME_THRESHOLD} Set Suite Variable ${MAX_FAILED_BUILDS} ${MAX_FAILED_BUILDS} - Set Suite Variable ${MAX_LONG_RUNNING_BUILDS} ${MAX_LONG_RUNNING_BUILDS} \ No newline at end of file + Set Suite Variable ${MAX_LONG_RUNNING_BUILDS} ${MAX_LONG_RUNNING_BUILDS} + Set Suite Variable ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + Set Suite Variable ${MAX_ALLOWED_FAILED_TESTS} ${MAX_ALLOWED_FAILED_TESTS} + Set Suite Variable ${MAX_QUEUED_BUILDS} ${MAX_QUEUED_BUILDS} + Set Suite Variable ${QUEUED_BUILD_MAX_WAIT_TIME} ${QUEUED_BUILD_MAX_WAIT_TIME} + Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} \ No newline at end of file From 84c188cacbf4b3e90a56f3bb6bf66570d64014c6 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:25:43 +0530 Subject: [PATCH 14/72] added folwing tasks in runbook.robot: - Check For Recent Failed Tests in Jenkins - Check For Jenkins Health - Check For Long Queued Builds in Jenkins - Check Jenkins Executor Utilization --- codebundles/jenkins-health/runbook.robot | 224 +++++++++++++++++++---- 1 file changed, 193 insertions(+), 31 deletions(-) diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot index ba05540e7..994577851 100644 --- a/codebundles/jenkins-health/runbook.robot +++ b/codebundles/jenkins-health/runbook.robot @@ -1,19 +1,20 @@ *** Settings *** -Documentation This taskset collects information about Jenkins health and failed builds to help troubleshoot potential issues. +Documentation List Jenkins health, failed builds, tests and long running builds Metadata Author saurabh3460 -Metadata Display Name Jenkins Healthcheck +Metadata Display Name Jenkins Health Metadata Supports Jenkins Library RW.Core Library RW.CLI Library RW.platform - +Library String +Library util.py Suite Setup Suite Initialization *** Tasks *** -List Failed Jenkins Builds +List Failed Build Logs in Jenkins [Documentation] Fetches logs from failed Jenkins builds using the Jenkins API - [Tags] Jenkins Logs Failures API Builds + [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File ... bash_file=faild_build_logs.sh ... env=${env} @@ -29,31 +30,32 @@ List Failed Jenkins Builds IF len(@{jobs}) > 0 FOR ${job} IN @{jobs} ${job_name}= Set Variable ${job['job_name']} - ${build_number}= Set Variable ${job['buildNumber']} - ${logs}= Set Variable ${job['logs']} - - RW.Core.Add Pre To Report ${job} + ${build_number}= Set Variable ${job['build_number']} + ${json_str}= Evaluate json.dumps(${job}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Job Name", "Build #", "Result", "URL"] as $headers | $headers, (. | [.job_name, .build_number, .result, .url]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Failed Builds:\n=======================================\n${formatted_results.stdout} ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint - RW.Core.Add Issue + RW.Core.Add Issue ... severity=3 - ... expected=Jenkins job `${job_name}` should have a successful build - ... actual=Jenkins job `${job_name}` has a failed build - ... title=Jenkins job `${job_name}` has a failed build - ... reproduce_hint=${rsp.cmd} + ... expected=Jenkins job `${job_name}` should complete successfully + ... actual=Jenkins job `${job_name}` build #`${build_number}` failed + ... title=Jenkins Build Failure: `${job_name}` (Build #`${build_number}`) + ... reproduce_hint=Navigate to Jenkins build `${job_name}` #`${build_number}` ... details=${pretty_item} - ... next_steps=Review the Jenkins job `${job_name}` build number `${build_number}` + ... next_steps=Review Failed build logs for Jenkins job `${job_name}` END ELSE RW.Core.Add Pre To Report "No failed builds found" END -List Long Running Builds +List Long Running Builds in Jenkins [Documentation] Identifies Jenkins builds that have been running longer than a specified threshold - [Tags] Jenkins Builds Monitoring + [Tags] Jenkins Builds ${rsp}= RW.CLI.Run Bash File ... bash_file=long_running_builds.sh - ... cmd_override=./long_running_builds.sh ${TIME_THRESHOLD} + ... cmd_override=./long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} @@ -70,11 +72,11 @@ List Long Running Builds IF len(${long_running_jobs}) > 0 ${json_str}= Evaluate json.dumps(${long_running_jobs}) json ${formatted_results}= RW.CLI.Run Cli - ... cmd=echo '${json_str}' | jq -r '["Job Name", "Build #", "Duration", "URL"] as $headers | $headers, (.[] | [.name, .build_number, .duration, .url]) | @tsv' | column -t -s $'\t' - RW.Core.Add Pre To Report Long Running Jobs:\n${formatted_results.stdout} + ... cmd=echo '${json_str}' | jq -r '["Job Name", "Build #", "Duration", "URL"] as $headers | $headers, (.[] | [.job_name, .build_number, .duration, .url]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Long Running Jobs:\n=======================================\n${formatted_results.stdout} FOR ${job} IN @{long_running_jobs} - ${job_name}= Set Variable ${job['name']} + ${job_name}= Set Variable ${job['job_name']} ${duration}= Set Variable ${job['duration']} ${build_number}= Set Variable ${job['build_number']} ${url}= Set Variable ${job['url']} @@ -82,17 +84,164 @@ List Long Running Builds ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint RW.Core.Add Issue ... severity=4 - ... expected=Jenkins job `${job_name}` should complete within ${TIME_THRESHOLD} - ... actual=Jenkins job `${job_name}` has been running for ${duration} - ... title=Jenkins job build `${job_name}` `${build_number}` has been running for ${duration} - ... reproduce_hint=${rsp.cmd} + ... expected=Jenkins job `${job_name}` (Build #`${build_number}`) should complete within ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + ... actual=Jenkins job `${job_name}` (Build #`${build_number}`) has been running for ${duration} (exceeds threshold) + ... title=Long Running Build: `${job_name}` (Build #`${build_number}`) - ${duration} + ... reproduce_hint=Navigate to Jenkins build `${job_name}` #`${build_number}` ... details=${pretty_item} - ... next_steps=Review the Jenkins job `${job_name}` build number `${build_number}` at ${url} + ... next_steps=Investigate build logs of job `${job_name}`\nCheck resource utilization on build node END ELSE RW.Core.Add Pre To Report "No long running builds found" END +List Recent Failed Tests in Jenkins + [Documentation] List Recent Failed Tests in Jenkins + [Tags] Jenkins Tests + ${failed_tests}= Get Failed Tests + + IF len(${failed_tests}) > 0 + FOR ${test_suite} IN @{failed_tests} + ${pipeline_details}= Set Variable ${test_suite['pipeline_details']} + ${test_results}= Set Variable ${test_suite['test_results']} + ${pipeline_url}= Set Variable ${pipeline_details['pipeline_url']} + ${pipeline_name}= Set Variable ${pipeline_details['pipeline_name']} + ${build_number}= Set Variable ${pipeline_details['build_number']} + + ${json_str}= Evaluate json.dumps(${test_results}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["FailedTests", "Duration", "StdErr", "StdOut", "Status"] as $headers | $headers, (.[] | [.name, .duration, .stderr, .stdout, .status]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Pipeline Name: ${pipeline_name} Build No.${build_number}:\n=======================================\n${formatted_results.stdout} + + FOR ${test} IN @{test_results} + ${class_name}= Set Variable ${test['className']} + ${test_name}= Set Variable ${test['name']} + ${error_details}= Set Variable ${test['errorDetails']} + ${stack_trace}= Set Variable ${test['errorStackTrace']} + + ${pretty_test}= Evaluate pprint.pformat(${test}) modules=pprint + RW.Core.Add Issue + ... severity=3 + ... expected=Test `${test_name}` in pipeline `${pipeline_name}` (Build #`${build_number}`) should pass successfully + ... actual=Test '${test_name}' failed with error:\n${error_details} + ... title=Test Failure: `${test_name}` in ${pipeline_name} (Build #`${build_number}`) + ... details=${pretty_test} + ... reproduce_hint=Navigate to Jenkins build `${pipeline_url}lastCompletedBuild/testReport/` + ... next_steps=Review the error message and stack trace + END + END + ELSE + RW.Core.Add Pre To Report "No failed tests found" + END + +Check Jenkins Health + [Documentation] Check if Jenkins instance is reachable and responding + [Tags] Jenkins Health + # TODO: Capture more exceptions here + ${rsp}= RW.CLI.Run Cli + ... cmd=curl -s -u "$${JENKINS_USERNAME.key}:$${JENKINS_TOKEN.key}" "${JENKINS_URL}/api/json" + ... env=${env} + ... secret__jenkins_token=${JENKINS_TOKEN} + ... secret__jenkins_username=${JENKINS_USERNAME} + TRY + ${data}= Evaluate json.loads('''${rsp.stdout}''') json + RW.Core.Add Pre To Report Jenkins instance is up and responding + EXCEPT + RW.Core.Add Issue + ... severity=1 + ... expected=Jenkins instance at ${JENKINS_URL}/api/json should be reachable and responding + ... actual=Unable to connect to Jenkins instance or received invalid response + ... title=Jenkins instance is not reachable + ... details=Failed to connect to Jenkins instance at ${JENKINS_URL}/api/json response: ${rsp.stdout} + ... reproduce_hint=Try accessing ${JENKINS_URL}/api/json in a web browser + ... next_steps=- Check if Jenkins service is running\n- Verify network connectivity\n- Validate Jenkins URL\n- Check Jenkins logs for errors + END + +List Long Queued Builds in Jenkins + [Documentation] Check for builds stuck in queue beyond threshold + [Tags] Jenkins Queue Builds + + ${queued_builds}= GET QUEUED BUILDS wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} + + TRY + IF ${queued_builds} == [] + RW.Core.Add Pre To Report No builds currently queued beyond threshold + ELSE + ${json_str}= Evaluate json.dumps(${queued_builds}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Job Name", "Wait Time", "Why", "Stuck", "Blocked", "URL"] as $headers | $headers, (.[] | [.job_name, .wait_time, .why, .stuck, .blocked, .url]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Builds Currently Queued:\n=======================================\n${formatted_results.stdout} + + FOR ${build} IN @{queued_builds} + ${url}= Set Variable ${build['url']} + ${job_name}= Set Variable ${build['job_name']} + ${wait_time}= Set Variable ${build['wait_time']} + ${why}= Set Variable ${build['why']} + ${stuck}= Set Variable ${build['stuck']} + ${blocked}= Set Variable ${build['blocked']} + + # Add specific next steps based on status + ${next_steps}= Set Variable If ${stuck} + ... - Check Jenkins executor status\n- Review system resources\n- Consider restarting Jenkins if needed + ... ${blocked} + ... Consider increasing executors if bottlenecked + ... Consider adding more build agents + + RW.Core.Add Issue + ... severity=4 + ... expected=Builds should not be queued for more than ${QUEUED_BUILD_MAX_WAIT_TIME} + ... actual=Build '${job_name}' has been queued for ${wait_time} (exceeds threshold) + ... title=Long Queued Build: ${job_name} (${wait_time}) + ... details=${build} + ... reproduce_hint=Access Jenkins at ${JENKINS_URL} + ... next_steps=${next_steps} + END + END + + EXCEPT + RW.Core.Add Pre To Report No queued builds found + END + + +List Jenkins Executor Utilization + [Documentation] Check Jenkins executor utilization across nodes + [Tags] Jenkins Executors Utilization + + ${executor_utilization}= GET EXECUTOR UTILIZATION + + TRY + IF ${executor_utilization} == [] + RW.Core.Add Pre To Report No executor utilization data found + ELSE + ${json_str}= Evaluate json.dumps(${executor_utilization}) json + ${formatted_results}= RW.CLI.Run Cli + ... cmd=echo '${json_str}' | jq -r '["Node Name", "Busy Executors", "Total Executors", "Utilization %"] as $headers | $headers, (.[] | [.node_name, .busy_executors, .total_executors, .utilization_percentage]) | @tsv' | column -t -s $'\t' + RW.Core.Add Pre To Report Executor Utilization:\n=======================================\n${formatted_results.stdout} + + FOR ${executor} IN @{executor_utilization} + ${node_name}= Set Variable ${executor['node_name']} + ${utilization}= Set Variable ${executor['utilization_percentage']} + ${busy_executors}= Set Variable ${executor['busy_executors']} + ${total_executors}= Set Variable ${executor['total_executors']} + + IF ${utilization} > float(${MAX_EXECUTOR_UTILIZATION}) + RW.Core.Add Issue + ... severity=3 + ... expected=Executor utilization should be below ${MAX_EXECUTOR_UTILIZATION}% + ... actual=Node '${node_name}' has ${utilization}% utilization (${busy_executors}/${total_executors} executors busy) + ... title=Jenkins High Executor Utilization: ${node_name} (${utilization}%) + ... details=${executor} + ... reproduce_hint=Check executor status at ${JENKINS_URL}/computer/ + ... next_steps=- Consider adding more executors\n- Review job distribution\n- Check for stuck builds + END + END + END + + EXCEPT + RW.Core.Add Pre To Report Failed to fetch executor utilization data + END + + *** Keywords *** Suite Initialization ${JENKINS_URL}= RW.Core.Import User Variable JENKINS_URL @@ -110,15 +259,28 @@ Suite Initialization ... description=Jenkins API token for authentication ... pattern=\w* ... example=11aa22bb33cc44dd55ee66ff77gg88hh - ${TIME_THRESHOLD}= RW.Core.Import User Variable TIME_THRESHOLD + ${LONG_RUNNING_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable LONG_RUNNING_BUILD_MAX_WAIT_TIME ... type=string ... description=The threshold for long running builds, formats like '5m', '2h', '1d' or '5min', '2h', '1d' ... pattern=\d+ - ... example="1m" - ... default="1m" + ... example="10m" + ... default="10m" + ${QUEUED_BUILD_MAX_WAIT_TIME}= RW.Core.Import User Variable QUEUED_BUILD_MAX_WAIT_TIME + ... type=string + ... description=The time threshold for builds in queue, formats like '5m', '2h', '1d' or '5min', '2h', '1d' + ... pattern=\d+ + ... example="10m" + ... default="10m" + ${MAX_EXECUTOR_UTILIZATION}= RW.Core.Import User Variable MAX_EXECUTOR_UTILIZATION + ... type=string + ... description=The maximum percentage of executor utilization to consider healthy + ... pattern=\d+ + ... example="80" + ... default="80" Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} - Set Suite Variable ${TIME_THRESHOLD} ${TIME_THRESHOLD} + Set Suite Variable ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} - \ No newline at end of file + Set Suite Variable ${QUEUED_BUILD_MAX_WAIT_TIME} ${QUEUED_BUILD_MAX_WAIT_TIME} + Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} \ No newline at end of file From 3e53b0a786802edf3f38f9480981a93489a54ff5 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:37:01 +0530 Subject: [PATCH 15/72] add root README.md --- codebundles/jenkins-health/README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 codebundles/jenkins-health/README.md diff --git a/codebundles/jenkins-health/README.md b/codebundles/jenkins-health/README.md new file mode 100644 index 000000000..db6d3096e --- /dev/null +++ b/codebundles/jenkins-health/README.md @@ -0,0 +1,25 @@ +# AWS Jenkins Health + +This CodeBundle monitors and evaluates the health of Jenkins using the Jenkins REST API + +## SLI +The SLI produces a score of 0 (bad), 1(good), or a value in between. This score is generated by capturing the following: +- Check if Jenkins instance is reachable and responding (endpoint) +- Check For Failed Build Logs in Jenkins +- Check For Long Running Builds in Jenkins +- Check For Long Queued Builds in Jenkins +- Check Jenkins Executor Utilization + +## TaskSet +Similar to the SLI, but produces a report on the specific jenkns apis and raises issues for each Jenkins check that requires attention. + +## Required Configuration + +``` + export JENKINS_URL="" + export JENKINS_USERNAME="" + export JENKINS_TOKEN="" +``` + +## Testing +See the `.test` directory for infrastructure test code. \ No newline at end of file From 1206720c72de4e3dfbd593124553fb443029ec53 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 17:32:29 +0530 Subject: [PATCH 16/72] Update README.md with Jenkins configuration and pipeline examples --- codebundles/jenkins-health/.test/README.md | 78 ++++++++++++++++++++-- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/codebundles/jenkins-health/.test/README.md b/codebundles/jenkins-health/.test/README.md index 3eb34be8b..a95263ea3 100644 --- a/codebundles/jenkins-health/.test/README.md +++ b/codebundles/jenkins-health/.test/README.md @@ -51,20 +51,86 @@ Create this file with the following environment variables: task build-infra ``` -2. Generate RunWhen Configurations +2. Configure Jenkins and create pipelines: + + - **Initial Setup**: Follow the Jenkins UI prompts to install suggested plugins. + + - **Reproducing Scenarios**: + + - **Failed Pipeline Logs**: + Create a `Freestyle project` and choose the `Execute shell` option under `Build Steps` with an arbitrary script that will fail, such as a syntax error. + + - **Long Running Pipelines**: + Create a `Freestyle project` and choose the `Execute shell` option under `Build Steps`. Use the following script: + ```sh + #!/bin/bash + + error + # Print the start time + echo "Script started at: $(date)" + + # Sleep for 30 minutes (1800 seconds) + sleep 1800 + + # Print the end time + echo "Script ended at: $(date)" + ``` + + - **Queued Builds**: + Create three `Freestyle projects` using the above long-running script. With the default Jenkins setup having two executors, triggering all three projects will result in one being queued for a long time. + + - **Failed Tests**: + Create a `Pipeline` project and under the Definition section, paste the following Groovy script: + ```groovy + pipeline { + agent any + + tools { + // Install the Maven version configured as "M3" and add it to the path. + maven "M3" + } + + stages { + stage('Build') { + steps { + // Get some code from a GitHub repository + git 'https://github.com/saurabh3460/simple-maven-project-with-tests.git' + + // Run Maven on a Unix agent. + sh "mvn -Dmaven.test.failure.ignore=true clean package" + + // To run Maven on a Windows agent, use + // bat "mvn -Dmaven.test.failure.ignore=true clean package" + } + + post { + // If Maven was able to run the tests, even if some of the test + // failed, record the test results and archive the jar file. + success { + junit '**/target/surefire-reports/TEST-*.xml' + archiveArtifacts 'target/*.jar' + } + } + } + } + } + ``` + + +3. Generate RunWhen Configurations ```sh tasks ``` -3. Upload generated SLx to RunWhen Platform +4. Upload generated SLx to RunWhen Platform ```sh task upload-slxs ``` -4. At last, after testing, clean up the test infrastructure. +5. At last, after testing, clean up the test infrastructure. -```sh - task clean -``` + ```sh + task clean + ``` From 3ab5c8edfa3ef0bcf14f811fc2b1982d4b57be46 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Feb 2025 15:37:12 +0530 Subject: [PATCH 17/72] wip --- .../generation-rules/jenkins-health.yaml | 20 ++++++++ .../templates/jenkins-health-sli.yaml | 49 +++++++++++++++++++ .../templates/jenkins-health-slx.yaml | 23 +++++++++ .../templates/jenkins-health-taskset.yaml | 31 ++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml create mode 100644 codebundles/jenkins-health/.runwhen/templates/jenkins-health-sli.yaml create mode 100644 codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml create mode 100644 codebundles/jenkins-health/.runwhen/templates/jenkins-health-taskset.yaml diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml new file mode 100644 index 000000000..4b7c40642 --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -0,0 +1,20 @@ +apiVersion: runwhen.com/v1 +kind: GenerationRules +spec: + generationRules: + - resourceTypes: + - statefulset + matchRules: + - type: pattern + pattern: "jenkins" + properties: [name] + mode: substring + slxs: + - baseName: jenkins-health + levelOfDetail: detailed + qualifiers: ["resource", "namespace", "cluster"] + baseTemplateName: jenkins-health + outputItems: + - type: slx + - type: runbook + templateName: jenkins-health-taskset.yaml diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-sli.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-sli.yaml new file mode 100644 index 000000000..5f76bd21e --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-sli.yaml @@ -0,0 +1,49 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelIndicator +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + displayUnitsLong: OK + displayUnitsShort: ok + locations: + - {{default_location}} + description: The number of failed Jenkins builds. + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/jenkins-healthcheck/runbook.robot + intervalStrategy: intermezzo + intervalSeconds: 600 + configProvided: + - name: JENKINS_URL + workspaceKey: {{custom.jenkins_url}} + secretsProvided: + - name: JENKINS_USERNAME + workspaceKey: {{custom.jenkins_username}} + - name: JENKINS_TOKEN + workspaceKey: {{custom.jenkins_token}} + alerts: + warning: + operator: < + threshold: '1' + for: '20m' + ticket: + operator: < + threshold: '1' + for: '40m' + page: + operator: '==' + threshold: '0' + for: '' \ No newline at end of file diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml new file mode 100644 index 000000000..22b664225 --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml @@ -0,0 +1,23 @@ +apiVersion: runwhen.com/v1 +kind: ServiceLevelX +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/Jenkins_logo.svg + alias: {{match_resource.resource.metadata.name}} Health + asMeasuredBy: The number of failed Jenkins builds. + configProvided: + - name: SLX_PLACEHOLDER + value: SLX_PLACEHOLDER + owners: + - {{workspace.owner_email}} + statement: The number of failed Jenkins builds should be zero. + additionalContext: + namespace: "{{match_resource.resource.metadata.namespace}}" + labelMap: "{{match_resource.resource.metadata.labels}}" + cluster: "{{ cluster.name }}" + context: "{{ cluster.context }}" \ No newline at end of file diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-taskset.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-taskset.yaml new file mode 100644 index 000000000..75225ba16 --- /dev/null +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-taskset.yaml @@ -0,0 +1,31 @@ +apiVersion: runwhen.com/v1 +kind: Runbook +metadata: + name: {{slx_name}} + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + location: {{default_location}} + codeBundle: + {% if repo_url %} + repoUrl: {{repo_url}} + {% else %} + repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git + {% endif %} + {% if ref %} + ref: {{ref}} + {% else %} + ref: main + {% endif %} + pathToRobot: codebundles/jenkins-healthcheck/runbook.robot + configProvided: + - name: JENKINS_URL + workspaceKey: {{custom.jenkins_url}} + secretsProvided: + - name: JENKINS_USERNAME + workspaceKey: {{custom.jenkins_username}} + - name: JENKINS_TOKEN + workspaceKey: {{custom.jenkins_token}} + \ No newline at end of file From 99138e32ff68f8c46f46534e17084d3747887037 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 5 Feb 2025 14:05:45 +0530 Subject: [PATCH 18/72] Add function to parse Jenkins Atom feed and extract logs --- codebundles/jenkins-health/util.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/util.py b/codebundles/jenkins-health/util.py index 1839285c2..97dec9354 100644 --- a/codebundles/jenkins-health/util.py +++ b/codebundles/jenkins-health/util.py @@ -5,6 +5,7 @@ from collections import defaultdict from thefuzz import fuzz from thefuzz import process as fuzzprocessor +import xml.etree.ElementTree as ET # Ensure required environment variables are set JENKINS_URL = os.getenv("JENKINS_URL") @@ -292,4 +293,24 @@ def build_logs_analytics(history_limit=5): ] }) - return analysis_results \ No newline at end of file + return analysis_results + + +def parse_jenkins_atom_feed(): + # Define the namespace for Atom feed + namespace = {'atom': 'http://www.w3.org/2005/Atom'} + # Fetch Jenkins log feed + try: + response = requests.get(f"{JENKINS_URL}/manage/log/rss", auth=auth, timeout=10) + response.raise_for_status() # Raise an error for bad responses + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + root = ET.fromstring(response.text) + + # Extract log contents + logs = "" + for entry in root.findall('atom:entry', namespace): + content = entry.find('atom:content', namespace).text.strip() + logs += f"{content}\n{'=' * 80}\n" + return logs + From 494cf3348018b6619f5dc73b0ce9018efd3a20b8 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 5 Feb 2025 14:06:12 +0530 Subject: [PATCH 19/72] Add keyword to list Jenkins logs from Atom feed --- codebundles/jenkins-health/runbook.robot | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot index 994577851..4382b0c74 100644 --- a/codebundles/jenkins-health/runbook.robot +++ b/codebundles/jenkins-health/runbook.robot @@ -242,6 +242,13 @@ List Jenkins Executor Utilization END +List Jenkins Logs from Atom Feed + [Documentation] Fetches and displays Jenkins logs from the Atom feed + [Tags] Jenkins Logs + ${rsp}= Parse Jenkins Atom Feed + RW.Core.Add Pre To Report ${rsp} + + *** Keywords *** Suite Initialization ${JENKINS_URL}= RW.Core.Import User Variable JENKINS_URL From 5160704de9394e77060fa31e6b75408b8b96f6a8 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Thu, 13 Feb 2025 22:12:37 +0000 Subject: [PATCH 20/72] jenkins modifications. auto-generate password. remove nc dependency. update outputs. --- .../jenkins-health/.test/terraform/main.tf | 128 ++++++++++-------- 1 file changed, 68 insertions(+), 60 deletions(-) diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index 4f0ea0ee9..81bd11986 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -1,3 +1,14 @@ +resource "random_password" "jenkins_admin_password" { + length = 16 + special = true + min_upper = 1 + min_lower = 1 + min_numeric = 1 + + # Optional: If you prefer fewer special characters, define allow_*: + # override_special = "!@#%" +} + # Get latest Ubuntu AMI data "aws_ami" "ubuntu" { most_recent = true @@ -119,32 +130,30 @@ resource "aws_security_group" "jenkins_sg" { resource "aws_instance" "jenkins_server" { ami = data.aws_ami.ubuntu.id instance_type = "t2.micro" - - subnet_id = aws_subnet.jenkins_subnet.id + subnet_id = aws_subnet.jenkins_subnet.id vpc_security_group_ids = [aws_security_group.jenkins_sg.id] key_name = aws_key_pair.generated_key.key_name associate_public_ip_address = true user_data = <<-EOF #!/bin/bash - # Update package index apt-get update - # Install Java 17 apt-get install -y openjdk-17-jdk - # Add Jenkins repository key curl -fsSL https://pkg.jenkins.io/debian-stable/jenkins.io-2023.key | tee /usr/share/keyrings/jenkins-keyring.asc > /dev/null - # Add Jenkins repository echo deb [signed-by=/usr/share/keyrings/jenkins-keyring.asc] https://pkg.jenkins.io/debian-stable binary/ | tee /etc/apt/sources.list.d/jenkins.list > /dev/null - # Update package index again apt-get update && apt-get install -y jenkins && systemctl enable jenkins && systemctl start jenkins + + # Wait a bit for Jenkins to start sleep 60 - # Get the initial admin password + + # Retrieve the initial admin password (only valid until we run our Groovy script) JENKINS_PASS=$(sudo cat /var/lib/jenkins/secrets/initialAdminPassword) - # Install Jenkins CLI + # Download Jenkins CLI wget -q http://localhost:8080/jnlpJars/jenkins-cli.jar - # Create groovy script to create admin user + # Create Groovy script to set Jenkins to "INITIAL_SETUP_COMPLETED" + # and create a new admin user with the random password cat < create_admin.groovy import jenkins.model.* import hudson.security.* @@ -152,24 +161,12 @@ resource "aws_instance" "jenkins_server" { def instance = Jenkins.getInstance() - // Skip setup wizard - // instance.setInstallState(InstallState.INITIAL_SETUP_COMPLETED) + // Skip the Jenkins setup wizard + instance.setInstallState(InstallState.INITIAL_SETUP_COMPLETED) - // Install suggested plugins - // def pm = instance.getPluginManager() - // def uc = instance.getUpdateCenter() - // uc.updateAllSites() - - // plugins.each { plugin -> - // if (!pm.getPlugin(plugin)) { - // def installFuture = uc.getPlugin(plugin).deploy() - // installFuture.get() - // } - // } - - // Create admin user + // Create admin user with a random password def hudsonRealm = new HudsonPrivateSecurityRealm(false) - hudsonRealm.createAccount("admin", "admin123!") + hudsonRealm.createAccount("admin", "${random_password.jenkins_admin_password.result}") instance.setSecurityRealm(hudsonRealm) def strategy = new FullControlOnceLoggedInAuthorizationStrategy() @@ -179,39 +176,24 @@ resource "aws_instance" "jenkins_server" { instance.save() GROOVY - # Execute the groovy script using Jenkins CLI - java -jar jenkins-cli.jar -s http://localhost:8080 -auth admin:$JENKINS_PASS groovy = < create_admin.groovy || { - echo "Failed to create admin user" - exit 1 - } + # Use the initial Jenkins password to run the Groovy script + java -jar jenkins-cli.jar \ + -s http://localhost:8080 \ + -auth admin:$JENKINS_PASS \ + groovy = < create_admin.groovy || { + echo "Failed to create admin user" + exit 1 + } - # Clean up rm -f create_admin.groovy - # Add Docker's official GPG key: - apt-get update - apt-get -y install ca-certificates curl - install -m 0755 -d /etc/apt/keyrings - curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc - chmod a+r /etc/apt/keyrings/docker.asc - - # Add the repository to Apt sources: - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - tee /etc/apt/sources.list.d/docker.list > /dev/null - apt-get update - apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin - - echo "DOCKER_OPTS=\"-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock\"" >> /etc/default/docker - systemctl restart docker - groupadd docker - usermod -aG docker $USER - + # (Optional) Additional setup commands, e.g. Docker, etc. + # ... EOF tags = { - Name = "jenkins-server" + Name = "jenkins-server", + lifecycle = "deleteme" } } @@ -248,21 +230,35 @@ resource "aws_instance" "jenkins_server" { # } -# Wait for Jenkins to be ready -resource "null_resource" "wait_for_jenkins" { + +resource "null_resource" "wait_for_jenkins_authenticated" { depends_on = [aws_instance.jenkins_server] provisioner "local-exec" { - command = <<-EOF - while ! nc -z ${aws_instance.jenkins_server.public_ip} 8080; do - echo "Waiting for Jenkins to be ready..." - sleep 10 + command = <<-EOT + while true; do + echo "Checking Jenkins with the new random password..." + + STATUS_CODE=$(curl -s -o /dev/null -w '%%{http_code}' \ + -u "admin:${random_password.jenkins_admin_password.result}" \ + http://${aws_instance.jenkins_server.public_ip}:8080/api/json) + + if [ "$STATUS_CODE" = "200" ]; then + echo "Jenkins is responding with HTTP 200 to admin:${random_password.jenkins_admin_password.result}" + break + else + echo "Got HTTP $STATUS_CODE. Waiting for Jenkins..." + sleep 10 + fi done - EOF + + echo "Jenkins is fully up and accepting authenticated requests." + EOT } } + # Configure Jenkins EC2 agents # resource "null_resource" "configure_jenkins_agents" { # depends_on = [null_resource.wait_for_jenkins] @@ -343,3 +339,15 @@ output "ssh_connection_string" { value = "ssh -i jenkins-key.pem ubuntu@${aws_instance.jenkins_server.public_ip}" } +output "jenkins_admin_password" { + value = random_password.jenkins_admin_password.result + sensitive = true +} + +output "fetch_admin_passwrd" { + value = "cd terraform && terraform show -json | jq '.values.outputs.jenkins_admin_password.value'" +} + +output "jenkins_url" { + value = "http://${aws_instance.jenkins_server.public_ip}:8080" +} From b8604829e58dbba15f3aa5f188e7e970c34959ce Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 11:13:19 +0000 Subject: [PATCH 21/72] update template --- .../generation-rules/jenkins-health.yaml | 8 ++++---- .../jenkins-health/.test/terraform/main.tf | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index 4b7c40642..f7d2eabe4 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -3,16 +3,16 @@ kind: GenerationRules spec: generationRules: - resourceTypes: - - statefulset + - custom matchRules: - type: pattern - pattern: "jenkins" - properties: [name] + pattern: ".+" + properties: [custom/jenkins_url] mode: substring slxs: - baseName: jenkins-health levelOfDetail: detailed - qualifiers: ["resource", "namespace", "cluster"] + qualifiers: ["resource"] baseTemplateName: jenkins-health outputItems: - type: slx diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index 81bd11986..44599dbf4 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -1,8 +1,8 @@ resource "random_password" "jenkins_admin_password" { - length = 16 - special = true - min_upper = 1 - min_lower = 1 + length = 16 + special = true + min_upper = 1 + min_lower = 1 min_numeric = 1 # Optional: If you prefer fewer special characters, define allow_*: @@ -128,9 +128,9 @@ resource "aws_security_group" "jenkins_sg" { # EC2 Instance resource "aws_instance" "jenkins_server" { - ami = data.aws_ami.ubuntu.id - instance_type = "t2.micro" - subnet_id = aws_subnet.jenkins_subnet.id + ami = data.aws_ami.ubuntu.id + instance_type = "t2.micro" + subnet_id = aws_subnet.jenkins_subnet.id vpc_security_group_ids = [aws_security_group.jenkins_sg.id] key_name = aws_key_pair.generated_key.key_name associate_public_ip_address = true @@ -192,7 +192,7 @@ resource "aws_instance" "jenkins_server" { EOF tags = { - Name = "jenkins-server", + Name = "jenkins-server", lifecycle = "deleteme" } } From 315ad5856b8b28542421faeec60ab8e7507a5498 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 11:48:02 +0000 Subject: [PATCH 22/72] update template --- .../generation-rules/jenkins-health.yaml | 18 +++++++++---- .../jenkins-health/.test/Taskfile.yaml | 25 ++++++++++--------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index f7d2eabe4..6010d7e97 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -1,14 +1,22 @@ apiVersion: runwhen.com/v1 kind: GenerationRules spec: + platform: aws generationRules: - resourceTypes: - - custom + - ec2_instance matchRules: - - type: pattern - pattern: ".+" - properties: [custom/jenkins_url] - mode: substring + - type: and + matches: + - type: pattern + pattern: "jenkins-server" + properties: [name] + mode: substring + - type: pattern + pattern: ".+" + properties: [custom/jenkins_url] + mode: substring + slxs: - baseName: jenkins-health levelOfDetail: detailed diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml index 6294e0844..ef9c14580 100644 --- a/codebundles/jenkins-health/.test/Taskfile.yaml +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -65,7 +65,6 @@ tasks: AWS_SECRET_ACCESS_KEY: "{{.AWS_SECRET_ACCESS_KEY}}" AWS_DEFAULT_REGION: "{{.AWS_DEFAULT_REGION}}" RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}' - JENKINS_URL: '{{.JENKINS_URL | default "http://localhost:8080"}}' cmds: - | source terraform/cb.secret @@ -73,18 +72,18 @@ tasks: branch_name=$(git rev-parse --abbrev-ref HEAD) codebundle=$(basename "$(dirname "$PWD")") - # Fetch individual cluster details from Terraform state - # pushd terraform > /dev/null - # cluster1_name=$(terraform show -json terraform.tfstate | jq -r ' - # .values.outputs.cluster_1_name.value') + Fetch individual cluster details from Terraform state + pushd terraform > /dev/null + jenkins_url=$(terraform show -json terraform.tfstate | jq -r ' + .values.outputs.jenkins_url.value') - # popd > /dev/null + popd > /dev/null - # # Check if any of the required cluster variables are empty - # if [ -z "$cluster1_name" ] || [ -z "$cluster1_server" ] || [ -z "$cluster1_resource_group" ]; then - # echo "Error: Missing cluster details. Ensure Terraform plan has been applied." - # exit 1 - # fi + # Check if any of the required cluster variables are empty + if [ -z "$jenkins_url" ]; then + echo "Error: Missing jenkins_url details. Ensure Terraform plan has been applied." + exit 1 + fi # Generate workspaceInfo.yaml with fetched cluster details cat < workspaceInfo.yaml @@ -103,7 +102,9 @@ tasks: custom: aws_access_key_id: AWS_ACCESS_KEY_ID aws_secret_access_key: AWS_SECRET_ACCESS_KEY - jenkins_url: $JENKINS_URL + jenkins_url: $jenkins_url + jenkins_username: jenkins_username + jenkins_token: jenkins_token EOF silent: true From c66c4ebdd2e8f4e842d6b07a816f94ea632f7f33 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 11:53:38 +0000 Subject: [PATCH 23/72] test tag match --- .../.runwhen/generation-rules/jenkins-health.yaml | 2 +- codebundles/jenkins-health/.test/Taskfile.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index 6010d7e97..29d70a851 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -10,7 +10,7 @@ spec: matches: - type: pattern pattern: "jenkins-server" - properties: [name] + properties: [resource/tags/Name] mode: substring - type: pattern pattern: ".+" diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml index ef9c14580..0e0024085 100644 --- a/codebundles/jenkins-health/.test/Taskfile.yaml +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -72,7 +72,7 @@ tasks: branch_name=$(git rev-parse --abbrev-ref HEAD) codebundle=$(basename "$(dirname "$PWD")") - Fetch individual cluster details from Terraform state + ## Fetch individual cluster details from Terraform state pushd terraform > /dev/null jenkins_url=$(terraform show -json terraform.tfstate | jq -r ' .values.outputs.jenkins_url.value') From 79eb6127c3bd55939358ab960f25da825579b323 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 11:57:59 +0000 Subject: [PATCH 24/72] try just tags --- .../.runwhen/generation-rules/jenkins-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index 29d70a851..f28d9043d 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -10,7 +10,7 @@ spec: matches: - type: pattern pattern: "jenkins-server" - properties: [resource/tags/Name] + properties: [tags] mode: substring - type: pattern pattern: ".+" From e770200f2707d24949d474106b25b82e0a7e61ad Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:00:02 +0000 Subject: [PATCH 25/72] debug tag matching further --- .../.runwhen/generation-rules/jenkins-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index f28d9043d..4e6d86dc5 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -10,7 +10,7 @@ spec: matches: - type: pattern pattern: "jenkins-server" - properties: [tags] + properties: [resource/tags] mode: substring - type: pattern pattern: ".+" From 2d4e5e67d19fdc3535c756aa7ba2bcbaf08865b2 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:01:15 +0000 Subject: [PATCH 26/72] debug tag match --- .../.runwhen/generation-rules/jenkins-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index 4e6d86dc5..c735c4aeb 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -10,7 +10,7 @@ spec: matches: - type: pattern pattern: "jenkins-server" - properties: [resource/tags] + properties: [tags/Name] mode: substring - type: pattern pattern: ".+" From d64874620348db5615bd9448054739279b62f377 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:06:44 +0000 Subject: [PATCH 27/72] try tag-values --- .../.runwhen/generation-rules/jenkins-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index c735c4aeb..3358b5cf0 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -10,7 +10,7 @@ spec: matches: - type: pattern pattern: "jenkins-server" - properties: [tags/Name] + properties: [tag-values] mode: substring - type: pattern pattern: ".+" From 712e3ebb887b90f226e4b80853d467f0d354fd8b Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:08:02 +0000 Subject: [PATCH 28/72] dump custom --- .../.runwhen/generation-rules/jenkins-health.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml index 3358b5cf0..e8cd3f0c1 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml @@ -12,10 +12,10 @@ spec: pattern: "jenkins-server" properties: [tag-values] mode: substring - - type: pattern - pattern: ".+" - properties: [custom/jenkins_url] - mode: substring + # - type: pattern + # pattern: ".+" + # properties: [custom/jenkins_url] + # mode: substring slxs: - baseName: jenkins-health From 9ea28242cc7f7533c810c36cb47748c31e1839db Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:10:07 +0000 Subject: [PATCH 29/72] remove additional context in slx --- .../.runwhen/templates/jenkins-health-slx.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml index 22b664225..81503fa0e 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml @@ -16,8 +16,4 @@ spec: owners: - {{workspace.owner_email}} statement: The number of failed Jenkins builds should be zero. - additionalContext: - namespace: "{{match_resource.resource.metadata.namespace}}" - labelMap: "{{match_resource.resource.metadata.labels}}" - cluster: "{{ cluster.name }}" - context: "{{ cluster.context }}" \ No newline at end of file + additionalContext: [] \ No newline at end of file From 3a7da5d03f609b4360b1ff72f9cf52ab43a1959b Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:13:06 +0000 Subject: [PATCH 30/72] remove var from alias --- .../jenkins-health/.runwhen/templates/jenkins-health-slx.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml index 81503fa0e..c1de77901 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml @@ -8,7 +8,8 @@ metadata: {% include "common-annotations.yaml" %} spec: imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/Jenkins_logo.svg - alias: {{match_resource.resource.metadata.name}} Health + # alias: {{match_resource.resource.tags.Name}} Health + alias: Jenkins Health asMeasuredBy: The number of failed Jenkins builds. configProvided: - name: SLX_PLACEHOLDER From f9f3288509a1f3ce3e7d4d2f5e5ae7b3091da83b Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:17:57 +0000 Subject: [PATCH 31/72] rename templates and basename --- .../{jenkins-health.yaml => jenkins-instance-health.yaml} | 4 ++-- ...nkins-health-sli.yaml => jenkins-instance-health-sli.yaml} | 0 ...alth-taskset.yaml => jenkins-instance-health-taskset.yaml} | 0 .../{jenkins-health-slx.yaml => jenkins-instance-health.yaml} | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename codebundles/jenkins-health/.runwhen/generation-rules/{jenkins-health.yaml => jenkins-instance-health.yaml} (87%) rename codebundles/jenkins-health/.runwhen/templates/{jenkins-health-sli.yaml => jenkins-instance-health-sli.yaml} (100%) rename codebundles/jenkins-health/.runwhen/templates/{jenkins-health-taskset.yaml => jenkins-instance-health-taskset.yaml} (100%) rename codebundles/jenkins-health/.runwhen/templates/{jenkins-health-slx.yaml => jenkins-instance-health.yaml} (100%) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml similarity index 87% rename from codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml rename to codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index e8cd3f0c1..c5ca0551c 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -18,10 +18,10 @@ spec: # mode: substring slxs: - - baseName: jenkins-health + - baseName: jenkins-instance-health levelOfDetail: detailed qualifiers: ["resource"] - baseTemplateName: jenkins-health + baseTemplateName: jenkins-instance-health outputItems: - type: slx - type: runbook diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-sli.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml similarity index 100% rename from codebundles/jenkins-health/.runwhen/templates/jenkins-health-sli.yaml rename to codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-taskset.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml similarity index 100% rename from codebundles/jenkins-health/.runwhen/templates/jenkins-health-taskset.yaml rename to codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health.yaml similarity index 100% rename from codebundles/jenkins-health/.runwhen/templates/jenkins-health-slx.yaml rename to codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health.yaml From bb55e32e736c47f4286dd92b492cbb851a42b155 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:19:23 +0000 Subject: [PATCH 32/72] fix file typo --- ...kins-instance-health.yaml => jenkins-instance-health-slx.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename codebundles/jenkins-health/.runwhen/templates/{jenkins-instance-health.yaml => jenkins-instance-health-slx.yaml} (100%) diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml similarity index 100% rename from codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health.yaml rename to codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml From b2abed32e35406cc2d001599a2527a1bd12c14a3 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:23:39 +0000 Subject: [PATCH 33/72] fix template name --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index c5ca0551c..d8622c4d1 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -25,4 +25,4 @@ spec: outputItems: - type: slx - type: runbook - templateName: jenkins-health-taskset.yaml + templateName: jenkins-instance-health-taskset.yaml From 6e727cc43d77aaf232c523211fa39488658512a9 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:29:11 +0000 Subject: [PATCH 34/72] add sli --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index d8622c4d1..590cd1ae5 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -24,5 +24,6 @@ spec: baseTemplateName: jenkins-instance-health outputItems: - type: slx + - type: sli - type: runbook templateName: jenkins-instance-health-taskset.yaml From 20ca81cbfb066069eb8691bac386affddcc09238 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 12:31:52 +0000 Subject: [PATCH 35/72] change alias --- .../.runwhen/templates/jenkins-instance-health-slx.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml index c1de77901..896506f86 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-slx.yaml @@ -8,8 +8,8 @@ metadata: {% include "common-annotations.yaml" %} spec: imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/Jenkins_logo.svg - # alias: {{match_resource.resource.tags.Name}} Health - alias: Jenkins Health + alias: {{match_resource.resource.tags.Name}} Instance Health + # alias: Jenkins Health asMeasuredBy: The number of failed Jenkins builds. configProvided: - name: SLX_PLACEHOLDER From 22af47020e4769652d30af8709a793833c10bdfc Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 13:29:10 +0000 Subject: [PATCH 36/72] add secrets upload for jenkins to taskfile --- .../jenkins-health/.test/Taskfile.yaml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml index 0e0024085..c909a681c 100644 --- a/codebundles/jenkins-health/.test/Taskfile.yaml +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -239,6 +239,34 @@ tasks: echo "Directory $BASE_DIR does not exist. Upload aborted." exit 1 fi + # Upload Secrets + pushd terraform > /dev/null + jenkins_username=admin + jenkins_token=$(terraform show -json terraform.tfstate | jq -r ' + .values.outputs.jenkins_admin_password.value') + + popd > /dev/null + + # Check if any of the required cluster variables are empty + if [ -z "$jenkins_token" ]; then + echo "Error: Missing jenkins_token details. Ensure Terraform plan has been applied." + exit 1 + fi + + # Create Secrets + URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" + PAYLOAD='{"secrets": {"jenkins_username": "$jenkins_username", "jenkins_password": "$jenkins_password" }}' + echo "Uploading secrets to $URL" + response_code=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $RW_PAT" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" \ + -w "%{http_code}" -o /dev/null -s) + if [[ "$response_code" == "200" || "$response_code" == "201" ]]; then + echo "Successfully uploaded secrets to $URL" + else + echo "Failed to upload secrets: $SLX_NAME to $URL. Unexpected response code: $response_code" + fi for dir in "$BASE_DIR"/*; do if [ -d "$dir" ]; then From a5f463dfd68e6169b7f58bdff1b72f4aaae2198b Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 13:42:10 +0000 Subject: [PATCH 37/72] fix template --- .../.runwhen/templates/jenkins-instance-health-sli.yaml | 2 +- .../.runwhen/templates/jenkins-instance-health-taskset.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml index 5f76bd21e..afa1f1b9e 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml @@ -23,7 +23,7 @@ spec: {% else %} ref: main {% endif %} - pathToRobot: codebundles/jenkins-healthcheck/runbook.robot + pathToRobot: codebundles/jenkins-health/runbook.robot intervalStrategy: intermezzo intervalSeconds: 600 configProvided: diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml index 75225ba16..1e65c7cad 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml @@ -19,7 +19,7 @@ spec: {% else %} ref: main {% endif %} - pathToRobot: codebundles/jenkins-healthcheck/runbook.robot + pathToRobot: codebundles/jenkins-health/runbook.robot configProvided: - name: JENKINS_URL workspaceKey: {{custom.jenkins_url}} From 916befa5b64510af74ad6d3c54f65e48fb35c1d3 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 14:16:45 +0000 Subject: [PATCH 38/72] fix secret key --- codebundles/jenkins-health/.test/Taskfile.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml index c909a681c..b84a7e233 100644 --- a/codebundles/jenkins-health/.test/Taskfile.yaml +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -255,7 +255,7 @@ tasks: # Create Secrets URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" - PAYLOAD='{"secrets": {"jenkins_username": "$jenkins_username", "jenkins_password": "$jenkins_password" }}' + PAYLOAD='{"secrets": {"jenkins_username": "$jenkins_username", "jenkins_token": "$jenkins_password" }}' echo "Uploading secrets to $URL" response_code=$(curl -X POST "$URL" \ -H "Authorization: Bearer $RW_PAT" \ From 42cd16a139b62df1307f471d6c7f2243b84bbd8f Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 14:39:01 +0000 Subject: [PATCH 39/72] update templates --- .../jenkins-instance-health-sli.yaml | 2 +- .../jenkins-instance-health-taskset.yaml | 2 +- libraries/Jenkins/__init__.py | 1 + libraries/Jenkins/jenkins.py | 320 ++++++++++++++++++ 4 files changed, 323 insertions(+), 2 deletions(-) create mode 100644 libraries/Jenkins/__init__.py create mode 100644 libraries/Jenkins/jenkins.py diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml index afa1f1b9e..fe1633913 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml @@ -28,7 +28,7 @@ spec: intervalSeconds: 600 configProvided: - name: JENKINS_URL - workspaceKey: {{custom.jenkins_url}} + value: {{custom.jenkins_url}} secretsProvided: - name: JENKINS_USERNAME workspaceKey: {{custom.jenkins_username}} diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml index 1e65c7cad..eabbe31f9 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-taskset.yaml @@ -22,7 +22,7 @@ spec: pathToRobot: codebundles/jenkins-health/runbook.robot configProvided: - name: JENKINS_URL - workspaceKey: {{custom.jenkins_url}} + value: {{custom.jenkins_url}} secretsProvided: - name: JENKINS_USERNAME workspaceKey: {{custom.jenkins_username}} diff --git a/libraries/Jenkins/__init__.py b/libraries/Jenkins/__init__.py new file mode 100644 index 000000000..ac8b6c2f7 --- /dev/null +++ b/libraries/Jenkins/__init__.py @@ -0,0 +1 @@ +from .jenkins import * diff --git a/libraries/Jenkins/jenkins.py b/libraries/Jenkins/jenkins.py new file mode 100644 index 000000000..db57ef05e --- /dev/null +++ b/libraries/Jenkins/jenkins.py @@ -0,0 +1,320 @@ +import os +import requests +import json +import time +from collections import defaultdict +from thefuzz import fuzz +from thefuzz import process as fuzzprocessor +import xml.etree.ElementTree as ET +from RW import platform +from RW.Core import Core + +# # Ensure required environment variables are set +# JENKINS_URL = os.getenv("JENKINS_URL") +# JENKINS_USERNAME = os.getenv("JENKINS_USERNAME") +# JENKINS_TOKEN = os.getenv("JENKINS_TOKEN") + +# if not all([JENKINS_URL, JENKINS_USERNAME, JENKINS_TOKEN]): +# error_msg = "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." +# raise ValueError(error_msg) + + + +# Jenkins API URL +api_url = f"{JENKINS_URL}/api/json?depth=2" +# Basic authentication +auth = (JENKINS_USERNAME, JENKINS_TOKEN) + +# Fetch Jenkins jobs data +try: + response = requests.get(api_url, auth=auth, timeout=10) + response.raise_for_status() # Raises an HTTPError for bad responses (4xx, 5xx) + jenkins_data = response.json() +except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + + +def get_failed_tests(): + failed_tests = [] + for job in jenkins_data.get('jobs'): + if job.get('lastBuild').get('result') == 'UNSTABLE': + pipeline_details = { + 'pipeline_name': job.get('name'), + 'pipeline_url': job.get('url'), + 'build_result': job.get('lastBuild').get('result'), + 'build_number': job.get('lastBuild').get('number'), + 'build_timestamp': job.get('lastBuild').get('timestamp'), + 'build_duration': job.get('lastBuild').get('duration'), + 'build_queueId': job.get('lastBuild').get('queueId'), + 'build_building': job.get('lastBuild').get('building'), + 'build_changeSet': job.get('lastBuild').get('changeSet') + } + try: + tests_response = requests.get(job.get('lastBuild').get('url')+"testReport/api/json", auth=auth, timeout=10) + tests_response.raise_for_status() + suites = tests_response.json().get('suites') + test_results = [] + for suite in suites: + for case in suite.get('cases'): + test_results.append(case) + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch test data from Jenkins: {e}") + + result = {"pipeline_details": pipeline_details, "test_results": test_results} + failed_tests.append(result) + return failed_tests + +def get_queued_builds(wait_threshold="10m"): + """Get builds waiting in queue longer than the specified threshold. + + Args: + wait_threshold (str): Time threshold in format like '10min', '1h', '30m', '1d', '1day' + + Returns: + list: List of queued builds that exceed the wait threshold + """ + # Convert threshold to minutes + wait_threshold = wait_threshold.lower().replace(' ', '').strip('"').strip("'") + threshold_value = 0 + if 'min' in wait_threshold: + threshold_value = int(wait_threshold.replace('min', '')) + elif 'h' in wait_threshold: + threshold_value = int(wait_threshold.replace('h', '')) * 60 + elif 'm' in wait_threshold: + threshold_value = int(wait_threshold.replace('m', '')) + elif 'day' in wait_threshold: + threshold_value = int(wait_threshold.replace('day', '')) * 24 * 60 + elif 'd' in wait_threshold: + threshold_value = int(wait_threshold.replace('d', '')) * 24 * 60 + else: + raise ValueError("Invalid threshold format. Use formats like '10min', '1h', '30m', '1d', '1day'") + + queued_builds = [] + + try: + queue_url = f"{JENKINS_URL}/queue/api/json" + queue_response = requests.get(queue_url, auth=auth, timeout=10) + queue_response.raise_for_status() + queue_data = queue_response.json() + + current_time = int(time.time() * 1000) # Convert to milliseconds + + for item in queue_data.get('items', []): + # Get time in queue in minutes + in_queue_since = item.get('inQueueSince', 0) + wait_time_mins = (current_time - in_queue_since) / (1000 * 60) # Convert to minutes + + # Format wait time based on duration + if wait_time_mins >= 24*60: # More than a day + wait_time = f"{wait_time_mins/(24*60):.1f}d" + elif wait_time_mins >= 60: # More than an hour + wait_time = f"{wait_time_mins/60:.1f}h" + else: + wait_time = f"{wait_time_mins:.1f}min" + + if wait_time_mins >= threshold_value: + job_name = item.get('task', {}).get('name', '') + if job_name == '': + try: + queued_build_url = item.get('url', '') + if queued_build_url != '': + queued_build_url = f"{JENKINS_URL}/{queued_build_url}api/json?depth=1" + rsp = requests.get(queued_build_url, auth=auth, timeout=10).json() + job_name = rsp.get('task').get('name') + else: + job_name = 'Unknown Job' + except requests.exceptions.RequestException as e: + job_name = 'Unknown Job' + + queued_build = { + 'job_name': job_name, + 'waiting_since': in_queue_since, + 'wait_time': wait_time, + 'why': item.get('why', 'Unknown Reason'), + 'stuck': item.get('stuck', False), + 'blocked': item.get('blocked', False), + 'url': f"{JENKINS_URL}/{item.get('url', '')}" + } + queued_builds.append(queued_build) + + return queued_builds + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch queue data from Jenkins: {e}") + + +def get_executor_utilization(): + executor_utilization = [] + for label in jenkins_data.get('assignedLabels', []): + busy_executors = label.get('busyExecutors', 0) + total_executors = label.get('totalExecutors', 0) + if total_executors > 0: + utilization = (busy_executors / total_executors) * 100 + else: + utilization = 0 + executor_utilization.append({ + 'node_name': label.get('name', 'unknown'), + 'busy_executors': busy_executors, + 'total_executors': total_executors, + 'utilization_percentage': utilization + }) + return executor_utilization + +def build_logs_analytics(history_limit=5): + # Get failed builds (up to limit) for each job + failed_builds = [] + for job in jenkins_data.get('jobs', []): + builds = [] + failed_count = 0 + + # Iterate through all builds until we find limit failed ones + for build in job.get('builds', []): + if build.get('result') == 'FAILURE': + builds.append({ + 'number': build.get('number'), + 'url': build.get('url') + }) + failed_count += 1 + if failed_count == history_limit: + break + + if builds: + failed_builds.append({ + 'job_name': job.get('name'), + 'builds': builds + }) + + # Analyze logs for each failed job + analysis_results = [] + for job in failed_builds: + job_logs = [] + + # Get logs for each failed build + for build in job['builds']: + try: + log_url = f"{build['url']}logText/progressiveText?start=0" + log_response = requests.get(log_url, auth=auth, timeout=10) + log_response.raise_for_status() + job_logs.append({ + 'build_number': build['number'], + 'log_content': log_response.text + }) + except requests.exceptions.RequestException as e: + print(f"Failed to fetch logs for {job['job_name']} #{build['number']}: {e}") + continue + + if len(job_logs) < 2: + continue + + # Extract error sections from logs + error_sections = [] + for log in job_logs: + log_lines = log['log_content'].split('\n') + error_section = [] + in_error = False + + for line in log_lines: + # Start capturing on error indicators + if any(error_term in line.lower() for error_term in ['error:', 'exception', 'failed', 'failure']): + # Skip common, less meaningful lines + if any(skip_term in line.lower() for skip_term in + ['finished: failure', 'build failure', '[info]']): + continue + in_error = True + error_section = [line] + # Continue capturing context + elif in_error and line.strip(): + # Skip info/debug lines in error context + if not line.lower().startswith('[info]'): + error_section.append(line) + # Stop after capturing some context + if len(error_section) > 10: + in_error = False + if error_section: # Only add if we have meaningful content + error_sections.append('\n'.join(error_section)) + elif in_error: + in_error = False + if error_section: # Only add if we have meaningful content + error_sections.append('\n'.join(error_section)) + + # Use thefuzz to find similar error patterns + common_patterns = defaultdict(list) + processed_sections = set() + + for section in error_sections: + if section in processed_sections: + continue + + # Use process.extractBests to find similar sections + matches = fuzzprocessor.extractBests( + section, + error_sections, + scorer=fuzz.token_set_ratio, + score_cutoff=85 # 85% similarity threshold + ) + + similar_sections = [match[0] for match in matches] + # Only include patterns that occur in all builds + if len(similar_sections) == len(job_logs): # Must occur in all builds + pattern_key = similar_sections[0] + common_patterns[pattern_key] = { + 'occurrences': len(similar_sections), + 'similar_sections': similar_sections, + 'similarity_scores': [match[1] for match in matches] + } + processed_sections.update(similar_sections) + + # Calculate overall log similarity + similarity_scores = [] + for i in range(len(job_logs)): + for j in range(i + 1, len(job_logs)): + score = fuzz.token_set_ratio( + job_logs[i]['log_content'], + job_logs[j]['log_content'] + ) + similarity_scores.append(score) + + avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0 + + # Filter out patterns that don't meet minimum occurrence threshold + significant_patterns = { + pattern: details for pattern, details in common_patterns.items() + if details['occurrences'] == len(job_logs) # Must occur in all builds + } + + analysis_results.append({ + 'job_name': job['job_name'], + 'builds_analyzed': len(job_logs), + 'similarity_score': avg_similarity, + 'common_error_patterns': [ + { + 'pattern': pattern, + 'occurrences': details['occurrences'], + 'similar_sections': details['similar_sections'], + 'similarity_scores': details['similarity_scores'] + } + for pattern, details in significant_patterns.items() + ] + }) + + return analysis_results + + +def parse_jenkins_atom_feed(): + # Define the namespace for Atom feed + namespace = {'atom': 'http://www.w3.org/2005/Atom'} + # Fetch Jenkins log feed + try: + response = requests.get(f"{JENKINS_URL}/manage/log/rss", auth=auth, timeout=10) + response.raise_for_status() # Raise an error for bad responses + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + root = ET.fromstring(response.text) + + # Extract log contents + logs = "" + for entry in root.findall('atom:entry', namespace): + content = entry.find('atom:content', namespace).text.strip() + logs += f"{content}\n{'=' * 80}\n" + return logs + From 85b327d85c8907a7ea84896cf91a2b3663c5d58c Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 15:51:48 +0000 Subject: [PATCH 40/72] moves utils.py into proper jenkins keyword with platform secret handling --- ...ild_build_logs.sh => failed_build_logs.sh} | 0 codebundles/jenkins-health/runbook.robot | 32 +- codebundles/jenkins-health/sli.robot | 24 +- codebundles/jenkins-health/util.py | 316 -------- interactive_console_output.xml | 25 + libraries/Jenkins/jenkins.py | 699 ++++++++++-------- 6 files changed, 459 insertions(+), 637 deletions(-) rename codebundles/jenkins-health/{faild_build_logs.sh => failed_build_logs.sh} (100%) delete mode 100644 codebundles/jenkins-health/util.py create mode 100644 interactive_console_output.xml diff --git a/codebundles/jenkins-health/faild_build_logs.sh b/codebundles/jenkins-health/failed_build_logs.sh similarity index 100% rename from codebundles/jenkins-health/faild_build_logs.sh rename to codebundles/jenkins-health/failed_build_logs.sh diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot index 4382b0c74..1731b9895 100644 --- a/codebundles/jenkins-health/runbook.robot +++ b/codebundles/jenkins-health/runbook.robot @@ -8,7 +8,7 @@ Library RW.Core Library RW.CLI Library RW.platform Library String -Library util.py +Library Jenkins Suite Setup Suite Initialization *** Tasks *** @@ -16,7 +16,7 @@ List Failed Build Logs in Jenkins [Documentation] Fetches logs from failed Jenkins builds using the Jenkins API [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File - ... bash_file=faild_build_logs.sh + ... bash_file=failed_build_logs.sh ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} @@ -98,7 +98,10 @@ List Long Running Builds in Jenkins List Recent Failed Tests in Jenkins [Documentation] List Recent Failed Tests in Jenkins [Tags] Jenkins Tests - ${failed_tests}= Get Failed Tests + ${failed_tests}= Jenkins.Get Failed Tests + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} IF len(${failed_tests}) > 0 FOR ${test_suite} IN @{failed_tests} @@ -161,7 +164,11 @@ List Long Queued Builds in Jenkins [Documentation] Check for builds stuck in queue beyond threshold [Tags] Jenkins Queue Builds - ${queued_builds}= GET QUEUED BUILDS wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} + ${queued_builds}= Jenkins.Get Queued Builds + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + ... wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} TRY IF ${queued_builds} == [] @@ -207,7 +214,10 @@ List Jenkins Executor Utilization [Documentation] Check Jenkins executor utilization across nodes [Tags] Jenkins Executors Utilization - ${executor_utilization}= GET EXECUTOR UTILIZATION + ${executor_utilization}= Jenkins.Get Executor Utilization + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} TRY IF ${executor_utilization} == [] @@ -242,10 +252,13 @@ List Jenkins Executor Utilization END -List Jenkins Logs from Atom Feed +Fetch Jenkins Logs and Add to Report [Documentation] Fetches and displays Jenkins logs from the Atom feed [Tags] Jenkins Logs - ${rsp}= Parse Jenkins Atom Feed + ${rsp}= Jenkins.Parse Atom Feed + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} RW.Core.Add Pre To Report ${rsp} @@ -284,10 +297,11 @@ Suite Initialization ... pattern=\d+ ... example="80" ... default="80" - Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} Set Suite Variable ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} Set Suite Variable ${QUEUED_BUILD_MAX_WAIT_TIME} ${QUEUED_BUILD_MAX_WAIT_TIME} - Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} \ No newline at end of file + Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} + Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} + #Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}", "JENKINS_USERNAME":"${JENKINS_USERNAME.key}", "JENKINS_TOKEN":"${JENKINS_TOKEN.key}"} diff --git a/codebundles/jenkins-health/sli.robot b/codebundles/jenkins-health/sli.robot index ae9f107a8..90fca3b4c 100644 --- a/codebundles/jenkins-health/sli.robot +++ b/codebundles/jenkins-health/sli.robot @@ -8,7 +8,7 @@ Library BuiltIn Library RW.Core Library RW.CLI Library RW.platform -Library util.py +Library Jenkins Suite Setup Suite Initialization @@ -17,7 +17,7 @@ Check For Failed Build Logs in Jenkins [Documentation] Check For Failed Build Logs in Jenkins [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File - ... bash_file=faild_build_logs.sh + ... bash_file=failed_build_logs.sh ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} @@ -58,9 +58,12 @@ Check For Long Running Builds in Jenkins Check For Recent Failed Tests in Jenkins [Documentation] Check For Recent Failed Tests in Jenkins [Tags] Jenkins Tests - ${failed_test_suites}= Get Failed Tests - IF len(${failed_test_suites}) > 0 - ${total_failed_tests}= Evaluate sum([len(suite['test_results']) for suite in ${failed_test_suites}]) + ${failed_tests}= Jenkins.Get Failed Tests + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + IF len(${failed_tests}) > 0 + ${total_failed_tests}= Evaluate sum([len(suite['test_results']) for suite in ${failed_tests}]) ${failed_test_score}= Evaluate 1 if int(${total_failed_tests}) <= int(${MAX_ALLOWED_FAILED_TESTS}) else 0 Set Global Variable ${failed_test_score} ELSE @@ -85,7 +88,11 @@ Check For Jenkins Health Check For Long Queued Builds in Jenkins [Documentation] Check for builds stuck in queue beyond threshold and calculate SLI score [Tags] Jenkins Queue Builds SLI - ${queued_builds}= Get Queued Builds wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} + ${queued_builds}= Jenkins.Get Queued Builds + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} + ... wait_threshold=${QUEUED_BUILD_MAX_WAIT_TIME} ${queued_count}= Evaluate len(${queued_builds}) ${queued_builds_score}= Evaluate 1 if int(${queued_count}) <= int(${MAX_QUEUED_BUILDS}) else 0 Set Global Variable ${queued_builds_score} @@ -93,7 +100,10 @@ Check For Long Queued Builds in Jenkins Check Jenkins Executor Utilization [Documentation] Check if Jenkins executor utilization is above 80% [Tags] Jenkins Executors Utilization - ${executor_utilization}= Get Executor Utilization + ${executor_utilization}= Jenkins.Get Executor Utilization + ... jenkins_url=${JENKINS_URL} + ... jenkins_username=${JENKINS_USERNAME} + ... jenkins_token=${JENKINS_TOKEN} ${high_utilization}= Set Variable False FOR ${executor} IN @{executor_utilization} IF ${executor['utilization_percentage']} > float(${MAX_EXECUTOR_UTILIZATION}) diff --git a/codebundles/jenkins-health/util.py b/codebundles/jenkins-health/util.py deleted file mode 100644 index 97dec9354..000000000 --- a/codebundles/jenkins-health/util.py +++ /dev/null @@ -1,316 +0,0 @@ -import os -import requests -import json -import time -from collections import defaultdict -from thefuzz import fuzz -from thefuzz import process as fuzzprocessor -import xml.etree.ElementTree as ET - -# Ensure required environment variables are set -JENKINS_URL = os.getenv("JENKINS_URL") -JENKINS_USERNAME = os.getenv("JENKINS_USERNAME") -JENKINS_TOKEN = os.getenv("JENKINS_TOKEN") - -if not all([JENKINS_URL, JENKINS_USERNAME, JENKINS_TOKEN]): - error_msg = "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." - raise ValueError(error_msg) - -# Jenkins API URL -api_url = f"{JENKINS_URL}/api/json?depth=2" -# Basic authentication -auth = (JENKINS_USERNAME, JENKINS_TOKEN) - -# Fetch Jenkins jobs data -try: - response = requests.get(api_url, auth=auth, timeout=10) - response.raise_for_status() # Raises an HTTPError for bad responses (4xx, 5xx) - jenkins_data = response.json() -except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") - - -def get_failed_tests(): - failed_tests = [] - for job in jenkins_data.get('jobs'): - if job.get('lastBuild').get('result') == 'UNSTABLE': - pipeline_details = { - 'pipeline_name': job.get('name'), - 'pipeline_url': job.get('url'), - 'build_result': job.get('lastBuild').get('result'), - 'build_number': job.get('lastBuild').get('number'), - 'build_timestamp': job.get('lastBuild').get('timestamp'), - 'build_duration': job.get('lastBuild').get('duration'), - 'build_queueId': job.get('lastBuild').get('queueId'), - 'build_building': job.get('lastBuild').get('building'), - 'build_changeSet': job.get('lastBuild').get('changeSet') - } - try: - tests_response = requests.get(job.get('lastBuild').get('url')+"testReport/api/json", auth=auth, timeout=10) - tests_response.raise_for_status() - suites = tests_response.json().get('suites') - test_results = [] - for suite in suites: - for case in suite.get('cases'): - test_results.append(case) - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch test data from Jenkins: {e}") - - result = {"pipeline_details": pipeline_details, "test_results": test_results} - failed_tests.append(result) - return failed_tests - -def get_queued_builds(wait_threshold="10m"): - """Get builds waiting in queue longer than the specified threshold. - - Args: - wait_threshold (str): Time threshold in format like '10min', '1h', '30m', '1d', '1day' - - Returns: - list: List of queued builds that exceed the wait threshold - """ - # Convert threshold to minutes - wait_threshold = wait_threshold.lower().replace(' ', '').strip('"').strip("'") - threshold_value = 0 - if 'min' in wait_threshold: - threshold_value = int(wait_threshold.replace('min', '')) - elif 'h' in wait_threshold: - threshold_value = int(wait_threshold.replace('h', '')) * 60 - elif 'm' in wait_threshold: - threshold_value = int(wait_threshold.replace('m', '')) - elif 'day' in wait_threshold: - threshold_value = int(wait_threshold.replace('day', '')) * 24 * 60 - elif 'd' in wait_threshold: - threshold_value = int(wait_threshold.replace('d', '')) * 24 * 60 - else: - raise ValueError("Invalid threshold format. Use formats like '10min', '1h', '30m', '1d', '1day'") - - queued_builds = [] - - try: - queue_url = f"{JENKINS_URL}/queue/api/json" - queue_response = requests.get(queue_url, auth=auth, timeout=10) - queue_response.raise_for_status() - queue_data = queue_response.json() - - current_time = int(time.time() * 1000) # Convert to milliseconds - - for item in queue_data.get('items', []): - # Get time in queue in minutes - in_queue_since = item.get('inQueueSince', 0) - wait_time_mins = (current_time - in_queue_since) / (1000 * 60) # Convert to minutes - - # Format wait time based on duration - if wait_time_mins >= 24*60: # More than a day - wait_time = f"{wait_time_mins/(24*60):.1f}d" - elif wait_time_mins >= 60: # More than an hour - wait_time = f"{wait_time_mins/60:.1f}h" - else: - wait_time = f"{wait_time_mins:.1f}min" - - if wait_time_mins >= threshold_value: - job_name = item.get('task', {}).get('name', '') - if job_name == '': - try: - queued_build_url = item.get('url', '') - if queued_build_url != '': - queued_build_url = f"{JENKINS_URL}/{queued_build_url}api/json?depth=1" - rsp = requests.get(queued_build_url, auth=auth, timeout=10).json() - job_name = rsp.get('task').get('name') - else: - job_name = 'Unknown Job' - except requests.exceptions.RequestException as e: - job_name = 'Unknown Job' - - queued_build = { - 'job_name': job_name, - 'waiting_since': in_queue_since, - 'wait_time': wait_time, - 'why': item.get('why', 'Unknown Reason'), - 'stuck': item.get('stuck', False), - 'blocked': item.get('blocked', False), - 'url': f"{JENKINS_URL}/{item.get('url', '')}" - } - queued_builds.append(queued_build) - - return queued_builds - - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch queue data from Jenkins: {e}") - - -def get_executor_utilization(): - executor_utilization = [] - for label in jenkins_data.get('assignedLabels', []): - busy_executors = label.get('busyExecutors', 0) - total_executors = label.get('totalExecutors', 0) - if total_executors > 0: - utilization = (busy_executors / total_executors) * 100 - else: - utilization = 0 - executor_utilization.append({ - 'node_name': label.get('name', 'unknown'), - 'busy_executors': busy_executors, - 'total_executors': total_executors, - 'utilization_percentage': utilization - }) - return executor_utilization - -def build_logs_analytics(history_limit=5): - # Get failed builds (up to limit) for each job - failed_builds = [] - for job in jenkins_data.get('jobs', []): - builds = [] - failed_count = 0 - - # Iterate through all builds until we find limit failed ones - for build in job.get('builds', []): - if build.get('result') == 'FAILURE': - builds.append({ - 'number': build.get('number'), - 'url': build.get('url') - }) - failed_count += 1 - if failed_count == history_limit: - break - - if builds: - failed_builds.append({ - 'job_name': job.get('name'), - 'builds': builds - }) - - # Analyze logs for each failed job - analysis_results = [] - for job in failed_builds: - job_logs = [] - - # Get logs for each failed build - for build in job['builds']: - try: - log_url = f"{build['url']}logText/progressiveText?start=0" - log_response = requests.get(log_url, auth=auth, timeout=10) - log_response.raise_for_status() - job_logs.append({ - 'build_number': build['number'], - 'log_content': log_response.text - }) - except requests.exceptions.RequestException as e: - print(f"Failed to fetch logs for {job['job_name']} #{build['number']}: {e}") - continue - - if len(job_logs) < 2: - continue - - # Extract error sections from logs - error_sections = [] - for log in job_logs: - log_lines = log['log_content'].split('\n') - error_section = [] - in_error = False - - for line in log_lines: - # Start capturing on error indicators - if any(error_term in line.lower() for error_term in ['error:', 'exception', 'failed', 'failure']): - # Skip common, less meaningful lines - if any(skip_term in line.lower() for skip_term in - ['finished: failure', 'build failure', '[info]']): - continue - in_error = True - error_section = [line] - # Continue capturing context - elif in_error and line.strip(): - # Skip info/debug lines in error context - if not line.lower().startswith('[info]'): - error_section.append(line) - # Stop after capturing some context - if len(error_section) > 10: - in_error = False - if error_section: # Only add if we have meaningful content - error_sections.append('\n'.join(error_section)) - elif in_error: - in_error = False - if error_section: # Only add if we have meaningful content - error_sections.append('\n'.join(error_section)) - - # Use thefuzz to find similar error patterns - common_patterns = defaultdict(list) - processed_sections = set() - - for section in error_sections: - if section in processed_sections: - continue - - # Use process.extractBests to find similar sections - matches = fuzzprocessor.extractBests( - section, - error_sections, - scorer=fuzz.token_set_ratio, - score_cutoff=85 # 85% similarity threshold - ) - - similar_sections = [match[0] for match in matches] - # Only include patterns that occur in all builds - if len(similar_sections) == len(job_logs): # Must occur in all builds - pattern_key = similar_sections[0] - common_patterns[pattern_key] = { - 'occurrences': len(similar_sections), - 'similar_sections': similar_sections, - 'similarity_scores': [match[1] for match in matches] - } - processed_sections.update(similar_sections) - - # Calculate overall log similarity - similarity_scores = [] - for i in range(len(job_logs)): - for j in range(i + 1, len(job_logs)): - score = fuzz.token_set_ratio( - job_logs[i]['log_content'], - job_logs[j]['log_content'] - ) - similarity_scores.append(score) - - avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0 - - # Filter out patterns that don't meet minimum occurrence threshold - significant_patterns = { - pattern: details for pattern, details in common_patterns.items() - if details['occurrences'] == len(job_logs) # Must occur in all builds - } - - analysis_results.append({ - 'job_name': job['job_name'], - 'builds_analyzed': len(job_logs), - 'similarity_score': avg_similarity, - 'common_error_patterns': [ - { - 'pattern': pattern, - 'occurrences': details['occurrences'], - 'similar_sections': details['similar_sections'], - 'similarity_scores': details['similarity_scores'] - } - for pattern, details in significant_patterns.items() - ] - }) - - return analysis_results - - -def parse_jenkins_atom_feed(): - # Define the namespace for Atom feed - namespace = {'atom': 'http://www.w3.org/2005/Atom'} - # Fetch Jenkins log feed - try: - response = requests.get(f"{JENKINS_URL}/manage/log/rss", auth=auth, timeout=10) - response.raise_for_status() # Raise an error for bad responses - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") - root = ET.fromstring(response.text) - - # Extract log contents - logs = "" - for entry in root.findall('atom:entry', namespace): - content = entry.find('atom:content', namespace).text.strip() - logs += f"{content}\n{'=' * 80}\n" - return logs - diff --git a/interactive_console_output.xml b/interactive_console_output.xml new file mode 100644 index 000000000..852fdf171 --- /dev/null +++ b/interactive_console_output.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + +All Tests + + + + +Robot Interactive Console + + + +Error in file '/home/runwhen/.vscode-remote/extensions/robocorp.robotframework-lsp-1.13.0/src/robotframework_ls/vendored/robotframework_interactive/robot_interactive_console.robot' on line 4: Singular section headers like '*** Test Case ***' are deprecated. Use plural format like '*** Test Cases ***' instead. + + diff --git a/libraries/Jenkins/jenkins.py b/libraries/Jenkins/jenkins.py index db57ef05e..d481b9632 100644 --- a/libraries/Jenkins/jenkins.py +++ b/libraries/Jenkins/jenkins.py @@ -1,320 +1,409 @@ -import os import requests -import json import time +import xml.etree.ElementTree as ET from collections import defaultdict from thefuzz import fuzz from thefuzz import process as fuzzprocessor -import xml.etree.ElementTree as ET + +from robot.api.deco import keyword from RW import platform -from RW.Core import Core - -# # Ensure required environment variables are set -# JENKINS_URL = os.getenv("JENKINS_URL") -# JENKINS_USERNAME = os.getenv("JENKINS_USERNAME") -# JENKINS_TOKEN = os.getenv("JENKINS_TOKEN") - -# if not all([JENKINS_URL, JENKINS_USERNAME, JENKINS_TOKEN]): -# error_msg = "Please set JENKINS_URL, JENKINS_USERNAME, and JENKINS_TOKEN environment variables." -# raise ValueError(error_msg) - - - -# Jenkins API URL -api_url = f"{JENKINS_URL}/api/json?depth=2" -# Basic authentication -auth = (JENKINS_USERNAME, JENKINS_TOKEN) - -# Fetch Jenkins jobs data -try: - response = requests.get(api_url, auth=auth, timeout=10) - response.raise_for_status() # Raises an HTTPError for bad responses (4xx, 5xx) - jenkins_data = response.json() -except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") - - -def get_failed_tests(): - failed_tests = [] - for job in jenkins_data.get('jobs'): - if job.get('lastBuild').get('result') == 'UNSTABLE': - pipeline_details = { - 'pipeline_name': job.get('name'), - 'pipeline_url': job.get('url'), - 'build_result': job.get('lastBuild').get('result'), - 'build_number': job.get('lastBuild').get('number'), - 'build_timestamp': job.get('lastBuild').get('timestamp'), - 'build_duration': job.get('lastBuild').get('duration'), - 'build_queueId': job.get('lastBuild').get('queueId'), - 'build_building': job.get('lastBuild').get('building'), - 'build_changeSet': job.get('lastBuild').get('changeSet') - } - try: - tests_response = requests.get(job.get('lastBuild').get('url')+"testReport/api/json", auth=auth, timeout=10) - tests_response.raise_for_status() - suites = tests_response.json().get('suites') - test_results = [] - for suite in suites: - for case in suite.get('cases'): - test_results.append(case) - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch test data from Jenkins: {e}") - - result = {"pipeline_details": pipeline_details, "test_results": test_results} - failed_tests.append(result) - return failed_tests - -def get_queued_builds(wait_threshold="10m"): - """Get builds waiting in queue longer than the specified threshold. - - Args: - wait_threshold (str): Time threshold in format like '10min', '1h', '30m', '1d', '1day' - - Returns: - list: List of queued builds that exceed the wait threshold + + +class Jenkins: """ - # Convert threshold to minutes - wait_threshold = wait_threshold.lower().replace(' ', '').strip('"').strip("'") - threshold_value = 0 - if 'min' in wait_threshold: - threshold_value = int(wait_threshold.replace('min', '')) - elif 'h' in wait_threshold: - threshold_value = int(wait_threshold.replace('h', '')) * 60 - elif 'm' in wait_threshold: - threshold_value = int(wait_threshold.replace('m', '')) - elif 'day' in wait_threshold: - threshold_value = int(wait_threshold.replace('day', '')) * 24 * 60 - elif 'd' in wait_threshold: - threshold_value = int(wait_threshold.replace('d', '')) * 24 * 60 - else: - raise ValueError("Invalid threshold format. Use formats like '10min', '1h', '30m', '1d', '1day'") - - queued_builds = [] - - try: - queue_url = f"{JENKINS_URL}/queue/api/json" - queue_response = requests.get(queue_url, auth=auth, timeout=10) - queue_response.raise_for_status() - queue_data = queue_response.json() - - current_time = int(time.time() * 1000) # Convert to milliseconds - - for item in queue_data.get('items', []): - # Get time in queue in minutes - in_queue_since = item.get('inQueueSince', 0) - wait_time_mins = (current_time - in_queue_since) / (1000 * 60) # Convert to minutes - - # Format wait time based on duration - if wait_time_mins >= 24*60: # More than a day - wait_time = f"{wait_time_mins/(24*60):.1f}d" - elif wait_time_mins >= 60: # More than an hour - wait_time = f"{wait_time_mins/60:.1f}h" - else: - wait_time = f"{wait_time_mins:.1f}min" - - if wait_time_mins >= threshold_value: - job_name = item.get('task', {}).get('name', '') - if job_name == '': - try: - queued_build_url = item.get('url', '') - if queued_build_url != '': - queued_build_url = f"{JENKINS_URL}/{queued_build_url}api/json?depth=1" - rsp = requests.get(queued_build_url, auth=auth, timeout=10).json() - job_name = rsp.get('task').get('name') - else: - job_name = 'Unknown Job' - except requests.exceptions.RequestException as e: - job_name = 'Unknown Job' - - queued_build = { - 'job_name': job_name, - 'waiting_since': in_queue_since, - 'wait_time': wait_time, - 'why': item.get('why', 'Unknown Reason'), - 'stuck': item.get('stuck', False), - 'blocked': item.get('blocked', False), - 'url': f"{JENKINS_URL}/{item.get('url', '')}" + This Robot Framework library exposes its keywords so that each one + accepts jenkins_url, jenkins_username, and jenkins_token directly. + + The `jenkins_username` and `jenkins_token` parameters are expected + to be `platform.Secret` objects, so we do `jenkins_username.value` + and `jenkins_token.value` to retrieve the actual strings. + + Example usage in Robot: + + *** Settings *** + Library Jenkins + + *** Variables *** + ${JENKINS_URL} https://my-jenkins.example + ${JENKINS_USERNAME} MyJenkinsUsernameSecret + ${JENKINS_TOKEN} MyJenkinsTokenSecret + + *** Test Cases *** + List Recent Failed Tests in Jenkins + ${failed_tests}= Get Failed Tests ${JENKINS_URL} ${JENKINS_USERNAME} ${JENKINS_TOKEN} + Log Found ${len(${failed_tests})} unstable builds + """ + + def __init__(self): + # We don't store credentials or Jenkins data at construction time + pass + + def _fetch_jenkins_data( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Helper method that calls Jenkins at /api/json?depth=2 and returns the parsed JSON. + Raises ConnectionError if the request fails. + """ + api_url = f"{jenkins_url}/api/json?depth=2" + # Extract the actual secret values for Basic Auth + auth = (jenkins_username.value, jenkins_token.value) + try: + response = requests.get(api_url, auth=auth, timeout=10) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + + @keyword("Get Failed Tests") + def get_failed_tests( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Returns a list of pipelines in the 'UNSTABLE' state along with their failed tests. + + Example: + | ${failed_tests}= Get Failed Tests ${JENKINS_URL} ${JENKINS_USERNAME} ${JENKINS_TOKEN} | + | FOR ${pipeline} IN @{failed_tests} | + | Log Pipeline name: ${pipeline['pipeline_details']['pipeline_name']} | + | Log Test results: ${pipeline['test_results']} | + | END | + """ + jenkins_data = self._fetch_jenkins_data(jenkins_url, jenkins_username, jenkins_token) + # For requests during test-report fetching: + auth = (jenkins_username.value, jenkins_token.value) + + failed_tests = [] + for job in jenkins_data.get('jobs', []): + last_build = job.get('lastBuild') or {} + if last_build.get('result') == 'UNSTABLE': + pipeline_details = { + 'pipeline_name': job.get('name'), + 'pipeline_url': job.get('url'), + 'build_result': last_build.get('result'), + 'build_number': last_build.get('number'), + 'build_timestamp': last_build.get('timestamp'), + 'build_duration': last_build.get('duration'), + 'build_queueId': last_build.get('queueId'), + 'build_building': last_build.get('building'), + 'build_changeSet': last_build.get('changeSet') } - queued_builds.append(queued_build) - - return queued_builds - - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch queue data from Jenkins: {e}") - - -def get_executor_utilization(): - executor_utilization = [] - for label in jenkins_data.get('assignedLabels', []): - busy_executors = label.get('busyExecutors', 0) - total_executors = label.get('totalExecutors', 0) - if total_executors > 0: - utilization = (busy_executors / total_executors) * 100 - else: - utilization = 0 - executor_utilization.append({ - 'node_name': label.get('name', 'unknown'), - 'busy_executors': busy_executors, - 'total_executors': total_executors, - 'utilization_percentage': utilization - }) - return executor_utilization - -def build_logs_analytics(history_limit=5): - # Get failed builds (up to limit) for each job - failed_builds = [] - for job in jenkins_data.get('jobs', []): - builds = [] - failed_count = 0 - - # Iterate through all builds until we find limit failed ones - for build in job.get('builds', []): - if build.get('result') == 'FAILURE': - builds.append({ - 'number': build.get('number'), - 'url': build.get('url') + try: + test_report_url = f"{last_build.get('url')}testReport/api/json" + tests_response = requests.get(test_report_url, auth=auth, timeout=10) + tests_response.raise_for_status() + suites = tests_response.json().get('suites', []) + test_results = [] + for suite in suites: + for case in suite.get('cases', []): + test_results.append(case) + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch test data from Jenkins: {e}") + + failed_tests.append({ + "pipeline_details": pipeline_details, + "test_results": test_results }) - failed_count += 1 - if failed_count == history_limit: - break - - if builds: - failed_builds.append({ - 'job_name': job.get('name'), - 'builds': builds + + return failed_tests + + @keyword("Get Queued Builds") + def get_queued_builds( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret, + wait_threshold: str = "10m" + ): + """ + Get builds waiting in queue longer than the specified threshold (e.g., '10m', '1h', '1d'). + + Returns a list of dictionaries with details of each queued build. + + Example usage in Robot: + | ${queued_builds}= | Get Queued Builds | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | 15m | + | FOR ${build} IN @{queued_builds} | + | Log Job ${build['job_name']} has been queued for ${build['wait_time']}. | + | END | + """ + wt = wait_threshold.lower().replace(' ', '').strip('"').strip("'") + threshold_value = 0 + if 'min' in wt: + threshold_value = int(wt.replace('min', '')) + elif 'h' in wt: + threshold_value = int(wt.replace('h', '')) * 60 + elif 'm' in wt: + threshold_value = int(wt.replace('m', '')) + elif 'day' in wt: + threshold_value = int(wt.replace('day', '')) * 24 * 60 + elif 'd' in wt: + threshold_value = int(wt.replace('d', '')) * 24 * 60 + else: + raise ValueError( + "Invalid threshold format. Use '10min', '1h', '30m', '1d', '1day', etc." + ) + + # Use .value to extract the actual username/token + auth = (jenkins_username.value, jenkins_token.value) + queue_url = f"{jenkins_url}/queue/api/json" + queued_builds = [] + + try: + queue_response = requests.get(queue_url, auth=auth, timeout=10) + queue_response.raise_for_status() + queue_data = queue_response.json() + + current_time = int(time.time() * 1000) + for item in queue_data.get('items', []): + in_queue_since = item.get('inQueueSince', 0) + wait_time_mins = (current_time - in_queue_since) / (1000 * 60) + + if wait_time_mins >= threshold_value: + if wait_time_mins >= 24*60: + wait_time = f"{wait_time_mins/(24*60):.1f}d" + elif wait_time_mins >= 60: + wait_time = f"{wait_time_mins/60:.1f}h" + else: + wait_time = f"{wait_time_mins:.1f}min" + + job_name = item.get('task', {}).get('name', '') + if not job_name: + try: + queued_build_url = item.get('url', '') + if queued_build_url: + queued_build_url = f"{jenkins_url}/{queued_build_url}api/json?depth=1" + rsp = requests.get(queued_build_url, auth=auth, timeout=10).json() + job_name = rsp.get('task', {}).get('name', 'Unknown Job') + else: + job_name = 'Unknown Job' + except requests.exceptions.RequestException: + job_name = 'Unknown Job' + + queued_builds.append({ + 'job_name': job_name, + 'waiting_since': in_queue_since, + 'wait_time': wait_time, + 'why': item.get('why', 'Unknown Reason'), + 'stuck': item.get('stuck', False), + 'blocked': item.get('blocked', False), + 'url': f"{jenkins_url}/{item.get('url', '')}" + }) + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch queue data from Jenkins: {e}") + + return queued_builds + + @keyword("Get Executor Utilization") + def get_executor_utilization( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Returns a list with executor utilization info for each Jenkins node. + + | ${utilization}= | Get Executor Utilization | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | + | FOR ${node} IN @{utilization} | + | Log Node ${node['node_name']} is at ${node['utilization_percentage']}% utilization. | + | END | + """ + jenkins_data = self._fetch_jenkins_data(jenkins_url, jenkins_username, jenkins_token) + executor_utilization = [] + + for label in jenkins_data.get('assignedLabels', []): + busy_executors = label.get('busyExecutors', 0) + total_executors = label.get('totalExecutors', 0) + utilization = (busy_executors / total_executors) * 100 if total_executors else 0 + executor_utilization.append({ + 'node_name': label.get('name', 'unknown'), + 'busy_executors': busy_executors, + 'total_executors': total_executors, + 'utilization_percentage': utilization }) - - # Analyze logs for each failed job - analysis_results = [] - for job in failed_builds: - job_logs = [] - - # Get logs for each failed build - for build in job['builds']: - try: - log_url = f"{build['url']}logText/progressiveText?start=0" - log_response = requests.get(log_url, auth=auth, timeout=10) - log_response.raise_for_status() - job_logs.append({ - 'build_number': build['number'], - 'log_content': log_response.text - }) - except requests.exceptions.RequestException as e: - print(f"Failed to fetch logs for {job['job_name']} #{build['number']}: {e}") + + return executor_utilization + + @keyword("Build Logs Analytics") + def build_logs_analytics( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret, + history_limit: int = 5 + ): + """ + For each job in Jenkins, retrieve up to `history_limit` failed builds, + analyze their logs, and attempt to find common error patterns using fuzzy matching. + + Returns a list of dictionaries, each describing: + - job_name + - builds_analyzed + - similarity_score + - common_error_patterns + + Example usage: + | ${analysis_results}= | Build Logs Analytics | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | 5 | + | FOR ${analysis} IN @{analysis_results} | + | Log Job ${analysis['job_name']} has average log similarity ${analysis['similarity_score']}. | + | Log Common error patterns: ${analysis['common_error_patterns']} | + | END | + """ + auth = (jenkins_username.value, jenkins_token.value) + jenkins_data = self._fetch_jenkins_data(jenkins_url, jenkins_username, jenkins_token) + failed_builds = [] + + # Collect up to history_limit failed builds per job + for job in jenkins_data.get('jobs', []): + builds = [] + failed_count = 0 + for build in job.get('builds', []): + if build.get('result') == 'FAILURE': + builds.append({'number': build.get('number'), 'url': build.get('url')}) + failed_count += 1 + if failed_count == history_limit: + break + + if builds: + failed_builds.append({'job_name': job.get('name'), 'builds': builds}) + + analysis_results = [] + for job_info in failed_builds: + job_logs = [] + for build_info in job_info['builds']: + try: + log_url = f"{build_info['url']}logText/progressiveText?start=0" + log_response = requests.get(log_url, auth=auth, timeout=10) + log_response.raise_for_status() + job_logs.append({ + 'build_number': build_info['number'], + 'log_content': log_response.text + }) + except requests.exceptions.RequestException as e: + print(f"Failed to fetch logs for {job_info['job_name']} #{build_info['number']}: {e}") + continue + + # If there's only one failed build, can't compare logs across multiple builds + if len(job_logs) < 2: continue - - if len(job_logs) < 2: - continue - - # Extract error sections from logs - error_sections = [] - for log in job_logs: - log_lines = log['log_content'].split('\n') - error_section = [] - in_error = False - - for line in log_lines: - # Start capturing on error indicators - if any(error_term in line.lower() for error_term in ['error:', 'exception', 'failed', 'failure']): - # Skip common, less meaningful lines - if any(skip_term in line.lower() for skip_term in - ['finished: failure', 'build failure', '[info]']): - continue - in_error = True - error_section = [line] - # Continue capturing context - elif in_error and line.strip(): - # Skip info/debug lines in error context - if not line.lower().startswith('[info]'): - error_section.append(line) - # Stop after capturing some context - if len(error_section) > 10: + + # Extract error sections + error_sections = [] + for log in job_logs: + lines = log['log_content'].split('\n') + error_section = [] + in_error = False + + for line in lines: + lower_line = line.lower() + if any(term in lower_line for term in ['error:', 'exception', 'failed', 'failure']): + if any(skip_term in lower_line for skip_term in ['finished: failure', 'build failure', '[info]']): + continue + in_error = True + error_section = [line] + elif in_error and line.strip(): + if not lower_line.startswith('[info]'): + error_section.append(line) + if len(error_section) > 10: + in_error = False + if error_section: + error_sections.append('\n'.join(error_section)) + elif in_error: in_error = False - if error_section: # Only add if we have meaningful content + if error_section: error_sections.append('\n'.join(error_section)) - elif in_error: - in_error = False - if error_section: # Only add if we have meaningful content - error_sections.append('\n'.join(error_section)) - - # Use thefuzz to find similar error patterns - common_patterns = defaultdict(list) - processed_sections = set() - - for section in error_sections: - if section in processed_sections: - continue - - # Use process.extractBests to find similar sections - matches = fuzzprocessor.extractBests( - section, - error_sections, - scorer=fuzz.token_set_ratio, - score_cutoff=85 # 85% similarity threshold - ) - - similar_sections = [match[0] for match in matches] - # Only include patterns that occur in all builds - if len(similar_sections) == len(job_logs): # Must occur in all builds - pattern_key = similar_sections[0] - common_patterns[pattern_key] = { - 'occurrences': len(similar_sections), - 'similar_sections': similar_sections, - 'similarity_scores': [match[1] for match in matches] - } - processed_sections.update(similar_sections) - - # Calculate overall log similarity - similarity_scores = [] - for i in range(len(job_logs)): - for j in range(i + 1, len(job_logs)): - score = fuzz.token_set_ratio( - job_logs[i]['log_content'], - job_logs[j]['log_content'] + + # Use fuzzy matching to find common error sections + common_patterns = defaultdict(dict) + processed_sections = set() + + for section in error_sections: + if section in processed_sections: + continue + matches = fuzzprocessor.extractBests( + section, + error_sections, + scorer=fuzz.token_set_ratio, + score_cutoff=85 ) - similarity_scores.append(score) - - avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0 - - # Filter out patterns that don't meet minimum occurrence threshold - significant_patterns = { - pattern: details for pattern, details in common_patterns.items() - if details['occurrences'] == len(job_logs) # Must occur in all builds - } - - analysis_results.append({ - 'job_name': job['job_name'], - 'builds_analyzed': len(job_logs), - 'similarity_score': avg_similarity, - 'common_error_patterns': [ - { - 'pattern': pattern, - 'occurrences': details['occurrences'], - 'similar_sections': details['similar_sections'], - 'similarity_scores': details['similarity_scores'] - } - for pattern, details in significant_patterns.items() - ] - }) - - return analysis_results - - -def parse_jenkins_atom_feed(): - # Define the namespace for Atom feed - namespace = {'atom': 'http://www.w3.org/2005/Atom'} - # Fetch Jenkins log feed - try: - response = requests.get(f"{JENKINS_URL}/manage/log/rss", auth=auth, timeout=10) - response.raise_for_status() # Raise an error for bad responses - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") - root = ET.fromstring(response.text) - - # Extract log contents - logs = "" - for entry in root.findall('atom:entry', namespace): - content = entry.find('atom:content', namespace).text.strip() - logs += f"{content}\n{'=' * 80}\n" - return logs + similar_sections = [m[0] for m in matches] + # Only keep if it appears in all logs + if len(similar_sections) == len(job_logs): + pattern_key = similar_sections[0] + common_patterns[pattern_key] = { + 'occurrences': len(similar_sections), + 'similar_sections': similar_sections, + 'similarity_scores': [m[1] for m in matches] + } + processed_sections.update(similar_sections) + + # Calculate overall log similarity + similarity_scores = [] + for i in range(len(job_logs)): + for j in range(i + 1, len(job_logs)): + score = fuzz.token_set_ratio(job_logs[i]['log_content'], job_logs[j]['log_content']) + similarity_scores.append(score) + + avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else 0 + # Filter out patterns that appear in all logs + significant_patterns = { + pattern: details + for pattern, details in common_patterns.items() + if details['occurrences'] == len(job_logs) + } + + analysis_results.append({ + 'job_name': job_info['job_name'], + 'builds_analyzed': len(job_logs), + 'similarity_score': avg_similarity, + 'common_error_patterns': [ + { + 'pattern': pattern, + 'occurrences': details['occurrences'], + 'similar_sections': details['similar_sections'], + 'similarity_scores': details['similarity_scores'] + } + for pattern, details in significant_patterns.items() + ] + }) + + return analysis_results + + @keyword("Parse Atom Feed") + def parse_atom_feed( + self, + jenkins_url: str, + jenkins_username: platform.Secret, + jenkins_token: platform.Secret + ): + """ + Fetches and parses the Jenkins manage/log Atom feed, returning the combined log text. + + Example usage: + | ${logs}= | Parse Jenkins Atom Feed | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | + | Log | Jenkins logs: ${logs} | + """ + auth = (jenkins_username.value, jenkins_token.value) + feed_url = f"{jenkins_url}/manage/log/rss" + namespace = {'atom': 'http://www.w3.org/2005/Atom'} + + try: + response = requests.get(feed_url, auth=auth, timeout=10) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Failed to fetch data from Jenkins: {e}") + + root = ET.fromstring(response.text) + logs = "" + for entry in root.findall('atom:entry', namespace): + content_elem = entry.find('atom:content', namespace) + if content_elem is not None and content_elem.text: + logs += f"{content_elem.text.strip()}\n{'=' * 80}\n" + return logs From d40755151f82504423ed0ad6c390d489ef0f52e7 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 20:14:26 +0000 Subject: [PATCH 41/72] wip, automating the job defs --- .../jenkins-health/.test/Taskfile.yaml | 6 +- .../.test/terraform/create_jenkins_token.sh | 71 +++++++++++++++++++ .../.test/terraform/jenkins-job.xml | 44 ++++++++++++ .../jenkins-health/.test/terraform/main.tf | 57 ++++++++------- 4 files changed, 150 insertions(+), 28 deletions(-) create mode 100755 codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh create mode 100644 codebundles/jenkins-health/.test/terraform/jenkins-job.xml diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml index b84a7e233..54002b2a3 100644 --- a/codebundles/jenkins-health/.test/Taskfile.yaml +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -241,9 +241,9 @@ tasks: fi # Upload Secrets pushd terraform > /dev/null - jenkins_username=admin + jenkins_username="admin" jenkins_token=$(terraform show -json terraform.tfstate | jq -r ' - .values.outputs.jenkins_admin_password.value') + .values.outputs.jenkins_api_token.value') popd > /dev/null @@ -255,7 +255,7 @@ tasks: # Create Secrets URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" - PAYLOAD='{"secrets": {"jenkins_username": "$jenkins_username", "jenkins_token": "$jenkins_password" }}' + PAYLOAD='{"secrets": {"jenkins_username": "$jenkins_username", "jenkins_token": "$jenkins_token" }}' echo "Uploading secrets to $URL" response_code=$(curl -X POST "$URL" \ -H "Authorization: Bearer $RW_PAT" \ diff --git a/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh new file mode 100755 index 000000000..750b710a4 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +set -x + +# 1) Read JSON input from stdin +read -r input + +# All debug statements go to stderr via >&2 +echo "[DEBUG] Received JSON input: $input" >&2 + +JENKINS_URL=$(echo "$input" | jq -r .jenkins_url) +USERNAME=$(echo "$input" | jq -r .username) +PASSWORD=$(echo "$input" | jq -r .password) + +echo "[DEBUG] Jenkins URL: $JENKINS_URL" >&2 +echo "[DEBUG] Username: $USERNAME" >&2 + +# 2) Wait for Jenkins up to MAX_ATTEMPTS +MAX_ATTEMPTS=10 +SLEEP_SECONDS=5 +echo "[DEBUG] Checking Jenkins readiness up to $MAX_ATTEMPTS attempts..." >&2 + +for i in $(seq 1 "$MAX_ATTEMPTS"); do + STATUS_CODE=$(curl -s -o /dev/null -w '%{http_code}' \ + --max-time 5 \ + -u "${USERNAME}:${PASSWORD}" \ + "${JENKINS_URL}/api/json" || echo "curl_error") + + if [ "$STATUS_CODE" = "200" ]; then + echo "[DEBUG] Jenkins responded HTTP 200 on attempt #$i." >&2 + break + else + echo "[DEBUG] Attempt #$i: HTTP $STATUS_CODE. Retrying in $SLEEP_SECONDS seconds..." >&2 + sleep "$SLEEP_SECONDS" + fi + + if [ "$i" -eq "$MAX_ATTEMPTS" ]; then + echo "[ERROR] Jenkins not ready after $MAX_ATTEMPTS attempts." >&2 + # Return some valid JSON to Terraform (it sees failure). + echo '{"error":"Jenkins never returned 200"}' + exit 1 + fi +done + +# 3) Generate a new token +echo "[DEBUG] Generating a new token via REST..." >&2 + +RESPONSE=$(curl -s -X POST \ + --max-time 10 \ + -u "${USERNAME}:${PASSWORD}" \ + --data "newTokenName=terraformToken" \ + "${JENKINS_URL}/user/${USERNAME}/descriptorByName/jenkins.security.ApiTokenProperty/generateNewToken" || true) + +echo "[DEBUG] Response: $RESPONSE" >&2 + +TOKEN_VALUE=$(echo "$RESPONSE" | jq -r '.data.tokenValue' 2>/dev/null || echo "") + +if [ -z "$TOKEN_VALUE" ] || [ "$TOKEN_VALUE" = "null" ]; then + echo "[ERROR] Could not parse a valid token from the response." >&2 + echo "[ERROR] Full response: $RESPONSE" >&2 + echo '{"error":"Token generation failed"}' + exit 1 +fi + +echo "[DEBUG] Successfully generated token: $TOKEN_VALUE" >&2 + +# 4) Print only JSON to stdout +cat < + + + Freestyle job that runs a 30-minute sleep script. + false + + + + + + true + false + false + false + + + + + false + + + + + + + + + + + + + diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index 44599dbf4..0d9cd684d 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -1,12 +1,9 @@ resource "random_password" "jenkins_admin_password" { - length = 16 - special = true + length = 12 + special = false min_upper = 1 min_lower = 1 min_numeric = 1 - - # Optional: If you prefer fewer special characters, define allow_*: - # override_special = "!@#%" } # Get latest Ubuntu AMI @@ -164,6 +161,9 @@ resource "aws_instance" "jenkins_server" { // Skip the Jenkins setup wizard instance.setInstallState(InstallState.INITIAL_SETUP_COMPLETED) + // Disable CSRF + instance.setCrumbIssuer(null) + // Create admin user with a random password def hudsonRealm = new HudsonPrivateSecurityRealm(false) hudsonRealm.createAccount("admin", "${random_password.jenkins_admin_password.result}") @@ -231,34 +231,31 @@ resource "aws_instance" "jenkins_server" { -resource "null_resource" "wait_for_jenkins_authenticated" { +data "external" "jenkins_token" { depends_on = [aws_instance.jenkins_server] + program = ["bash", "./create_jenkins_token.sh"] + + # These JSON values get passed on stdin to the script + query = { + jenkins_url = "http://${aws_instance.jenkins_server.public_ip}:8080" + username = "admin" + password = "${random_password.jenkins_admin_password.result}" + } +} + +resource "null_resource" "create_jobs" { + depends_on = [data.external.jenkins_token] provisioner "local-exec" { command = <<-EOT - while true; do - echo "Checking Jenkins with the new random password..." - - STATUS_CODE=$(curl -s -o /dev/null -w '%%{http_code}' \ - -u "admin:${random_password.jenkins_admin_password.result}" \ - http://${aws_instance.jenkins_server.public_ip}:8080/api/json) - - if [ "$STATUS_CODE" = "200" ]; then - echo "Jenkins is responding with HTTP 200 to admin:${random_password.jenkins_admin_password.result}" - break - else - echo "Got HTTP $STATUS_CODE. Waiting for Jenkins..." - sleep 10 - fi - done - - echo "Jenkins is fully up and accepting authenticated requests." + curl -X POST -u 'admin:${data.external.jenkins_token.result["token"]}' -H "Content-Type: application/xml" --data-binary @jenkins-job.xml http://${aws_instance.jenkins_server.public_ip}:8080/createItem?name=long-running-job + + EOT } } - # Configure Jenkins EC2 agents # resource "null_resource" "configure_jenkins_agents" { # depends_on = [null_resource.wait_for_jenkins] @@ -345,9 +342,19 @@ output "jenkins_admin_password" { } output "fetch_admin_passwrd" { - value = "cd terraform && terraform show -json | jq '.values.outputs.jenkins_admin_password.value'" + value = "JENKINS_PASSWORD=$(cd terraform && terraform show -json | jq -r '.values.outputs.jenkins_admin_password.value')" } output "jenkins_url" { value = "http://${aws_instance.jenkins_server.public_ip}:8080" } + +output "jenkins_api_token" { + value = data.external.jenkins_token.result["token"] + sensitive = true +} + +output "fetch_jenkins_api_token" { + value = "JENKINS_TOKEN=$(cd terraform && terraform show -json | jq -r '.values.outputs.jenkins_api_token.value')" +} + From a5202bf7cb24c8c583a6d106086d5870d4f33178 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 21:18:37 +0000 Subject: [PATCH 42/72] increase timeout --- .../jenkins-health/.test/terraform/create_jenkins_token.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh index 750b710a4..b30861c4f 100755 --- a/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh +++ b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh @@ -1,4 +1,5 @@ -#!/usr/bin/env bash +#!/usr/bin/bash +# echo '{"token": "hello"}' set -x # 1) Read JSON input from stdin @@ -15,8 +16,8 @@ echo "[DEBUG] Jenkins URL: $JENKINS_URL" >&2 echo "[DEBUG] Username: $USERNAME" >&2 # 2) Wait for Jenkins up to MAX_ATTEMPTS -MAX_ATTEMPTS=10 -SLEEP_SECONDS=5 +MAX_ATTEMPTS=30 +SLEEP_SECONDS=10 echo "[DEBUG] Checking Jenkins readiness up to $MAX_ATTEMPTS attempts..." >&2 for i in $(seq 1 "$MAX_ATTEMPTS"); do From 2e4ef65c1e59812f96b7a76ee8ea6bcdc5d355cb Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:09:18 +0000 Subject: [PATCH 43/72] add new jobs, plugins, and test varifying that the ec2 instance is running (to avoid code for terminated instances) --- .../jenkins-instance-health.yaml | 8 +-- codebundles/jenkins-health/.test/README.md | 1 - .../jenkins-health/.test/Taskfile.yaml | 6 +- .../.test/terraform/failed-job.xml | 39 +++++++++++ .../.test/terraform/failed-pipeline.xml | 46 +++++++++++++ .../{jenkins-job.xml => long-running-job.xml} | 3 +- .../jenkins-health/.test/terraform/main.tf | 68 ++++++++++++++++++- 7 files changed, 157 insertions(+), 14 deletions(-) create mode 100644 codebundles/jenkins-health/.test/terraform/failed-job.xml create mode 100644 codebundles/jenkins-health/.test/terraform/failed-pipeline.xml rename codebundles/jenkins-health/.test/terraform/{jenkins-job.xml => long-running-job.xml} (99%) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index 590cd1ae5..a665b4e1e 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -12,10 +12,10 @@ spec: pattern: "jenkins-server" properties: [tag-values] mode: substring - # - type: pattern - # pattern: ".+" - # properties: [custom/jenkins_url] - # mode: substring + - type: pattern + pattern: "Running" + properties: [state/Name] + mode: substring slxs: - baseName: jenkins-instance-health diff --git a/codebundles/jenkins-health/.test/README.md b/codebundles/jenkins-health/.test/README.md index a95263ea3..ee8b5a3c5 100644 --- a/codebundles/jenkins-health/.test/README.md +++ b/codebundles/jenkins-health/.test/README.md @@ -65,7 +65,6 @@ Create this file with the following environment variables: ```sh #!/bin/bash - error # Print the start time echo "Script started at: $(date)" diff --git a/codebundles/jenkins-health/.test/Taskfile.yaml b/codebundles/jenkins-health/.test/Taskfile.yaml index 54002b2a3..247ef0bd6 100644 --- a/codebundles/jenkins-health/.test/Taskfile.yaml +++ b/codebundles/jenkins-health/.test/Taskfile.yaml @@ -241,8 +241,7 @@ tasks: fi # Upload Secrets pushd terraform > /dev/null - jenkins_username="admin" - jenkins_token=$(terraform show -json terraform.tfstate | jq -r ' + jenkins_token=$(terraform show -json | jq -r ' .values.outputs.jenkins_api_token.value') popd > /dev/null @@ -252,10 +251,9 @@ tasks: echo "Error: Missing jenkins_token details. Ensure Terraform plan has been applied." exit 1 fi - # Create Secrets URL="https://${RW_API_URL}/api/v3/workspaces/${RW_WORKSPACE}/secrets" - PAYLOAD='{"secrets": {"jenkins_username": "$jenkins_username", "jenkins_token": "$jenkins_token" }}' + PAYLOAD="{\"secrets\": {\"jenkins_username\": \"admin\", \"jenkins_token\": \"${jenkins_token}\"}}" echo "Uploading secrets to $URL" response_code=$(curl -X POST "$URL" \ -H "Authorization: Bearer $RW_PAT" \ diff --git a/codebundles/jenkins-health/.test/terraform/failed-job.xml b/codebundles/jenkins-health/.test/terraform/failed-job.xml new file mode 100644 index 000000000..6f71aba03 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/failed-job.xml @@ -0,0 +1,39 @@ + + + + A Freestyle job deliberately configured to fail for testing. + false + + + + + + true + false + false + false + + + false + + + + + + + + + + + diff --git a/codebundles/jenkins-health/.test/terraform/failed-pipeline.xml b/codebundles/jenkins-health/.test/terraform/failed-pipeline.xml new file mode 100644 index 000000000..79da4b607 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/failed-pipeline.xml @@ -0,0 +1,46 @@ + + + + A sample Pipeline job that uses Maven (M3) to build a simple Maven project and run tests. + false + + + + + true + + + false + diff --git a/codebundles/jenkins-health/.test/terraform/jenkins-job.xml b/codebundles/jenkins-health/.test/terraform/long-running-job.xml similarity index 99% rename from codebundles/jenkins-health/.test/terraform/jenkins-job.xml rename to codebundles/jenkins-health/.test/terraform/long-running-job.xml index 3057b2aa2..16e172d60 100644 --- a/codebundles/jenkins-health/.test/terraform/jenkins-job.xml +++ b/codebundles/jenkins-health/.test/terraform/long-running-job.xml @@ -23,8 +23,7 @@ create_admin.groovy @@ -248,14 +269,55 @@ resource "null_resource" "create_jobs" { provisioner "local-exec" { command = <<-EOT - curl -X POST -u 'admin:${data.external.jenkins_token.result["token"]}' -H "Content-Type: application/xml" --data-binary @jenkins-job.xml http://${aws_instance.jenkins_server.public_ip}:8080/createItem?name=long-running-job - - + #!/usr/bin/env bash + + TOKEN='${data.external.jenkins_token.result["token"]}' + JENKINS_URL="http://${aws_instance.jenkins_server.public_ip}:8080" + + # Define a function to check if a job exists. If yes, update; if not, create. + function upsert_job() { + local job_name="$1" + local config_file="$2" + + # Check if job exists by hitting its /api/json + local status_code + status_code=$(curl -s -o /dev/null -w '%%{http_code}' -u "admin:$TOKEN" "$JENKINS_URL/job/$job_name/api/json") + + if [ "$status_code" = "200" ]; then + echo "Updating job: $job_name" + curl -X POST -u "admin:$TOKEN" \ + -H "Content-Type: application/xml" \ + --data-binary @"$config_file" \ + "$JENKINS_URL/job/$job_name/config.xml" + else + echo "Creating job: $job_name" + curl -X POST -u "admin:$TOKEN" \ + -H "Content-Type: application/xml" \ + --data-binary @"$config_file" \ + "$JENKINS_URL/createItem?name=$job_name" + fi + } + + # Upsert each job + upsert_job "the-fastest-job" "long-running-job.xml" + upsert_job "this-never-breaks" "failed-job.xml" + upsert_job "my-fun-pipeline" "failed-pipeline.xml" + + # Now queue the slow jobs (build them) -- same as before + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/the-fastest-job/build" + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/the-fastest-job/build" + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/the-fastest-job/build" + + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/this-never-breaks/build" + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/my-fun-pipeline/build" EOT + # This ensures /bin/bash is used: + interpreter = ["/bin/bash", "-c"] } } + # Configure Jenkins EC2 agents # resource "null_resource" "configure_jenkins_agents" { # depends_on = [null_resource.wait_for_jenkins] From 1c10324aca8f78f83aa826b42339519b79ff260e Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:27:57 +0000 Subject: [PATCH 44/72] try new property --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index a665b4e1e..b548fd44f 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -14,7 +14,7 @@ spec: mode: substring - type: pattern pattern: "Running" - properties: [state/Name] + properties: [resource/state/Name] mode: substring slxs: From b254f88ace760a76ee42da8494464d90461e558f Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:33:22 +0000 Subject: [PATCH 45/72] debug resource match --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index b548fd44f..0fa634dd5 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -13,8 +13,8 @@ spec: properties: [tag-values] mode: substring - type: pattern - pattern: "Running" - properties: [resource/state/Name] + pattern: "xen" + properties: [resource/hypervisor] mode: substring slxs: From 4330651b44aae411a4ec83b988d8e3a6dccafbcd Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:36:28 +0000 Subject: [PATCH 46/72] x --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index 0fa634dd5..2717fce53 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -14,7 +14,7 @@ spec: mode: substring - type: pattern pattern: "xen" - properties: [resource/hypervisor] + properties: [resource.hypervisor] mode: substring slxs: From 7f44b37c6e8dae73285589ad13b91ec7cea3404a Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:37:41 +0000 Subject: [PATCH 47/72] x --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index 2717fce53..3d5e7e1a3 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -14,7 +14,7 @@ spec: mode: substring - type: pattern pattern: "xen" - properties: [resource.hypervisor] + properties: [hypervisor] mode: substring slxs: From c3f29019c2d39b065e92ba6a1bc53427eb5763a0 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:40:26 +0000 Subject: [PATCH 48/72] debug --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index 3d5e7e1a3..a665b4e1e 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -13,8 +13,8 @@ spec: properties: [tag-values] mode: substring - type: pattern - pattern: "xen" - properties: [hypervisor] + pattern: "Running" + properties: [state/Name] mode: substring slxs: From 600a5929fe97880e8ce47dfc350de24b6f39e5ff Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:41:55 +0000 Subject: [PATCH 49/72] debug --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index a665b4e1e..abfef9d25 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -14,7 +14,7 @@ spec: mode: substring - type: pattern pattern: "Running" - properties: [state/Name] + properties: [state["Name"]] mode: substring slxs: From 143fa95d91cfa10f0b87c5e4b007a37372231029 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:43:05 +0000 Subject: [PATCH 50/72] x --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index abfef9d25..a665b4e1e 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -14,7 +14,7 @@ spec: mode: substring - type: pattern pattern: "Running" - properties: [state["Name"]] + properties: [state/Name] mode: substring slxs: From 20147c99cce8c73e6b47c2252269eca008d699d1 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 17 Feb 2025 23:46:05 +0000 Subject: [PATCH 51/72] x --- .../.runwhen/generation-rules/jenkins-instance-health.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml index a665b4e1e..8bb47cd19 100644 --- a/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml +++ b/codebundles/jenkins-health/.runwhen/generation-rules/jenkins-instance-health.yaml @@ -13,7 +13,7 @@ spec: properties: [tag-values] mode: substring - type: pattern - pattern: "Running" + pattern: "running" properties: [state/Name] mode: substring From 1931395621de118140d1366fdefdbcde7c3dadd8 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Tue, 18 Feb 2025 00:06:21 +0000 Subject: [PATCH 52/72] fix plugin installation --- codebundles/jenkins-health/.test/terraform/main.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index e5cd346af..96662b59b 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -151,19 +151,19 @@ resource "aws_instance" "jenkins_server" { # Install the Job DSL plugin echo "[INFO] Installing Job DSL plugin..." - java -jar /tmp/jenkins-cli.jar \ + java -jar jenkins-cli.jar \ -s http://localhost:8080 \ -auth "admin:$JENKINS_PASS" \ install-plugin job-dsl -deploy echo "[INFO] Installing Pipeline plugin (workflow-aggregator)..." - java -jar /tmp/jenkins-cli.jar \ + java -jar jenkins-cli.jar \ -s "http://localhost:8080" \ -auth "admin:$JENKINS_PASS" \ install-plugin workflow-aggregator -deploy echo "[INFO] Restarting Jenkins..." - java -jar /tmp/jenkins-cli.jar \ + java -jar jenkins-cli.jar \ -s http://localhost:8080 \ -auth "admin:$JENKINS_PASS" \ safe-restart From 1ae118c5c362f2c172d8e11da370289d32abfd5d Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 12:27:20 +0530 Subject: [PATCH 53/72] redact sensitive information in parse_atom_feed --- libraries/Jenkins/jenkins.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libraries/Jenkins/jenkins.py b/libraries/Jenkins/jenkins.py index d481b9632..16704e2b3 100644 --- a/libraries/Jenkins/jenkins.py +++ b/libraries/Jenkins/jenkins.py @@ -384,6 +384,7 @@ def parse_atom_feed( ): """ Fetches and parses the Jenkins manage/log Atom feed, returning the combined log text. + Any sensitive information like initial admin passwords will be redacted. Example usage: | ${logs}= | Parse Jenkins Atom Feed | ${JENKINS_URL} | ${JENKINS_USERNAME} | ${JENKINS_TOKEN} | @@ -404,6 +405,10 @@ def parse_atom_feed( for entry in root.findall('atom:entry', namespace): content_elem = entry.find('atom:content', namespace) if content_elem is not None and content_elem.text: - logs += f"{content_elem.text.strip()}\n{'=' * 80}\n" + log_text = content_elem.text.strip() + # Redact initial admin password lines + if "Jenkins initial setup is required" in log_text and "This may also be found at:" in log_text: + log_text = "Jenkins initial setup is required. [REDACTED]" + logs += f"{log_text}\n{'=' * 80}\n" return logs From 2bf9c53ce35a019b5aa9f061db51a30fc71dce15 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 12:27:41 +0530 Subject: [PATCH 54/72] update Jenkins health checks to include instance name in task descriptions --- codebundles/jenkins-health/sli.robot | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/codebundles/jenkins-health/sli.robot b/codebundles/jenkins-health/sli.robot index 90fca3b4c..9d292634a 100644 --- a/codebundles/jenkins-health/sli.robot +++ b/codebundles/jenkins-health/sli.robot @@ -13,7 +13,7 @@ Library Jenkins Suite Setup Suite Initialization *** Tasks *** -Check For Failed Build Logs in Jenkins +Check For Failed Build Logs in Jenkins Instance ${JENKINS_INSTANCE_NAME} [Documentation] Check For Failed Build Logs in Jenkins [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File @@ -32,7 +32,7 @@ Check For Failed Build Logs in Jenkins ${failed_builds_score}= Evaluate 1 if int(${failed_builds}) <= int(${MAX_FAILED_BUILDS}) else 0 Set Global Variable ${failed_builds_score} -Check For Long Running Builds in Jenkins +Check For Long Running Builds in Jenkins Instance ${JENKINS_INSTANCE_NAME} [Documentation] Check Jenkins builds that have been running longer than a specified threshold [Tags] Jenkins Builds ${rsp}= RW.CLI.Run Bash File @@ -55,7 +55,7 @@ Check For Long Running Builds in Jenkins ${long_running_score}= Evaluate 1 if int(${long_running_count}) <= int(${MAX_LONG_RUNNING_BUILDS}) else 0 Set Global Variable ${long_running_score} -Check For Recent Failed Tests in Jenkins +Check For Recent Failed Tests in Jenkins Instance ${JENKINS_INSTANCE_NAME} [Documentation] Check For Recent Failed Tests in Jenkins [Tags] Jenkins Tests ${failed_tests}= Jenkins.Get Failed Tests @@ -70,7 +70,7 @@ Check For Recent Failed Tests in Jenkins Set Global Variable ${failed_test_score} 1 END -Check For Jenkins Health +Check For Jenkins Instance ${JENKINS_INSTANCE_NAME} Health [Documentation] Check if Jenkins instance is reachable and responding [Tags] Jenkins Health ${rsp}= RW.CLI.Run Cli @@ -85,7 +85,7 @@ Check For Jenkins Health Set Global Variable ${jenkins_health_score} 0 END -Check For Long Queued Builds in Jenkins +Check For Long Queued Builds in Jenkins Instance ${JENKINS_INSTANCE_NAME} [Documentation] Check for builds stuck in queue beyond threshold and calculate SLI score [Tags] Jenkins Queue Builds SLI ${queued_builds}= Jenkins.Get Queued Builds @@ -97,7 +97,7 @@ Check For Long Queued Builds in Jenkins ${queued_builds_score}= Evaluate 1 if int(${queued_count}) <= int(${MAX_QUEUED_BUILDS}) else 0 Set Global Variable ${queued_builds_score} -Check Jenkins Executor Utilization +Check Jenkins Executor Utilization in Jenkins Instance ${JENKINS_INSTANCE_NAME} [Documentation] Check if Jenkins executor utilization is above 80% [Tags] Jenkins Executors Utilization ${executor_utilization}= Jenkins.Get Executor Utilization @@ -180,6 +180,12 @@ Suite Initialization ... pattern=\d+ ... example="80" ... default="80" + ${JENKINS_INSTANCE_NAME}= RW.Core.Import User Variable JENKINS_INSTANCE_NAME + ... type=string + ... description=Jenkins Instance Name + ... pattern=\d+ + ... example="prod-jenkins" + ... default="prod-jenkins" Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} @@ -190,4 +196,6 @@ Suite Initialization Set Suite Variable ${MAX_ALLOWED_FAILED_TESTS} ${MAX_ALLOWED_FAILED_TESTS} Set Suite Variable ${MAX_QUEUED_BUILDS} ${MAX_QUEUED_BUILDS} Set Suite Variable ${QUEUED_BUILD_MAX_WAIT_TIME} ${QUEUED_BUILD_MAX_WAIT_TIME} - Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} \ No newline at end of file + Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} + Set Suite Variable ${JENKINS_INSTANCE_NAME} ${JENKINS_INSTANCE_NAME} + \ No newline at end of file From 3af8102589eaae60a6379d6bdd9a9004023eacb9 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 12:27:54 +0530 Subject: [PATCH 55/72] update Jenkins health check tasks to include instance name in descriptions --- codebundles/jenkins-health/runbook.robot | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot index 1731b9895..1f1c4b318 100644 --- a/codebundles/jenkins-health/runbook.robot +++ b/codebundles/jenkins-health/runbook.robot @@ -12,7 +12,7 @@ Library Jenkins Suite Setup Suite Initialization *** Tasks *** -List Failed Build Logs in Jenkins +List Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Fetches logs from failed Jenkins builds using the Jenkins API [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File @@ -50,7 +50,7 @@ List Failed Build Logs in Jenkins RW.Core.Add Pre To Report "No failed builds found" END -List Long Running Builds in Jenkins +List Long Running Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Identifies Jenkins builds that have been running longer than a specified threshold [Tags] Jenkins Builds ${rsp}= RW.CLI.Run Bash File @@ -95,8 +95,8 @@ List Long Running Builds in Jenkins RW.Core.Add Pre To Report "No long running builds found" END -List Recent Failed Tests in Jenkins - [Documentation] List Recent Failed Tests in Jenkins +List Recent Failed Tests in Jenkins Instance `${JENKINS_INSTANCE_NAME}` + [Documentation] List Recent Failed Tests in Jenkins Instance [Tags] Jenkins Tests ${failed_tests}= Jenkins.Get Failed Tests ... jenkins_url=${JENKINS_URL} @@ -137,7 +137,7 @@ List Recent Failed Tests in Jenkins RW.Core.Add Pre To Report "No failed tests found" END -Check Jenkins Health +Check Jenkins Instance `${JENKINS_INSTANCE_NAME}` Health [Documentation] Check if Jenkins instance is reachable and responding [Tags] Jenkins Health # TODO: Capture more exceptions here @@ -160,7 +160,7 @@ Check Jenkins Health ... next_steps=- Check if Jenkins service is running\n- Verify network connectivity\n- Validate Jenkins URL\n- Check Jenkins logs for errors END -List Long Queued Builds in Jenkins +List Long Queued Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check for builds stuck in queue beyond threshold [Tags] Jenkins Queue Builds @@ -210,7 +210,7 @@ List Long Queued Builds in Jenkins END -List Jenkins Executor Utilization +List Executor Utilization in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check Jenkins executor utilization across nodes [Tags] Jenkins Executors Utilization @@ -252,7 +252,7 @@ List Jenkins Executor Utilization END -Fetch Jenkins Logs and Add to Report +Fetch Jenkins Instance `${JENKINS_INSTANCE_NAME}` Logs and Add to Report [Documentation] Fetches and displays Jenkins logs from the Atom feed [Tags] Jenkins Logs ${rsp}= Jenkins.Parse Atom Feed @@ -297,11 +297,18 @@ Suite Initialization ... pattern=\d+ ... example="80" ... default="80" + ${JENKINS_INSTANCE_NAME}= RW.Core.Import User Variable JENKINS_INSTANCE_NAME + ... type=string + ... description=Jenkins Instance Name + ... pattern=\d+ + ... example="prod-jenkins" + ... default="prod-jenkins" Set Suite Variable ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} Set Suite Variable ${JENKINS_URL} ${JENKINS_URL} Set Suite Variable ${JENKINS_USERNAME} ${JENKINS_USERNAME} Set Suite Variable ${JENKINS_TOKEN} ${JENKINS_TOKEN} Set Suite Variable ${QUEUED_BUILD_MAX_WAIT_TIME} ${QUEUED_BUILD_MAX_WAIT_TIME} Set Suite Variable ${MAX_EXECUTOR_UTILIZATION} ${MAX_EXECUTOR_UTILIZATION} + Set Suite Variable ${JENKINS_INSTANCE_NAME} ${JENKINS_INSTANCE_NAME} Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} #Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}", "JENKINS_USERNAME":"${JENKINS_USERNAME.key}", "JENKINS_TOKEN":"${JENKINS_TOKEN.key}"} From b5b96a44cabca5b8f5851a5e809857ac3b31f76b Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 13:10:02 +0530 Subject: [PATCH 56/72] jenkins cb: update path to robot file in sli template --- .../.runwhen/templates/jenkins-instance-health-sli.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml index fe1633913..ee567a0d4 100644 --- a/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml +++ b/codebundles/jenkins-health/.runwhen/templates/jenkins-instance-health-sli.yaml @@ -23,7 +23,7 @@ spec: {% else %} ref: main {% endif %} - pathToRobot: codebundles/jenkins-health/runbook.robot + pathToRobot: codebundles/jenkins-health/sli.robot intervalStrategy: intermezzo intervalSeconds: 600 configProvided: From 4e434e9e49f718e70f6a96655040361dfe627578 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 16:47:55 +0530 Subject: [PATCH 57/72] Jenkins hlt cb: add error patterns and suggestions --- .../jenkins-health/error_patterns.json | 195 ++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 codebundles/jenkins-health/error_patterns.json diff --git a/codebundles/jenkins-health/error_patterns.json b/codebundles/jenkins-health/error_patterns.json new file mode 100644 index 000000000..622a165dd --- /dev/null +++ b/codebundles/jenkins-health/error_patterns.json @@ -0,0 +1,195 @@ +{ + "COMPILATION_ERROR": { + "pattern": "\\[ERROR\\] COMPILATION ERROR", + "suggestion": "Check detailed logs above for root cause and retry build after fixing issues." + }, + "TEST_FAILURE": { + "pattern": "\\[ERROR\\] Failures:", + "suggestion": "Review failed test cases and ensure test data is correct." + }, + "DEPENDENCY_ERROR": { + "pattern": "Could not resolve dependencies", + "suggestion": "Verify dependency versions in pom.xml and check repository accessibility." + }, + "CHECKSTYLE_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal org\\.apache\\.maven\\.plugins:maven-checkstyle-plugin", + "suggestion": "Fix code style violations and check checkstyle configuration." + }, + "JAVA_VERSION_MISMATCH": { + "pattern": "Unsupported major\\.minor version", + "suggestion": "Verify Java version in build environment and pom.xml." + }, + "OUT_OF_MEMORY": { + "pattern": "java\\.lang\\.OutOfMemoryError", + "suggestion": "Increase memory allocation for the build or optimize memory usage." + }, + "MAVEN_PLUGIN_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal .*:maven-.*-plugin", + "suggestion": "Ensure the required Maven plugin is installed and correctly configured." + }, + "MISSING_CLASS": { + "pattern": "java\\.lang\\.NoClassDefFoundError", + "suggestion": "Verify classpath configuration and ensure dependencies are properly included." + }, + "MISSING_METHOD": { + "pattern": "java\\.lang\\.NoSuchMethodError", + "suggestion": "Check if the method exists in the correct version of the dependency." + }, + "CONNECTION_TIMEOUT": { + "pattern": "Connection timed out", + "suggestion": "Check network connectivity and increase connection timeout in Maven settings." + }, + "AUTHENTICATION_FAILURE": { + "pattern": "401 Unauthorized|403 Forbidden", + "suggestion": "Verify credentials in settings.xml and check user permissions." + }, + "PERMISSION_DENIED": { + "pattern": "Permission denied", + "suggestion": "Ensure appropriate file permissions and user access rights." + }, + "GIT_ERROR": { + "pattern": "fatal: .+", + "suggestion": "Check Git repository URL, credentials, and branch availability." + }, + "SSL_ERROR": { + "pattern": "javax.net.ssl.SSLException", + "suggestion": "Verify SSL certificates and Java keystore settings." + }, + "JAR_NOT_FOUND": { + "pattern": "Could not find artifact .+ in central", + "suggestion": "Ensure the artifact exists in Maven repository or check repository settings." + }, + "MAVEN_COMPATIBILITY_ERROR": { + "pattern": "Non-resolvable parent POM", + "suggestion": "Ensure parent POM exists and is accessible." + }, + "FORK_FAILURE": { + "pattern": "Execution default-test of goal org.apache.maven.plugins:maven-surefire-plugin failed", + "suggestion": "Check for missing dependencies or configuration issues in Surefire plugin." + }, + "PORT_BIND_ERROR": { + "pattern": "Address already in use", + "suggestion": "Ensure the port is not already occupied by another process." + }, + "GROOVY_COMPILATION_ERROR": { + "pattern": "org\\.codehaus\\.groovy\\.control\\.MultipleCompilationErrorsException", + "suggestion": "Ensure that all required tools (e.g., Maven) are correctly installed and configured in Jenkins global tool settings." + }, + "MAVEN_TOOL_NOT_FOUND": { + "pattern": "Tool type \\\"maven\\\" does not have an install of \\\".*\\\" configured", + "suggestion": "Check Jenkins global tool configuration to ensure Maven is installed and referenced correctly in the pipeline script." + }, + "COMPILATION_ERROR": { + "pattern": "\\[ERROR\\] COMPILATION ERROR", + "suggestion": "Check detailed logs above for root cause and retry build after fixing issues." + }, + "TEST_FAILURE": { + "pattern": "\\[ERROR\\] Failures:", + "suggestion": "Review failed test cases and ensure test data is correct." + }, + "DEPENDENCY_ERROR": { + "pattern": "Could not resolve dependencies", + "suggestion": "Verify dependency versions in pom.xml and check repository accessibility." + }, + "CHECKSTYLE_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal org\\.apache\\.maven\\.plugins:maven-checkstyle-plugin", + "suggestion": "Fix code style violations and check checkstyle configuration." + }, + "JAVA_VERSION_MISMATCH": { + "pattern": "Unsupported major\\.minor version", + "suggestion": "Verify Java version in build environment and pom.xml." + }, + "OUT_OF_MEMORY": { + "pattern": "java\\.lang\\.OutOfMemoryError", + "suggestion": "Increase memory allocation for the build or optimize memory usage." + }, + "MAVEN_PLUGIN_ERROR": { + "pattern": "\\[ERROR\\] Failed to execute goal .*:maven-.*-plugin", + "suggestion": "Ensure the required Maven plugin is installed and correctly configured." + }, + "MISSING_CLASS": { + "pattern": "java\\.lang\\.NoClassDefFoundError", + "suggestion": "Verify classpath configuration and ensure dependencies are properly included." + }, + "MISSING_RESOURCE": { + "pattern": "java\\.lang\\.NoSuchMethodError", + "suggestion": "Check if the method exists in the correct version of the dependency." + }, + "CONNECTION_TIMEOUT": { + "pattern": "Connection timed out", + "suggestion": "Check network connectivity and increase connection timeout in Maven settings." + }, + "AUTHENTICATION_FAILURE": { + "pattern": "401 Unauthorized|403 Forbidden", + "suggestion": "Verify credentials in settings.xml and check user permissions." + }, + "GROOVY_COMPILATION_ERROR": { + "pattern": "org\\.codehaus\\.groovy\\.control\\.MultipleCompilationErrorsException", + "suggestion": "Ensure that all required tools (e.g., Maven) are correctly installed and configured in Jenkins global tool settings." + }, + "MAVEN_TOOL_NOT_FOUND": { + "pattern": "Tool type \\\"maven\\\" does not have an install of \\\".*\\\" configured", + "suggestion": "Check Jenkins global tool configuration to ensure Maven is installed and referenced correctly in the pipeline script." + }, + "JENKINS_STARTUP_FAILURE": { + "pattern": "SEVERE: Failed to initialize Jenkins", + "suggestion": "This error usually indicates a corrupted Jenkins configuration file or plugin. Review the Jenkins logs to identify the problematic configuration or plugin and rectify it." + }, + "GROOVY_CLASS_RESOLUTION_ERROR": { + "pattern": "unable to resolve class", + "suggestion": "This error suggests that the Groovy script is unable to find a specified class. Ensure that all necessary classes are imported correctly and that the classpath is properly configured." + }, + "MISSING_FILE_OR_DIRECTORY": { + "pattern": "No such file or directory", + "suggestion": "This error indicates that Jenkins is unable to find the specified file or directory. Verify the path provided in your Jenkins job configuration and ensure the file or directory exists at the specified location." + }, + "JENKINS_PIPELINE_SYNTAX_ERROR": { + "pattern": "WorkflowScript: .*: Expected .* but found .*", + "suggestion": "This error points to a syntax issue in your Jenkins Pipeline script. Use the Jenkins Pipeline Syntax tool to validate the script and identify any errors or warnings." + }, + "JENKINS_GROOVY_SCRIPT_ERROR": { + "pattern": "groovy.lang.MissingPropertyException", + "suggestion": "This error occurs when a Groovy script tries to access a property that doesn't exist. Check the script for typos or incorrect property names." + }, + "JENKINS_PLUGIN_ERROR": { + "pattern": "java.lang.NoSuchMethodError", + "suggestion": "This error often arises from plugin incompatibilities. Ensure all Jenkins plugins are up-to-date and compatible with your Jenkins version." + }, + "JENKINS_DISK_SPACE_ERROR": { + "pattern": "No space left on device", + "suggestion": "Jenkins is unable to write to disk due to insufficient space. Free up disk space on the Jenkins server and consider setting up disk usage monitoring." + }, + "JENKINS_PERMISSION_DENIED_ERROR": { + "pattern": "Permission denied", + "suggestion": "Jenkins does not have the necessary permissions to execute a script or access a file. Verify the permissions of the Jenkins user and adjust them as needed." + }, + "JENKINS_AGENT_CONNECTION_ERROR": { + "pattern": "java\\.io\\.IOException: Connection reset by peer", + "suggestion": "This error indicates a loss of connection between the Jenkins master and agent. Check the network connectivity and firewall settings between the master and agent machines." + }, + "JENKINS_GIT_AUTHENTICATION_ERROR": { + "pattern": "Authentication failed for 'https://.*'", + "suggestion": "Jenkins is unable to authenticate with the Git repository. Verify the credentials and ensure they are correctly configured in Jenkins." + }, + "JENKINS_SCM_ERROR": { + "pattern": "hudson\\.plugins\\.git\\.GitException: Command \"git .*\" returned status code 128", + "suggestion": "This error suggests an issue with the Source Code Management (SCM) configuration. Check the repository URL and credentials." + }, + "JENKINS_MAVEN_BUILD_FAILURE": { + "pattern": "\\[ERROR\\] BUILD FAILURE", + "suggestion": "The Maven build has failed. Review the error messages above this line in the log to identify the cause and address it accordingly." + }, + "JENKINS_MAVEN_COMPILATION_FAILURE": { + "pattern": "\\[ERROR\\] COMPILATION ERROR", + "suggestion": "There are compilation errors in the Maven build. Check the detailed error messages to identify and fix the issues in the code." + }, + "JENKINS_MAVEN_TEST_FAILURE": { + "pattern": "\\[ERROR\\] Failed to execute goal org\\.apache\\.maven\\.plugins:maven-surefire-plugin", + "suggestion": "Test execution has failed. Review the test results to identify failing tests and address the underlying issues." + }, + "DEPENDENCY_RESOLUTION_ERROR": { + "pattern": "Could not resolve dependencies for project", + "suggestion": "Ensure the required dependencies exist in the specified repository and check the internet connection or repository credentials." + } + +} From 01b6da0f1f57a23c9b550bf11186ea4a7a300469 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 16:48:24 +0530 Subject: [PATCH 58/72] add log analysis feature to suggest next steps for Java/Maven errors --- libraries/Jenkins/jenkins.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/libraries/Jenkins/jenkins.py b/libraries/Jenkins/jenkins.py index 16704e2b3..06eb2883a 100644 --- a/libraries/Jenkins/jenkins.py +++ b/libraries/Jenkins/jenkins.py @@ -1,5 +1,7 @@ import requests import time +import re +import json import xml.etree.ElementTree as ET from collections import defaultdict from thefuzz import fuzz @@ -412,3 +414,26 @@ def parse_atom_feed( logs += f"{log_text}\n{'=' * 80}\n" return logs + + + @keyword("Analyze Logs") + def analyze_logs(self, logs: str, error_patterns_file: str = None): + """Analyzes logs for common Java/Maven errors and suggests next steps.""" + if error_patterns_file: + with open(error_patterns_file, "r") as f: + error_patterns = json.load(f) + + suggestions = [] + for error, details in error_patterns.items(): + pattern = details.get("pattern") + advice = details.get("suggestion") + + if pattern and advice: + if re.search(pattern, logs, re.MULTILINE): + suggestions.append(advice) + + # Use default suggestions if no specific issues found + if not suggestions: + suggestions = ["Check detailed logs for root cause."] + + return "\n".join(suggestions) From 7c4bd369320067f3b819de14525870472075c4a7 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 16:49:07 +0530 Subject: [PATCH 59/72] update Jenkins runbook to analyze logs and suggest next steps for failed builds --- codebundles/jenkins-health/runbook.robot | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot index 1f1c4b318..1a3540474 100644 --- a/codebundles/jenkins-health/runbook.robot +++ b/codebundles/jenkins-health/runbook.robot @@ -35,6 +35,9 @@ List Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` ${formatted_results}= RW.CLI.Run Cli ... cmd=echo '${json_str}' | jq -r '["Job Name", "Build #", "Result", "URL"] as $headers | $headers, (. | [.job_name, .build_number, .result, .url]) | @tsv' | column -t -s $'\t' RW.Core.Add Pre To Report Failed Builds:\n=======================================\n${formatted_results.stdout} + ${next_steps}= Analyze Logs + ... logs=${job['logs']} + ... error_patterns_file=${CURDIR}/error_patterns.json ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint RW.Core.Add Issue @@ -44,7 +47,7 @@ List Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` ... title=Jenkins Build Failure: `${job_name}` (Build #`${build_number}`) ... reproduce_hint=Navigate to Jenkins build `${job_name}` #`${build_number}` ... details=${pretty_item} - ... next_steps=Review Failed build logs for Jenkins job `${job_name}` + ... next_steps=${next_steps} END ELSE RW.Core.Add Pre To Report "No failed builds found" From 5a72a1c5b428c1b8b40e44ae1ec877e29e44168e Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 16:59:35 +0530 Subject: [PATCH 60/72] update runbook to use relative paths for bash scripts and adjust pattern for Jenkins instance name --- codebundles/jenkins-health/runbook.robot | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot index 1a3540474..ea75fec06 100644 --- a/codebundles/jenkins-health/runbook.robot +++ b/codebundles/jenkins-health/runbook.robot @@ -16,7 +16,7 @@ List Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Fetches logs from failed Jenkins builds using the Jenkins API [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File - ... bash_file=failed_build_logs.sh + ... bash_file=${CURDIR}/failed_build_logs.sh ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} @@ -57,8 +57,8 @@ List Long Running Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Identifies Jenkins builds that have been running longer than a specified threshold [Tags] Jenkins Builds ${rsp}= RW.CLI.Run Bash File - ... bash_file=long_running_builds.sh - ... cmd_override=./long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + ... bash_file=${CURDIR}/long_running_builds.sh + ... cmd_override=${CURDIR}/long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} @@ -303,7 +303,7 @@ Suite Initialization ${JENKINS_INSTANCE_NAME}= RW.Core.Import User Variable JENKINS_INSTANCE_NAME ... type=string ... description=Jenkins Instance Name - ... pattern=\d+ + ... pattern=\w* ... example="prod-jenkins" ... default="prod-jenkins" Set Suite Variable ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} From e3c171730c77ba30dbd21679caafcca308a18eff Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 17:14:12 +0530 Subject: [PATCH 61/72] update Jenkins health checks to use backticks for instance name variable --- codebundles/jenkins-health/sli.robot | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/codebundles/jenkins-health/sli.robot b/codebundles/jenkins-health/sli.robot index 9d292634a..3397f0941 100644 --- a/codebundles/jenkins-health/sli.robot +++ b/codebundles/jenkins-health/sli.robot @@ -13,7 +13,7 @@ Library Jenkins Suite Setup Suite Initialization *** Tasks *** -Check For Failed Build Logs in Jenkins Instance ${JENKINS_INSTANCE_NAME} +Check For Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check For Failed Build Logs in Jenkins [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File @@ -32,7 +32,7 @@ Check For Failed Build Logs in Jenkins Instance ${JENKINS_INSTANCE_NAME} ${failed_builds_score}= Evaluate 1 if int(${failed_builds}) <= int(${MAX_FAILED_BUILDS}) else 0 Set Global Variable ${failed_builds_score} -Check For Long Running Builds in Jenkins Instance ${JENKINS_INSTANCE_NAME} +Check For Long Running Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check Jenkins builds that have been running longer than a specified threshold [Tags] Jenkins Builds ${rsp}= RW.CLI.Run Bash File @@ -55,7 +55,7 @@ Check For Long Running Builds in Jenkins Instance ${JENKINS_INSTANCE_NAME} ${long_running_score}= Evaluate 1 if int(${long_running_count}) <= int(${MAX_LONG_RUNNING_BUILDS}) else 0 Set Global Variable ${long_running_score} -Check For Recent Failed Tests in Jenkins Instance ${JENKINS_INSTANCE_NAME} +Check For Recent Failed Tests in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check For Recent Failed Tests in Jenkins [Tags] Jenkins Tests ${failed_tests}= Jenkins.Get Failed Tests @@ -70,7 +70,7 @@ Check For Recent Failed Tests in Jenkins Instance ${JENKINS_INSTANCE_NAME} Set Global Variable ${failed_test_score} 1 END -Check For Jenkins Instance ${JENKINS_INSTANCE_NAME} Health +Check For Jenkins Instance `${JENKINS_INSTANCE_NAME}` Health [Documentation] Check if Jenkins instance is reachable and responding [Tags] Jenkins Health ${rsp}= RW.CLI.Run Cli @@ -85,7 +85,7 @@ Check For Jenkins Instance ${JENKINS_INSTANCE_NAME} Health Set Global Variable ${jenkins_health_score} 0 END -Check For Long Queued Builds in Jenkins Instance ${JENKINS_INSTANCE_NAME} +Check For Long Queued Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check for builds stuck in queue beyond threshold and calculate SLI score [Tags] Jenkins Queue Builds SLI ${queued_builds}= Jenkins.Get Queued Builds @@ -97,7 +97,7 @@ Check For Long Queued Builds in Jenkins Instance ${JENKINS_INSTANCE_NAME} ${queued_builds_score}= Evaluate 1 if int(${queued_count}) <= int(${MAX_QUEUED_BUILDS}) else 0 Set Global Variable ${queued_builds_score} -Check Jenkins Executor Utilization in Jenkins Instance ${JENKINS_INSTANCE_NAME} +Check Jenkins Executor Utilization in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check if Jenkins executor utilization is above 80% [Tags] Jenkins Executors Utilization ${executor_utilization}= Jenkins.Get Executor Utilization @@ -183,7 +183,7 @@ Suite Initialization ${JENKINS_INSTANCE_NAME}= RW.Core.Import User Variable JENKINS_INSTANCE_NAME ... type=string ... description=Jenkins Instance Name - ... pattern=\d+ + ... pattern=\w* ... example="prod-jenkins" ... default="prod-jenkins" Set Suite Variable ${env} {"JENKINS_URL":"${JENKINS_URL}"} From b34eed7a0fc7edb0dc4af9b075f3b5a0094bc401 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 18:10:04 +0530 Subject: [PATCH 62/72] jenkins hlt cb: use relative paths for bash scripts in sli --- codebundles/jenkins-health/sli.robot | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codebundles/jenkins-health/sli.robot b/codebundles/jenkins-health/sli.robot index 3397f0941..6566b4761 100644 --- a/codebundles/jenkins-health/sli.robot +++ b/codebundles/jenkins-health/sli.robot @@ -17,7 +17,7 @@ Check For Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check For Failed Build Logs in Jenkins [Tags] Jenkins Logs Builds ${rsp}= RW.CLI.Run Bash File - ... bash_file=failed_build_logs.sh + ... bash_file=${CURDIR}/failed_build_logs.sh ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} @@ -36,8 +36,8 @@ Check For Long Running Builds in Jenkins Instance `${JENKINS_INSTANCE_NAME}` [Documentation] Check Jenkins builds that have been running longer than a specified threshold [Tags] Jenkins Builds ${rsp}= RW.CLI.Run Bash File - ... bash_file=long_running_builds.sh - ... cmd_override=./long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} + ... bash_file=${CURDIR}/long_running_builds.sh + ... cmd_override=${CURDIR}/long_running_builds.sh ${LONG_RUNNING_BUILD_MAX_WAIT_TIME} ... env=${env} ... include_in_history=False ... secret__jenkins_token=${JENKINS_TOKEN} From 891626b94b109d624c8fe8c269a5e74893df61ea Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Wed, 19 Feb 2025 18:26:25 +0530 Subject: [PATCH 63/72] add error patterns and suggestions for common Python exceptions --- .../jenkins-health/error_patterns.json | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/codebundles/jenkins-health/error_patterns.json b/codebundles/jenkins-health/error_patterns.json index 622a165dd..dd5576413 100644 --- a/codebundles/jenkins-health/error_patterns.json +++ b/codebundles/jenkins-health/error_patterns.json @@ -190,6 +190,77 @@ "DEPENDENCY_RESOLUTION_ERROR": { "pattern": "Could not resolve dependencies for project", "suggestion": "Ensure the required dependencies exist in the specified repository and check the internet connection or repository credentials." + }, + + + "SYNTAX_ERROR": { + "pattern": "SyntaxError: .+", + "suggestion": "Check for incorrect syntax, missing colons, incorrect indentation, or mismatched parentheses." + }, + "INDENTATION_ERROR": { + "pattern": "IndentationError: .+", + "suggestion": "Ensure consistent indentation using spaces or tabs. Avoid mixing both." + }, + "TYPE_ERROR": { + "pattern": "TypeError: .+", + "suggestion": "Check data types in function calls and operations. Convert variables if necessary." + }, + "NAME_ERROR": { + "pattern": "NameError: name '.+' is not defined", + "suggestion": "Ensure the variable or function is defined before use." + }, + "INDEX_ERROR": { + "pattern": "IndexError: list index out of range", + "suggestion": "Check list length before accessing elements by index." + }, + "KEY_ERROR": { + "pattern": "KeyError: '.+'", + "suggestion": "Ensure the dictionary contains the key before accessing it." + }, + "VALUE_ERROR": { + "pattern": "ValueError: .+", + "suggestion": "Check if the function arguments or data formats are valid." + }, + "ATTRIBUTE_ERROR": { + "pattern": "AttributeError: '.+' object has no attribute '.+'", + "suggestion": "Ensure the object has the specified attribute before accessing it." + }, + "MODULE_NOT_FOUND": { + "pattern": "ModuleNotFoundError: No module named '.+'", + "suggestion": "Ensure the required module is installed and the import statement is correct." + }, + "IMPORT_ERROR": { + "pattern": "ImportError: .+", + "suggestion": "Check module paths and dependencies. Ensure required modules are installed." + }, + "IO_ERROR": { + "pattern": "OSError: .+|IOError: .+", + "suggestion": "Check file paths, permissions, and disk space before performing I/O operations." + }, + "ZERO_DIVISION_ERROR": { + "pattern": "ZeroDivisionError: division by zero", + "suggestion": "Avoid dividing by zero. Check for zero before performing division." + }, + "RECURSION_ERROR": { + "pattern": "RecursionError: maximum recursion depth exceeded", + "suggestion": "Check recursive function calls to prevent infinite recursion." + }, + "MEMORY_ERROR": { + "pattern": "MemoryError", + "suggestion": "Optimize memory usage. Consider processing data in smaller chunks." + }, + "TIMEOUT_ERROR": { + "pattern": "TimeoutError", + "suggestion": "Increase timeout limits or optimize code to prevent long execution times." + }, + "CONNECTION_ERROR": { + "pattern": "requests\\.exceptions\\.ConnectionError", + "suggestion": "Check network connectivity and ensure the server is reachable." + }, + "SSL_ERROR": { + "pattern": "requests\\.exceptions\\.SSLError", + "suggestion": "Verify SSL certificates and ensure secure connections." } + } From 2ea4552e25b981bafbc9def125eff18fee79941f Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 28 Feb 2025 16:54:22 +0530 Subject: [PATCH 64/72] increase maximum attempts for Jenkins readiness check from 30 to 100 --- .../jenkins-health/.test/terraform/create_jenkins_token.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh index b30861c4f..f0b2ae3e3 100755 --- a/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh +++ b/codebundles/jenkins-health/.test/terraform/create_jenkins_token.sh @@ -16,7 +16,7 @@ echo "[DEBUG] Jenkins URL: $JENKINS_URL" >&2 echo "[DEBUG] Username: $USERNAME" >&2 # 2) Wait for Jenkins up to MAX_ATTEMPTS -MAX_ATTEMPTS=30 +MAX_ATTEMPTS=100 SLEEP_SECONDS=10 echo "[DEBUG] Checking Jenkins readiness up to $MAX_ATTEMPTS attempts..." >&2 From 30b592fa665a855a7b30dc034d525473422bf004 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 28 Feb 2025 16:54:36 +0530 Subject: [PATCH 65/72] add error patterns and suggestions for common Jenkins and Docker issues --- .../jenkins-health/error_patterns.json | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/codebundles/jenkins-health/error_patterns.json b/codebundles/jenkins-health/error_patterns.json index dd5576413..f8c903d24 100644 --- a/codebundles/jenkins-health/error_patterns.json +++ b/codebundles/jenkins-health/error_patterns.json @@ -260,7 +260,41 @@ "SSL_ERROR": { "pattern": "requests\\.exceptions\\.SSLError", "suggestion": "Verify SSL certificates and ensure secure connections." - } - - + }, + "GIT_TOOL_NOT_FOUND": { + "pattern": "Selected Git installation does not exist\\. Using Default", + "suggestion": "Ensure Git is installed and configured correctly in Jenkins global tool settings." + }, + "NO_CREDENTIALS_SPECIFIED": { + "pattern": "No credentials specified", + "suggestion": "Add appropriate credentials in Jenkins for secure repository access." + }, + "DOCKER_BUILD_ERROR": { + "pattern": "docker build .* failed", + "suggestion": "Review the Dockerfile and check for syntax errors or missing files." + }, + "DOCKER_CONTAINER_NOT_RUNNING": { + "pattern": "Jenkins does not seem to be running inside a container", + "suggestion": "Ensure Jenkins is running inside a container or check the Docker configuration." + }, + "JENKINS_DOCKER_PERMISSION_DENIED": { + "pattern": "permission denied while trying to connect to the Docker daemon socket", + "suggestion": "Add the Jenkins user to the Docker group and restart the service." + }, + "PYTHON_REQUIREMENTS_FAIL": { + "pattern": "ERROR: Could not find a version that satisfies the requirement .*", + "suggestion": "Verify the package name and version in 'requirements.txt' and check the package index." + }, + "DEBCONF_FRONTEND_ERROR": { + "pattern": "debconf: unable to initialize frontend", + "suggestion": "Set the 'DEBIAN_FRONTEND' environment variable to 'noninteractive' for Docker builds." + }, + "NO_SUCH_DSL_METHOD": { + "pattern": "java\\.lang\\.NoSuchMethodError: No such DSL method '.*'", + "suggestion": "Verify the Jenkins Pipeline script and ensure all necessary plugins are installed." + }, + "JUNIT_NOT_FOUND": { + "pattern": "No such DSL method 'junit'", + "suggestion": "Ensure the 'JUnit Plugin' is installed and configured in Jenkins." + } } From 67fb87cb93ead1844c4b684a13c8f74501758663 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 28 Feb 2025 16:58:24 +0530 Subject: [PATCH 66/72] refactor suggestion handling in Jenkins error patterns to use a set for deduplication and improve match checking --- libraries/Jenkins/jenkins.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libraries/Jenkins/jenkins.py b/libraries/Jenkins/jenkins.py index 06eb2883a..bf6ddaabc 100644 --- a/libraries/Jenkins/jenkins.py +++ b/libraries/Jenkins/jenkins.py @@ -423,17 +423,19 @@ def analyze_logs(self, logs: str, error_patterns_file: str = None): with open(error_patterns_file, "r") as f: error_patterns = json.load(f) - suggestions = [] + suggestions = set() # Use a set for deduplication + for error, details in error_patterns.items(): pattern = details.get("pattern") advice = details.get("suggestion") if pattern and advice: - if re.search(pattern, logs, re.MULTILINE): - suggestions.append(advice) + matches = re.findall(pattern, logs, re.MULTILINE) + if matches: # Only add advice if there are matches + suggestions.add(advice) - # Use default suggestions if no specific issues found + # Use default suggestion if no specific issues found if not suggestions: - suggestions = ["Check detailed logs for root cause."] + suggestions.add("Check detailed logs for root cause.") return "\n".join(suggestions) From 13a136cc6da26ca80e56facdc5c8895a2caf09ed Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 28 Feb 2025 17:24:23 +0530 Subject: [PATCH 67/72] add Jenkins pipeline configuration for Python Docker project --- .../terraform/python-docker-pipeline.xml | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 codebundles/jenkins-health/.test/terraform/python-docker-pipeline.xml diff --git a/codebundles/jenkins-health/.test/terraform/python-docker-pipeline.xml b/codebundles/jenkins-health/.test/terraform/python-docker-pipeline.xml new file mode 100644 index 000000000..5e6d93455 --- /dev/null +++ b/codebundles/jenkins-health/.test/terraform/python-docker-pipeline.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + +false + + + +2 + + +https://github.com/saurabh3460/python-testing-ci.git + + + + +main + + +false + + + +Jenkinsfile +true + + +false + \ No newline at end of file From b3e8b5f7c8b24b59bcd2f5567dfa82b0cd40fa73 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Fri, 28 Feb 2025 17:24:40 +0530 Subject: [PATCH 68/72] add Docker and Git plugin installation to Jenkins setup --- .../jenkins-health/.test/terraform/main.tf | 109 +++++------------- 1 file changed, 29 insertions(+), 80 deletions(-) diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index 96662b59b..b41ca0202 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -162,6 +162,12 @@ resource "aws_instance" "jenkins_server" { -auth "admin:$JENKINS_PASS" \ install-plugin workflow-aggregator -deploy + echo "[INFO] Installing git and docker plugin..." + java -jar jenkins-cli.jar \ + -s "http://localhost:8080" \ + -auth "admin:$JENKINS_PASS" \ + install-plugin git docker-plugin docker-workflow -deploy + echo "[INFO] Restarting Jenkins..." java -jar jenkins-cli.jar \ -s http://localhost:8080 \ @@ -210,6 +216,26 @@ resource "aws_instance" "jenkins_server" { # (Optional) Additional setup commands, e.g. Docker, etc. # ... + # Add Docker's official GPG key: + apt-get update + apt-get -y install ca-certificates curl + install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + chmod a+r /etc/apt/keyrings/docker.asc + + # Add the repository to Apt sources: + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null + apt-get update + apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + + echo "DOCKER_OPTS=\"-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock\"" >> /etc/default/docker + systemctl restart docker + groupadd docker + usermod -aG docker ubuntu + usermod -aG docker jenkins EOF tags = { @@ -219,39 +245,6 @@ resource "aws_instance" "jenkins_server" { } -# # Instance Profile for Jenkins -# resource "aws_iam_instance_profile" "jenkins_profile" { -# name = "jenkins_profile" -# role = aws_iam_role.jenkins_role.name -# } - -# # Security Group for Jenkins Agents -# resource "aws_security_group" "jenkins_agent_sg" { -# name = "jenkins-agent-sg" -# description = "Security group for Jenkins agents" -# vpc_id = aws_vpc.jenkins_vpc.id - -# ingress { -# from_port = 22 -# to_port = 22 -# protocol = "tcp" -# security_groups = [aws_security_group.jenkins_sg.id] -# } - -# egress { -# from_port = 0 -# to_port = 0 -# protocol = "-1" -# cidr_blocks = ["0.0.0.0/0"] -# } - -# tags = { -# Name = "jenkins-agent-sg" -# } -# } - - - data "external" "jenkins_token" { depends_on = [aws_instance.jenkins_server] program = ["bash", "./create_jenkins_token.sh"] @@ -302,6 +295,7 @@ resource "null_resource" "create_jobs" { upsert_job "the-fastest-job" "long-running-job.xml" upsert_job "this-never-breaks" "failed-job.xml" upsert_job "my-fun-pipeline" "failed-pipeline.xml" + upsert_job "python-docker" "python-docker-pipeline.xml" # Now queue the slow jobs (build them) -- same as before curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/the-fastest-job/build" @@ -310,59 +304,14 @@ resource "null_resource" "create_jobs" { curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/this-never-breaks/build" curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/my-fun-pipeline/build" + + curl -X POST -u "admin:$TOKEN" "$JENKINS_URL/job/python-docker/build" EOT # This ensures /bin/bash is used: interpreter = ["/bin/bash", "-c"] } } - - -# Configure Jenkins EC2 agents -# resource "null_resource" "configure_jenkins_agents" { -# depends_on = [null_resource.wait_for_jenkins] - -# connection { -# type = "ssh" -# user = "ubuntu" -# private_key = tls_private_key.jenkins_key.private_key_pem -# host = aws_instance.jenkins_server.public_ip -# } - -# provisioner "file" { -# content = tls_private_key.jenkins_key.private_key_pem -# destination = "/tmp/jenkins-key.pem" -# } - -# provisioner "file" { -# content = templatefile("${path.module}/configure_ec2_agent.groovy.tpl", { -# ami_id = data.aws_ami.ubuntu.id -# subnet_id = aws_subnet.jenkins_subnet.id -# security_group_id = aws_security_group.jenkins_sg.id -# }) -# destination = "/tmp/configure_ec2_agent.groovy" -# } - - -# provisioner "remote-exec" { -# inline = [ -# # Setup SSH key for Jenkins -# "sudo mkdir -p /var/lib/jenkins/.ssh", -# "sudo mv /tmp/jenkins-key.pem /var/lib/jenkins/.ssh/", -# "sudo chown -R jenkins:jenkins /var/lib/jenkins/.ssh", -# "sudo chmod 700 /var/lib/jenkins/.ssh", -# "sudo chmod 600 /var/lib/jenkins/.ssh/jenkins-key.pem", -# "cat /tmp/configure_ec2_agent.groovy", -# "wget -q http://localhost:8080/jnlpJars/jenkins-cli.jar", -# # Execute the Groovy script using Jenkins CLI -# "java -jar jenkins-cli.jar -s http://localhost:8080 -auth admin:admin123! groovy = < /tmp/configure_ec2_agent.groovy", - -# # Cleanup -# "rm /tmp/configure_ec2_agent.groovy" -# ] -# } -# } - # Create IAM user for Jenkins resource "aws_iam_user" "jenkins_user" { name = "jenkins-user" From a42029759f5ab3be315719bfeb6538cf63c8bba5 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Mon, 3 Mar 2025 15:19:56 +0530 Subject: [PATCH 69/72] update Docker service configuration to use TCP and systemd overrides --- .../jenkins-health/.test/terraform/main.tf | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/.test/terraform/main.tf b/codebundles/jenkins-health/.test/terraform/main.tf index b41ca0202..1a08ae4a4 100644 --- a/codebundles/jenkins-health/.test/terraform/main.tf +++ b/codebundles/jenkins-health/.test/terraform/main.tf @@ -231,8 +231,21 @@ resource "aws_instance" "jenkins_server" { apt-get update apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin - echo "DOCKER_OPTS=\"-H tcp://0.0.0.0:2376 -H unix:///var/run/docker.sock\"" >> /etc/default/docker - systemctl restart docker + TCP_ADDRESS="tcp://127.0.0.1:2375" + mkdir -p /etc/systemd/system/docker.service.d + cat < /etc/systemd/system/docker.service.d/override.conf + [Service] + ExecStart= + ExecStart=/usr/bin/dockerd -H $TCP_ADDRESS -H unix:///var/run/docker.sock + ELF + + # Reload systemctl configuration + echo "Reloading systemctl configuration..." + systemctl daemon-reload + + # Restart Docker + echo "Restarting Docker service..." + systemctl restart docker.service groupadd docker usermod -aG docker ubuntu usermod -aG docker jenkins From e07ef6b86964f95e4e2b3c77e9390a0252a83b4c Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Mar 2025 18:32:09 +0530 Subject: [PATCH 70/72] enhance log analysis by normalizing input and prioritizing error context in suggestions --- libraries/Jenkins/jenkins.py | 77 +++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/libraries/Jenkins/jenkins.py b/libraries/Jenkins/jenkins.py index bf6ddaabc..e98e53a3b 100644 --- a/libraries/Jenkins/jenkins.py +++ b/libraries/Jenkins/jenkins.py @@ -10,6 +10,13 @@ from robot.api.deco import keyword from RW import platform +def normalize_log(log: str) -> str: + """Normalize logs to improve pattern matching.""" + log = log.lower() # Convert to lowercase for case-insensitive matching + log = re.sub(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z', '{timestamp}', log) # Remove timestamps + log = re.sub(r'[a-f0-9]{12,}', '{hash}', log) # Replace long hashes or container IDs + log = re.sub(r'\s+', ' ', log).strip() # Collapse multiple spaces + return log class Jenkins: """ @@ -418,24 +425,76 @@ def parse_atom_feed( @keyword("Analyze Logs") def analyze_logs(self, logs: str, error_patterns_file: str = None): - """Analyzes logs for common Java/Maven errors and suggests next steps.""" + """Analyzes logs for common errors, prioritizing lines with ERROR, and suggests next steps.""" if error_patterns_file: with open(error_patterns_file, "r") as f: error_patterns = json.load(f) - suggestions = set() # Use a set for deduplication - + suggestions = [] # Collect suggestions with error lines + error_lines = [] # Store error-prone log sections + normalized_logs = normalize_log(logs) for error, details in error_patterns.items(): pattern = details.get("pattern") advice = details.get("suggestion") if pattern and advice: - matches = re.findall(pattern, logs, re.MULTILINE) - if matches: # Only add advice if there are matches - suggestions.add(advice) + matches = re.finditer(pattern.lower(), normalized_logs, re.MULTILINE) + for match in matches: + matched_line = match.group(0) + # Get context around the error + start = max(0, match.start() - 100) # Get 100 chars before match + end = min(len(normalized_logs), match.end() + 100) # Get 100 chars after match + context = normalized_logs[start:end] + + # Format advice with matches if needed + formatted_advice = advice + if '{match}' in advice: + # Replace all {match} placeholders with corresponding group matches + formatted_advice = advice + for i, group in enumerate(match.groups(), start=1): + formatted_advice = formatted_advice.replace('{match}', group, 1) + + # Prioritize lines with ERROR or FAILURE + if re.search(r"(ERROR|error|FAILURE)", matched_line): + error_lines.insert(0, (formatted_advice, context)) # Add to the front for higher priority + else: + error_lines.append((formatted_advice, context)) # Add normally + + # Collect unique suggestions in order of priority + seen_advice = set() + for advice, line in error_lines: + if advice not in seen_advice: + suggestions.append({"suggestion": advice, "log": line}) + seen_advice.add(advice) # Use default suggestion if no specific issues found if not suggestions: - suggestions.add("Check detailed logs for root cause.") - - return "\n".join(suggestions) + # Find all error/failure lines with context (5 lines before and after) + error_blocks = [] + for match in re.finditer(r'.*\b(error|failure)\b.*', normalized_logs, re.MULTILINE | re.IGNORECASE): + start = max(0, match.start() - 500) # Get 500 chars before for context + end = min(len(normalized_logs), match.end() + 500) # Get 500 chars after for context + error_blocks.append(normalized_logs[start:end]) + + if error_blocks: + # Deduplicate while preserving order + unique_errors = [] + seen = set() + for block in error_blocks: + if block not in seen: + seen.add(block) + unique_errors.append(block) + + suggestions.append({ + "suggestion": "Check detailed logs for root cause.", + "log": "\n---\n".join(unique_errors) + }) + else: + # Provide more specific guidance when no errors found + suggestions.append({ + "suggestion": "Check detailed logs for root cause.", + "log": logs + }) + + return suggestions + From d2c93f2103ac404c0fd48b4f5679002b0fc80d4f Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Mar 2025 18:33:09 +0530 Subject: [PATCH 71/72] refactor error patterns in Jenkins health checks to remove outdated suggestions and improve clarity --- .../jenkins-health/error_patterns.json | 44 +++---------------- 1 file changed, 6 insertions(+), 38 deletions(-) diff --git a/codebundles/jenkins-health/error_patterns.json b/codebundles/jenkins-health/error_patterns.json index f8c903d24..a9697d081 100644 --- a/codebundles/jenkins-health/error_patterns.json +++ b/codebundles/jenkins-health/error_patterns.json @@ -31,10 +31,6 @@ "pattern": "java\\.lang\\.NoClassDefFoundError", "suggestion": "Verify classpath configuration and ensure dependencies are properly included." }, - "MISSING_METHOD": { - "pattern": "java\\.lang\\.NoSuchMethodError", - "suggestion": "Check if the method exists in the correct version of the dependency." - }, "CONNECTION_TIMEOUT": { "pattern": "Connection timed out", "suggestion": "Check network connectivity and increase connection timeout in Maven settings." @@ -43,10 +39,6 @@ "pattern": "401 Unauthorized|403 Forbidden", "suggestion": "Verify credentials in settings.xml and check user permissions." }, - "PERMISSION_DENIED": { - "pattern": "Permission denied", - "suggestion": "Ensure appropriate file permissions and user access rights." - }, "GIT_ERROR": { "pattern": "fatal: .+", "suggestion": "Check Git repository URL, credentials, and branch availability." @@ -111,10 +103,6 @@ "pattern": "java\\.lang\\.NoClassDefFoundError", "suggestion": "Verify classpath configuration and ensure dependencies are properly included." }, - "MISSING_RESOURCE": { - "pattern": "java\\.lang\\.NoSuchMethodError", - "suggestion": "Check if the method exists in the correct version of the dependency." - }, "CONNECTION_TIMEOUT": { "pattern": "Connection timed out", "suggestion": "Check network connectivity and increase connection timeout in Maven settings." @@ -151,18 +139,10 @@ "pattern": "groovy.lang.MissingPropertyException", "suggestion": "This error occurs when a Groovy script tries to access a property that doesn't exist. Check the script for typos or incorrect property names." }, - "JENKINS_PLUGIN_ERROR": { - "pattern": "java.lang.NoSuchMethodError", - "suggestion": "This error often arises from plugin incompatibilities. Ensure all Jenkins plugins are up-to-date and compatible with your Jenkins version." - }, "JENKINS_DISK_SPACE_ERROR": { "pattern": "No space left on device", "suggestion": "Jenkins is unable to write to disk due to insufficient space. Free up disk space on the Jenkins server and consider setting up disk usage monitoring." }, - "JENKINS_PERMISSION_DENIED_ERROR": { - "pattern": "Permission denied", - "suggestion": "Jenkins does not have the necessary permissions to execute a script or access a file. Verify the permissions of the Jenkins user and adjust them as needed." - }, "JENKINS_AGENT_CONNECTION_ERROR": { "pattern": "java\\.io\\.IOException: Connection reset by peer", "suggestion": "This error indicates a loss of connection between the Jenkins master and agent. Check the network connectivity and firewall settings between the master and agent machines." @@ -261,14 +241,6 @@ "pattern": "requests\\.exceptions\\.SSLError", "suggestion": "Verify SSL certificates and ensure secure connections." }, - "GIT_TOOL_NOT_FOUND": { - "pattern": "Selected Git installation does not exist\\. Using Default", - "suggestion": "Ensure Git is installed and configured correctly in Jenkins global tool settings." - }, - "NO_CREDENTIALS_SPECIFIED": { - "pattern": "No credentials specified", - "suggestion": "Add appropriate credentials in Jenkins for secure repository access." - }, "DOCKER_BUILD_ERROR": { "pattern": "docker build .* failed", "suggestion": "Review the Dockerfile and check for syntax errors or missing files." @@ -277,9 +249,9 @@ "pattern": "Jenkins does not seem to be running inside a container", "suggestion": "Ensure Jenkins is running inside a container or check the Docker configuration." }, - "JENKINS_DOCKER_PERMISSION_DENIED": { - "pattern": "permission denied while trying to connect to the Docker daemon socket", - "suggestion": "Add the Jenkins user to the Docker group and restart the service." + "DOCKER_PERMISSION_DENIED": { + "pattern": "ERROR: permission denied while trying to connect to the Docker daemon socket", + "suggestion": "Add the Jenkins user to the Docker group with `sudo usermod -aG docker jenkins`, then restart Jenkins with `sudo systemctl restart jenkins`." }, "PYTHON_REQUIREMENTS_FAIL": { "pattern": "ERROR: Could not find a version that satisfies the requirement .*", @@ -290,11 +262,7 @@ "suggestion": "Set the 'DEBIAN_FRONTEND' environment variable to 'noninteractive' for Docker builds." }, "NO_SUCH_DSL_METHOD": { - "pattern": "java\\.lang\\.NoSuchMethodError: No such DSL method '.*'", - "suggestion": "Verify the Jenkins Pipeline script and ensure all necessary plugins are installed." - }, - "JUNIT_NOT_FOUND": { - "pattern": "No such DSL method 'junit'", - "suggestion": "Ensure the 'JUnit Plugin' is installed and configured in Jenkins." - } + "pattern": "java\\.lang\\.NoSuchMethodError: No such DSL method '([^']+)'", + "suggestion": "Check if the plugin providing '{match}' is installed and up-to-date." + } } From dac2d47b8e456c3afed47800bb28bcbfdd28c8f0 Mon Sep 17 00:00:00 2001 From: saurabh3460 Date: Tue, 4 Mar 2025 18:33:15 +0530 Subject: [PATCH 72/72] improve error reporting in Jenkins health checks by enhancing suggestion and log detail formatting --- codebundles/jenkins-health/runbook.robot | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/codebundles/jenkins-health/runbook.robot b/codebundles/jenkins-health/runbook.robot index ea75fec06..9264ce09d 100644 --- a/codebundles/jenkins-health/runbook.robot +++ b/codebundles/jenkins-health/runbook.robot @@ -40,14 +40,22 @@ List Failed Build Logs in Jenkins Instance `${JENKINS_INSTANCE_NAME}` ... error_patterns_file=${CURDIR}/error_patterns.json ${pretty_item}= Evaluate pprint.pformat(${job}) modules=pprint + + ${suggestions}= Set Variable ${EMPTY} + ${logs_details}= Set Variable ${EMPTY} + FOR ${step} IN @{next_steps} + ${suggestions}= Set Variable ${suggestions}${step['suggestion']}\n + ${logs_details}= Set Variable ${logs_details}Log: ${step['log']}\n + END + RW.Core.Add Issue ... severity=3 ... expected=Jenkins job `${job_name}` should complete successfully ... actual=Jenkins job `${job_name}` build #`${build_number}` failed ... title=Jenkins Build Failure: `${job_name}` (Build #`${build_number}`) ... reproduce_hint=Navigate to Jenkins build `${job_name}` #`${build_number}` - ... details=${pretty_item} - ... next_steps=${next_steps} + ... details=Error Logs:\n${logs_details} + ... next_steps=${suggestions} END ELSE RW.Core.Add Pre To Report "No failed builds found"