Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
3255521
User given slurm node info
Nagachandan-P Feb 24, 2026
6b584ed
fallback scenario and input dir
Nagachandan-P Feb 24, 2026
e196a70
lint issues fixed
Nagachandan-P Feb 24, 2026
6a60aa5
lint errors fixed
Nagachandan-P Feb 24, 2026
3fc791d
warnings of jinja spacing
Nagachandan-P Feb 25, 2026
c594d09
Update omnia.sh
mithileshreddy04 Feb 25, 2026
9917e06
Merge branch 'pub/q1_dev' of https://github.com/mithileshreddy04/omni…
mithileshreddy04 Feb 25, 2026
61ff25e
Merge pull request #4044 from mithileshreddy04/pub/q1_dev
abhishek-sa1 Feb 26, 2026
327f929
Update image not found error in omnia.sh
mithileshreddy04 Feb 26, 2026
db6bf53
Merge branch 'pub/q1_dev' of https://github.com/mithileshreddy04/omni…
mithileshreddy04 Feb 26, 2026
c6c60ed
Update omnia.sh
mithileshreddy04 Feb 26, 2026
abf03fb
Update omnia.sh
mithileshreddy04 Feb 26, 2026
16ec53a
Merge pull request #4049 from mithileshreddy04/pub/q1_dev
abhishek-sa1 Feb 26, 2026
1d807f9
Updating validation for user_repo_url repo names
Katakam-Rakesh Feb 26, 2026
d0865b8
Merge branch 'dell:pub/q1_dev' into pub/q1_dev
Katakam-Rakesh Feb 26, 2026
6b34371
Merge branch 'dell:pub/q1_dev' into pub/q1_dev
Nagachandan-P Feb 26, 2026
e9d3d2f
upgrade to 2.1
Nagachandan-P Feb 26, 2026
47b7d9e
upgrade to 2.1 fixed spacing
Nagachandan-P Feb 26, 2026
a8426b9
upgrade lint fixed spacing
Nagachandan-P Feb 26, 2026
994b72c
upgrade lint
Nagachandan-P Feb 26, 2026
4bf3d0f
Merge pull request #4050 from Katakam-Rakesh/pub/q1_dev
jagadeeshnv Feb 26, 2026
2b8f504
Delete node whern busy with choice to abort
jagadeeshnv Feb 26, 2026
3224d1d
Merge branch 'dell:pub/q1_dev' into pub/q1_dev
jagadeeshnv Feb 26, 2026
9183463
bmc inv validation
jagadeeshnv Feb 26, 2026
056c374
Lint fix
jagadeeshnv Feb 27, 2026
c729c2e
Update ssh ci-group-login_compiler_node_aarch64.yaml.j2
Rohith-Ravut Feb 27, 2026
84bbc0e
Update ssh ci-group-login_node_aarch64.yaml.j2
Rohith-Ravut Feb 27, 2026
9722f8b
Update ssh ci-group-slurm_node_aarch64.yaml.j2
Rohith-Ravut Feb 27, 2026
092aa3d
Update ssh main.yml
Rohith-Ravut Feb 27, 2026
92699f6
Merge pull request #4054 from jagadeeshnv/pub/q1_dev
snarthan Feb 27, 2026
e58b999
Merge pull request #4055 from Rohith-Ravut/pub/q1_dev
snarthan Feb 27, 2026
944ba28
Updated pxe boot to handle poweroff case
jagadeeshnv Feb 27, 2026
28112cb
Merge branch 'dell:pub/q1_dev' into pub/q1_dev
jagadeeshnv Feb 27, 2026
1cac11b
Merge pull request #4056 from jagadeeshnv/pub/q1_dev
jagadeeshnv Feb 27, 2026
25e9684
Error update for openLDAP container not running in discovery.yml
mithileshreddy04 Feb 27, 2026
833776c
Update discovery.yml
mithileshreddy04 Feb 27, 2026
1e89503
localrepo pulp cleanup for all types
pullan1 Feb 27, 2026
334dbf0
updated copy right info
pullan1 Feb 27, 2026
fcb069f
Update main.yml
mithileshreddy04 Feb 27, 2026
9d4fa52
Update main.yml
mithileshreddy04 Feb 27, 2026
a4129ff
Delete scenario fix
jagadeeshnv Feb 27, 2026
a6bd9e1
Merge pull request #4061 from jagadeeshnv/pub/q1_dev
snarthan Feb 27, 2026
596b282
Merge pull request #4036 from Nagachandan-P/pub/q1_dev
snarthan Feb 27, 2026
0d1f036
Merge pull request #4059 from pullan1/pub/q1_dev
snarthan Feb 27, 2026
66df2ca
default values for slurm node info
Nagachandan-P Feb 27, 2026
48e576c
Merge branch 'dell:pub/q1_dev' into pub/q1_dev
Nagachandan-P Feb 27, 2026
3622743
Merge pull request #4062 from Nagachandan-P/pub/q1_dev
snarthan Feb 27, 2026
b0cff18
Merge pull request #4058 from mithileshreddy04/pub/q1_dev
abhishek-sa1 Feb 27, 2026
53b70f0
Added retry for unreachable iDRACs
jagadeeshnv Mar 1, 2026
ad253ff
Merge branch 'dell:pub/q1_dev' into pub/q1_dev
jagadeeshnv Mar 2, 2026
8a6d40b
multi-node mpi jobs firewall permission
Nagachandan-P Mar 2, 2026
9ded573
Merge pull request #4071 from Nagachandan-P/pub/q1_dev
snarthan Mar 2, 2026
884011d
Updated gpu default for slurm conf
jagadeeshnv Mar 2, 2026
748e347
Pxe mapping x86 update
jagadeeshnv Mar 2, 2026
d5a0bd7
Update main.yml
jagadeeshnv Mar 2, 2026
f5464e9
Merge branch 'dell:pub/q1_dev' into pub/q1_dev
jagadeeshnv Mar 2, 2026
2bfbbf5
Update pxe_mapping_file.csv
jagadeeshnv Mar 2, 2026
d192280
Update pxe_mapping_file.csv
jagadeeshnv Mar 2, 2026
2d6583f
Update slurm_custom.json
jagadeeshnv Mar 2, 2026
fff42d3
Update slurm_custom.json
jagadeeshnv Mar 2, 2026
b967b44
Merge pull request #4073 from jagadeeshnv/pub/q1_dev
jagadeeshnv Mar 2, 2026
75f2602
Delete when mix of busy and idle nodes, scenario for abort
jagadeeshnv Mar 2, 2026
b133a9d
initialize
jagadeeshnv Mar 3, 2026
4654f38
Merge pull request #4081 from jagadeeshnv/pub/q1_dev
snarthan Mar 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ def schema(config):
error_bucket.append("input data reading failed.")
return error_bucket

# Normalize case-sensitive fields for omnia_config.yml
if "omnia_config" in input_file_path:
if "slurm_cluster" in input_data:
for cluster in input_data["slurm_cluster"]:
if "node_discovery_mode" in cluster and isinstance(cluster["node_discovery_mode"], str):
cluster["node_discovery_mode"] = cluster["node_discovery_mode"].lower()

# Load schema
with open(schema_file_path, "r", encoding="utf-8") as schema_file:
j_schema = json.load(schema_file)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@
LDMS_REQUIRES_SLURM_MSG = (
"requires Slurm package 'slurm_custom' to be present in the 'softwares' list in software_config.json."
)
USER_REPO_NAME_PREFIX_FAIL_MSG = (
"Repository name '{repo_name}' in {repo_key} must start with '{expected_prefix}'. "
"Please update the name to '{expected_prefix}{repo_name}'."
)

# omnia_config.yml
INVALID_PASSWORD_MSG = ("Provided password is invalid. Password must meet the specified "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,50 @@
"type": "boolean",
"description": "Variable indicates whether a specific configuration file path under config_sources should be used as-is without merging"
},
"node_discovery_mode": {
"type": "string",
"enum": ["homogeneous", "heterogeneous"],
"description": "Node hardware discovery mode. 'homogeneous' for group-based discovery, 'heterogeneous' for individual node discovery. Default: heterogeneous"
},
"node_hardware_defaults": {
"type": "object",
"description": "Hardware specifications for homogeneous node groups. Key is group name (grp0-grp100), value is hardware specs.",
"patternProperties": {
"^grp([0-9]|[1-9][0-9]|100)$": {
"type": "object",
"properties": {
"sockets": {
"type": "integer",
"minimum": 1,
"description": "Number of CPU sockets per node"
},
"cores_per_socket": {
"type": "integer",
"minimum": 1,
"description": "Number of CPU cores per socket"
},
"threads_per_core": {
"type": "integer",
"minimum": 1,
"description": "Number of CPU threads per core"
},
"real_memory": {
"type": "integer",
"minimum": 1,
"description": "Memory in MB (exact value to use in Slurm)"
},
"gres": {
"type": "string",
"pattern": "^gpu:[0-9]+$",
"description": "GPU resources in format 'gpu:N' (optional)"
}
},
"required": ["sockets", "cores_per_socket", "threads_per_core", "real_memory"],
"additionalProperties": false
}
},
"additionalProperties": false
},
"config_sources": {
"type": "object",
"description": "Config can be a file path or inline mapping",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,30 @@ def validate_omnia_config(
"slurm NFS not provided",
f"NFS name {', '.join(diff_set)} required for slurm is not defined in {storage_config}"
))


# Validate node_hardware_defaults requires node_discovery_mode=homogeneous
for clst in data.get('slurm_cluster', []):
node_hardware_defaults = clst.get('node_hardware_defaults')
node_discovery_mode = clst.get('node_discovery_mode')

# Normalize mode to lowercase for case-insensitive comparison
if node_discovery_mode and isinstance(node_discovery_mode, str):
node_discovery_mode = node_discovery_mode.lower()

if node_hardware_defaults and len(node_hardware_defaults) > 0:
if not node_discovery_mode or node_discovery_mode != 'homogeneous':
group_names = list(node_hardware_defaults.keys())
errors.append(
create_error_msg(
input_file_path,
"slurm_cluster configuration inconsistency",
f"'node_hardware_defaults' is specified for groups {group_names}, but 'node_discovery_mode' is not set to 'homogeneous'. "
f"Current mode: {node_discovery_mode if node_discovery_mode else 'not set (defaults to heterogeneous)'}. "
f"Either set 'node_discovery_mode: \"homogeneous\"' to use the hardware specifications, "
f"or remove 'node_hardware_defaults' to use heterogeneous discovery."
))

cnfg_src = [clst.get('config_sources', {}) for clst in data.get('slurm_cluster')]
skip_conf_validation = os.path.exists("/opt/omnia/input/.skip_slurm_conf_validation")
cnfg_src = [clst.get('config_sources', {}) for clst in data.get('slurm_cluster')]
skip_merge_list = [clst.get('skip_merge', False) for clst in data.get('slurm_cluster')]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import re
from ansible.module_utils.input_validation.common_utils import validation_utils
from ansible.module_utils.input_validation.common_utils import config
from ansible.module_utils.input_validation.common_utils import en_us_validation_msg
from ansible.module_utils.local_repo.software_utils import load_yaml, load_json

file_names = config.files
Expand Down Expand Up @@ -128,6 +129,27 @@ def validate_local_repo_config(input_file_path, data,
if key_path and not os.path.exists(key_path):
errors.append(create_error_msg(local_repo_yml, "user_registry",
f"Key file not found: {key_path}"))

# Validate user_repo_url name prefixes
user_repo_prefix_map = {
"user_repo_url_x86_64": "x86_64_",
"user_repo_url_aarch64": "aarch64_",
}
for repo_key, expected_prefix in user_repo_prefix_map.items():
user_repos = data.get(repo_key)
if user_repos:
for repo in user_repos:
repo_name = repo.get("name", "")
if repo_name and not repo_name.startswith(expected_prefix):
errors.append(create_error_msg(
local_repo_yml, repo_key,
en_us_validation_msg.USER_REPO_NAME_PREFIX_FAIL_MSG.format(
repo_name=repo_name,
repo_key=repo_key,
expected_prefix=expected_prefix
)
))

repo_names = {}
sub_result = check_subscription_status(logger)
logger.info(f"validate_local_repo_config: Subscription status: {sub_result}")
Expand Down
10 changes: 5 additions & 5 deletions common/library/module_utils/local_repo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,10 @@
}

CLI_FILE_PATH = "/root/.config/pulp/cli.toml"
POST_TIMEOUT = 3600 # seconds
TAR_POLL_VAL = 45 # minutes
FILE_POLL_VAL = 1 # minutes
ISO_POLL_VAL = 45 # minutes
TAR_TIMEOUT_MIN = 45 # minutes
FILE_TIMEOUT_MIN = 1 # minutes
ISO_TIMEOUT_MIN = 45 # minutes
TASK_POLL_INTERVAL = 10 # seconds
FILE_URI = "/pulp/api/v3/content/file/files/"
PULP_SSL_CA_CERT = "/etc/pki/ca-trust/source/anchors/pulp_webserver.crt"
# ----------------------------
Expand Down Expand Up @@ -160,7 +160,7 @@
"get_repo_version": "pulp container repository show --href %s",
"list_tags_by_version": "pulp show --href /pulp/api/v3/content/container/tags/?repository_version=%s",
"rename_repository": "pulp container repository update --name %s --new-name %s",
"orphan_cleanup": "pulp orphan cleanup",
"orphan_cleanup": "pulp orphan cleanup --protection-time 0",
"container_distribution_show": "pulp container distribution show --name %s | jq .repository",
"show_repository_version": "pulp container repository show --href %s | jq .latest_version_href",
"list_image_tags": "pulp show --href /pulp/api/v3/content/container/tags/?repository_version=%s"
Expand Down
33 changes: 17 additions & 16 deletions common/library/module_utils/local_repo/download_common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved.
# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,10 +37,10 @@
pulp_file_commands,
pulp_rpm_commands,
CLI_FILE_PATH,
POST_TIMEOUT,
ISO_POLL_VAL,
TAR_POLL_VAL,
FILE_POLL_VAL,
ISO_TIMEOUT_MIN,
TAR_TIMEOUT_MIN,
FILE_TIMEOUT_MIN,
TASK_POLL_INTERVAL,
FILE_URI,
PULP_SSL_CA_CERT
)
Expand Down Expand Up @@ -201,15 +201,15 @@ def wait_for_task(task_href, base_url, username, password, logger, timeout=3600,
logger.error("Timeout waiting for task to complete")
return False

def handle_file_upload(repository_name, relative_path, file_url, poll_interval, logger):
def handle_file_upload(repository_name, relative_path, file_url, timeout_minutes, logger):
"""
Ensure repository exists, then POST a file to Pulp and wait for the task to complete.

Args:
repository_name (str): Name of the repository.
relative_path (str): Relative path for the file in the repository.
file_url (str): URL of the file to upload.
poll_interval: Polling time
timeout_minutes (int): Maximum time in minutes to wait for task completion.
logger (logging.Logger): Logger instance.

Returns:
Expand Down Expand Up @@ -262,31 +262,32 @@ def handle_file_upload(repository_name, relative_path, file_url, poll_interval,
return "Failed"

# Wait for task completion
logger.info(f"Waiting for task {task_href} to complete...")
timeout_seconds = timeout_minutes * 60
logger.info(f"Waiting for task {task_href} to complete (timeout: {timeout_minutes} min)...")
task_result = wait_for_task(task_href, base_url, config["username"], passcode,
logger, timeout=POST_TIMEOUT, interval=poll_interval)
logger, timeout=timeout_seconds, interval=TASK_POLL_INTERVAL)
if task_result:
logger.info(f"File successfully uploaded to repository '{repository_name}'.")
return "Success"
else:
logger.error(f"Task {task_href} failed or timed out. File upload to repository '{repository_name}' failed.")
return "Failed"

def handle_post_request(repository_name, relative_path, base_path, file_url, poll_interval,logger):
def handle_post_request(repository_name, relative_path, base_path, file_url, timeout_minutes,logger):
"""
Handles the full Pulp upload and distribution process for a given repository and file.
Args:
repository_name (str): Name of the Pulp repository.
relative_path (str): Path where the file should be stored inside the repository.
base_path (str): The base path for the distribution.
file_url (str): URL of the file to be uploaded.
poll_interval: Interval for polling
timeout_minutes (int): Maximum time in minutes to wait for upload task completion.
logger (logging.Logger): Logger for logging messages and errors.

Returns:
str: "Success" if the operation completes successfully, "Failed" otherwise.
"""
result = handle_file_upload(repository_name, relative_path, file_url, poll_interval,logger)
result = handle_file_upload(repository_name, relative_path, file_url, timeout_minutes,logger)
if result =="Success":
distribution_name = repository_name
logger.info("Creating publication...")
Expand Down Expand Up @@ -483,7 +484,7 @@ def process_manifest(file,repo_store_path, status_file_path, cluster_os_type, cl
relative_path = output_file
base_path = manifest_directory.strip("/")
status = handle_post_request(repository_name, relative_path,
base_path, url, FILE_POLL_VAL, logger)
base_path, url, FILE_TIMEOUT_MIN, logger)
except Exception as e:
logger.error(f"Error processing manifest: {e}")
status= "Failed"
Expand Down Expand Up @@ -775,7 +776,7 @@ def process_tarball(package, repo_store_path, status_file_path, version_variable
if url:
try:
status = handle_post_request(repository_name, relative_path,
base_path, url, TAR_POLL_VAL,logger)
base_path, url, TAR_TIMEOUT_MIN,logger)
except Exception as e:
logger.error(f"Error processing tarball: {e}")
status = "Failed"
Expand Down Expand Up @@ -882,7 +883,7 @@ def process_iso(package, repo_store_path, status_file_path,
# non-zero for failure)
subprocess.run(['wget', '-q', '--spider', '--tries=1', url], check=True)
status = handle_post_request(repository_name, relative_path,
base_path, url, ISO_POLL_VAL,logger)
base_path, url, ISO_TIMEOUT_MIN,logger)
except subprocess.CalledProcessError as e:
logger.error(f"Error executing iso commands: {e}")
status = "Failed"
Expand Down Expand Up @@ -1176,4 +1177,4 @@ def process_rpm_file(package, repo_store_path, status_file_path, cluster_os_type
# Write the status to the file
write_status_to_file(status_file_path, package_name, package_type, status, logger, file_lock, repo_name)
logger.info("#" * 30 + f" {process_rpm_file.__name__} end " + "#" * 30)
return status
return status
2 changes: 1 addition & 1 deletion common/library/modules/parallel_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def update_status_csv(csv_dir, software, overall_status,slogger):

# Transform the new status.
transformed_status = re.sub(r'failure', 'failed', overall_status.lower())
transformed_status = re.sub(r'partial', 'failed', overall_status.lower())
transformed_status = re.sub(r'timeout', 'failed', transformed_status)

# Update or add the entry for each given software.
if isinstance(software, list):
Expand Down
Loading