Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions discovery/roles/slurm_config/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,11 @@ conf_dict_items: {}

__default_config:
cgroup:
# CgroupAutomount: true
CgroupPlugin: autodetect
ConstrainCores: true
ConstrainDevices: true
ConstrainRAMSpace: true
ConstrainSwapSpace: true
ConstrainCores: 'yes'
ConstrainDevices: 'yes'
ConstrainRAMSpace: 'yes'
ConstrainSwapSpace: 'yes'
slurm:
SlurmUser: "{{ slurm_user }}"
SlurmctldPort: 6817
Expand Down
8 changes: 8 additions & 0 deletions examples/slurm_conf/cgroup.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Sample Cgroup Configuration
# This is a sample configuration - customize according to your environment
# For more information, see https://slurm.schedmd.com/cgroup.conf.html
CgroupPlugin=autodetect
ConstrainCores=yes
ConstrainDevices=yes
ConstrainRAMSpace=yes
ConstrainSwapSpace=yes
64 changes: 64 additions & 0 deletions examples/slurm_conf/slurm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Sample Slurm Configuration File
# Replace values marked with <PLACEHOLDER> with your actual values
# This is a sample configuration - customize according to your environment

# By default, Omnia merges custom configuration sources with defaults
# and existing configurations to ensure a complete and valid setup.

# For supported conf parameters, see https://slurm.schedmd.com/slurm.conf.html

# CLUSTER IDENTITY
ClusterName=slurm_cluster
SlurmctldHost=<CONTROLLER_HOSTNAME>

# AUTHENTICATION
AuthType=auth/munge
CredType=cred/munge

# SLURM USER
SlurmUser=slurm

# DIRECTORIES AND FILES
StateSaveLocation=/var/spool/slurmctld
SlurmdSpoolDir=/var/spool/slurmd
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
Epilog=/etc/slurm/epilog.sh

# PORTS
SlurmctldPort=6817
SlurmdPort=6818

# PLUGINS
PluginDir=/usr/lib64/slurm
ProctrackType=proctrack/cgroup
PrologFlags=contain
TaskPlugin=task/cgroup
MpiDefault=none
JobAcctGatherType=jobacct_gather/linux
JobAcctGatherFrequency=30

# SCHEDULING
SchedulerType=sched/backfill
SelectType=select/cons_tres

# TIMEOUTS
SlurmctldTimeout=120
SlurmdTimeout=300

# PARAMETERS
ReturnToService=2
SlurmctldParameters=enable_configless

# ACCOUNTING (Optional)
AccountingStorageHost=<SLURMDBD_HOSTNAME>
AccountingStoragePort=6819
AccountingStorageType=accounting_storage/slurmdbd

# COMPUTE NODES
NodeName=<NODE_HOSTNAME> Sockets=2 CoresPerSocket=8 ThreadsPerCore=2 RealMemory=32000 State=UNKNOWN

# PARTITIONS
# Define at least one partition
PartitionName=DEFAULT Nodes=ALL MaxTime=INFINITE State=UP
PartitionName=normal Nodes=<NODE_LIST> Default=YES MaxTime=INFINITE State=UP
23 changes: 23 additions & 0 deletions examples/slurm_conf/slurmdbd.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Sample SlurmDBD Configuration File
# Replace values marked with <PLACEHOLDER> with your actual values
# This is a sample configuration - customize according to your environment
# For more information, see https://slurm.schedmd.com/slurmdbd.conf.html

# Authentication
AuthType=auth/munge
SlurmUser=slurm

# Database Daemon Configuration
DbdHost=<DBD_HOST>
DbdPort=6819
LogFile=/var/log/slurm/slurmdbd.log
PidFile=/var/run/slurmdbd.pid
PluginDir=/usr/lib64/slurm

# Database Connection
StorageType=accounting_storage/mysql
StorageHost=<DB_HOST>
StoragePort=3306
StorageLoc=slurm_acct_db
StorageUser=slurm
StoragePass=<db_password>
6 changes: 5 additions & 1 deletion input/config/aarch64/rhel/10.0/openmpi.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
{ "package": "openmpi",
"type": "tarball",
"url": "https://download.open-mpi.org/release/open-mpi/v{{ openmpi_version.split('.')[:2] | join('.') }}/openmpi-{{ openmpi_version }}.tar.gz"
}
},
{"package": "pmix-devel", "type": "rpm", "repo_name": "aarch64_appstream"},
{"package": "munge-devel", "type": "rpm", "repo_name": "aarch64_codeready-builder"},
{"package": "gcc-c++", "type": "rpm", "repo_name": "aarch64_appstream"},
{"package": "make", "type": "rpm", "repo_name": "aarch64_appstream"}
]
}
}
2 changes: 0 additions & 2 deletions input/config/aarch64/rhel/10.0/slurm_custom.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@
"slurm_custom": {
"cluster": [
{"package": "munge", "type": "rpm", "repo_name": "aarch64_appstream"},
{"package": "munge-devel", "type": "rpm", "repo_name": "aarch64_codeready-builder"},
{"package": "firewalld", "type": "rpm", "repo_name": "aarch64_baseos"},
{"package": "python3-firewall", "type": "rpm", "repo_name": "aarch64_baseos"},
{"package": "pmix", "type": "rpm", "repo_name": "aarch64_appstream"},
{"package": "pmix-devel", "type": "rpm", "repo_name": "aarch64_appstream"},
{"package": "nvcr.io/nvidia/hpc-benchmarks", "tag": "25.09", "type": "image"},
{"package": "apptainer", "type": "rpm", "repo_name": "epel" },
{"package": "doca-ofed", "type": "rpm_repo", "repo_name": "doca" }
Expand Down
10 changes: 6 additions & 4 deletions input/omnia_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@
# <mapping> or <filepath>
# <mapping> Supply the configuration values directly as a key–value map
# <filepath> Supply the absolute path to a custom configuration file
# This path can be any path inside the omnia_core container.
# The default input path "/opt/omnia/input/project_default"
# can also be used to place the custom conf files
# The conf files supported by slurm are
# slurm
# cgroup
Expand Down Expand Up @@ -135,10 +138,9 @@ slurm_cluster:
# OR

# config_sources:
# slurm: /path/to/custom_slurm.conf
# cgroup: /path/to/custom_cgroup.conf
# slurmdbd: /path/to/custom_slurmdbd.conf
# gres: /path/to/custom_gres.conf
# slurm: /opt/omnia/input/project_default/slurm.conf
# cgroup: /opt/omnia/input/project_default/cgroup.conf
# slurmdbd: /opt/omnia/input/project_default/slurmdbd.conf

# ----------------------------SERVICE K8S------------------------------------------------------
# For service k8s cluster below parameters are required,(List)
Expand Down