Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions discovery/roles/slurm_config/tasks/create_slurm_dir.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@
- slurm_custom_aarch64.slurm_node is defined
- slurm_custom_aarch64.slurm_node.cluster | selectattr('package', 'equalto', 'cuda-run') | list | length > 0

- name: Extract NVHPC package name for x86_64 from slurm_custom.json
ansible.builtin.set_fact:
nvhpc_pkg_name_x86_64: "{{ (slurm_custom_x86_64.slurm_node.cluster | selectattr('type', 'equalto', 'tarball') | selectattr('package', 'match', '^nvhpc_.*_Linux_x86_64_cuda_.*') | first).package }}" # noqa: yaml[line-length]
when:
- slurm_custom_x86_64 is defined
- slurm_custom_x86_64.slurm_node is defined
- slurm_custom_x86_64.slurm_node.cluster | selectattr('type', 'equalto', 'tarball') | selectattr('package', 'match', '^nvhpc_.*_Linux_x86_64_cuda_.*') | list | length > 0 # noqa: yaml[line-length]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we take this string nvhpc_.*Linux_x86_64_cuda as variable if possible


- name: Extract NVHPC package name for aarch64 from slurm_custom.json
ansible.builtin.set_fact:
nvhpc_pkg_name_aarch64: "{{ (slurm_custom_aarch64.slurm_node.cluster | selectattr('type', 'equalto', 'tarball') | selectattr('package', 'match', '^nvhpc_.*_Linux_aarch64_cuda_.*') | first).package }}" # noqa: yaml[line-length]
when:
- slurm_custom_aarch64 is defined
- slurm_custom_aarch64.slurm_node is defined
- slurm_custom_aarch64.slurm_node.cluster | selectattr('type', 'equalto', 'tarball') | selectattr('package', 'match', '^nvhpc_.*_Linux_aarch64_cuda_.*') | list | length > 0 # noqa: yaml[line-length]

- name: Set facts for slurm
ansible.builtin.set_fact:
nfs_storage_name: "{{ slurm_cluster[0].nfs_storage_name }}"
Expand Down
7 changes: 3 additions & 4 deletions discovery/roles/slurm_config/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ offline_path_aarch64: []
ssh_private_key_path: /root/.ssh/oim_rsa

# nvidia sdk vars
# Fully resolved tarball relative paths (no nested Jinja2)
# nvidia sdk vars
nvhpc_pkg_name_x86_64: "nvhpc_2025_2511_Linux_x86_64_cuda_13.0"
nvhpc_pkg_name_aarch64: "nvhpc_2025_2511_Linux_aarch64_cuda_13.0"
# Dynamically extracted from slurm_custom.json
nvhpc_pkg_name_x86_64: "{{ hostvars['oim']['nvhpc_pkg_name_x86_64'] }}"
nvhpc_pkg_name_aarch64: "{{ hostvars['oim']['nvhpc_pkg_name_aarch64'] }}"

nvhpc_tarball_x86_64_relpath: "offline_repo/cluster/x86_64/rhel/{{ hostvars['localhost']['cluster_os_version'] }}/tarball/{{ nvhpc_pkg_name_x86_64 }}/{{ nvhpc_pkg_name_x86_64 }}.tar.gz" # noqa: yaml[line-length]
nvhpc_tarball_aarch64_relpath: "offline_repo/cluster/aarch64/rhel/{{ hostvars['localhost']['cluster_os_version'] }}/tarball/{{ nvhpc_pkg_name_aarch64 }}/{{ nvhpc_pkg_name_aarch64 }}.tar.gz" # noqa: yaml[line-length]
Expand Down
Loading