Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions discovery/roles/discovery_validations/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,21 @@
bmc_password: "{{ hostvars['localhost']['bmc_password'] }}"
no_log: true

- name: Load OIM metadata
ansible.builtin.set_fact:
oim_metadata: "{{ lookup('file', oim_metadata_file_path) | from_yaml }}"

- name: Set upgrade_enabled flag from metadata
ansible.builtin.set_fact:
upgrade_enabled: false

- name: Set upgrade_enabled flag from metadata
ansible.builtin.set_fact:
upgrade_enabled: true
when:
- oim_metadata.upgrade_backup_dir is defined
- oim_metadata.upgrade_backup_dir | length > 0

- name: Include discovery inputs
ansible.builtin.include_tasks: include_inputs.yml
with_items: "{{ discovery_inputs }}"
Expand Down
148 changes: 148 additions & 0 deletions discovery/roles/telemetry/tasks/apply_telemetry_on_upgrade.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---

- name: Apply telemetry configurations for upgrade
when:
- kube_vip is defined
- kube_vip | length > 0
- hostvars['localhost']['idrac_telemetry_support'] | default(false) | bool
block:
- name: Check if telemetry deployment file exists
ansible.builtin.stat:
path: "{{ idrac_telemetry_statefulset_path }}"
register: telemetry_stat

- name: Apply iDRAC telemetry StatefulSet using kubectl
ansible.builtin.command:
cmd: "kubectl apply -f {{ idrac_telemetry_statefulset_path }}"
delegate_to: "{{ kube_vip }}"
register: kubectl_apply_result
changed_when: "'configured' in kubectl_apply_result.stdout or 'created' in kubectl_apply_result.stdout"
failed_when: false
when:
- telemetry_stat.stat.exists | default(false)

- name: Display kubectl apply result
ansible.builtin.debug:
msg: "{{ kubectl_apply_result.stdout_lines }}"
when:
- kubectl_apply_result is defined
- kubectl_apply_result.stdout_lines is defined

- name: Wait for idrac telemetry receiver to be ready
kubernetes.core.k8s_info:
api_version: v1
kind: Pod
namespace: "{{ telemetry_namespace }}"
label_selectors:
- "app=idrac-telemetry-receiver"
wait: true
wait_condition:
type: Ready
status: "True"
wait_timeout: 120
delegate_to: "{{ kube_vip }}"
register: idrac_telemetry_receiver_ready
failed_when: false
when:
- hostvars['localhost']['idrac_telemetry_support'] | default(false) | bool

- name: Display idrac telemetry receiver ready status
ansible.builtin.debug:
msg: "{{ idrac_telemetry_receiver_ready }}"
when:
- hostvars['localhost']['idrac_telemetry_support'] | default(false) | bool
- idrac_telemetry_receiver_ready is defined

- name: Apply LDMS configurations for upgrade
when:
- kube_vip is defined
- kube_vip | length > 0
- hostvars['localhost']['ldms_support'] | default(false) | bool
block:
- name: Check if LDMS aggregator is running on service k8s cluster
kubernetes.core.k8s_info:
api_version: apps/v1
kind: StatefulSet
name: nersc-ldms-aggr
namespace: "{{ telemetry_namespace }}"
delegate_to: "{{ kube_vip }}"
register: ldms_statefulset_info
failed_when: false

- name: Set LDMS running state
ansible.builtin.set_fact:
ldms_running: "{{ ldms_statefulset_info.resources is defined and ldms_statefulset_info.resources | length > 0 }}"

- name: Check if decomp.json exists
ansible.builtin.stat:
path: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json"
register: decomp_json_stat

- name: Copy decompose.json if it doesn't exist
ansible.builtin.copy:
src: files/scripts/decomp.json
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"
when: not decomp_json_stat.stat.exists

- name: Restart LDMS aggregator StatefulSet
kubernetes.core.k8s:
state: present
definition:
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: nersc-ldms-aggr
namespace: "{{ telemetry_namespace }}"
spec:
template:
metadata:
annotations:
kubectl.kubernetes.io/restartedAt: "{{ ansible_date_time.iso8601 }}"
delegate_to: "{{ kube_vip }}"
failed_when: false
when:
- ldms_running | default(false) | bool
- ldms_conf_file.stat.exists | default(false)
- ldms_bin_file.stat.exists | default(false)

- name: Wait for LDMS aggregator pod to be ready after restart
kubernetes.core.k8s_info:
api_version: v1
kind: Pod
namespace: "{{ telemetry_namespace }}"
label_selectors:
- "app=nersc-ldms-aggr"
wait: true
wait_condition:
type: Ready
status: "True"
wait_timeout: 120
delegate_to: "{{ kube_vip }}"
register: ldms_pod_ready
failed_when: false
when:
- ldms_running | default(false) | bool
- ldms_conf_file.stat.exists | default(false)
- ldms_bin_file.stat.exists | default(false)

- name: Display LDMS aggregator restart status
ansible.builtin.debug:
msg: "{{ ldms_pod_ready_msg if (ldms_pod_ready.resources | default([]) | length > 0) else ldms_pod_not_ready_msg }}"
when:
- ldms_running | default(false) | bool
- ldms_conf_file.stat.exists | default(false)
- ldms_bin_file.stat.exists | default(false)
5 changes: 5 additions & 0 deletions discovery/roles/telemetry/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,8 @@
when:
- hostvars['localhost']['ldms_support']
- pxe_changed | default(false) | bool

- name: Apply telemetry configurations on upgrade
ansible.builtin.include_tasks: apply_telemetry_on_upgrade.yml
when:
- hostvars['localhost']['upgrade_enabled'] | default(false) | bool
6 changes: 6 additions & 0 deletions discovery/roles/telemetry/tasks/update_ldms_agg_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
mode: '0755'
when: not values_file_exists.stat.exists

- name: Copy ldms decompose.json
ansible.builtin.copy:
src: files/scripts/decomp.json
dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json"
mode: "{{ hostvars['localhost']['file_permissions_644'] }}"

- name: Generate ldms_machine_config.json from template
ansible.builtin.template:
src: 'telemetry/ldms/ldms_machine_config.json.j2'
Expand Down
2 changes: 2 additions & 0 deletions discovery/roles/telemetry/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ victoria_templates_common:
- src: 'telemetry/victoria/victoria-agent-deployment.yaml.j2'
dest: 'victoria-agent-deployment.yaml'

idrac_telemetry_statefulset_path: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/idrac_telemetry_statefulset.yaml"

# Single-node templates (used when victoria_cluster.enabled: false)
victoria_templates_single:
- src: 'telemetry/victoria/victoria-statefulset.yaml.j2'
Expand Down
Loading