diff --git a/discovery/roles/discovery_validations/tasks/main.yml b/discovery/roles/discovery_validations/tasks/main.yml index 2a71f86f7b..890498ea78 100644 --- a/discovery/roles/discovery_validations/tasks/main.yml +++ b/discovery/roles/discovery_validations/tasks/main.yml @@ -20,6 +20,21 @@ bmc_password: "{{ hostvars['localhost']['bmc_password'] }}" no_log: true +- name: Load OIM metadata + ansible.builtin.set_fact: + oim_metadata: "{{ lookup('file', oim_metadata_file_path) | from_yaml }}" + +- name: Set upgrade_enabled flag from metadata + ansible.builtin.set_fact: + upgrade_enabled: false + +- name: Set upgrade_enabled flag from metadata + ansible.builtin.set_fact: + upgrade_enabled: true + when: + - oim_metadata.upgrade_backup_dir is defined + - oim_metadata.upgrade_backup_dir | length > 0 + - name: Include discovery inputs ansible.builtin.include_tasks: include_inputs.yml with_items: "{{ discovery_inputs }}" diff --git a/discovery/roles/telemetry/tasks/apply_telemetry_on_upgrade.yml b/discovery/roles/telemetry/tasks/apply_telemetry_on_upgrade.yml new file mode 100644 index 0000000000..d173d7e6f8 --- /dev/null +++ b/discovery/roles/telemetry/tasks/apply_telemetry_on_upgrade.yml @@ -0,0 +1,148 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Apply telemetry configurations for upgrade + when: + - kube_vip is defined + - kube_vip | length > 0 + - hostvars['localhost']['idrac_telemetry_support'] | default(false) | bool + block: + - name: Check if telemetry deployment file exists + ansible.builtin.stat: + path: "{{ idrac_telemetry_statefulset_path }}" + register: telemetry_stat + + - name: Apply iDRAC telemetry StatefulSet using kubectl + ansible.builtin.command: + cmd: "kubectl apply -f {{ idrac_telemetry_statefulset_path }}" + delegate_to: "{{ kube_vip }}" + register: kubectl_apply_result + changed_when: "'configured' in kubectl_apply_result.stdout or 'created' in kubectl_apply_result.stdout" + failed_when: false + when: + - telemetry_stat.stat.exists | default(false) + + - name: Display kubectl apply result + ansible.builtin.debug: + msg: "{{ kubectl_apply_result.stdout_lines }}" + when: + - kubectl_apply_result is defined + - kubectl_apply_result.stdout_lines is defined + + - name: Wait for idrac telemetry receiver to be ready + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + namespace: "{{ telemetry_namespace }}" + label_selectors: + - "app=idrac-telemetry-receiver" + wait: true + wait_condition: + type: Ready + status: "True" + wait_timeout: 120 + delegate_to: "{{ kube_vip }}" + register: idrac_telemetry_receiver_ready + failed_when: false + when: + - hostvars['localhost']['idrac_telemetry_support'] | default(false) | bool + + - name: Display idrac telemetry receiver ready status + ansible.builtin.debug: + msg: "{{ idrac_telemetry_receiver_ready }}" + when: + - hostvars['localhost']['idrac_telemetry_support'] | default(false) | bool + - idrac_telemetry_receiver_ready is defined + +- name: Apply LDMS configurations for upgrade + when: + - kube_vip is defined + - kube_vip | length > 0 + - hostvars['localhost']['ldms_support'] | default(false) | bool + block: + - name: Check if LDMS aggregator is running on service k8s cluster + kubernetes.core.k8s_info: + api_version: apps/v1 + kind: StatefulSet + name: nersc-ldms-aggr + namespace: "{{ telemetry_namespace }}" + delegate_to: "{{ kube_vip }}" + register: ldms_statefulset_info + failed_when: false + + - name: Set LDMS running state + ansible.builtin.set_fact: + ldms_running: "{{ ldms_statefulset_info.resources is defined and ldms_statefulset_info.resources | length > 0 }}" + + - name: Check if decomp.json exists + ansible.builtin.stat: + path: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json" + register: decomp_json_stat + + - name: Copy decompose.json if it doesn't exist + ansible.builtin.copy: + src: files/scripts/decomp.json + dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json" + mode: "{{ hostvars['localhost']['file_permissions_644'] }}" + when: not decomp_json_stat.stat.exists + + - name: Restart LDMS aggregator StatefulSet + kubernetes.core.k8s: + state: present + definition: + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: nersc-ldms-aggr + namespace: "{{ telemetry_namespace }}" + spec: + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: "{{ ansible_date_time.iso8601 }}" + delegate_to: "{{ kube_vip }}" + failed_when: false + when: + - ldms_running | default(false) | bool + - ldms_conf_file.stat.exists | default(false) + - ldms_bin_file.stat.exists | default(false) + + - name: Wait for LDMS aggregator pod to be ready after restart + kubernetes.core.k8s_info: + api_version: v1 + kind: Pod + namespace: "{{ telemetry_namespace }}" + label_selectors: + - "app=nersc-ldms-aggr" + wait: true + wait_condition: + type: Ready + status: "True" + wait_timeout: 120 + delegate_to: "{{ kube_vip }}" + register: ldms_pod_ready + failed_when: false + when: + - ldms_running | default(false) | bool + - ldms_conf_file.stat.exists | default(false) + - ldms_bin_file.stat.exists | default(false) + + - name: Display LDMS aggregator restart status + ansible.builtin.debug: + msg: "{{ ldms_pod_ready_msg if (ldms_pod_ready.resources | default([]) | length > 0) else ldms_pod_not_ready_msg }}" + when: + - ldms_running | default(false) | bool + - ldms_conf_file.stat.exists | default(false) + - ldms_bin_file.stat.exists | default(false) diff --git a/discovery/roles/telemetry/tasks/main.yml b/discovery/roles/telemetry/tasks/main.yml index e4e3d1846a..2e9c3ac0da 100644 --- a/discovery/roles/telemetry/tasks/main.yml +++ b/discovery/roles/telemetry/tasks/main.yml @@ -65,3 +65,8 @@ when: - hostvars['localhost']['ldms_support'] - pxe_changed | default(false) | bool + +- name: Apply telemetry configurations on upgrade + ansible.builtin.include_tasks: apply_telemetry_on_upgrade.yml + when: + - hostvars['localhost']['upgrade_enabled'] | default(false) | bool diff --git a/discovery/roles/telemetry/tasks/update_ldms_agg_config.yml b/discovery/roles/telemetry/tasks/update_ldms_agg_config.yml index ee6c0c7d75..64a5d694ca 100644 --- a/discovery/roles/telemetry/tasks/update_ldms_agg_config.yml +++ b/discovery/roles/telemetry/tasks/update_ldms_agg_config.yml @@ -25,6 +25,12 @@ mode: '0755' when: not values_file_exists.stat.exists +- name: Copy ldms decompose.json + ansible.builtin.copy: + src: files/scripts/decomp.json + dest: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/ldms/nersc-ldms-aggr/scripts/decomp.json" + mode: "{{ hostvars['localhost']['file_permissions_644'] }}" + - name: Generate ldms_machine_config.json from template ansible.builtin.template: src: 'telemetry/ldms/ldms_machine_config.json.j2' diff --git a/discovery/roles/telemetry/vars/main.yml b/discovery/roles/telemetry/vars/main.yml index b757a752ae..af36a0b0d2 100644 --- a/discovery/roles/telemetry/vars/main.yml +++ b/discovery/roles/telemetry/vars/main.yml @@ -202,6 +202,8 @@ victoria_templates_common: - src: 'telemetry/victoria/victoria-agent-deployment.yaml.j2' dest: 'victoria-agent-deployment.yaml' +idrac_telemetry_statefulset_path: "{{ hostvars['localhost']['k8s_client_share_path'] }}/telemetry/deployments/idrac_telemetry_statefulset.yaml" + # Single-node templates (used when victoria_cluster.enabled: false) victoria_templates_single: - src: 'telemetry/victoria/victoria-statefulset.yaml.j2'