diff --git a/callback_plugins/custom_logger.py b/callback_plugins/custom_logger.py index 1dec950b..50dd0e00 100644 --- a/callback_plugins/custom_logger.py +++ b/callback_plugins/custom_logger.py @@ -85,7 +85,7 @@ def log_summary_results(self, host): print("The host %s does not have any results" % host) return - with open(file_path, 'w') as f: + with open(file_path, 'a') as f: f.write(f"Host: {host}\n") f.write(f"Tasks Succeeded: {self.results[host]['passed']}\n") f.write(f"Tasks Failed: {self.results[host]['failed']}\n") diff --git a/roles/test_alerts/tasks/test_create_an_alert.yml b/roles/test_alerts/tasks/test_create_an_alert.yml index 6d1c12f9..54a85c33 100644 --- a/roles/test_alerts/tasks/test_create_an_alert.yml +++ b/roles/test_alerts/tasks/test_create_an_alert.yml @@ -16,13 +16,13 @@ labels: prometheus: default role: alert-rules - name: prometheus-alarm-rules + name: fvt-testing-prometheus-alarm-rules namespace: service-telemetry spec: groups: - name: ./openstack.rules rules: - - alert: Collectd metrics receive rate is zero + - alert: FVT_TESTING Collectd metrics receive rate is zero expr: rate(sg_total_collectd_msg_received_count[1m]) == 0 EOF changed_when: false @@ -34,5 +34,21 @@ cmd: | curl -k {{ prom_auth_string }} https://{{ prom_url }}/api/v1/rules register: cmd_output + changed_when: true + + always: + - name: "Delete the PrometheusRule" + ansible.builtin.command: + cmd: | + oc delete prometheusrule.{{ observability_api }} fvt-testing-prometheus-alarm-rules + register: delete_prom + changed_when: delete_prom.rc == 0 + + - name: Wait up to two minutes until the rule is deleted + ansible.builtin.command: + cmd: | + curl -k {{ prom_auth_string }} https://{{ prom_url }}/api/v1/rules + retries: 12 + delay: 10 + until: 'not "FVT_TESTING Collectd metrics receive rate is zero" in cmd_output.stdout' changed_when: false - failed_when: cmd_output.rc != 0 diff --git a/roles/test_alerts/tasks/test_creating_a_standard_alert_route_in_alert_manager.yml b/roles/test_alerts/tasks/test_creating_a_standard_alert_route_in_alert_manager.yml index 7a63c7f8..b14f692a 100644 --- a/roles/test_alerts/tasks/test_creating_a_standard_alert_route_in_alert_manager.yml +++ b/roles/test_alerts/tasks/test_creating_a_standard_alert_route_in_alert_manager.yml @@ -36,19 +36,42 @@ ansible.builtin.debug: var: alertmanager_secret - - name: "RHELOSP-148697 Interrupt metrics flow by preventing the QDR from running" + - name: "RHELOSP-144965 Create the alert" ansible.builtin.shell: cmd: | - for i in {1..15}; do oc delete po -l application=default-interconnect; sleep 1; done + oc apply -f - <- oc exec -it prometheus-default-0 -c prometheus -- /bin/sh -c 'curl -k -H \ "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ - https://default-alertmanager-proxy:9095/api/v1/alerts' | grep 'active' | grep 'Collectd metrics receive rate is zero' + https://default-alertmanager-proxy:9095/api/v1/alerts' | grep 'active' | grep 'FVT_TESTING Collectd metrics receive rate is zero' register: cmd_output changed_when: false failed_when: cmd_output.stdout_lines | length == 0 @@ -56,11 +79,28 @@ - name: "RHELOSP-148699 Verify that the alert is firing in Prometheus" ansible.builtin.shell: cmd: >- - /usr/bin/curl -k {{ prom_auth_string }} -g https://{{ prom_url }}/api/v1/alerts | grep 'firing' | grep 'Collectd metrics receive rate is zero' + /usr/bin/curl -k {{ prom_auth_string }} -g https://{{ prom_url }}/api/v1/alerts | grep 'firing' | grep 'FVT_TESTING Collectd metrics receive rate is zero' register: cmd_output changed_when: false failed_when: cmd_output.stdout_lines | length == 0 + always: + - name: "Delete the PrometheusRule" + ansible.builtin.command: + cmd: | + oc delete prometheusrule.{{ observability_api }} fvt-testing-prometheus-alarm-rules-alertmanager + register: delete_prom + changed_when: delete_prom.rc == 0 + + - name: "Wait up to two minutes until the rule is deleted" + ansible.builtin.command: + cmd: | + curl -k {{ prom_auth_string }} https://{{ prom_url }}/api/v1/rules + retries: 12 + delay: 10 + until: 'not "FVT_TESTING Collectd metrics receive rate is zero" in cmd_output.stdout' + changed_when: false + - name: "Wait 2 minutes to make sure all SG pods are back to normal" ansible.builtin.pause: minutes: 2 diff --git a/roles/test_verify_email/tasks/main.yml b/roles/test_verify_email/tasks/main.yml index 6cd00379..033d6e38 100644 --- a/roles/test_verify_email/tasks/main.yml +++ b/roles/test_verify_email/tasks/main.yml @@ -1,5 +1,16 @@ --- # tasks file for roles/test_verify_email + +- name: "Set the prom auth" + ansible.builtin.include_role: + name: client_side_tests + tasks_from: get_prom_info.yml + vars: + prom_auth_method: token + when: + - prom_auth_string is not defined + - prom_url is not defined + - name: "Get the observability strategy and set observability_api" ansible.builtin.include_role: name: test_alerts @@ -18,13 +29,13 @@ labels: prometheus: default role: alert-rules - name: prometheus-alarm-rules + name: fvt-testing-prometheus-alarm-rules-email namespace: service-telemetry spec: groups: - name: ./openstack.rules rules: - - alert: Collectd metrics receive rate is zero + - alert: FVT_TESTING Collectd metrics receive rate is zero expr: rate(sg_total_collectd_msg_received_count[1m]) == 0 EOF changed_when: false @@ -54,6 +65,21 @@ changed_when: false always: + - name: "Delete the PrometheusRule" + ansible.builtin.command: | + oc delete prometheusrule.{{ observability_api }} fvt-testing-prometheus-alarm-rules-email + register: delete_prom + changed_when: delete_prom.rc == 0 + + - name: "Wait up to two minutes until the rule is deleted" + ansible.builtin.command: + cmd: | + curl -k {{ prom_auth_string }} https://{{ prom_url }}/api/v1/rules + retries: 12 + delay: 10 + until: 'not "FVT_TESTING Collectd metrics receive rate is zero" in cmd_output.stdout' + changed_when: false + - name: "RHELOSP-176046 Remove alertmanagerConfigManifest from the ServiceTelemetry object" ansible.builtin.shell: cmd: |