From 75db5bd36d4fd1d952f3434dd65a74c645b37df1 Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Mon, 4 Mar 2024 02:03:19 +0100 Subject: [PATCH 1/2] ceph-exporter: add installation role Signed-off-by: Seena Fallah --- group_vars/all.yml.sample | 2 - group_vars/exporters.yml.sample | 30 ++++++++++ group_vars/mdss.yml.sample | 1 - infrastructure-playbooks/add-mon.yml | 5 ++ infrastructure-playbooks/cephadm-adopt.yml | 37 ++++++++++++ infrastructure-playbooks/cephadm.yml | 6 ++ infrastructure-playbooks/docker-to-podman.yml | 11 ++++ infrastructure-playbooks/purge-cluster.yml | 34 +++++++++++ infrastructure-playbooks/rolling_update.yml | 44 ++++++++++++++ ...inerized-to-containerized-ceph-daemons.yml | 31 ++++++++++ plugins/callback/installer_checkpoint.py | 5 ++ roles/ceph-exporter/defaults/main.yml | 24 ++++++++ roles/ceph-exporter/meta/main.yml | 14 +++++ roles/ceph-exporter/tasks/main.yml | 58 +++++++++++++++++++ roles/ceph-exporter/tasks/systemd.yml | 9 +++ .../templates/ceph-exporter.service.j2 | 50 ++++++++++++++++ roles/ceph-handler/handlers/main.yml | 12 ++++ .../tasks/check_running_containers.yml | 14 +++++ .../tasks/check_socket_non_container.yml | 14 +++++ roles/ceph-handler/tasks/handler_exporter.yml | 18 ++++++ roles/ceph-handler/tasks/main.yml | 11 ++++ roles/ceph-mds/defaults/main.yml | 1 - site-container.yml.sample | 40 +++++++++++++ tests/conftest.py | 3 + tests/functional/tests/test_install.py | 9 +++ tests/pytest.ini | 1 + 26 files changed, 480 insertions(+), 4 deletions(-) create mode 100644 group_vars/exporters.yml.sample create mode 100644 roles/ceph-exporter/defaults/main.yml create mode 100644 roles/ceph-exporter/meta/main.yml create mode 100644 roles/ceph-exporter/tasks/main.yml create mode 100644 roles/ceph-exporter/tasks/systemd.yml create mode 100644 roles/ceph-exporter/templates/ceph-exporter.service.j2 create mode 100644 roles/ceph-handler/tasks/handler_exporter.yml diff --git a/group_vars/all.yml.sample b/group_vars/all.yml.sample index a2651614a3..d596d53a72 100644 --- a/group_vars/all.yml.sample +++ b/group_vars/all.yml.sample @@ -541,8 +541,6 @@ dummy: # CONTAINER_IMAGE: "{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}" # TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES: "{{ ceph_tcmalloc_max_total_thread_cache }}" # args: -# - --setuser=ceph -# - --setgroup=ceph # - --default-log-to-file=false # - --default-log-to-stderr=true # - --default-log-stderr-prefix="debug " diff --git a/group_vars/exporters.yml.sample b/group_vars/exporters.yml.sample new file mode 100644 index 0000000000..9bb126b901 --- /dev/null +++ b/group_vars/exporters.yml.sample @@ -0,0 +1,30 @@ +--- +# Variables here are applicable to all host groups NOT roles + +# This sample file generated by generate_group_vars_sample.sh + +# Dummy variable to avoid error because ansible does not recognize the +# file as a good configuration file when no variable in it. +dummy: + +########### +# GENERAL # +########### + +#ceph_exporter_addr: "0.0.0.0" +#ceph_exporter_port: 9926 +#ceph_exporter_stats_period: 5 # seconds + +########## +# DOCKER # +########## + +# If you want to add parameters, you should retain the existing ones and include the new ones. +#ceph_exporter_container_params: +# args: +# - -f +# - --sock-dir=/var/run/ceph +# - --addrs={{ ceph_exporter_addr }} +# - --port={{ ceph_exporter_port }} +# - --stats-period={{ ceph_exporter_stats_period }} + diff --git a/group_vars/mdss.yml.sample b/group_vars/mdss.yml.sample index 592421b5c1..edc556532d 100644 --- a/group_vars/mdss.yml.sample +++ b/group_vars/mdss.yml.sample @@ -26,7 +26,6 @@ dummy: # Resource limitation # For the whole list of limits you can apply see: docs.docker.com/engine/admin/resource_constraints # Default values are based from: https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/2/html/red_hat_ceph_storage_hardware_guide/minimum_recommendations -# These options can be passed using the 'ceph_mds_docker_extra_env' variable. #ceph_mds_docker_memory_limit: "{{ ansible_facts['memtotal_mb'] }}m" #ceph_mds_docker_cpu_limit: 4 diff --git a/infrastructure-playbooks/add-mon.yml b/infrastructure-playbooks/add-mon.yml index 166d9ceb60..78709a615b 100644 --- a/infrastructure-playbooks/add-mon.yml +++ b/infrastructure-playbooks/add-mon.yml @@ -101,6 +101,11 @@ name: ceph-crash when: containerized_deployment | bool + - name: Import ceph-exporter role + ansible.builtin.import_role: + name: ceph-exporter + when: containerized_deployment | bool + - name: Update config file on OSD nodes hosts: osds gather_facts: true diff --git a/infrastructure-playbooks/cephadm-adopt.yml b/infrastructure-playbooks/cephadm-adopt.yml index 1fb2aabeff..1ea7534d95 100644 --- a/infrastructure-playbooks/cephadm-adopt.yml +++ b/infrastructure-playbooks/cephadm-adopt.yml @@ -1187,6 +1187,43 @@ CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}' +- name: Redeploy ceph-exporter daemons + hosts: + - "{{ mon_group_name|default('mons') }}" + - "{{ osd_group_name|default('osds') }}" + - "{{ mds_group_name|default('mdss') }}" + - "{{ rgw_group_name|default('rgws') }}" + - "{{ mgr_group_name|default('mgrs') }}" + - "{{ rbdmirror_group_name|default('rbdmirrors') }}" + become: true + gather_facts: false + any_errors_fatal: true + tasks: + - name: Import ceph-defaults role + ansible.builtin.import_role: + name: ceph-defaults + + - name: Stop and disable ceph-exporter systemd service + ansible.builtin.service: + name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" + state: stopped + enabled: false + failed_when: false + + - name: Remove ceph-exporter systemd unit file + ansible.builtin.file: + path: /etc/systemd/system/ceph-exporter@.service + state: absent + + - name: Update the placement of ceph-exporter hosts + ansible.builtin.command: "{{ cephadm_cmd }} shell -k /etc/ceph/{{ cluster }}.client.admin.keyring --fsid {{ fsid }} -- ceph orch apply ceph-exporter --placement='label:ceph'" + run_once: true + changed_when: false + delegate_to: '{{ groups[mon_group_name][0] }}' + environment: + CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}' + + - name: Redeploy alertmanager/grafana/prometheus daemons hosts: "{{ monitoring_group_name|default('monitoring') }}" serial: 1 diff --git a/infrastructure-playbooks/cephadm.yml b/infrastructure-playbooks/cephadm.yml index eae68da436..95c3896da9 100644 --- a/infrastructure-playbooks/cephadm.yml +++ b/infrastructure-playbooks/cephadm.yml @@ -312,6 +312,12 @@ environment: CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}' + - name: Update the placement of ceph-exporter hosts + ansible.builtin.command: "{{ cephadm_cmd }} shell -- ceph --cluster {{ cluster }} orch apply ceph-exporter --placement='label:ceph'" + changed_when: false + environment: + CEPHADM_IMAGE: '{{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }}' + - name: Adjust monitoring service placement hosts: "{{ monitoring_group_name|default('monitoring') }}" become: true diff --git a/infrastructure-playbooks/docker-to-podman.yml b/infrastructure-playbooks/docker-to-podman.yml index b40363d503..784a244a55 100644 --- a/infrastructure-playbooks/docker-to-podman.yml +++ b/infrastructure-playbooks/docker-to-podman.yml @@ -194,6 +194,17 @@ inventory_hostname in groups.get(mgr_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + - name: Import ceph-exporter role + ansible.builtin.import_role: + name: ceph-exporter + tasks_from: systemd.yml + when: inventory_hostname in groups.get(mon_group_name, []) or + inventory_hostname in groups.get(osd_group_name, []) or + inventory_hostname in groups.get(mds_group_name, []) or + inventory_hostname in groups.get(rgw_group_name, []) or + inventory_hostname in groups.get(mgr_group_name, []) or + inventory_hostname in groups.get(rbdmirror_group_name, []) + - name: Dashboard configuration when: dashboard_enabled | bool block: diff --git a/infrastructure-playbooks/purge-cluster.yml b/infrastructure-playbooks/purge-cluster.yml index 6f7a378410..26cdf275cf 100644 --- a/infrastructure-playbooks/purge-cluster.yml +++ b/infrastructure-playbooks/purge-cluster.yml @@ -726,6 +726,40 @@ state: absent +- name: Purge ceph-exporter daemons + hosts: + - mons + - osds + - mdss + - rgws + - rbdmirrors + - mgrs + gather_facts: false + become: true + tasks: + - name: Import ceph-defaults role + ansible.builtin.import_role: + name: ceph-defaults + + - name: Stop ceph-exporter service + ansible.builtin.service: + name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" + state: stopped + enabled: false + failed_when: false + + - name: Systemctl reset-failed ceph-exporter # noqa command-instead-of-module + ansible.builtin.command: "systemctl reset-failed {{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" + changed_when: false + failed_when: false + + - name: Remove service file + ansible.builtin.file: + name: "/etc/systemd/system/ceph-exporter{{ '@' if containerized_deployment | bool else '' }}.service" + state: absent + failed_when: false + + - name: Check container hosts hosts: - mons diff --git a/infrastructure-playbooks/rolling_update.yml b/infrastructure-playbooks/rolling_update.yml index 2bcb73cf78..de0a08cf38 100644 --- a/infrastructure-playbooks/rolling_update.yml +++ b/infrastructure-playbooks/rolling_update.yml @@ -1143,6 +1143,50 @@ ansible.builtin.import_role: name: ceph-crash +- name: Upgrade ceph-exporter daemons + hosts: + - "{{ mon_group_name | default('mons') }}" + - "{{ osd_group_name | default('osds') }}" + - "{{ mds_group_name | default('mdss') }}" + - "{{ rgw_group_name | default('rgws') }}" + - "{{ rbdmirror_group_name | default('rbdmirrors') }}" + - "{{ mgr_group_name | default('mgrs') }}" + tags: + - post_upgrade + - ceph-exporter + gather_facts: false + become: true + tasks: + - name: Exit ceph-exporter upgrade if non containerized deployment + ansible.builtin.meta: end_play + when: not containerized_deployment | bool + + - name: Stop the ceph-exporter service + ansible.builtin.systemd: + name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" + state: stopped + + # it needs to be done in a separate task otherwise the stop just before doesn't work. + - name: Mask and disable the ceph-exporter service + ansible.builtin.systemd: + name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" + enabled: false + masked: true + + - name: Import ceph-defaults role + ansible.builtin.import_role: + name: ceph-defaults + - name: Import ceph-facts role + ansible.builtin.import_role: + name: ceph-facts + tasks_from: container_binary.yml + - name: Import ceph-handler role + ansible.builtin.import_role: + name: ceph-handler + - name: Import ceph-exporter role + ansible.builtin.import_role: + name: ceph-exporter + - name: Complete upgrade hosts: "{{ mon_group_name | default('mons') }}" tags: post_upgrade diff --git a/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml b/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml index ee19b8ba2e..0e059159a0 100644 --- a/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml +++ b/infrastructure-playbooks/switch-from-non-containerized-to-containerized-ceph-daemons.yml @@ -755,6 +755,37 @@ ansible.builtin.import_role: name: ceph-crash +- name: Switching from non-containerized to containerized ceph-exporter + + hosts: + - "{{ mon_group_name | default('mons') }}" + - "{{ osd_group_name | default('osds') }}" + - "{{ mds_group_name | default('mdss') }}" + - "{{ rgw_group_name | default('rgws') }}" + - "{{ rbdmirror_group_name | default('rbdmirrors') }}" + - "{{ mgr_group_name | default('mgrs') }}" + + vars: + containerized_deployment: true + become: true + tasks: + - name: Import ceph-defaults role + ansible.builtin.import_role: + name: ceph-defaults + + - name: Import ceph-facts role + ansible.builtin.import_role: + name: ceph-facts + tasks_from: container_binary.yml + + - name: Import ceph-handler role + ansible.builtin.import_role: + name: ceph-handler + + - name: Import ceph-exporter role + ansible.builtin.import_role: + name: ceph-exporter + - name: Final task hosts: - "{{ mon_group_name|default('mons') }}" diff --git a/plugins/callback/installer_checkpoint.py b/plugins/callback/installer_checkpoint.py index 976ccefb6f..de9234d218 100644 --- a/plugins/callback/installer_checkpoint.py +++ b/plugins/callback/installer_checkpoint.py @@ -34,6 +34,7 @@ def v2_playbook_on_stats(self, stats): 'installer_phase_ceph_grafana', 'installer_phase_ceph_node_exporter', 'installer_phase_ceph_crash', + 'installer_phase_ceph_exporter', ] # Define the attributes of the installer phases @@ -90,6 +91,10 @@ def v2_playbook_on_stats(self, stats): 'title': 'Install Ceph Crash', 'playbook': 'roles/ceph-crash/tasks/main.yml' }, + 'installer_phase_ceph_exporter': { + 'title': 'Install Ceph Exporter', + 'playbook': 'roles/ceph-exporter/tasks/main.yml' + }, } # Find the longest phase title diff --git a/roles/ceph-exporter/defaults/main.yml b/roles/ceph-exporter/defaults/main.yml new file mode 100644 index 0000000000..2633c6efde --- /dev/null +++ b/roles/ceph-exporter/defaults/main.yml @@ -0,0 +1,24 @@ +--- +########### +# GENERAL # +########### + +ceph_exporter_addr: "0.0.0.0" +ceph_exporter_port: 9926 +ceph_exporter_stats_period: 5 # seconds +ceph_exporter_prio_limit: 5 + +########## +# DOCKER # +########## + +# If you want to add parameters, you should retain the existing ones and include the new ones. +ceph_exporter_container_params: + args: + - -f + - -n=client.ceph-exporter + - --sock-dir=/var/run/ceph + - --addrs={{ ceph_exporter_addr }} + - --port={{ ceph_exporter_port }} + - --stats-period={{ ceph_exporter_stats_period }} + - --prio-limit={{ ceph_exporter_prio_limit }} diff --git a/roles/ceph-exporter/meta/main.yml b/roles/ceph-exporter/meta/main.yml new file mode 100644 index 0000000000..61fc0ce21b --- /dev/null +++ b/roles/ceph-exporter/meta/main.yml @@ -0,0 +1,14 @@ +--- +galaxy_info: + company: Red Hat + author: Guillaume Abrioux + description: Deploy ceph-exporter + license: Apache + min_ansible_version: '2.7' + platforms: + - name: EL + versions: + - 'all' + galaxy_tags: + - system +dependencies: [] diff --git a/roles/ceph-exporter/tasks/main.yml b/roles/ceph-exporter/tasks/main.yml new file mode 100644 index 0000000000..3980a38660 --- /dev/null +++ b/roles/ceph-exporter/tasks/main.yml @@ -0,0 +1,58 @@ +--- +- name: Create and copy client.ceph-exporter keyring + when: cephx | bool + block: + - name: Create client.ceph-exporter keyring + ceph_key: + name: "client.ceph-exporter" + caps: + mon: 'allow r' + mgr: 'allow r' + osd: 'allow r' + cluster: "{{ cluster }}" + dest: "{{ ceph_conf_key_directory }}" + import_key: true + mode: "{{ ceph_keyring_permissions }}" + owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" + group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" + environment: + CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" + CEPH_CONTAINER_BINARY: "{{ container_binary }}" + delegate_to: "{{ groups.get(mon_group_name, [])[0] }}" + run_once: true + no_log: "{{ no_log_on_ceph_key_tasks }}" + + - name: Get keys from monitors + ceph_key: + name: client.ceph-exporter + cluster: "{{ cluster }}" + output_format: plain + state: info + environment: + CEPH_CONTAINER_IMAGE: "{{ ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else None }}" + CEPH_CONTAINER_BINARY: "{{ container_binary }}" + register: _exporter_keys + delegate_to: "{{ groups.get(mon_group_name)[0] }}" + run_once: true + no_log: "{{ no_log_on_ceph_key_tasks }}" + + - name: Copy ceph key(s) if needed + ansible.builtin.copy: + dest: "{{ ceph_conf_key_directory }}/{{ cluster }}.client.ceph-exporter.keyring" + content: "{{ _exporter_keys.stdout + '\n' }}" + owner: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" + group: "{{ ceph_uid if containerized_deployment | bool else 'ceph' }}" + mode: "{{ ceph_keyring_permissions }}" + no_log: "{{ no_log_on_ceph_key_tasks }}" + +- name: Include_tasks systemd.yml + ansible.builtin.include_tasks: systemd.yml + when: containerized_deployment | bool + +- name: Start the ceph-exporter service + ansible.builtin.systemd: + name: "{{ 'ceph-exporter@' + ansible_facts['hostname'] if containerized_deployment | bool else 'ceph-exporter.service' }}" + state: started + enabled: true + masked: false + daemon_reload: true diff --git a/roles/ceph-exporter/tasks/systemd.yml b/roles/ceph-exporter/tasks/systemd.yml new file mode 100644 index 0000000000..4e4733f9d6 --- /dev/null +++ b/roles/ceph-exporter/tasks/systemd.yml @@ -0,0 +1,9 @@ +--- +- name: Generate systemd unit file for ceph-exporter container + ansible.builtin.template: + src: "{{ role_path }}/templates/ceph-exporter.service.j2" + dest: /etc/systemd/system/ceph-exporter@.service + owner: "root" + group: "root" + mode: "0644" + notify: Restart ceph exporter diff --git a/roles/ceph-exporter/templates/ceph-exporter.service.j2 b/roles/ceph-exporter/templates/ceph-exporter.service.j2 new file mode 100644 index 0000000000..171bcd9d7c --- /dev/null +++ b/roles/ceph-exporter/templates/ceph-exporter.service.j2 @@ -0,0 +1,50 @@ +[Unit] +Description=Ceph exporter +{% if container_binary == 'docker' %} +After=docker.service network-online.target local-fs.target time-sync.target +Requires=docker.service +{% else %} +After=network-online.target local-fs.target time-sync.target +{% endif %} +Wants=network-online.target local-fs.target time-sync.target + +[Service] +{% if container_binary == 'podman' %} +ExecStartPre=-/usr/bin/rm -f /%t/%n-pid /%t/%n-cid +ExecStartPre=-/usr/bin/{{ container_binary }} rm --storage ceph-exporter-%i +{% endif %} +ExecStartPre=-/usr/bin/{{ container_binary }} rm -f ceph-exporter-%i +ExecStart=/usr/bin/{{ container_binary }} run --rm --name ceph-exporter-%i \ +{% if container_binary == 'podman' %} +-d --log-driver journald --conmon-pidfile /%t/%n-pid --cidfile /%t/%n-cid \ +{% endif %} +--pids-limit={{ 0 if container_binary == 'podman' else -1 }} \ +--security-opt label=disable \ +--net=host \ +{% for v in ceph_common_container_params['volumes'] + ceph_exporter_container_params['volumes'] | default([]) %} + -v {{ v }} \ +{% endfor %} +{% for k, v in (ceph_common_container_params['envs'] | combine(ceph_exporter_container_params['envs'] | default({}))).items() %} + -e {{ k }}={{ v }} \ +{% endfor %} +--entrypoint=/usr/bin/ceph-exporter {{ ceph_docker_registry }}/{{ ceph_docker_image }}:{{ ceph_docker_image_tag }} \ + {{ (ceph_common_container_params['args'] + ceph_exporter_container_params['args'] | default([])) | join(' ') }} +{% if container_binary == 'podman' %} +ExecStop=-/usr/bin/sh -c "/usr/bin/{{ container_binary }} rm -f `cat /%t/%n-cid`" +{% else %} +ExecStop=-/usr/bin/{{ container_binary }} stop ceph-exporter-%i +{% endif %} +StartLimitInterval=10min +StartLimitBurst=30 +{% if container_binary == 'podman' %} +Type=forking +PIDFile=/%t/%n-pid +{% endif %} +KillMode=none +Restart=always +RestartSec=10s +TimeoutStartSec=120 +TimeoutStopSec=10 + +[Install] +WantedBy=ceph.target diff --git a/roles/ceph-handler/handlers/main.yml b/roles/ceph-handler/handlers/main.yml index d634b9dd87..9de73241a5 100644 --- a/roles/ceph-handler/handlers/main.yml +++ b/roles/ceph-handler/handlers/main.yml @@ -65,6 +65,17 @@ or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + - name: Ceph exporter handler + ansible.builtin.include_tasks: handler_exporter.yml + listen: "Restart ceph exporter" + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) + - name: Remove tempdir for scripts ansible.builtin.file: path: "{{ tmpdirpath.path }}" @@ -80,6 +91,7 @@ register: tmpdirpath when: - tmpdirpath.path is defined + - not _exporter_handler_called | default(false) | bool - not _crash_handler_called | default(false) | bool - not _mds_handler_called | default(false) | bool - not _mgr_handler_called | default(false) | bool diff --git a/roles/ceph-handler/tasks/check_running_containers.yml b/roles/ceph-handler/tasks/check_running_containers.yml index 82f7c5c135..551a15c83b 100644 --- a/roles/ceph-handler/tasks/check_running_containers.yml +++ b/roles/ceph-handler/tasks/check_running_containers.yml @@ -68,3 +68,17 @@ or inventory_hostname in groups.get(mds_group_name, []) or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: Check for a ceph-exporter container + ansible.builtin.command: "{{ container_binary }} ps -q --filter='name=ceph-exporter-{{ ansible_facts['hostname'] }}'" + register: ceph_exporter_container_stat + changed_when: false + failed_when: false + check_mode: false + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) diff --git a/roles/ceph-handler/tasks/check_socket_non_container.yml b/roles/ceph-handler/tasks/check_socket_non_container.yml index 37e1b0f959..96c492ffcc 100644 --- a/roles/ceph-handler/tasks/check_socket_non_container.yml +++ b/roles/ceph-handler/tasks/check_socket_non_container.yml @@ -218,3 +218,17 @@ or inventory_hostname in groups.get(mds_group_name, []) or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: Check for a ceph-exporter process + ansible.builtin.command: pgrep ceph-exporter + changed_when: false + failed_when: false + check_mode: false + register: exporter_process + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) diff --git a/roles/ceph-handler/tasks/handler_exporter.yml b/roles/ceph-handler/tasks/handler_exporter.yml new file mode 100644 index 0000000000..d846d69c49 --- /dev/null +++ b/roles/ceph-handler/tasks/handler_exporter.yml @@ -0,0 +1,18 @@ +--- +- name: Set _exporter_handler_called before restart + ansible.builtin.set_fact: + _exporter_handler_called: true + +- name: Restart the ceph-exporter service # noqa: ignore-errors + ansible.builtin.systemd: + name: ceph-exporter@{{ ansible_facts['hostname'] }} + state: restarted + enabled: true + masked: false + daemon_reload: true + ignore_errors: true + when: hostvars[inventory_hostname]['_exporter_handler_called'] | default(False) | bool + +- name: Set _exporter_handler_called after restart + ansible.builtin.set_fact: + _exporter_handler_called: false diff --git a/roles/ceph-handler/tasks/main.yml b/roles/ceph-handler/tasks/main.yml index 4ea3bef6d6..c963b0115f 100644 --- a/roles/ceph-handler/tasks/main.yml +++ b/roles/ceph-handler/tasks/main.yml @@ -48,3 +48,14 @@ or inventory_hostname in groups.get(mds_group_name, []) or inventory_hostname in groups.get(rgw_group_name, []) or inventory_hostname in groups.get(rbdmirror_group_name, []) + +- name: Set_fact handler_exporter_status + ansible.builtin.set_fact: + handler_exporter_status: "{{ exporter_process.get('rc') == 0 if not containerized_deployment | bool else (ceph_exporter_container_stat.get('rc') == 0 and ceph_exporter_container_stat.get('stdout_lines', []) | length != 0) }}" + when: + - inventory_hostname in groups.get(mon_group_name, []) + or inventory_hostname in groups.get(mgr_group_name, []) + or inventory_hostname in groups.get(osd_group_name, []) + or inventory_hostname in groups.get(mds_group_name, []) + or inventory_hostname in groups.get(rgw_group_name, []) + or inventory_hostname in groups.get(rbdmirror_group_name, []) diff --git a/roles/ceph-mds/defaults/main.yml b/roles/ceph-mds/defaults/main.yml index cd1342f85c..be435e3e80 100644 --- a/roles/ceph-mds/defaults/main.yml +++ b/roles/ceph-mds/defaults/main.yml @@ -18,7 +18,6 @@ copy_admin_key: false # Resource limitation # For the whole list of limits you can apply see: docs.docker.com/engine/admin/resource_constraints # Default values are based from: https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/2/html/red_hat_ceph_storage_hardware_guide/minimum_recommendations -# These options can be passed using the 'ceph_mds_docker_extra_env' variable. ceph_mds_docker_memory_limit: "{{ ansible_facts['memtotal_mb'] }}m" ceph_mds_docker_cpu_limit: 4 diff --git a/site-container.yml.sample b/site-container.yml.sample index 3935f9b20d..7031621c10 100644 --- a/site-container.yml.sample +++ b/site-container.yml.sample @@ -468,6 +468,46 @@ status: "Complete" end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" +- hosts: + - mons + - osds + - mdss + - rgws + - rbdmirrors + - mgrs + + gather_facts: false + become: True + any_errors_fatal: true + pre_tasks: + - name: set ceph exporter install 'In Progress' + run_once: true + set_stats: + data: + installer_phase_ceph_exporter: + status: "In Progress" + start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" + + tasks: + - import_role: + name: ceph-defaults + - import_role: + name: ceph-facts + tasks_from: container_binary.yml + - import_role: + name: ceph-handler + - import_role: + name: ceph-exporter + + post_tasks: + - name: set ceph exporter install 'Complete' + run_once: true + set_stats: + data: + installer_phase_ceph_exporter: + status: "Complete" + end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}" + - hosts: mons[0] gather_facts: false become: True diff --git a/tests/conftest.py b/tests/conftest.py index 9a12727dc5..07e35e2654 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -171,6 +171,9 @@ def node(host, request): if request.node.get_closest_marker('ceph_crash') and sanitized_group_names in [['nfss'], ['clients'], ['monitoring']]: pytest.skip('Not a valid test for nfs or client nodes') + if request.node.get_closest_marker('ceph_exporter') and sanitized_group_names in [['nfss'], ['clients'], ['monitoring']]: + pytest.skip('Not a valid test for nfs or client nodes') + if request.node.get_closest_marker("no_docker") and docker: pytest.skip( "Not a valid test for containerized deployments or atomic hosts") diff --git a/tests/functional/tests/test_install.py b/tests/functional/tests/test_install.py index 69f38c41d8..38d87c2fd5 100644 --- a/tests/functional/tests/test_install.py +++ b/tests/functional/tests/test_install.py @@ -45,3 +45,12 @@ def test_ceph_crash_service_enabled_and_running_container(self, node, host): s = host.service("ceph-crash@{hostname}".format(hostname=node["vars"]["inventory_hostname"])) assert s.is_enabled assert s.is_running + + +class TestCephExporter(object): + @pytest.mark.docker + @pytest.mark.ceph_exporter + def test_ceph_exporter_service_enabled_and_running_container(self, node, host): + s = host.service("ceph-exporter@{hostname}".format(hostname=node["vars"]["inventory_hostname"])) + assert s.is_enabled + assert s.is_running diff --git a/tests/pytest.ini b/tests/pytest.ini index 61620b9ebd..d4c15634be 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -2,6 +2,7 @@ # dir really is. [pytest] markers = + ceph_exporter: environment with ceph exporter enabled ceph_crash: environment with ceph crash enabled dashboard: environment with dashboard enabled no_docker: environment without containers From 2aca2a1e98e1941e68d2e971f8dcb4526dbe883b Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Mon, 4 Mar 2024 14:09:44 +0100 Subject: [PATCH 2/2] tests: add ceph exporter to collect logs Signed-off-by: Seena Fallah --- tests/functional/collect-logs.yml | 55 +++++++++++++++++-------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/tests/functional/collect-logs.yml b/tests/functional/collect-logs.yml index 9b314dfaa4..e17a1716df 100644 --- a/tests/functional/collect-logs.yml +++ b/tests/functional/collect-logs.yml @@ -1,28 +1,30 @@ --- -- hosts: all - become: yes +- name: Collect logs + hosts: all + become: true tasks: - - name: import_role ceph-defaults - import_role: + - name: Import_role ceph-defaults + ansible.builtin.import_role: name: ceph-defaults - - name: import_role ceph-facts - import_role: + - name: Import_role ceph-facts + ansible.builtin.import_role: name: ceph-facts tasks_from: container_binary.yml - - import_role: + - name: Import set_radosgw_address.yml + ansible.builtin.import_role: name: ceph-facts tasks_from: set_radosgw_address.yml - - name: set_fact ceph_cmd - set_fact: + - name: Set_fact ceph_cmd + ansible.builtin.set_fact: ceph_cmd: "{{ container_binary + ' run --rm --net=host -v /etc/ceph:/etc/ceph:z -v /var/lib/ceph:/var/lib/ceph:z -v /var/run/ceph:/var/run/ceph:z --entrypoint=ceph ' + ceph_docker_registry + '/' + ceph_docker_image + ':' + ceph_docker_image_tag if containerized_deployment | bool else 'ceph' }}" - - name: get some ceph status outputs - command: "{{ ceph_cmd }} --connect-timeout 10 --cluster {{ cluster }} {{ item }}" + - name: Get some ceph status outputs + ansible.builtin.command: "{{ ceph_cmd }} --connect-timeout 10 --cluster {{ cluster }} {{ item }}" register: ceph_status - run_once: True + run_once: true delegate_to: mon0 failed_when: false changed_when: false @@ -34,30 +36,35 @@ - "versions" - "health detail -f json-pretty" - - name: save ceph status to file - copy: + - name: Save ceph status to file + ansible.builtin.copy: content: "{{ item.stdout }}" dest: "{{ archive_path }}/{{ item.item | regex_replace(' ', '_') }}.log" delegate_to: localhost - run_once: True + run_once: true with_items: "{{ ceph_status.results }}" - - name: get mgr log - shell: journalctl -l -u ceph-mgr@{{ ansible_facts['hostname'] }} > /var/log/ceph/ceph-mgr.{{ ansible_facts['hostname'] }}.log + - name: Get ceph-exporter logs # noqa: ignore-errors + ansible.builtin.shell: journalctl -l -u ceph-exporter@{{ ansible_facts['hostname'] }} | tee -a /var/log/ceph/ceph-exporter.{{ ansible_facts['hostname'] }}.log + changed_when: false + ignore_errors: true + + - name: Get mgr log + ansible.builtin.shell: journalctl -l -u ceph-mgr@{{ ansible_facts['hostname'] }} > /var/log/ceph/ceph-mgr.{{ ansible_facts['hostname'] }}.log changed_when: false when: - inventory_hostname in groups.get(mgr_group_name, []) or (groups.get(mgr_group_name, []) | length == 0 and inventory_hostname in groups.get(mon_group_name, [])) - - name: get rgw log - shell: journalctl -l -u ceph-radosgw@rgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }} > /var/log/ceph/ceph-radosgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }}.log + - name: Get rgw log + ansible.builtin.shell: journalctl -l -u ceph-radosgw@rgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }} > /var/log/ceph/ceph-radosgw.{{ ansible_facts['hostname'] }}.{{ item.instance_name }}.log changed_when: false with_items: "{{ rgw_instances | default([]) }}" when: inventory_hostname in groups.get(rgw_group_name, []) - - name: find ceph config file and logs - find: + - name: Find ceph config file and logs + ansible.builtin.find: paths: - /etc/ceph - /var/log/ceph @@ -66,9 +73,9 @@ - "*.log" register: results - - name: collect ceph config file and logs - fetch: + - name: Collect ceph config file and logs + ansible.builtin.fetch: src: "{{ item.path }}" dest: "{{ archive_path }}/{{ inventory_hostname }}/" - flat: yes + flat: true with_items: "{{ results.files }}"