-
Notifications
You must be signed in to change notification settings - Fork 39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
scylla-ansible-roles: Adds example playbook "kernel_version_enforcer" #402
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
--- | ||
|
||
- name: Kernel Version Enforcer | ||
hosts: scylla | ||
gather_facts: true | ||
serial: 1 | ||
vars: | ||
api_address: 127.0.0.1 | ||
api_delay: 10 | ||
api_port: 10000 | ||
api_retries: 360 | ||
api_timeout: 300 | ||
cql_timeout: 86400 | ||
reboot_timeout: 600 | ||
systemd_unit_retries: 5 | ||
systemd_unit_delay: 30 | ||
grub_config_file: /boot/grub/grub.cfg | ||
image_package_prefix: linux-image | ||
image_version: 5.15.0-1051-gcp | ||
kernel_related_packages: | ||
- linux-gcp | ||
- linux-image-gcp | ||
- linux-headers-gcp | ||
pid_kill_delay: 12 | ||
pid_kill_retries: 5 | ||
pause_time: 15 | ||
pin_kernel_version: false | ||
purge_older_images: false | ||
upgrade_all_packages: false | ||
upgrade_latest_kernel: false | ||
tasks: | ||
- name: Enforce kernel version for Ubuntu | ||
ansible.builtin.include_tasks: ubuntu/main.yml | ||
when: ansible_distribution == "Ubuntu" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
--- | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Procedure below doesn't seem to be standard. In gist you need to change |
||
|
||
- name: Get {{ grub_config_file }} metadata | ||
ansible.builtin.stat: | ||
path: "{{ grub_config_file }}" | ||
register: grub_config | ||
|
||
- name: Fail if GRUB config file doesn't exist | ||
ansible.builtin.fail: | ||
msg: "{{ grub_config_file }} doesn't exist" | ||
when: not grub_config.stat.exists | ||
|
||
- name: Get GRUB entries | ||
ansible.builtin.command: grep -E "^\smenuentry" {{ grub_config_file }} | ||
register: grub_entries | ||
|
||
- name: Get GRUB index for '{{ image_package_prefix }}-{{ image_version }}' | ||
ansible.builtin.set_fact: | ||
target_grub_index="{{ grub_index }}" | ||
when: | ||
- image_version in item | ||
- not "recovery mode" in item | ||
- target_grub_index is not defined | ||
loop: "{{ grub_entries.stdout_lines }}" | ||
loop_control: | ||
index_var: grub_index | ||
|
||
- name: Set index '1>{{ target_grub_index }}' to be used in the next reboot | ||
ansible.builtin.command: grub-reboot "1>{{ target_grub_index }}" | ||
become: true |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
--- | ||
|
||
- name: Purge all kernel images newer than '{{ final_image_version }}' | ||
ansible.builtin.apt: | ||
name: "{{ image_package_prefix }}-{{ item }}" | ||
state: absent | ||
purge: true | ||
become: true | ||
when: item is version(final_image_version, '>') | ||
loop: "{{ vmlinuz_versions.stdout_lines }}" | ||
|
||
- name: Erase all kernel images related files newer than '{{ final_image_version }}' | ||
ansible.builtin.shell: rm -f /boot/*-{{ item }} | ||
become: true | ||
when: item is version(final_image_version, '>') | ||
loop: "{{ vmlinuz_versions.stdout_lines }}" | ||
|
||
- name: Purge all kernel images older than '{{ final_image_version }}' | ||
ansible.builtin.apt: | ||
name: "{{ image_package_prefix }}-{{ item }}" | ||
state: absent | ||
purge: true | ||
become: true | ||
when: | ||
- purge_older_images | ||
- item is version(final_image_version, '<') | ||
loop: "{{ vmlinuz_versions.stdout_lines }}" | ||
|
||
- name: Erase all kernel images related files older than '{{ final_image_version }}' | ||
ansible.builtin.shell: rm -f /boot/*-{{ item }} | ||
become: true | ||
when: | ||
- purge_older_images | ||
- item is version(final_image_version, '<') | ||
loop: "{{ vmlinuz_versions.stdout_lines }}" | ||
|
||
- name: Remove useless packages from the cache | ||
ansible.builtin.apt: | ||
autoclean: true | ||
become: true | ||
|
||
- name: Reconfigure '{{ image_package_prefix }}-{{ final_image_version }}' package | ||
ansible.builtin.command: dpkg-reconfigure {{ image_package_prefix }}-{{ final_image_version }} -f noninteractive -p critical | ||
become: true | ||
|
||
- name: Get /boot/vmlinuz metadata | ||
ansible.builtin.stat: | ||
path: /boot/vmlinuz | ||
register: vmlinuz | ||
|
||
- name: Fail if /boot/vmlinuz is not a symbolic link of /boot/vmlinuz-{{ final_image_version }} | ||
ansible.builtin.fail: | ||
msg: "/boot/vmlinuz is not a symbolic link of /boot/vmlinuz-{{ final_image_version }}" | ||
when: | ||
- not vmlinuz.stat.islnk | ||
- not vmlinuz.stat.lnk_source is /boot/vmlinuz-{{ final_image_version }} | ||
|
||
- name: Get /boot/initrd.img metadata | ||
ansible.builtin.stat: | ||
path: /boot/initrd.img | ||
register: initrd | ||
|
||
- name: Fail if /boot/initrd.img is not a symbolic link of /boot/initrd.img-{{ final_image_version }} | ||
ansible.builtin.fail: | ||
msg: "/boot/initrd.img is not a symbolic link of /boot/initrd.img-{{ final_image_version }}" | ||
when: | ||
- not initrd.stat.islnk | ||
- not initrd.stat.lnk_source is /boot/initrd.img-{{ final_image_version }} | ||
|
||
- name: Clean all non-required packages | ||
ansible.builtin.apt: | ||
autoclean: true | ||
autoremove: true | ||
force_apt_get: true | ||
become: true |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
--- | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this functionality should be inside a playbook. We will want to re-use this functionality when setting up a ScyllaDB node. |
||
|
||
- name: Get current kernel image version | ||
ansible.builtin.command: uname --kernel-release | ||
register: uname_pre_output | ||
|
||
- name: Save kernel image version | ||
ansible.builtin.set_fact: | ||
detected_image_version="{{ uname_pre_output.stdout_lines | first }}" | ||
|
||
- name: Define if the kernel image should be installed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This task checks if the The above is definitely not what the "name" claims because the package with the |
||
ansible.builtin.set_fact: | ||
kernel_image_required="{{ image_version is version(detected_image_version, 'ne') or upgrade_latest_kernel }}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you have a "or upgrade_latest_kernel" part in the |
||
|
||
- name: Mark to unhold kernel-related packages | ||
ansible.builtin.dpkg_selections: | ||
name: "{{ item }}" | ||
selection: install | ||
loop: "{{ kernel_related_packages }}" | ||
become: true | ||
when: kernel_image_required | ||
|
||
- name: Ensure kernel image '{{ image_package_prefix }}-{{ image_version }}' is installed | ||
ansible.builtin.apt: | ||
name: "{{ image_package_prefix }}-{{ image_version }}" | ||
state: present | ||
become: true | ||
when: | ||
- kernel_image_required | ||
- not upgrade_latest_kernel | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did you use |
||
|
||
- name: Upgrade kernel-related packages to the latest version available | ||
ansible.builtin.apt: | ||
name: "{{ item }}" | ||
state: latest | ||
update_cache: true | ||
autoclean: true | ||
autoremove: true | ||
force_apt_get: true | ||
loop: "{{ kernel_related_packages }}" | ||
become: true | ||
when: upgrade_latest_kernel | ||
|
||
- name: Mark to hold kernel-related packages | ||
ansible.builtin.dpkg_selections: | ||
name: "{{ item }}" | ||
selection: hold | ||
loop: "{{ kernel_related_packages }}" | ||
become: true | ||
when: pin_kernel_version | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
- name: Upgrade all upgradable packages | ||
ansible.builtin.apt: | ||
name: "*" | ||
state: latest | ||
update_cache: true | ||
autoclean: true | ||
autoremove: true | ||
force_apt_get: true | ||
become: true | ||
when: upgrade_all_packages | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you add this to this playbook which allegedly takes care of Linux kernel (only)? |
||
|
||
- name: Get all vmlinuz files available | ||
ansible.builtin.shell: ls /boot/vmlinuz-* | sed 's/\/boot\/vmlinuz-*//' | ||
register: vmlinuz_versions | ||
|
||
- name: Define if reconfiguration is required due to the presence of serveral vmlinuz files | ||
ansible.builtin.set_fact: | ||
reconfiguration_required="{{ vmlinuz_versions.stdout_lines | length > 1 }}" | ||
|
||
- name: Mark to unhold kernel-related packages | ||
ansible.builtin.dpkg_selections: | ||
name: "{{ item }}" | ||
selection: install | ||
loop: "{{ kernel_related_packages }}" | ||
become: true | ||
when: reconfiguration_required | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need to unhold kernel related packages? |
||
|
||
- name: Prepare GRUB modifications | ||
ansible.builtin.include_tasks: grub.yml | ||
when: | ||
- reconfiguration_required | ||
- not upgrade_latest_kernel | ||
|
||
- name: Stop, reboot and start each node (if required) | ||
ansible.builtin.include_tasks: stop_reboot_start.yml | ||
when: reconfiguration_required | ||
|
||
- name: Set final kernel image version if '{{ image_version }}' was installed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why all tasks below this one are required to begin with. |
||
ansible.builtin.set_fact: | ||
final_image_version="{{ image_version }}" | ||
when: not upgrade_latest_kernel | ||
|
||
- name: Set final kernel image version if the latest one was installed | ||
ansible.builtin.set_fact: | ||
final_image_version="{{ target_image_version }}" | ||
when: | ||
- reconfiguration_required | ||
- upgrade_latest_kernel | ||
|
||
- name: Enforce kernel version '{{ final_image_version }}' usage | ||
ansible.builtin.include_tasks: kernel_enforce_cleanup.yml | ||
when: reconfiguration_required | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not using |
||
|
||
- name: Mark to hold kernel-related packages | ||
ansible.builtin.dpkg_selections: | ||
name: "{{ item }}" | ||
selection: hold | ||
loop: "{{ kernel_related_packages }}" | ||
become: true | ||
when: | ||
- pin_kernel_version | ||
- reconfiguration_required | ||
|
||
- name: Make a pause of {{ pause_time }} seconds | ||
ansible.builtin.wait_for: | ||
timeout: "{{ pause_time | int }}" | ||
when: kernel_image_required |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
--- | ||
- name: Populate service facts | ||
ansible.builtin.service_facts: | ||
|
||
- name: Check if Scylla is installed | ||
ansible.builtin.set_fact: | ||
scylla_installation="{{ true if ansible_facts.services['scylla-server.service'] is defined else false }}" | ||
|
||
- name: Stop Scylla | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should stop duplicating "Scylla Rolling Restart" versions and have one that is used everywhere. We should add the missing "rescue" block to https://github.com/scylladb/scylla-ansible-roles/blob/master/example-playbooks/rolling_ops/rolling_restart.yml and then it would be good to be used everywhere. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This stop and start functionality, is indeed preferably implemented only once (DRY principle). |
||
block: | ||
- name: Mask scylla-server service | ||
ansible.builtin.systemd: | ||
name: scylla-server | ||
masked: true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "masked"? Why? |
||
become: true | ||
|
||
- name: Drain node | ||
ansible.builtin.uri: | ||
url: "http://{{ api_address }}:{{ api_port }}/storage_service/drain" | ||
method: POST | ||
retries: "{{ api_retries }}" | ||
delay: "{{ api_delay }}" | ||
timeout: "{{ api_timeout }}" | ||
|
||
- name: Check if the node if fully drained | ||
ansible.builtin.uri: | ||
url: "http://{{ api_address }}:{{ api_port }}/storage_service/operation_mode" | ||
method: GET | ||
retries: "{{ api_retries }}" | ||
delay: "{{ api_delay }}" | ||
timeout: "{{ api_timeout }}" | ||
register: node_drain_status | ||
failed_when: "'DRAINED' not in node_drain_status.json" | ||
|
||
- name: Stop scylla-manager-agent service (if exists) | ||
ansible.builtin.systemd: | ||
name: scylla-manager-agent | ||
enabled: true | ||
state: stopped | ||
become: true | ||
when: ansible_facts.services['scylla-manager-agent.service'] is defined | ||
|
||
- name: Stop scylla-server service | ||
ansible.builtin.systemd: | ||
name: scylla-server | ||
state: stopped | ||
become: true | ||
when: "'DRAINED' in node_drain_status.json" | ||
rescue: | ||
- name: Send a SIGKILL to Scylla PID | ||
ansible.builtin.shell: kill -9 $(pidof scylla) | ||
register: scylla_kill_pid | ||
retries: "{{ pid_kill_retries }}" | ||
delay: "{{ pid_kill_delay }}" | ||
until: scylla_kill_pid.rc == 2 | ||
failed_when: scylla_kill_pid.rc != 2 | ||
become: true | ||
always: | ||
- name: Unask scylla-server service | ||
ansible.builtin.systemd: | ||
name: scylla-server | ||
masked: false | ||
become: true | ||
when: | ||
- scylla_installation | ||
- kernel_image_required | ||
|
||
- name: Reboot and post-reboot checks | ||
block: | ||
- name: Reboot the node | ||
ansible.builtin.reboot: | ||
reboot_timeout: "{{ reboot_timeout }}" | ||
become: true | ||
|
||
- name: Get current kernel image version | ||
ansible.builtin.shell: uname --kernel-release | ||
register: uname_post_output | ||
|
||
- name: Save kernel image version | ||
ansible.builtin.set_fact: | ||
target_image_version="{{ uname_post_output.stdout_lines | first }}" | ||
|
||
- name: Fail if kernel image version '{{ image_version }}' is not currently in use | ||
ansible.builtin.fail: | ||
msg: "'{{ image_version }}' is not currently used" | ||
when: | ||
- target_image_version is version(image_version, 'ne') | ||
- not upgrade_latest_kernel | ||
when: kernel_image_required | ||
|
||
- name: Start Scylla | ||
block: | ||
- name: Get listen address | ||
ansible.builtin.shell: grep '^listen_address:' /etc/scylla/scylla.yaml | awk '{ print $2 }' | ||
register: listen_address | ||
|
||
- name: Start scylla-server service | ||
ansible.builtin.systemd: | ||
name: scylla-server | ||
state: started | ||
retries: "{{ systemd_unit_retries }}" | ||
delay: "{{ systemd_unit_delay }}" | ||
become: true | ||
when: | ||
- ansible_facts.services['scylla-server.service'] is defined | ||
- ansible_facts.services['scylla-server.service'].status == "disabled" | ||
|
||
- name: Wait for CQL port on {{ listen_address.stdout }} | ||
ansible.builtin.wait_for: | ||
port: 9042 | ||
host: "{{ listen_address.stdout }}" | ||
timeout: "{{ cql_timeout }}" | ||
|
||
- name: Wait for the cluster to become healthy | ||
ansible.builtin.shell: nodetool status | grep "{{ listen_address.stdout }}" | grep '^UN' | ||
register: node_status | ||
until: node_status.rc == 0 | ||
retries: "{{ api_retries }}" | ||
delay: "{{ api_delay }}" | ||
when: | ||
- scylla_installation | ||
- kernel_image_required |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about
linux-image-generic
,linux-image-aws
and correspondingheaders
packages?