From 7271d15b39283fd7c0e5208cf4b3458a8064d75b Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Tue, 27 Feb 2024 14:57:54 +0000 Subject: [PATCH 01/44] Use cirun for testing --- .cirun.yml | 15 +++++++++++++++ .github/workflows/kvm-test.yaml | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 .cirun.yml diff --git a/.cirun.yml b/.cirun.yml new file mode 100644 index 00000000..45d806a8 --- /dev/null +++ b/.cirun.yml @@ -0,0 +1,15 @@ +runners: + - name: cirun-aws-runner + # Cloud Provider: AWS + cloud: aws + # Instance Type has 4 vcpu, 16 GiB memory, Up to 5 Gbps Network Performance + instance_type: t3a.2xlarge + machine_image: ami-0a388df278199ff52 + # Region: Oregon + region: us-west-2 + # Use Spot Instances for cost savings + preemptible: + - true + - false + labels: + - cirun-runner \ No newline at end of file diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index ea8fa2c8..9560f25b 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -11,7 +11,7 @@ jobs: # https://github.com/jonashackt/vagrant-github-actions test-kvm: name: KVM Test - runs-on: macos-latest + runs-on: "cirun-runner--${{ github.run_id }}" steps: - uses: actions/checkout@v2 From 819c21941c940aad96bf57d81d6be9cc10a51aed Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Tue, 27 Feb 2024 14:58:49 +0000 Subject: [PATCH 02/44] trigger run for cirun runner --- .github/workflows/kvm-test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 9560f25b..586b51ee 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -5,7 +5,8 @@ on: pull_request: push: branches: - - main + - "*" + jobs: # https://github.com/jonashackt/vagrant-github-actions From 01fb47bbd12b24a88b2312ebae5281d095418aad Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 13:44:29 -0300 Subject: [PATCH 03/44] Re-enable deployment run --- .github/workflows/kvm-test.yaml | 52 ++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 586b51ee..67d2148e 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -7,6 +7,9 @@ on: branches: - "*" +env: + # Prevents Vagrant from creating symlinks + VAGRANT_DISABLE_VBOXSYMLINKCREATE: "1" jobs: # https://github.com/jonashackt/vagrant-github-actions @@ -24,6 +27,10 @@ jobs: restore-keys: | ${{ runner.os }}-vagrant- + # - name: Install vagrant + # run: | + # sudo apt install -y virtualbox + - name: Install test dependencies. run: sudo pip3 install ansible @@ -36,7 +43,44 @@ jobs: - name: Show Vagrant version run: vagrant --version -# Disabled until we fix it -# - name: Run vagrant up -# working-directory: tests/ubuntu2004-singlenode -# run: vagrant up + - name: Check Libvirt status + run: | + virsh --version + + - name: Run vagrant up + working-directory: tests/ubuntu2004-singlenode + run: vagrant up --provider=libvirt + + - name: Check Vagrant status + run: | + vagrant status + + - name: SSH into Vagrant VM + run: | + vagrant ssh -c "echo 'Hello from Vagrant VM'" + + - name: Collect System Info and Metrics for Debugging + if: success() + run: | + vagrant ssh -c " + echo 'System Information:' && uname -a + echo 'Available Disk Space:' && df -h + echo 'Memory Usage:' && free -m + echo 'CPU Information:' && lscpu + echo 'Network Configuration:' && ifconfig + echo 'Firewall Status:' && sudo ufw status + echo 'System Logs:' && journalctl --no-pager | tail -n 100 + " + + - name: Copy inventory files to .vagrant provider directory + run: | + cp -r inventory.template/* tests/ubuntu2004-singlenode/.vagrant/provisioners/ansible/inventory/ + + - name: Run vagrant provision + working-directory: tests/ubuntu2004-singlenode + run: vagrant provision + + - name: Cleanup + if: always() + run: | + vagrant destroy -f From 24b9efd9c28b1eed8ea2a2d8fd2e46ca11ed5118 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 13:48:43 -0300 Subject: [PATCH 04/44] restrict trigger criteria for linting dirs --- .github/workflows/lint.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 8ad0e24e..160b8458 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -3,7 +3,17 @@ name: Ansible Lint on: push: + paths: + - 'roles/**' + - 'tasks/**' + - '.github/workflows/ansible-lint.yml' + pull_request: + paths: + - 'roles/**' + - 'tasks/**' + - '.github/workflows/ansible-lint.yml' + jobs: build: name: Ansible Lint From 3e97e35d6f71fb8505b2af7a7abefec571044be4 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 13:51:18 -0300 Subject: [PATCH 05/44] include python setup --- .github/workflows/kvm-test.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 67d2148e..d0e8ca4e 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -17,7 +17,7 @@ jobs: name: KVM Test runs-on: "cirun-runner--${{ github.run_id }}" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache Vagrant boxes uses: actions/cache@v2 @@ -31,6 +31,11 @@ jobs: # run: | # sudo apt install -y virtualbox + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' + - name: Install test dependencies. run: sudo pip3 install ansible From 50b998db30bf9336697ae7c15f3af692266941cd Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 13:57:37 -0300 Subject: [PATCH 06/44] add concurrency group and fix pip calls --- .github/workflows/kvm-test.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index d0e8ca4e..5544017e 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -3,9 +3,14 @@ name: Vagrant (KVM) Tests on: pull_request: + types: [opened, reopened] push: branches: - "*" +# Cancel concurrent runs +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true env: # Prevents Vagrant from creating symlinks @@ -37,7 +42,7 @@ jobs: cache: 'pip' - name: Install test dependencies. - run: sudo pip3 install ansible + run: pip install ansible - name: Install Ansible Dependencies working-directory: tests/ubuntu2004-singlenode From cf1604fce836777ba682e09a5916cad6964638c1 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:01:56 -0300 Subject: [PATCH 07/44] disable concurrent group and restrict trigger dirs --- .github/workflows/kvm-test.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 5544017e..eef760ce 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -2,15 +2,15 @@ name: Vagrant (KVM) Tests on: - pull_request: - types: [opened, reopened] push: branches: - "*" -# Cancel concurrent runs -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true + paths: + - "roles/**" + - "tasks/**" + - "tests/**" + - "Vagrantfile" + - ".github/workflows/kvm-test.yaml" env: # Prevents Vagrant from creating symlinks From d897096c430d42418230ee60d6f5311f0cba1c2f Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:08:26 -0300 Subject: [PATCH 08/44] add install vagrant step --- .github/workflows/kvm-test.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index eef760ce..88611353 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -32,9 +32,14 @@ jobs: restore-keys: | ${{ runner.os }}-vagrant- - # - name: Install vagrant - # run: | - # sudo apt install -y virtualbox + - name: Install vagrant + run: | + curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add - + sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main" + sudo apt update && sudo apt install -y vagrant + + - name: Show Vagrant version + run: vagrant --version - uses: actions/setup-python@v5 with: @@ -50,9 +55,6 @@ jobs: ansible-galaxy collection install community.general ansible-galaxy collection install ansible.posix - - name: Show Vagrant version - run: vagrant --version - - name: Check Libvirt status run: | virsh --version From 8ee1e4485a309caf814f1c2670329ee64ed172e9 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:10:05 -0300 Subject: [PATCH 09/44] tmp replace ci runner for github hosted for fast test response --- .github/workflows/kvm-test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 88611353..a2c93eeb 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -20,7 +20,8 @@ jobs: # https://github.com/jonashackt/vagrant-github-actions test-kvm: name: KVM Test - runs-on: "cirun-runner--${{ github.run_id }}" + # runs-on: "cirun-runner--${{ github.run_id }}" + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 From b6792b13cceb617d7aa6f0ef7a91a32b1050576d Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:15:20 -0300 Subject: [PATCH 10/44] libvirt installation and validation --- .github/workflows/kvm-test.yaml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index a2c93eeb..93912dc9 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -25,6 +25,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Check virtualization support + run: | + egrep -c '(vmx|svm)' /proc/cpuinfo + sudo kvm-ok + + - name: Install KVM essentials + run: | + sudo apt install qemu-kvm libvirt-daemon-system libvirt-clients bridge-utils + sudo adduser $(whoami) libvirt && sudo adduser $(whoami) kvm + - name: Cache Vagrant boxes uses: actions/cache@v2 with: @@ -39,6 +49,12 @@ jobs: sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main" sudo apt update && sudo apt install -y vagrant + - name: Check Libvirt status + run: | + virsh --version + virsh list --all + sudo systemctl status libvirtd + - name: Show Vagrant version run: vagrant --version @@ -56,10 +72,6 @@ jobs: ansible-galaxy collection install community.general ansible-galaxy collection install ansible.posix - - name: Check Libvirt status - run: | - virsh --version - - name: Run vagrant up working-directory: tests/ubuntu2004-singlenode run: vagrant up --provider=libvirt From 4b1dc815f87b95ec4cc26f09fc9fc06973e6b8ac Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:17:52 -0300 Subject: [PATCH 11/44] libvirt installation and validation --- .github/workflows/kvm-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 93912dc9..63af3c94 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -28,6 +28,7 @@ jobs: - name: Check virtualization support run: | egrep -c '(vmx|svm)' /proc/cpuinfo + sudo apt install cpu-checker sudo kvm-ok - name: Install KVM essentials From cd313edd25ebdea6da2ce9c24b88430073b63274 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:20:31 -0300 Subject: [PATCH 12/44] libvirt installation and validation --- .github/workflows/kvm-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 63af3c94..f72bd7b0 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -33,6 +33,7 @@ jobs: - name: Install KVM essentials run: | + sudo apt update sudo apt install qemu-kvm libvirt-daemon-system libvirt-clients bridge-utils sudo adduser $(whoami) libvirt && sudo adduser $(whoami) kvm From e068359cc778b78295911c25adce419970a4798f Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:24:55 -0300 Subject: [PATCH 13/44] include libvirt plugin setup --- .github/workflows/kvm-test.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index f72bd7b0..9a3086c1 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -31,10 +31,11 @@ jobs: sudo apt install cpu-checker sudo kvm-ok + # https://vagrant-libvirt.github.io/vagrant-libvirt/#installation - name: Install KVM essentials run: | sudo apt update - sudo apt install qemu-kvm libvirt-daemon-system libvirt-clients bridge-utils + sudo apt-get install -y qemu libvirt-daemon-system ebtables libguestfs-tools vagrant ruby-fog-libvirt sudo adduser $(whoami) libvirt && sudo adduser $(whoami) kvm - name: Cache Vagrant boxes @@ -60,6 +61,9 @@ jobs: - name: Show Vagrant version run: vagrant --version + - name: Install vagrant libvirt plugin + run: vagrant plugin install vagrant-libvirt + - uses: actions/setup-python@v5 with: python-version: '3.10' From 338e0599e71d27a04a33a79e08f457ae9f20fb25 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:30:26 -0300 Subject: [PATCH 14/44] fix bug with deps --- .github/workflows/kvm-test.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 9a3086c1..7988e090 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -36,7 +36,7 @@ jobs: run: | sudo apt update sudo apt-get install -y qemu libvirt-daemon-system ebtables libguestfs-tools vagrant ruby-fog-libvirt - sudo adduser $(whoami) libvirt && sudo adduser $(whoami) kvm + sudo adduser $USER libvirt && sudo adduser $USER kvm - name: Cache Vagrant boxes uses: actions/cache@v2 @@ -46,11 +46,11 @@ jobs: restore-keys: | ${{ runner.os }}-vagrant- - - name: Install vagrant - run: | - curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add - - sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main" - sudo apt update && sudo apt install -y vagrant + # - name: Install vagrant + # run: | + # curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add - + # sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main" + # sudo apt update && sudo apt install -y vagrant - name: Check Libvirt status run: | From cbdc942ad7ef14150a31e8583b557dd71a35bf8a Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 4 Mar 2024 14:33:34 -0300 Subject: [PATCH 15/44] re-enable cirun runner --- .github/workflows/kvm-test.yaml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 7988e090..ecb4dedb 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -20,8 +20,8 @@ jobs: # https://github.com/jonashackt/vagrant-github-actions test-kvm: name: KVM Test - # runs-on: "cirun-runner--${{ github.run_id }}" - runs-on: ubuntu-latest + runs-on: "cirun-runner--${{ github.run_id }}" + # runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -46,12 +46,6 @@ jobs: restore-keys: | ${{ runner.os }}-vagrant- - # - name: Install vagrant - # run: | - # curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add - - # sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main" - # sudo apt update && sudo apt install -y vagrant - - name: Check Libvirt status run: | virsh --version From 70612882adc9e16b57a57c78b9fb6ef064793d27 Mon Sep 17 00:00:00 2001 From: "Vinicius D. Cerutti" <51954708+viniciusdc@users.noreply.github.com> Date: Tue, 5 Mar 2024 13:13:04 -0300 Subject: [PATCH 16/44] Update kvm-test.yaml Update kvm-test.yaml --- .github/workflows/kvm-test.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index ecb4dedb..8b7f9b7b 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -25,17 +25,17 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Check virtualization support - run: | - egrep -c '(vmx|svm)' /proc/cpuinfo - sudo apt install cpu-checker - sudo kvm-ok + # - name: Check virtualization support + # run: | + # egrep -c '(vmx|svm)' /proc/cpuinfo + # sudo apt install cpu-checker + # sudo kvm-ok # https://vagrant-libvirt.github.io/vagrant-libvirt/#installation - name: Install KVM essentials run: | sudo apt update - sudo apt-get install -y qemu libvirt-daemon-system ebtables libguestfs-tools vagrant ruby-fog-libvirt + sudo apt-get install -y qemu libvirt-daemon-system ebtables libguestfs-tools vagrant sudo adduser $USER libvirt && sudo adduser $USER kvm - name: Cache Vagrant boxes From 6f8df3e81016e381d723c5a40edc4f6283f10bfa Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Wed, 6 Mar 2024 10:33:58 -0300 Subject: [PATCH 17/44] reorganize GitHub Actions workflow for Vagrant (KVM) tests --- .github/inventory.ini | 7 ++ .github/workflows/kvm-test.yaml | 154 +++++++++----------------------- 2 files changed, 51 insertions(+), 110 deletions(-) create mode 100644 .github/inventory.ini diff --git a/.github/inventory.ini b/.github/inventory.ini new file mode 100644 index 00000000..433e0cb4 --- /dev/null +++ b/.github/inventory.ini @@ -0,0 +1,7 @@ +hpc01-test + +[hpc_master] +hpc01-test + +[hpc_worker] +hpc01-test diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 8b7f9b7b..dc454556 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -1,111 +1,45 @@ --- -name: Vagrant (KVM) Tests - -on: - push: - branches: - - "*" - paths: - - "roles/**" - - "tasks/**" - - "tests/**" - - "Vagrantfile" - - ".github/workflows/kvm-test.yaml" - -env: - # Prevents Vagrant from creating symlinks - VAGRANT_DISABLE_VBOXSYMLINKCREATE: "1" - -jobs: - # https://github.com/jonashackt/vagrant-github-actions - test-kvm: - name: KVM Test - runs-on: "cirun-runner--${{ github.run_id }}" - # runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - # - name: Check virtualization support - # run: | - # egrep -c '(vmx|svm)' /proc/cpuinfo - # sudo apt install cpu-checker - # sudo kvm-ok - - # https://vagrant-libvirt.github.io/vagrant-libvirt/#installation - - name: Install KVM essentials - run: | - sudo apt update - sudo apt-get install -y qemu libvirt-daemon-system ebtables libguestfs-tools vagrant - sudo adduser $USER libvirt && sudo adduser $USER kvm - - - name: Cache Vagrant boxes - uses: actions/cache@v2 - with: - path: ~/.vagrant.d/boxes - key: ${{ runner.os }}-vagrant-${{ hashFiles('Vagrantfile') }} - restore-keys: | - ${{ runner.os }}-vagrant- - - - name: Check Libvirt status - run: | - virsh --version - virsh list --all - sudo systemctl status libvirtd - - - name: Show Vagrant version - run: vagrant --version - - - name: Install vagrant libvirt plugin - run: vagrant plugin install vagrant-libvirt - - - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'pip' - - - name: Install test dependencies. - run: pip install ansible - - - name: Install Ansible Dependencies - working-directory: tests/ubuntu2004-singlenode - run: | - ansible-galaxy collection install community.general - ansible-galaxy collection install ansible.posix - - - name: Run vagrant up - working-directory: tests/ubuntu2004-singlenode - run: vagrant up --provider=libvirt - - - name: Check Vagrant status - run: | - vagrant status - - - name: SSH into Vagrant VM - run: | - vagrant ssh -c "echo 'Hello from Vagrant VM'" - - - name: Collect System Info and Metrics for Debugging - if: success() - run: | - vagrant ssh -c " - echo 'System Information:' && uname -a - echo 'Available Disk Space:' && df -h - echo 'Memory Usage:' && free -m - echo 'CPU Information:' && lscpu - echo 'Network Configuration:' && ifconfig - echo 'Firewall Status:' && sudo ufw status - echo 'System Logs:' && journalctl --no-pager | tail -n 100 - " - - - name: Copy inventory files to .vagrant provider directory - run: | - cp -r inventory.template/* tests/ubuntu2004-singlenode/.vagrant/provisioners/ansible/inventory/ - - - name: Run vagrant provision - working-directory: tests/ubuntu2004-singlenode - run: vagrant provision - - - name: Cleanup - if: always() - run: | - vagrant destroy -f + name: Vagrant (KVM) Tests + + on: + pull_request: + push: + branches: + - main + + jobs: + test-kvm: + name: KVM Test + runs-on: "cirun-runner--${{ github.run_id }}" + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" + + - name: Install dependencies + run: | + pip install ansible + + - name: Install Ansible dependencies + run: | + ansible-galaxy collection install -r requirements.yaml + + - name: Create deploy folder and move inventory files + run: | + mkdir deploy + cp -r inventory.template/* deploy/ + cp .github/inventory.ini deploy/inventory.ini + + - name: Update firwall_internal_ip_range + run: | + echo "firewall_internal_ip_range: $(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)" >> deploy/group_vars/all.yaml + + - name: Run ansible playbook + run: | + cd deploy + ansible-playbook ../playbook.yaml -i inventory.ini --connection=local -v + env: + ANSIBLE_FORCE_COLOR: True From 3d757bf2997aa5503fe8226e15c6af0119857daf Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Wed, 6 Mar 2024 10:41:57 -0300 Subject: [PATCH 18/44] fix: rename host_vars file from hpc01-test.yaml to localhost.yaml for consistency --- .github/inventory.ini | 6 +++--- .github/workflows/kvm-test.yaml | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/inventory.ini b/.github/inventory.ini index 433e0cb4..846b7bd5 100644 --- a/.github/inventory.ini +++ b/.github/inventory.ini @@ -1,7 +1,7 @@ -hpc01-test +localhost [hpc_master] -hpc01-test +localhost [hpc_worker] -hpc01-test +localhost diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index dc454556..024cd635 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -32,6 +32,8 @@ mkdir deploy cp -r inventory.template/* deploy/ cp .github/inventory.ini deploy/inventory.ini + # Rename host_vars guest name + mv deploy/host_vars/hpc01-test.yaml deploy/host_vars/localhost.yaml - name: Update firwall_internal_ip_range run: | From fe8836eabd5d632591e340e16a886d18a347f389 Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Thu, 7 Mar 2024 11:59:53 -0300 Subject: [PATCH 19/44] Refactor local testing --- roles/apt_packages/tasks/main.yml | 1 + roles/backups/tasks/backup.yaml | 1 + roles/cifs/tasks/client.yaml | 1 + roles/cifs/tasks/server.yaml | 1 + roles/grafana/tasks/grafana.yaml | 1 + roles/keycloak/tasks/keycloak.yaml | 1 + roles/mysql/defaults/main.yml | 9 +++ roles/mysql/tasks/mysql.yaml | 92 ++++++++++++++++++++++---- roles/nfs/tasks/client.yaml | 1 + roles/nfs/tasks/server.yaml | 4 +- roles/openldap/tasks/client.yaml | 1 + roles/openldap/tasks/openldap.yaml | 1 + roles/postgresql/tasks/postgresql.yaml | 1 + roles/slurm/tasks/main.yaml | 1 + roles/slurm/tasks/munge.yaml | 1 + roles/slurm/tasks/slurmctld.yaml | 1 + roles/slurm/tasks/slurmdbd.yaml | 1 + 17 files changed, 104 insertions(+), 15 deletions(-) diff --git a/roles/apt_packages/tasks/main.yml b/roles/apt_packages/tasks/main.yml index 74722cc9..3a00a5ef 100644 --- a/roles/apt_packages/tasks/main.yml +++ b/roles/apt_packages/tasks/main.yml @@ -1,6 +1,7 @@ --- - name: Ensure apt packages are installed become: true + timeout: 300 ansible.builtin.apt: name: "{{ installed_packages }}" state: latest diff --git a/roles/backups/tasks/backup.yaml b/roles/backups/tasks/backup.yaml index 49aab111..c6affbdf 100644 --- a/roles/backups/tasks/backup.yaml +++ b/roles/backups/tasks/backup.yaml @@ -1,6 +1,7 @@ --- - name: Ensure restic installed become: true + timeout: 300 ansible.builtin.apt: name: restic state: latest diff --git a/roles/cifs/tasks/client.yaml b/roles/cifs/tasks/client.yaml index 5524ad08..21a1d309 100644 --- a/roles/cifs/tasks/client.yaml +++ b/roles/cifs/tasks/client.yaml @@ -2,6 +2,7 @@ - name: Install cifs become: true ansible.builtin.apt: + timeout: 300 state: latest cache_valid_time: 3600 name: diff --git a/roles/cifs/tasks/server.yaml b/roles/cifs/tasks/server.yaml index 8427455c..41bca8ad 100644 --- a/roles/cifs/tasks/server.yaml +++ b/roles/cifs/tasks/server.yaml @@ -1,6 +1,7 @@ --- - name: Install samba become: true + timeout: 300 ansible.builtin.apt: state: latest cache_valid_time: 3600 diff --git a/roles/grafana/tasks/grafana.yaml b/roles/grafana/tasks/grafana.yaml index 00214a52..b7291305 100644 --- a/roles/grafana/tasks/grafana.yaml +++ b/roles/grafana/tasks/grafana.yaml @@ -12,6 +12,7 @@ - name: Install grafana become: true + timeout: 300 ansible.builtin.apt: name: grafana{{ grafana_version }} state: "{% if grafana_version %}present{% else %}latest{% endif %}" diff --git a/roles/keycloak/tasks/keycloak.yaml b/roles/keycloak/tasks/keycloak.yaml index 917991ae..7e7ae20d 100644 --- a/roles/keycloak/tasks/keycloak.yaml +++ b/roles/keycloak/tasks/keycloak.yaml @@ -2,6 +2,7 @@ - name: Install openjdk and python requirements become: true ansible.builtin.apt: + timeout: 300 state: latest cache_valid_time: 3600 name: diff --git a/roles/mysql/defaults/main.yml b/roles/mysql/defaults/main.yml index 8931fe34..2446dffa 100644 --- a/roles/mysql/defaults/main.yml +++ b/roles/mysql/defaults/main.yml @@ -1,5 +1,7 @@ --- mysql_enabled: false +mysql_config_file: /etc/mysql/my.cnf + mysql_databases: - slurm - conda-store @@ -14,3 +16,10 @@ mysql_users: - username: conda-store password: eIbmUditL4RbQm0YPeLozRme privileges: "*.*:ALL" + +# Define a custom list of packages to install +mysql_packages: + - mysql-server + - mysql-common + +mysql_python_package: python3-mysqldb diff --git a/roles/mysql/tasks/mysql.yaml b/roles/mysql/tasks/mysql.yaml index 19f565f0..7f28a88a 100644 --- a/roles/mysql/tasks/mysql.yaml +++ b/roles/mysql/tasks/mysql.yaml @@ -1,14 +1,33 @@ --- -- name: Install mysql +- name: Check if MySQL is already installed. + ansible.builtin.stat: + path: "{{ mysql_config_file }}" + register: mysql_installed + +- name: Update apt cache if MySQL is not yet installed. + ansible.builtin.apt: + update_cache: yes + changed_when: False + when: not mysql_installed.stat.exists + +- name: Ensure MySQL Python libraries are installed. + become: true + timeout: 300 + ansible.builtin.apt: + name: "{{ mysql_python_package }}" + state: present + +- name: Ensure MySQL packages are installed. become: true + timeout: 300 ansible.builtin.apt: - name: - - mysql-server - - python3 - - python3-pip - - python3-mysqldb - state: latest - cache_valid_time: 3600 + name: "{{ mysql_packages }}" + state: present + register: mysql_install_packages + +- name: Check if MySQL packages were installed. + ansible.builtin.set_fact: + mysql_install_packages: "{{ mysql_install_packages is defined and mysql_install_packages.changed }}" - name: Ensure mysql settings in file become: true @@ -17,20 +36,66 @@ section: mysqld option: "{{ item.key }}" value: "{{ item.value }}" - mode: "0644" + mode: "0655" backup: true with_dict: "{{ mysql_config }}" - notify: restart services mysql + notify: Restart services mysql -- name: Create mysql database +- name: Ensure MySQL is running and enabled become: true + ansible.builtin.service: + name: mysql + state: started + enabled: true + register: mysql_service + +- name: Check if the debian.cnf file exists + ansible.builtin.stat: + path: /etc/mysql/debian.cnf + register: debian_cnf_file + +- name: Grab the debian.cnf content + become: true + ansible.builtin.slurp: + src: /etc/mysql/debian.cnf + register: debian_cnf_content + when: debian_cnf_file.stat.exists + +- name: Set facts from debian.cnf content + set_fact: + debian_sys_maint_user: "debian-sys-maint" + debian_sys_maint_password: "{{ (debian_cnf_content['content'] | b64decode).split('\n') | select('match', '^password = ') | first | split('=') | last | trim }}" + when: debian_cnf_file.stat.exists + +- name: Create mysql databases using debian-sys-maint credentials community.mysql.mysql_db: + login_user: "{{ debian_sys_maint_user | default(omit) }}" + login_password: "{{ debian_sys_maint_password | default(omit) }}" name: "{{ item }}" state: present with_items: "{{ mysql_databases }}" + when: debian_cnf_file.stat.exists -- name: Create mysql users - become: true +- name: Create mysql databases as root (if debian.cnf does not exist) + community.mysql.mysql_db: + name: "{{ item }}" + state: present + with_items: "{{ mysql_databases }}" + when: not debian_cnf_file.stat.exists + +- name: Create mysql users using debian-sys-maint credentials + community.mysql.mysql_user: + login_user: "{{ debian_sys_maint_user | default(omit) }}" + login_password: "{{ debian_sys_maint_password | default(omit) }}" + name: "{{ item.username }}" + password: "{{ item.password }}" + priv: "{{ item.privileges }}" + state: present + with_items: "{{ mysql_users }}" + no_log: true # Avoid logging user creds + when: debian_cnf_file.stat.exists + +- name: Create mysql users as root (if debian.cnf does not exist) community.mysql.mysql_user: name: "{{ item.username }}" password: "{{ item.password }}" @@ -38,3 +103,4 @@ state: present with_items: "{{ mysql_users }}" no_log: true # Avoid logging user creds + when: not debian_cnf_file.stat.exists diff --git a/roles/nfs/tasks/client.yaml b/roles/nfs/tasks/client.yaml index ac6300d4..9e0dde7c 100644 --- a/roles/nfs/tasks/client.yaml +++ b/roles/nfs/tasks/client.yaml @@ -2,6 +2,7 @@ - name: Install nfs become: true ansible.builtin.apt: + timeout: 300 state: latest cache_valid_time: 3600 name: diff --git a/roles/nfs/tasks/server.yaml b/roles/nfs/tasks/server.yaml index 41b91418..eada1eb1 100644 --- a/roles/nfs/tasks/server.yaml +++ b/roles/nfs/tasks/server.yaml @@ -1,6 +1,6 @@ --- -- name: Install nfs - become: true +- name: Install nfs packages + timeout: 300 ansible.builtin.apt: state: latest cache_valid_time: 3600 diff --git a/roles/openldap/tasks/client.yaml b/roles/openldap/tasks/client.yaml index 80503277..465bbb92 100644 --- a/roles/openldap/tasks/client.yaml +++ b/roles/openldap/tasks/client.yaml @@ -1,6 +1,7 @@ --- - name: Install ldap client packages become: true + timeout: 300 ansible.builtin.apt: name: - libpam-ldapd diff --git a/roles/openldap/tasks/openldap.yaml b/roles/openldap/tasks/openldap.yaml index 3719f3ce..511ff3d1 100644 --- a/roles/openldap/tasks/openldap.yaml +++ b/roles/openldap/tasks/openldap.yaml @@ -2,6 +2,7 @@ - name: Install openldap packages become: true ansible.builtin.apt: + timeout: 300 name: - slapd - ldap-utils diff --git a/roles/postgresql/tasks/postgresql.yaml b/roles/postgresql/tasks/postgresql.yaml index 6a4fe492..410f3fa2 100644 --- a/roles/postgresql/tasks/postgresql.yaml +++ b/roles/postgresql/tasks/postgresql.yaml @@ -1,6 +1,7 @@ --- - name: Install PostgreSQL become: true + timeout: 300 ansible.builtin.apt: name: - postgresql diff --git a/roles/slurm/tasks/main.yaml b/roles/slurm/tasks/main.yaml index 091f8f11..d3aff375 100644 --- a/roles/slurm/tasks/main.yaml +++ b/roles/slurm/tasks/main.yaml @@ -5,6 +5,7 @@ - name: Install slurm client packages become: true + timeout: 300 ansible.builtin.apt: state: latest cache_valid_time: 3600 diff --git a/roles/slurm/tasks/munge.yaml b/roles/slurm/tasks/munge.yaml index 63d89761..a8d69e40 100644 --- a/roles/slurm/tasks/munge.yaml +++ b/roles/slurm/tasks/munge.yaml @@ -38,6 +38,7 @@ - name: Install munge controller packages become: true + timeout: 300 ansible.builtin.apt: state: latest cache_valid_time: 3600 diff --git a/roles/slurm/tasks/slurmctld.yaml b/roles/slurm/tasks/slurmctld.yaml index 0eae5e4a..1b9a7a57 100644 --- a/roles/slurm/tasks/slurmctld.yaml +++ b/roles/slurm/tasks/slurmctld.yaml @@ -55,6 +55,7 @@ - name: Install slurm controller packages become: true ansible.builtin.apt: + timeout: 300 state: latest cache_valid_time: 3600 name: diff --git a/roles/slurm/tasks/slurmdbd.yaml b/roles/slurm/tasks/slurmdbd.yaml index 3cf1f7e7..da2e3806 100644 --- a/roles/slurm/tasks/slurmdbd.yaml +++ b/roles/slurm/tasks/slurmdbd.yaml @@ -55,6 +55,7 @@ - name: Install slurm controller packages become: true + timeout: 300 ansible.builtin.apt: state: latest cache_valid_time: 3600 From dcabe9bd3a6d81f6e99321de0fdfb2b24d7ccb80 Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Thu, 7 Mar 2024 12:00:50 -0300 Subject: [PATCH 20/44] update CI workflow to run on ubuntu-latest and improve network info extract --- .github/inventory.ini | 6 +++--- .github/workflows/kvm-test.yaml | 26 +++++++++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/inventory.ini b/.github/inventory.ini index 846b7bd5..09b05b72 100644 --- a/.github/inventory.ini +++ b/.github/inventory.ini @@ -1,7 +1,7 @@ -localhost +hpc01-test connection=local ansible_ssh_host=127.0.0.1 [hpc_master] -localhost +hpc01-test [hpc_worker] -localhost +hpc01-test diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 024cd635..005a9a95 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -10,7 +10,9 @@ jobs: test-kvm: name: KVM Test - runs-on: "cirun-runner--${{ github.run_id }}" + # Disable ci-run untill addressing apt-get lock issue + # runs-on: "cirun-runner--${{ github.run_id }}" + runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v4 @@ -32,12 +34,26 @@ mkdir deploy cp -r inventory.template/* deploy/ cp .github/inventory.ini deploy/inventory.ini - # Rename host_vars guest name - mv deploy/host_vars/hpc01-test.yaml deploy/host_vars/localhost.yaml - - name: Update firwall_internal_ip_range + - name: Check network adapter run: | - echo "firewall_internal_ip_range: $(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)" >> deploy/group_vars/all.yaml + ip a + + - name: Check hosts + run: | + cat /etc/hosts + + - name: Extract Network Information + run: | + chmod +x .github/scripts/extract_network_info.sh + ./.github/scripts/extract_network_info.sh + echo "adapter_name=$(cat network_info.txt | head -1 | awk '{print $2}')" >> $GITHUB_ENV + echo "ip_range=$(cat network_info.txt | awk 'NR > 1 && $3 {print $3}')" >> $GITHUB_ENV + + - name: Update Firewall IP range and Host network adapter + run: | + echo "firewall_internal_ip_range: $ip_range" >> deploy/group_vars/all.yaml + echo "internal_interface: $adapter_name" >> deploy/group_vars/all.yaml - name: Run ansible playbook run: | From 3b81ba50e454491fdd6be23e0be9a218c4c9d9dd Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 12:16:10 -0300 Subject: [PATCH 21/44] fix: update script to use 'ip address show' for broader compatibility and store network interface and IP range in network_info.txt --- .github/scripts/extract_network_info.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/scripts/extract_network_info.sh diff --git a/.github/scripts/extract_network_info.sh b/.github/scripts/extract_network_info.sh new file mode 100644 index 00000000..4cce4cf0 --- /dev/null +++ b/.github/scripts/extract_network_info.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +IFS=$'\n' # Split output into lines based on newline + +# Use `ip address show` instead of `ip -br -4 address show` for broader compatibility +readarray -t lines <<< "$(ip -br -4 address show | grep UP)" +for line in "${lines[@]}"; do + if [[ $line =~ (eth[0-9]|ens[0-9]+|enp[0-9].*) ]]; then + INTERFACE=$(echo $line | awk '{print $1}') + IP_RANGE=$(echo $line | awk '{print $3}') + break + fi +done + +# Write variables into network_info.txt +echo "Interface: $INTERFACE" > network_info.txt +echo "IP Range: $IP_RANGE" >> network_info.txt From d6b802aaeef13ddd1bb10d295db00bcb5cf819a5 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 12:20:51 -0300 Subject: [PATCH 22/44] fix: 'become: true' was mistakenly removed --- roles/nfs/tasks/server.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/nfs/tasks/server.yaml b/roles/nfs/tasks/server.yaml index eada1eb1..d96cc02d 100644 --- a/roles/nfs/tasks/server.yaml +++ b/roles/nfs/tasks/server.yaml @@ -1,6 +1,7 @@ --- - name: Install nfs packages timeout: 300 + become: true ansible.builtin.apt: state: latest cache_valid_time: 3600 From 7921c6bca3c829c8439f118afd3f759766d2000f Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 12:26:30 -0300 Subject: [PATCH 23/44] chore: fix identation and update timeout value for nfs wait_for task to 10 minutes --- roles/nfs/tasks/client.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/roles/nfs/tasks/client.yaml b/roles/nfs/tasks/client.yaml index 9e0dde7c..46e842b1 100644 --- a/roles/nfs/tasks/client.yaml +++ b/roles/nfs/tasks/client.yaml @@ -2,7 +2,6 @@ - name: Install nfs become: true ansible.builtin.apt: - timeout: 300 state: latest cache_valid_time: 3600 name: @@ -12,7 +11,7 @@ ansible.builtin.wait_for: host: "{{ item.host }}" port: 2049 - timeout: 600 + timeout: 600 # Set a timeout of 10 minutes with_items: "{{ nfs_client_mounts }}" - name: Ensure nfs mounted directories exist From 104cbb7478ee2cc91b24b95d5369a17403a969ba Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 12:33:00 -0300 Subject: [PATCH 24/44] chore: remove redundant 'cmd' attribute and update notification messages for service restarts --- roles/cifs/handlers/main.yaml | 1 - roles/cifs/tasks/server.yaml | 2 +- roles/dask_gateway/handlers/main.yaml | 1 - roles/dask_gateway/tasks/dask_gateway.yaml | 4 ++-- roles/jupyterhub/handlers/main.yaml | 3 --- roles/mysql/handlers/main.yaml | 1 - roles/openldap/handlers/main.yaml | 1 - roles/openldap/tasks/client.yaml | 8 ++++---- roles/postgresql/handlers/main.yaml | 1 - roles/traefik/handlers/main.yaml | 1 - roles/traefik/tasks/traefik.yaml | 10 +++++----- 11 files changed, 12 insertions(+), 21 deletions(-) diff --git a/roles/cifs/handlers/main.yaml b/roles/cifs/handlers/main.yaml index fcd0b3cc..c66ca20a 100644 --- a/roles/cifs/handlers/main.yaml +++ b/roles/cifs/handlers/main.yaml @@ -5,6 +5,5 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - smbd diff --git a/roles/cifs/tasks/server.yaml b/roles/cifs/tasks/server.yaml index 41bca8ad..0750f3c9 100644 --- a/roles/cifs/tasks/server.yaml +++ b/roles/cifs/tasks/server.yaml @@ -23,4 +23,4 @@ owner: root group: root mode: "0644" - notify: restart services samba + notify: Restart services samba diff --git a/roles/dask_gateway/handlers/main.yaml b/roles/dask_gateway/handlers/main.yaml index 7c083370..9ecb4de8 100644 --- a/roles/dask_gateway/handlers/main.yaml +++ b/roles/dask_gateway/handlers/main.yaml @@ -5,6 +5,5 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - dask-gateway diff --git a/roles/dask_gateway/tasks/dask_gateway.yaml b/roles/dask_gateway/tasks/dask_gateway.yaml index fb364c30..6ce9110b 100644 --- a/roles/dask_gateway/tasks/dask_gateway.yaml +++ b/roles/dask_gateway/tasks/dask_gateway.yaml @@ -49,7 +49,7 @@ owner: dask group: dask mode: "0644" - notify: restart services dask-gateway + notify: Restart services dask-gateway - name: Copy the dask-gateway systemd service file become: true @@ -77,7 +77,7 @@ owner: root group: root mode: "0644" - notify: restart services dask-gateway + notify: Restart services dask-gateway - name: Ensure dask-gateway is enabled on boot become: true diff --git a/roles/jupyterhub/handlers/main.yaml b/roles/jupyterhub/handlers/main.yaml index c27dcd6d..07a5fd09 100644 --- a/roles/jupyterhub/handlers/main.yaml +++ b/roles/jupyterhub/handlers/main.yaml @@ -5,7 +5,6 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - jupyterhub @@ -15,7 +14,6 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - jupyterhub-proxy @@ -25,6 +23,5 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - jupyterhub-ssh diff --git a/roles/mysql/handlers/main.yaml b/roles/mysql/handlers/main.yaml index 96781a56..a89efe3b 100644 --- a/roles/mysql/handlers/main.yaml +++ b/roles/mysql/handlers/main.yaml @@ -5,6 +5,5 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - mysql diff --git a/roles/openldap/handlers/main.yaml b/roles/openldap/handlers/main.yaml index bca5e5b4..ca5c23bf 100644 --- a/roles/openldap/handlers/main.yaml +++ b/roles/openldap/handlers/main.yaml @@ -6,7 +6,6 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - nscd - nslcd diff --git a/roles/openldap/tasks/client.yaml b/roles/openldap/tasks/client.yaml index 465bbb92..5e99d9f1 100644 --- a/roles/openldap/tasks/client.yaml +++ b/roles/openldap/tasks/client.yaml @@ -16,7 +16,7 @@ regexp: pam_mkhomedir\.so line: session required pam_mkhomedir.so skel=/etc/skel/ umask=0022 state: present - notify: restart services ldap + notify: Restart services ldap - name: LDAP Authentication | Query ldap in nsswitch.conf become: true @@ -29,7 +29,7 @@ - passwd - shadow - group - notify: restart services ldap + notify: Restart services ldap - name: LDAP Authentication | no cache for ldap in nscd.conf become: true @@ -41,7 +41,7 @@ with_items: - passwd - group - notify: restart services ldap + notify: Restart services ldap - name: LDAP Authentication | Configure /etc/nslcd.conf become: true @@ -49,4 +49,4 @@ src: nslcd.conf.j2 dest: /etc/nslcd.conf mode: "0600" - notify: restart services ldap + notify: Restart services ldap diff --git a/roles/postgresql/handlers/main.yaml b/roles/postgresql/handlers/main.yaml index e59eec69..393c4b69 100644 --- a/roles/postgresql/handlers/main.yaml +++ b/roles/postgresql/handlers/main.yaml @@ -5,6 +5,5 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - postgresql diff --git a/roles/traefik/handlers/main.yaml b/roles/traefik/handlers/main.yaml index c52eac80..2ff770f8 100644 --- a/roles/traefik/handlers/main.yaml +++ b/roles/traefik/handlers/main.yaml @@ -5,6 +5,5 @@ name: "{{ item }}" enabled: "yes" state: restarted - cmd: "" with_items: - traefik diff --git a/roles/traefik/tasks/traefik.yaml b/roles/traefik/tasks/traefik.yaml index 48329849..b3e303ab 100644 --- a/roles/traefik/tasks/traefik.yaml +++ b/roles/traefik/tasks/traefik.yaml @@ -89,7 +89,7 @@ owner: traefik group: traefik when: traefik_tls_certificate is defined - notify: restart services traefik + notify: Restart services traefik register: _traefik_tls_certificate - name: Copy TLS key if provided @@ -102,7 +102,7 @@ owner: traefik group: traefik when: traefik_tls_key is defined - notify: restart services traefik + notify: Restart services traefik register: _traefik_tls_key - name: Copy traefik configuration @@ -113,7 +113,7 @@ mode: "0600" owner: traefik group: traefik - notify: restart services traefik + notify: Restart services traefik - name: Copy traefik dynamic configuration become: true @@ -123,7 +123,7 @@ mode: "0600" owner: traefik group: traefik - notify: restart services traefik + notify: Restart services traefik - name: Copy the traefik systemd service file become: true @@ -155,7 +155,7 @@ owner: root group: root mode: "0644" - notify: restart services traefik + notify: Restart services traefik - name: Ensure Traefik is enabled on boot become: true From 534ecccddf1265384435a1952f3e18820ff2a355 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 12:44:55 -0300 Subject: [PATCH 25/44] fix: incorrect placements of timeouts --- roles/cifs/tasks/client.yaml | 2 +- roles/keycloak/tasks/keycloak.yaml | 5 +++-- roles/openldap/tasks/openldap.yaml | 2 +- roles/slurm/tasks/slurmctld.yaml | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/roles/cifs/tasks/client.yaml b/roles/cifs/tasks/client.yaml index 21a1d309..1ef2a4ef 100644 --- a/roles/cifs/tasks/client.yaml +++ b/roles/cifs/tasks/client.yaml @@ -1,8 +1,8 @@ --- - name: Install cifs become: true - ansible.builtin.apt: timeout: 300 + ansible.builtin.apt: state: latest cache_valid_time: 3600 name: diff --git a/roles/keycloak/tasks/keycloak.yaml b/roles/keycloak/tasks/keycloak.yaml index 7e7ae20d..5d242ac0 100644 --- a/roles/keycloak/tasks/keycloak.yaml +++ b/roles/keycloak/tasks/keycloak.yaml @@ -1,8 +1,8 @@ --- - name: Install openjdk and python requirements become: true - ansible.builtin.apt: timeout: 300 + ansible.builtin.apt: state: latest cache_valid_time: 3600 name: @@ -65,7 +65,8 @@ - name: Ensure Keycloak admin user exists become: true - ansible.builtin.command: /opt/keycloak-{{ keycloak_version }}/bin/add-user-keycloak.sh -r master -u "{{ keycloak_admin_username }}" -p "{{ keycloak_admin_password + ansible.builtin.command: + /opt/keycloak-{{ keycloak_version }}/bin/add-user-keycloak.sh -r master -u "{{ keycloak_admin_username }}" -p "{{ keycloak_admin_password }}" args: creates: /opt/keycloak-{{ keycloak_version }}/standalone/configuration/keycloak-add-user.json diff --git a/roles/openldap/tasks/openldap.yaml b/roles/openldap/tasks/openldap.yaml index 511ff3d1..b800a19f 100644 --- a/roles/openldap/tasks/openldap.yaml +++ b/roles/openldap/tasks/openldap.yaml @@ -1,8 +1,8 @@ --- - name: Install openldap packages become: true - ansible.builtin.apt: timeout: 300 + ansible.builtin.apt: name: - slapd - ldap-utils diff --git a/roles/slurm/tasks/slurmctld.yaml b/roles/slurm/tasks/slurmctld.yaml index 1b9a7a57..a45a0f35 100644 --- a/roles/slurm/tasks/slurmctld.yaml +++ b/roles/slurm/tasks/slurmctld.yaml @@ -54,8 +54,8 @@ - name: Install slurm controller packages become: true - ansible.builtin.apt: timeout: 300 + ansible.builtin.apt: state: latest cache_valid_time: 3600 name: From 8d5c6d354c81bb92d9f4f8a7b8f3ca780321fe85 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 13:13:51 -0300 Subject: [PATCH 26/44] chore: re-enable ci-run for KVM Test job --- .github/workflows/kvm-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 005a9a95..e4a8fcac 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -11,8 +11,8 @@ test-kvm: name: KVM Test # Disable ci-run untill addressing apt-get lock issue - # runs-on: "cirun-runner--${{ github.run_id }}" - runs-on: "ubuntu-latest" + runs-on: "cirun-runner--${{ github.run_id }}" + # runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v4 From 4ad5007135dbd175165f03f462a70d430d72c02e Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 13:23:29 -0300 Subject: [PATCH 27/44] fix: update CI job to address /var/lib/dpkg/lock-frontend issue and kill any process holding the lock --- .github/workflows/kvm-test.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index e4a8fcac..7f6d279b 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -10,7 +10,7 @@ jobs: test-kvm: name: KVM Test - # Disable ci-run untill addressing apt-get lock issue + # Disable ci-run untill addressing /var/lib/dpkg/lock-frontend issue runs-on: "cirun-runner--${{ github.run_id }}" # runs-on: "ubuntu-latest" steps: @@ -42,6 +42,10 @@ - name: Check hosts run: | cat /etc/hosts + - name: Kill any process that helds the lock + run: | + sudo lsof /var/lib/dpkg/lock-frontend + sudo kill -9 $(sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}') - name: Extract Network Information run: | From cb28bc084dd5a4a50674ccfe5a4d7b3f5359bf1f Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 13:59:26 -0300 Subject: [PATCH 28/44] chore: simplify killing process holding lock by using 'killall' command --- .github/workflows/kvm-test.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 7f6d279b..1bb887d0 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -44,8 +44,7 @@ cat /etc/hosts - name: Kill any process that helds the lock run: | - sudo lsof /var/lib/dpkg/lock-frontend - sudo kill -9 $(sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}') + sudo killall apt apt-get - name: Extract Network Information run: | From d1fe0dfd348c454d116bd4ed33907ec2439d295c Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 16:44:31 -0300 Subject: [PATCH 29/44] chore: update file permissions and paths for Slurm configuration files and directories. --- .github/workflows/kvm-test.yaml | 3 --- roles/slurm/defaults/main.yml | 2 ++ roles/slurm/tasks/main.yaml | 8 ++++---- roles/slurm/tasks/slurm_exporter.yaml | 1 + roles/slurm/tasks/slurmctld.yaml | 12 +++++++++--- roles/slurm/tasks/slurmd.yaml | 8 +++++++- roles/slurm/tasks/slurmdbd.yaml | 12 +++++++++--- 7 files changed, 32 insertions(+), 14 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 1bb887d0..0141dcd5 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -42,9 +42,6 @@ - name: Check hosts run: | cat /etc/hosts - - name: Kill any process that helds the lock - run: | - sudo killall apt apt-get - name: Extract Network Information run: | diff --git a/roles/slurm/defaults/main.yml b/roles/slurm/defaults/main.yml index 5fa83f95..25a5363f 100644 --- a/roles/slurm/defaults/main.yml +++ b/roles/slurm/defaults/main.yml @@ -6,6 +6,8 @@ slurmd_enabled: false slurmctld_enabled: false slurmdbd_enabled: false +SlurmConfigFileDIr: /etc/slurm-llnl + slurm_config: ClusterName: cluster # slurmctld options diff --git a/roles/slurm/tasks/main.yaml b/roles/slurm/tasks/main.yaml index d3aff375..8c613a43 100644 --- a/roles/slurm/tasks/main.yaml +++ b/roles/slurm/tasks/main.yaml @@ -16,7 +16,7 @@ - name: Ensure that slurm configuration directory exists become: true ansible.builtin.file: - path: /etc/slurm + path: "{{ SlurmConfigFileDIr }}" state: directory mode: "0755" owner: root @@ -26,10 +26,10 @@ become: true ansible.builtin.template: src: templates/slurm.conf - dest: /etc/slurm/slurm.conf + dest: "{{ SlurmConfigFileDIr }}/slurm.conf" owner: root group: root - mode: "0444" + mode: "0755" register: _slurm_config - name: Install extra execution host configs @@ -40,7 +40,7 @@ ConstrainCores=yes ConstrainRAMSpace=yes ConstrainSwapSpace=yes - dest: /etc/slurm/cgroup.conf + dest: "{{ SlurmConfigFileDIr }}/cgroup.conf" owner: root group: root mode: "0444" diff --git a/roles/slurm/tasks/slurm_exporter.yaml b/roles/slurm/tasks/slurm_exporter.yaml index dd595cc9..da14ae97 100644 --- a/roles/slurm/tasks/slurm_exporter.yaml +++ b/roles/slurm/tasks/slurm_exporter.yaml @@ -1,6 +1,7 @@ --- - name: Install golang ansible.builtin.include_tasks: golang.yaml + - name: Check that the slurm exporter binary exists ansible.builtin.stat: path: /usr/local/bin/prometheus_slurm_exporter diff --git a/roles/slurm/tasks/slurmctld.yaml b/roles/slurm/tasks/slurmctld.yaml index a45a0f35..b990f5f7 100644 --- a/roles/slurm/tasks/slurmctld.yaml +++ b/roles/slurm/tasks/slurmctld.yaml @@ -1,22 +1,28 @@ --- +# Must be writable by user SlurmUser. +# The file must be accessible by the primary and backup control machines. - name: Ensure slurm state directory exists become: true ansible.builtin.file: path: "{{ slurm_config.StateSaveLocation }}" state: directory - mode: "0700" + mode: "0755" owner: slurm group: slurm +# Must be writable by user SlurmUser. +# The file must be accessible by the primary and backup control machines. - name: Ensure slurm log directory exists become: true ansible.builtin.file: path: "{{ slurm_config.SlurmctldLogFile | dirname }}" state: directory - mode: "0700" + mode: "0755" owner: slurm group: slurm +# Must be writable by user root. Preferably writable and removable by SlurmUser. +# The file must be accessible by the primary and backup control machines. - name: Ensure slurm pid directory exists become: true ansible.builtin.file: @@ -33,7 +39,7 @@ [Unit] Description=Slurm controller daemon After=network.target munge.service - ConditionPathExists=/etc/slurm/slurm.conf + ConditionPathExists={{ SlurmConfigFileDIr }}/slurm.conf [Service] Type=forking diff --git a/roles/slurm/tasks/slurmd.yaml b/roles/slurm/tasks/slurmd.yaml index 7105c146..b44ec6b0 100644 --- a/roles/slurm/tasks/slurmd.yaml +++ b/roles/slurm/tasks/slurmd.yaml @@ -1,4 +1,6 @@ --- +# Permissions must be set to 755 so that job scripts can be executed from this directory. +# A distinct file must exist on each compute node. - name: Create slurm spool directory become: true ansible.builtin.file: @@ -8,6 +10,8 @@ mode: "0755" state: directory +# Must be writable by user root. +# A distinct file must exist on each compute node. - name: Create slurm log directory become: true ansible.builtin.file: @@ -17,6 +21,8 @@ mode: "0755" state: directory +# Must be writable by user root. +# A distinct file must exist on each compute node. - name: Ensure slurm pid directory exists become: true ansible.builtin.file: @@ -33,7 +39,7 @@ [Unit] Description=Slurm node daemon After=network.target munge.service remote-fs.target - ConditionPathExists=/etc/slurm/slurm.conf + ConditionPathExists={{ SlurmConfigFileDIr }}/slurm.conf [Service] Type=forking diff --git a/roles/slurm/tasks/slurmdbd.yaml b/roles/slurm/tasks/slurmdbd.yaml index da2e3806..14aba571 100644 --- a/roles/slurm/tasks/slurmdbd.yaml +++ b/roles/slurm/tasks/slurmdbd.yaml @@ -1,13 +1,15 @@ --- +# Must be writable by user SlurmUser. - name: Ensure slurmdbd log directory exists become: true ansible.builtin.file: path: "{{ slurmdbd_config.LogFile | dirname }}" state: directory - mode: "0700" + mode: "0755" owner: slurm group: slurm +# Must be writable by user SlurmUser. - name: Ensure slurm pid directory exists become: true ansible.builtin.file: @@ -17,11 +19,15 @@ owner: slurm group: slurm +# This file should be only on the computer where SlurmDBD executes +# and should only be readable by the user which executes SlurmDBD (e.g. "slurm"). +# This file should be protected from unauthorized access since +# it contains a database password - name: Install slurmdbd.conf become: true ansible.builtin.template: src: templates/slurmdbd.conf - dest: /etc/slurm/slurmdbd.conf + dest: "{{ SlurmConfigFileDIr }}/slurmdbd.conf" owner: slurm group: slurm mode: "0600" @@ -34,7 +40,7 @@ [Unit] Description=Slurm DBD accounting daemon After=network.target munge.service - ConditionPathExists=/etc/slurm/slurmdbd.conf + ConditionPathExists={{ SlurmConfigFileDIr }}/slurmdbd.conf [Service] Type=forking From 01d4a2364125508e3ef51c50c8fabca6b9f78051 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 7 Mar 2024 16:46:58 -0300 Subject: [PATCH 30/44] chore: re-enable CI to run on ubuntu-latest --- .github/workflows/kvm-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 0141dcd5..243988d5 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -11,8 +11,8 @@ test-kvm: name: KVM Test # Disable ci-run untill addressing /var/lib/dpkg/lock-frontend issue - runs-on: "cirun-runner--${{ github.run_id }}" - # runs-on: "ubuntu-latest" + # runs-on: "cirun-runner--${{ github.run_id }}" + runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v4 From e94366caef92889d2a892cfc7686c7185e03a5e0 Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Wed, 13 Mar 2024 13:55:40 -0300 Subject: [PATCH 31/44] test new config --- .github/inventory.ini | 7 ----- .github/scripts/gen_inventory.sh | 29 ++++++++++++++++++++ .github/workflows/kvm-test.yaml | 46 ++++++++++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 9 deletions(-) delete mode 100644 .github/inventory.ini create mode 100644 .github/scripts/gen_inventory.sh diff --git a/.github/inventory.ini b/.github/inventory.ini deleted file mode 100644 index 09b05b72..00000000 --- a/.github/inventory.ini +++ /dev/null @@ -1,7 +0,0 @@ -hpc01-test connection=local ansible_ssh_host=127.0.0.1 - -[hpc_master] -hpc01-test - -[hpc_worker] -hpc01-test diff --git a/.github/scripts/gen_inventory.sh b/.github/scripts/gen_inventory.sh new file mode 100644 index 00000000..2d625478 --- /dev/null +++ b/.github/scripts/gen_inventory.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Check if the correct number of arguments was provided +if [ $# -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Get the hostname from the first argument +HOSTNAME=$1 + +# Get the output path from the second argument +OUTPUT_PATH=$2 + +# Ensure the directory exists +mkdir -p $(dirname "$OUTPUT_PATH") + +# Create the inventory.ini file at the specified output path with dynamic content +cat < "$OUTPUT_PATH" +${HOSTNAME} connection=local ansible_ssh_host=127.0.0.1 + +[hpc_master] +${HOSTNAME} + +[hpc_worker] +${HOSTNAME} +EOF + +echo "inventory.ini file has been created at $OUTPUT_PATH." diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 243988d5..0770dabc 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -33,7 +33,9 @@ run: | mkdir deploy cp -r inventory.template/* deploy/ - cp .github/inventory.ini deploy/inventory.ini + + chmod +x .github/scripts/gen_inventory.sh + ./.github/scripts/gen_inventory.sh $(hostname -s) deploy/inventory.ini - name: Check network adapter run: | @@ -50,10 +52,50 @@ echo "adapter_name=$(cat network_info.txt | head -1 | awk '{print $2}')" >> $GITHUB_ENV echo "ip_range=$(cat network_info.txt | awk 'NR > 1 && $3 {print $3}')" >> $GITHUB_ENV - - name: Update Firewall IP range and Host network adapter + - name: Update group vars run: | + echo "Updating group vars for firewall and internal network" echo "firewall_internal_ip_range: $ip_range" >> deploy/group_vars/all.yaml echo "internal_interface: $adapter_name" >> deploy/group_vars/all.yaml + echo "SlurmConfigFileDIr: /etc/slurm" >> deploy/group_vars/all.yaml + + echo "Replace hpc01-test with $(hostname -s) in group_vars/hpc_worker.yaml file" + sed -i "s/hpc01-test/$(hostname -s)/g" deploy/group_vars/hpc_worker.yaml + + - name: Disable unattended-upgrades + run: | + # Ensure all commands are non-interactive by setting DEBIAN_FRONTEND to noninteractive + export DEBIAN_FRONTEND=noninteractive + + # Check if unattended-upgrades service is active and stop it if it is + if systemctl is-active --quiet unattended-upgrades; then + echo "Stopping unattended-upgrades service..." + sudo systemctl stop unattended-upgrades + else + echo "unattended-upgrades service is not active. Skipping stop command." + fi + + # Proceed with killing any running APT processes without manual confirmation + echo "Checking and killing running APT processes if necessary..." + sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n +2 | while read PID; do + if [ ! -z "$PID" ]; then + echo "Killing PID $PID" + sudo kill -9 $PID + fi + done + + # Configure any packages that are in an unclean state non-interactively + echo "Configuring any packages in an unclean state..." + sudo dpkg --configure -a + + # Remove unattended-upgrades to avoid automatic background updates during script execution + echo "Disabling unattended upgrades..." + sudo apt-get remove --purge unattended-upgrades -y || true + + # - name: Run ssh session + # uses: mxschmitt/action-tmate@v3 + # with: + # detached: true - name: Run ansible playbook run: | From 4c640d7990f115779b408962f2b764048ca7473b Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Wed, 13 Mar 2024 14:04:00 -0300 Subject: [PATCH 32/44] ensure systemd task is executed by root --- roles/postgresql/tasks/postgresql.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/postgresql/tasks/postgresql.yaml b/roles/postgresql/tasks/postgresql.yaml index 410f3fa2..e40ed47b 100644 --- a/roles/postgresql/tasks/postgresql.yaml +++ b/roles/postgresql/tasks/postgresql.yaml @@ -12,6 +12,7 @@ cache_valid_time: 3600 - name: Ensure PostgreSQL service is running + become: true ansible.builtin.systemd: name: postgresql state: started From 058b519c29a89f07ce0dc088bd1a487c96cc68fd Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Fri, 15 Mar 2024 10:25:49 -0300 Subject: [PATCH 33/44] update group_vars LDAP server url --- .github/workflows/kvm-test.yaml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 0770dabc..6e065391 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -11,8 +11,8 @@ test-kvm: name: KVM Test # Disable ci-run untill addressing /var/lib/dpkg/lock-frontend issue - # runs-on: "cirun-runner--${{ github.run_id }}" - runs-on: "ubuntu-latest" + runs-on: "cirun-runner--${{ github.run_id }}" + # runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v4 @@ -54,6 +54,8 @@ - name: Update group vars run: | + cp deploy/group_vars/all.yaml deploy/group_vars/all.yaml.bak + echo "Updating group vars for firewall and internal network" echo "firewall_internal_ip_range: $ip_range" >> deploy/group_vars/all.yaml echo "internal_interface: $adapter_name" >> deploy/group_vars/all.yaml @@ -62,6 +64,11 @@ echo "Replace hpc01-test with $(hostname -s) in group_vars/hpc_worker.yaml file" sed -i "s/hpc01-test/$(hostname -s)/g" deploy/group_vars/hpc_worker.yaml + echo "Replace LDAP server URI with $(hostname -s) in group_vars/all.yaml file" + sed -i "s|ldap://hpc01-test:389|ldap://$(hostname -s):389|g" deploy/group_vars/all.yaml + + diff deploy/group_vars/all.yaml.bak deploy/group_vars/all.yaml || true + - name: Disable unattended-upgrades run: | # Ensure all commands are non-interactive by setting DEBIAN_FRONTEND to noninteractive From 2dd43177fd3a2bbc0973edd94eae9a697d8fd3de Mon Sep 17 00:00:00 2001 From: vinicius douglas cerutti Date: Fri, 15 Mar 2024 10:44:25 -0300 Subject: [PATCH 34/44] increase MySQL packages install time --- roles/mysql/tasks/mysql.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/mysql/tasks/mysql.yaml b/roles/mysql/tasks/mysql.yaml index 7f28a88a..eeebca87 100644 --- a/roles/mysql/tasks/mysql.yaml +++ b/roles/mysql/tasks/mysql.yaml @@ -19,7 +19,7 @@ - name: Ensure MySQL packages are installed. become: true - timeout: 300 + timeout: 600 ansible.builtin.apt: name: "{{ mysql_packages }}" state: present From 458130947b0267772c37193ed2275410e1c09e52 Mon Sep 17 00:00:00 2001 From: "Vinicius D. Cerutti" <51954708+viniciusdc@users.noreply.github.com> Date: Thu, 21 Mar 2024 13:49:33 -0300 Subject: [PATCH 35/44] Increase instance size --- .cirun.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirun.yml b/.cirun.yml index 45d806a8..cdc9c43a 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -3,7 +3,7 @@ runners: # Cloud Provider: AWS cloud: aws # Instance Type has 4 vcpu, 16 GiB memory, Up to 5 Gbps Network Performance - instance_type: t3a.2xlarge + instance_type: t3.2xlarge machine_image: ami-0a388df278199ff52 # Region: Oregon region: us-west-2 From de458a86ae22e91480ef03e8f0f0298fed74c68b Mon Sep 17 00:00:00 2001 From: "Vinicius D. Cerutti" <51954708+viniciusdc@users.noreply.github.com> Date: Wed, 27 Mar 2024 21:06:13 -0300 Subject: [PATCH 36/44] Split partial/full dpeloyment ansible runs --- .github/workflows/kvm-test.yaml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 6e065391..494e4a1a 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -103,10 +103,26 @@ # uses: mxschmitt/action-tmate@v3 # with: # detached: true + - name: Move hpc variables from group_vars to temp location + run: | + mv deploy/group_vars/hpc_*.yaml /tmp + + - name: Run ansible playbook (partial) + run: | + cd deploy + ansible-playbook ../playbook.yaml -i inventory.ini --connection=local -v + env: + ANSIBLE_FORCE_COLOR: True + ANSIBLE_CALLBACKS_ENABLED: "profile_tasks" + + - name: Move hpc variables back to group_vars + run: | + mv /tmp/hpc_*.yaml deploy/group_vars/ - - name: Run ansible playbook + - name: Run ansible playbook (Full) run: | cd deploy ansible-playbook ../playbook.yaml -i inventory.ini --connection=local -v env: ANSIBLE_FORCE_COLOR: True + ANSIBLE_CALLBACKS_ENABLED: "profile_tasks" From acaf5a6e3739261dc007c6402a8530bae0329466 Mon Sep 17 00:00:00 2001 From: "Vinicius D. Cerutti" <51954708+viniciusdc@users.noreply.github.com> Date: Wed, 27 Mar 2024 21:36:45 -0300 Subject: [PATCH 37/44] Update AMI us-east-1-Jellyfish-22.04-amd64 --- .cirun.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cirun.yml b/.cirun.yml index cdc9c43a..e297948b 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -4,7 +4,8 @@ runners: cloud: aws # Instance Type has 4 vcpu, 16 GiB memory, Up to 5 Gbps Network Performance instance_type: t3.2xlarge - machine_image: ami-0a388df278199ff52 + machine_image: ami-053053586808c3e70 + # ami-0a388df278199ff52 # Region: Oregon region: us-west-2 # Use Spot Instances for cost savings From f9e3cb4b7f99d3b018aa8acbee57c9aa0144dd45 Mon Sep 17 00:00:00 2001 From: "Vinicius D. Cerutti" <51954708+viniciusdc@users.noreply.github.com> Date: Wed, 27 Mar 2024 22:07:20 -0300 Subject: [PATCH 38/44] Ubuntu Server 22.04 LTS (HVM) x86 --- .cirun.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirun.yml b/.cirun.yml index e297948b..9e9b8be7 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -4,7 +4,7 @@ runners: cloud: aws # Instance Type has 4 vcpu, 16 GiB memory, Up to 5 Gbps Network Performance instance_type: t3.2xlarge - machine_image: ami-053053586808c3e70 + machine_image: ami-0b8b44ec9a8f90422 # ami-0a388df278199ff52 # Region: Oregon region: us-west-2 From afcd1b55b2819d99e12d3403176f6fdf8f3535a9 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 15 Apr 2024 12:31:52 -0300 Subject: [PATCH 39/44] Update AMI instance Ubuntu Server 20.04 --- .cirun.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.cirun.yml b/.cirun.yml index 9e9b8be7..d23f28a8 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -4,7 +4,7 @@ runners: cloud: aws # Instance Type has 4 vcpu, 16 GiB memory, Up to 5 Gbps Network Performance instance_type: t3.2xlarge - machine_image: ami-0b8b44ec9a8f90422 + machine_image: ami-0eb199b995e2bc4e3 # ami-0a388df278199ff52 # Region: Oregon region: us-west-2 @@ -13,4 +13,4 @@ runners: - true - false labels: - - cirun-runner \ No newline at end of file + - cirun-runner From 91eebfc0e3a1e6bf066c8fdf78fc41d2ed420028 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 15 Apr 2024 12:51:06 -0300 Subject: [PATCH 40/44] double timeout for postgresql packages install --- roles/postgresql/tasks/postgresql.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/postgresql/tasks/postgresql.yaml b/roles/postgresql/tasks/postgresql.yaml index e40ed47b..ddc9fe81 100644 --- a/roles/postgresql/tasks/postgresql.yaml +++ b/roles/postgresql/tasks/postgresql.yaml @@ -1,7 +1,7 @@ --- - name: Install PostgreSQL become: true - timeout: 300 + timeout: 600 ansible.builtin.apt: name: - postgresql From d9dda481b5572011a42af5eb14951cfa95f29e4f Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 15 Apr 2024 13:29:07 -0300 Subject: [PATCH 41/44] move long demanding installs to pre-install --- .github/workflows/kvm-test.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 494e4a1a..1c1bd97a 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -99,14 +99,15 @@ echo "Disabling unattended upgrades..." sudo apt-get remove --purge unattended-upgrades -y || true - # - name: Run ssh session - # uses: mxschmitt/action-tmate@v3 - # with: - # detached: true - name: Move hpc variables from group_vars to temp location run: | mv deploy/group_vars/hpc_*.yaml /tmp - + + - name: Includes long demanding package installs + run: | + echo "mysql_enabled: true" >> deploy/group_vars/hpc_master.yaml + echo "postgresql_enabled: true" >> deploy/group_vars/hpc_master.yaml + - name: Run ansible playbook (partial) run: | cd deploy @@ -114,7 +115,11 @@ env: ANSIBLE_FORCE_COLOR: True ANSIBLE_CALLBACKS_ENABLED: "profile_tasks" - + + - name: Remove custom changes + run: | + rm -rf deploy/group_vars/hpc_master.yaml + - name: Move hpc variables back to group_vars run: | mv /tmp/hpc_*.yaml deploy/group_vars/ From 93b8d1a40049260cc5f1fa6bcbe4c161be5839bb Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Mon, 15 Apr 2024 13:50:40 -0300 Subject: [PATCH 42/44] addess postgresql install --- .github/workflows/kvm-test.yaml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 1c1bd97a..c6c0a800 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -103,10 +103,10 @@ run: | mv deploy/group_vars/hpc_*.yaml /tmp - - name: Includes long demanding package installs - run: | - echo "mysql_enabled: true" >> deploy/group_vars/hpc_master.yaml - echo "postgresql_enabled: true" >> deploy/group_vars/hpc_master.yaml + # - name: Includes long demanding package installs + # run: | + # echo "mysql_enabled: true" >> deploy/group_vars/hpc_master.yaml + # echo "postgresql_enabled: true" >> deploy/group_vars/hpc_master.yaml - name: Run ansible playbook (partial) run: | @@ -116,13 +116,10 @@ ANSIBLE_FORCE_COLOR: True ANSIBLE_CALLBACKS_ENABLED: "profile_tasks" - - name: Remove custom changes - run: | - rm -rf deploy/group_vars/hpc_master.yaml - - name: Move hpc variables back to group_vars run: | mv /tmp/hpc_*.yaml deploy/group_vars/ + echo "postgresql_enabled: true" >> deploy/group_vars/hpc_master.yaml - name: Run ansible playbook (Full) run: | From 660f24f39ad710d7b66913b8edffb674a988cc37 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Tue, 16 Apr 2024 15:47:21 -0300 Subject: [PATCH 43/44] add debug session --- .github/workflows/kvm-test.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index c6c0a800..0ef02afd 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -99,6 +99,10 @@ echo "Disabling unattended upgrades..." sudo apt-get remove --purge unattended-upgrades -y || true + # Use tmate + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + - name: Move hpc variables from group_vars to temp location run: | mv deploy/group_vars/hpc_*.yaml /tmp @@ -111,7 +115,7 @@ - name: Run ansible playbook (partial) run: | cd deploy - ansible-playbook ../playbook.yaml -i inventory.ini --connection=local -v + ansible-playbook ../playbook.yaml -i inventory.ini --connection=local -vvv env: ANSIBLE_FORCE_COLOR: True ANSIBLE_CALLBACKS_ENABLED: "profile_tasks" @@ -119,12 +123,11 @@ - name: Move hpc variables back to group_vars run: | mv /tmp/hpc_*.yaml deploy/group_vars/ - echo "postgresql_enabled: true" >> deploy/group_vars/hpc_master.yaml - name: Run ansible playbook (Full) run: | cd deploy - ansible-playbook ../playbook.yaml -i inventory.ini --connection=local -v + ansible-playbook ../playbook.yaml -i inventory.ini --connection=local -vvv env: ANSIBLE_FORCE_COLOR: True ANSIBLE_CALLBACKS_ENABLED: "profile_tasks" From 36b0e8e6a56fe7b3a367fbcbc63b37d3a3ea720d Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Tue, 16 Apr 2024 16:16:55 -0300 Subject: [PATCH 44/44] test disk usage --- .github/workflows/kvm-test.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/kvm-test.yaml b/.github/workflows/kvm-test.yaml index 0ef02afd..7fe6e964 100644 --- a/.github/workflows/kvm-test.yaml +++ b/.github/workflows/kvm-test.yaml @@ -99,6 +99,11 @@ echo "Disabling unattended upgrades..." sudo apt-get remove --purge unattended-upgrades -y || true + - name: List disk space + if: success() || failure() + run: | + df -h -l + # Use tmate - name: Setup tmate session uses: mxschmitt/action-tmate@v3 @@ -124,6 +129,11 @@ run: | mv /tmp/hpc_*.yaml deploy/group_vars/ + - name: List disk space + if: success() || failure() + run: | + df -h -l + - name: Run ansible playbook (Full) run: | cd deploy @@ -131,3 +141,8 @@ env: ANSIBLE_FORCE_COLOR: True ANSIBLE_CALLBACKS_ENABLED: "profile_tasks" + + - name: List disk space + if: success() || failure() + run: | + df -h -l