From fbbbc6809fa179c38b3beebb791af947a13e62b2 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Fri, 24 Jul 2020 21:51:33 +0530 Subject: [PATCH 01/13] Enabling threading for cassandra backup Signed-off-by: Rajesh Rajendran --- deploy/cassandra_backup.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index 076379a9dd..77812ffc57 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -26,6 +26,7 @@ from sys import exit from tempfile import mkdtemp from time import strftime +import concurrent.futures parser = ArgumentParser(description="Create a snapshot and create tar ball inside tardirectory") parser.add_argument("-d", "--datadirectory", metavar="datadir", default='/var/lib/cassandra/data', @@ -41,6 +42,9 @@ tmpdir = mkdtemp() makedirs(tmpdir+sep+"cassandra_backup") +def customCopy(root, root_target_dir): + print("copying {} to {}".format(root, root_target_dir)) + copytree(src=root, dst=root_target_dir, ignore=ignore_patterns('.*')) def copy(): ''' @@ -48,17 +52,27 @@ def copy(): ''' root_levels = args.datadirectory.rstrip('/').count(sep) ignore_list = compile(tmpdir+sep+"cassandra_backup"+sep+'(system|system|systemtauth|system_traces|system_schema|system_distributed)') - + # List of the threds running in background + futures = [] try: - for root, dirs, files in walk(args.datadirectory): - root_target_dir = tmpdir+sep+"cassandra_backup"+sep+sep.join(root.split(sep)[root_levels+1:-2]) - if match(ignore_list, root_target_dir): - continue - if root.split(sep)[-1] == args.snapshotname: - copytree(src=root, dst=root_target_dir, ignore=ignore_patterns('.*')) + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + for root, dirs, files in walk(args.datadirectory): + root_target_dir = tmpdir+sep+"cassandra_backup"+sep+sep.join(root.split(sep)[root_levels+1:-2]) + if match(ignore_list, root_target_dir): + continue + if root.split(sep)[-1] == args.snapshotname: + # Keeping copy operation in background with threads + tmp_arr = [root, root_target_dir] + futures.append( executor.submit( lambda p: customCopy(*p), tmp_arr)) except Exception as e: print(e) - + # Checking status of the copy operation + for future in concurrent.futures.as_completed(futures): + try: + print("Task completed for ...") + print(future.result()) + except Exception as e: + print(e) # Creating schema command = "cqlsh -e 'DESC SCHEMA' > {}/cassandra_backup/db_schema.cql".format(tmpdir) @@ -82,3 +96,4 @@ def copy(): # Cleaning up backup directory rmtree(tmpdir) print("Cassandra backup completed and stored in {}/{}.tar.gz".format(args.tardirectory, args.snapshotname)) + From 08d4982afc10914095ec8305f8e304e8dbf9d73c Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Fri, 24 Jul 2020 22:21:40 +0530 Subject: [PATCH 02/13] Taking snapshot optional for cassandra backup Signed-off-by: Rajesh Rajendran --- ansible/roles/cassandra-backup/tasks/main.yml | 2 +- deploy/cassandra_backup.py | 20 +++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/ansible/roles/cassandra-backup/tasks/main.yml b/ansible/roles/cassandra-backup/tasks/main.yml index 8d6da3b6d2..6a8a2c082f 100755 --- a/ansible/roles/cassandra-backup/tasks/main.yml +++ b/ansible/roles/cassandra-backup/tasks/main.yml @@ -14,7 +14,7 @@ - name: run the backup script become: yes - shell: python cassandra_backup.py --snapshotname "{{ cassandra_backup_gzip_file_name }}" + shell: python3 cassandra_backup.py --snapshotname "{{ cassandra_backup_gzip_file_name }}" args: chdir: /data/cassandra/backup async: 7200 diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index 77812ffc57..6c4851facf 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -36,6 +36,8 @@ help="Name with which snapshot to be taken. Default {}".format("cassandra_backup-"+strftime("%Y-%m-%d"))) parser.add_argument("-t", "--tardirectory", metavar="tardir", default=getcwd(), help="Path to create the tarball. Default {}".format(getcwd())) +parser.add_argument("--disablesnapshot", action="store_true", + help="disable taking snapshot, snapshot name can be given via -s flag") args = parser.parse_args() # Create temporary directory to copy data @@ -81,14 +83,20 @@ def copy(): print("Couldn't backup schema, exiting...") exit(1) print("Schema backup completed. saved in {}/cassandra_backup/db_schema.sql".format(tmpdir)) -# Cleaning all old snapshots -command = "nodetool clearsnapshot" -system(command) +# Default value for snapshot +rc = 0 + # Creating snapshots -command = "nodetool snapshot -t {}".format(args.snapshotname) -rc = system(command) +if not args.disablesnapshot: + # Cleaning all old snapshots + command = "nodetool clearsnapshot" + system(command) + # Taking new snapshot + command = "nodetool snapshot -t {}".format(args.snapshotname) + rc = system(command) if rc == 0: - print("Snapshot taken.") + if not args.disablesnapshot: + print("Snapshot taken.") copy() print("Making a tarball: {}.tar.gz".format(args.snapshotname)) command = "cd {} && tar -czvf {}/{}.tar.gz *".format(tmpdir, args.tardirectory, args.snapshotname) From 94c020f9ea0c0a9676a588ece10c63af73566de4 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Fri, 24 Jul 2020 22:50:08 +0530 Subject: [PATCH 03/13] Taking cassandra snapshot serially via ansible Signed-off-by: Rajesh Rajendran --- ansible/cassandra-backup.yml | 20 +++++++++++++++++-- ansible/roles/cassandra-backup/tasks/main.yml | 7 +++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/ansible/cassandra-backup.yml b/ansible/cassandra-backup.yml index d8a054b580..87fadd0bfa 100644 --- a/ansible/cassandra-backup.yml +++ b/ansible/cassandra-backup.yml @@ -1,7 +1,23 @@ +# This task will take snapshot serially for all cassandra nodes +# If we club both taking snapshot and upload the data, then the +# interval of snapshots b/w nodes will be very high; Which might lead +# to data discrepancies - hosts: cassandra become: yes - serial: 1 + serial: true + tasks: + - name: taking cassandra snapshots + shell: | + nodetool clearsnapshot + nodetool snapshot -t "cassandra-backup-{{ lookup('pipe', 'date +%Y%m%d') }}-{{ ansible_hostname }}" + +# Once snaphot is done, +# We can take the snapshot and compress it and upload it +# This will take some cpu and memory in the nodes +# Because of that we're running it serially, so that it won't affect the perfomance +- hosts: cassandra + serial: true vars_files: - ['{{ inventory_dir }}/secrets.yml'] roles: - - cassandra-backup + - {name: cassandra-backup, vars: [ additional_arguments: "--disablesnapshot"]} diff --git a/ansible/roles/cassandra-backup/tasks/main.yml b/ansible/roles/cassandra-backup/tasks/main.yml index 6a8a2c082f..9d154af074 100755 --- a/ansible/roles/cassandra-backup/tasks/main.yml +++ b/ansible/roles/cassandra-backup/tasks/main.yml @@ -4,7 +4,10 @@ - name: copy the backup script become: yes - template: src=cassandra_backup.j2 dest=/data/cassandra/backup/cassandra_backup.py mode=0755 + template: + src: ../../../../deploy/cassandra_backup.py + dest: /data/cassandra/backup/cassandra_backup.py + mode: 0755 - set_fact: cassandra_backup_gzip_file_name: "cassandra-backup-{{ lookup('pipe', 'date +%Y%m%d') }}-{{ ansible_hostname }}" @@ -14,7 +17,7 @@ - name: run the backup script become: yes - shell: python3 cassandra_backup.py --snapshotname "{{ cassandra_backup_gzip_file_name }}" + shell: python3 cassandra_backup.py --snapshotname "{{ cassandra_backup_gzip_file_name }}" "{{additional_arguments|d('')}}" args: chdir: /data/cassandra/backup async: 7200 From 6b7092a740896fe6307e88340bb52dbdd2feda79 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Fri, 24 Jul 2020 23:38:38 +0530 Subject: [PATCH 04/13] Install azure cli if not present Signed-off-by: Rajesh Rajendran --- ansible/roles/azure-cli/tasks/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/azure-cli/tasks/main.yml b/ansible/roles/azure-cli/tasks/main.yml index cf29bcb106..798d80188c 100644 --- a/ansible/roles/azure-cli/tasks/main.yml +++ b/ansible/roles/azure-cli/tasks/main.yml @@ -1,2 +1,3 @@ - name: install azure cli - shell: curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + shell: + which az || curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash From 2036ce754c342d1e0817c78864e168dbb0e1356b Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Fri, 24 Jul 2020 23:43:20 +0530 Subject: [PATCH 05/13] root permission for backup Signed-off-by: Rajesh Rajendran --- ansible/cassandra-backup.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/cassandra-backup.yml b/ansible/cassandra-backup.yml index 87fadd0bfa..9fa6688894 100644 --- a/ansible/cassandra-backup.yml +++ b/ansible/cassandra-backup.yml @@ -17,6 +17,7 @@ # Because of that we're running it serially, so that it won't affect the perfomance - hosts: cassandra serial: true + become: yes vars_files: - ['{{ inventory_dir }}/secrets.yml'] roles: From 1061dd6414838a565d05d2b37a5e26a4a23a6f09 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Sat, 25 Jul 2020 14:21:14 +0530 Subject: [PATCH 06/13] waiting between snapstots Signed-off-by: Rajesh Rajendran --- ansible/cassandra-backup.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/cassandra-backup.yml b/ansible/cassandra-backup.yml index 9fa6688894..14d79f0283 100644 --- a/ansible/cassandra-backup.yml +++ b/ansible/cassandra-backup.yml @@ -10,6 +10,11 @@ shell: | nodetool clearsnapshot nodetool snapshot -t "cassandra-backup-{{ lookup('pipe', 'date +%Y%m%d') }}-{{ ansible_hostname }}" + - name: sleeping 1 min b/w snapshots + pause: + echo: true + minutes: 1 # A positive number of minutes to pause for. + prompt: "Pausing after snapshot" # Optional text to use for the prompt message. # Once snaphot is done, # We can take the snapshot and compress it and upload it From b9f4bc554b4536b482d58986823b4422914e9a4b Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Sat, 25 Jul 2020 14:21:41 +0530 Subject: [PATCH 07/13] Disabling serial upload of snapshots Signed-off-by: Rajesh Rajendran --- ansible/cassandra-backup.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/cassandra-backup.yml b/ansible/cassandra-backup.yml index 14d79f0283..73ef0fcbf1 100644 --- a/ansible/cassandra-backup.yml +++ b/ansible/cassandra-backup.yml @@ -21,7 +21,6 @@ # This will take some cpu and memory in the nodes # Because of that we're running it serially, so that it won't affect the perfomance - hosts: cassandra - serial: true become: yes vars_files: - ['{{ inventory_dir }}/secrets.yml'] From 4adde58f5b7c784faf1d025163d9c0523059afd9 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Sat, 25 Jul 2020 15:54:27 +0530 Subject: [PATCH 08/13] Print cassandra backup background thread task status Signed-off-by: Rajesh Rajendran --- deploy/cassandra_backup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index 6c4851facf..962f32ae0f 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -71,8 +71,7 @@ def copy(): # Checking status of the copy operation for future in concurrent.futures.as_completed(futures): try: - print("Task completed for ...") - print(future.result()) + print("Task completed. Result: {}".format(future.result())) except Exception as e: print(e) From 8f643e973d4d49eb4d11791ccad10d79d5667008 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Sat, 25 Jul 2020 15:55:49 +0530 Subject: [PATCH 09/13] change the worker process in to automatic Signed-off-by: Rajesh Rajendran --- deploy/cassandra_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index 962f32ae0f..cf7a82cb8e 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -57,7 +57,7 @@ def copy(): # List of the threds running in background futures = [] try: - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: for root, dirs, files in walk(args.datadirectory): root_target_dir = tmpdir+sep+"cassandra_backup"+sep+sep.join(root.split(sep)[root_levels+1:-2]) if match(ignore_list, root_target_dir): From 4eac924e27acfd17780b8ef6993ceba03acd8cea Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Sat, 25 Jul 2020 16:02:20 +0530 Subject: [PATCH 10/13] Automatic cpu core detection for defining number of workers Signed-off-by: Rajesh Rajendran --- deploy/cassandra_backup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index cf7a82cb8e..74efd50433 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -19,7 +19,7 @@ for help ./cassandra_backup.py -h ''' -from os import walk, sep, system, getcwd, makedirs +from os import walk, sep, system, getcwd, makedirs, cpu_count from argparse import ArgumentParser from shutil import rmtree, ignore_patterns, copytree from re import match, compile @@ -36,6 +36,8 @@ help="Name with which snapshot to be taken. Default {}".format("cassandra_backup-"+strftime("%Y-%m-%d"))) parser.add_argument("-t", "--tardirectory", metavar="tardir", default=getcwd(), help="Path to create the tarball. Default {}".format(getcwd())) +parser.add_argument("-w", "--workers", metavar="workers", + default=cpu_count(), help="Number of workers to use. Default same as cpu cores {}".format(cpu_count())) parser.add_argument("--disablesnapshot", action="store_true", help="disable taking snapshot, snapshot name can be given via -s flag") args = parser.parse_args() @@ -57,7 +59,7 @@ def copy(): # List of the threds running in background futures = [] try: - with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor: for root, dirs, files in walk(args.datadirectory): root_target_dir = tmpdir+sep+"cassandra_backup"+sep+sep.join(root.split(sep)[root_levels+1:-2]) if match(ignore_list, root_target_dir): From 552f77e6317cc90456db890d5ef71f987498579b Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Sat, 25 Jul 2020 18:13:08 +0530 Subject: [PATCH 11/13] Check tar creation succeeded or not Signed-off-by: Rajesh Rajendran --- deploy/cassandra_backup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index 74efd50433..26796b920f 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -101,7 +101,10 @@ def copy(): copy() print("Making a tarball: {}.tar.gz".format(args.snapshotname)) command = "cd {} && tar -czvf {}/{}.tar.gz *".format(tmpdir, args.tardirectory, args.snapshotname) - system(command) + rc = system(command) + if rc != 0: + print("Creation of tar failed") + exit(1) # Cleaning up backup directory rmtree(tmpdir) print("Cassandra backup completed and stored in {}/{}.tar.gz".format(args.tardirectory, args.snapshotname)) From 7de7a33653eebe589ce16ef0cc702298355faa36 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Sun, 26 Jul 2020 23:28:35 +0530 Subject: [PATCH 12/13] Remove files after adding them to tar Else it'll take 2x size of the backup till the backup completes Signed-off-by: Rajesh Rajendran --- deploy/cassandra_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index 26796b920f..5f1e6e85e5 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -100,7 +100,7 @@ def copy(): print("Snapshot taken.") copy() print("Making a tarball: {}.tar.gz".format(args.snapshotname)) - command = "cd {} && tar -czvf {}/{}.tar.gz *".format(tmpdir, args.tardirectory, args.snapshotname) + command = "cd {} && tar --remove-files -czvf {}/{}.tar.gz *".format(tmpdir, args.tardirectory, args.snapshotname) rc = system(command) if rc != 0: print("Creation of tar failed") From a2f77c36bec9626747fc17a5dc8a2a2bb5b8c1b7 Mon Sep 17 00:00:00 2001 From: Rajesh Rajendran Date: Mon, 27 Jul 2020 11:11:36 +0530 Subject: [PATCH 13/13] feat: Updated the if condition with proper context Signed-off-by: Rajesh Rajendran --- deploy/cassandra_backup.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/deploy/cassandra_backup.py b/deploy/cassandra_backup.py index 5f1e6e85e5..bdabf66373 100755 --- a/deploy/cassandra_backup.py +++ b/deploy/cassandra_backup.py @@ -84,8 +84,6 @@ def copy(): print("Couldn't backup schema, exiting...") exit(1) print("Schema backup completed. saved in {}/cassandra_backup/db_schema.sql".format(tmpdir)) -# Default value for snapshot -rc = 0 # Creating snapshots if not args.disablesnapshot: @@ -95,17 +93,21 @@ def copy(): # Taking new snapshot command = "nodetool snapshot -t {}".format(args.snapshotname) rc = system(command) -if rc == 0: - if not args.disablesnapshot: - print("Snapshot taken.") - copy() - print("Making a tarball: {}.tar.gz".format(args.snapshotname)) - command = "cd {} && tar --remove-files -czvf {}/{}.tar.gz *".format(tmpdir, args.tardirectory, args.snapshotname) - rc = system(command) if rc != 0: - print("Creation of tar failed") + print("Backup failed") exit(1) - # Cleaning up backup directory - rmtree(tmpdir) - print("Cassandra backup completed and stored in {}/{}.tar.gz".format(args.tardirectory, args.snapshotname)) + print("Snapshot taken.") + +# Copying the snapshot to proper folder structure +copy() +# Creating tarball +print("Making a tarball: {}.tar.gz".format(args.snapshotname)) +command = "cd {} && tar --remove-files -czvf {}/{}.tar.gz *".format(tmpdir, args.tardirectory, args.snapshotname) +rc = system(command) +if rc != 0: + print("Creation of tar failed") + exit(1) +# Cleaning up backup directory +rmtree(tmpdir) +print("Cassandra backup completed and stored in {}/{}.tar.gz".format(args.tardirectory, args.snapshotname))