diff --git a/release/scripts/CopyHarmonizedScoringFilesPOS.py b/release/scripts/CopyHarmonizedScoringFilesPOS.py index 28cded92..5a9e45b2 100644 --- a/release/scripts/CopyHarmonizedScoringFilesPOS.py +++ b/release/scripts/CopyHarmonizedScoringFilesPOS.py @@ -1,4 +1,4 @@ -import sys, os, shutil, stat, glob, re +import sys, os, shutil, stat, glob, re, pwd from os import path import requests import argparse @@ -18,12 +18,13 @@ class CopyHarmonizedScoringFilesPOS: for gb in genebuilds: log_msg[type][gb] = [] - def __init__(self, new_ftp_scores_dir, staged_harmonized_files_dir, harmonized_files_dir,md5_sql_filepath): + def __init__(self, new_ftp_scores_dir, staged_harmonized_files_dir, harmonized_files_dir, md5_sql_filepath, username): self.new_ftp_scores_dir = new_ftp_scores_dir self.harmonized_files_staged_dir = staged_harmonized_files_dir self.harmonized_files_prod_dir = harmonized_files_dir self.scores_list_file_path = new_ftp_scores_dir+'/'+self.scores_list_file self.md5_sql_filepath = md5_sql_filepath + self.username = username if not os.path.exists(new_ftp_scores_dir): print(f'Error: The path to the data directory can\'t be found ({new_ftp_scores_dir}).') @@ -104,13 +105,23 @@ def copy_harmonized_files_to_production(self): self.create_directory(f'{self.harmonized_files_prod_dir}/{pgs_id}/') self.create_directory(harmonized_file_prod_dir) if os.path.isdir(harmonized_file_prod_dir): - shutil.copy2(harmonized_file_staged, harmonized_file_prod) - # Change chmod to allow group write access try: - os.chmod(harmonized_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH) - print(copy_msg) - except: - print(f">>>>> ERROR! Can't change the read/write access of the file '{harmonized_file}'!") + shutil.copyfile(harmonized_file_staged, harmonized_file_prod) + # If there is any permission issue + except PermissionError as e: + print(f'>>>>> ERROR! File \'{harmonized_file}\' (Permission issue) - {e}') + except IOError as e: + print(f'>>>>> ERROR! File \'{harmonized_file}\' couldn\'t be copied to production: "{self.harmonized_files_prod_dir}"!') + print(e) + + # Change chmod to allow group write access + file_owner = pwd.getpwuid(os.stat(harmonized_file_prod).st_uid).pw_name + if self.username == file_owner: + try: + os.chmod(harmonized_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH) + print(copy_msg) + except: + print(f">>>>> ERROR! Can't change the read/write access of the file '{harmonized_file}'!") file_info = { 'genebuild': gb, 'name': harmonized_file, 'status': copy_type } if not pgs_id in self.harmonized_files_to_copy: @@ -140,7 +151,7 @@ def copy_harmonized_files_to_metadata(self): """ Copy the new/updated scoring files to the metadata directory (temporary FTP) """ print("\n***** Step 2 - Copy the new/updated scoring files to the metadata directory (temporary FTP) *****") - md5_sql_file = open(self.md5_sql_filepath,'a') + # md5_sql_file = open(self.md5_sql_filepath,'a') for score_id in sorted(self.harmonized_files_to_copy.keys()): @@ -169,11 +180,11 @@ def copy_harmonized_files_to_metadata(self): shutil.copy2(harmonized_file_prod, harmonized_file_release) self.log_msg[harmonized_status][harmonized_gb].append(score_id) - # md5 checksum SQL commands - sql_cmd = f"UPDATE {self.sql_table} SET hmpos_{harmonized_gb}_md5='{harmonized_file_md5}' WHERE score_id={id};\n" - md5_sql_file.write(sql_cmd) + # # md5 checksum SQL commands + # sql_cmd = f"UPDATE {self.sql_table} SET hmpos_{harmonized_gb}_md5='{harmonized_file_md5}' WHERE score_id={id};\n" + # md5_sql_file.write(sql_cmd) - md5_sql_file.close() + # md5_sql_file.close() # Copied PGS Scoring files self.print_log_msg('new', 'New PGS Scoring files') diff --git a/release/scripts/CopyScoringFiles.py b/release/scripts/CopyScoringFiles.py index b5d68546..ea644260 100644 --- a/release/scripts/CopyScoringFiles.py +++ b/release/scripts/CopyScoringFiles.py @@ -1,4 +1,4 @@ -import sys, os, shutil, stat, glob, re +import sys, os, shutil, stat, glob, re, pwd from os import path import requests import argparse @@ -10,18 +10,18 @@ class CopyScoringFiles: ftp_std_scoringfile_suffix = '.txt.gz' scores_list_file = 'pgs_scores_list.txt' sql_table = 'catalog_scorefilemd5' - log_msg = { 'new': [], 'updated': [], 'skipped': [] } - def __init__(self, new_ftp_scores_dir, staged_scores_dir, scoring_files_dir,md5_sql_filepath): + def __init__(self, new_ftp_scores_dir, staged_scores_dir, scoring_files_dir, md5_sql_filepath, username): self.new_ftp_scores_dir = new_ftp_scores_dir self.new_scoringfiles_dir = staged_scores_dir self.scoring_files_dir = scoring_files_dir self.md5_sql_filepath = md5_sql_filepath + self.username = username if not os.path.exists(new_ftp_scores_dir): print(f'Error: The path to the data directory can\'t be found ({new_ftp_scores_dir}).') @@ -128,7 +128,7 @@ def copy_scoring_files_to_production(self): if copy_msg != '': # Copy file try: - shutil.copy2(scoring_file_ftp_priv, scoring_file_prod) + shutil.copyfile(scoring_file_ftp_priv, scoring_file_prod) print(copy_msg) if copy_type == 'update': count_updated_pgs += 1 @@ -136,15 +136,20 @@ def copy_scoring_files_to_production(self): count_new_pgs += 1 else: print(f'>>>>> ERROR! Can\'t determine whether the copy of \'{scoring_file}\' was due to the very first version of the scoring file or an updated version of the file') + # If there is any permission issue + except PermissionError as e: + print(f'>>>>> ERROR! File \'{scoring_file}\' (Permission issue) - {e}') except IOError as e: - print(f'>>>>> ERROR! File \'{scoring_file}\' couldn\'t be copied to "{self.scoring_files_dir}"!') + print(f'>>>>> ERROR! File \'{scoring_file}\' couldn\'t be copied to production: "{self.scoring_files_dir}"!') print(e) # Change chmod to allow group write access if os.path.isfile(scoring_file_prod): - try: - os.chmod(scoring_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH) - except: - print(f">>>>> ERROR! Can't change the read/write access of the file '{scoring_file}'!") + file_owner = pwd.getpwuid(os.stat(scoring_file_prod).st_uid).pw_name + if self.username == file_owner: + try: + os.chmod(scoring_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH) + except: + print(f">>>>> ERROR! Can't change the read/write access of the file '{scoring_file}'!") total_count = count_new_pgs + count_updated_pgs print(f'Number of PGS files successfully copied: {total_count} (New: {count_new_pgs} | Updated: {count_updated_pgs} | Skipped: {count_skipped_pgs})') @@ -153,7 +158,7 @@ def copy_scoring_files_to_metadata(self): """ Copy the new/updated scoring files to the metadata directory (temporary FTP) """ print("\n***** Step 2 - Copy the new/updated scoring files to the metadata directory (temporary FTP) *****") - md5_sql_file = open(self.md5_sql_filepath,'w') + # md5_sql_file = open(self.md5_sql_filepath,'w') for score_id in sorted(os.listdir(self.new_ftp_scores_dir+'/scores/')): score_release_dir = self.new_ftp_scores_dir+'/scores/'+score_id+'/ScoringFiles/' @@ -211,15 +216,15 @@ def copy_scoring_files_to_metadata(self): if not score_id in self.log_msg['updated']: self.log_msg['new'].append(score_id) - # md5 checksum SQL commands - id = re.sub(r'PGS0+(.+)', r'\1', score_id) - if is_updated: - sql_cmd = f"UPDATE {self.sql_table} SET score_md5='{new_score_md5}' WHERE score_id={id};\n" - else: - sql_cmd = f"INSERT INTO {self.sql_table} (score_id,score_md5) VALUES ({id},'{new_score_md5}');\n" - md5_sql_file.write(sql_cmd) + # # md5 checksum SQL commands + # id = re.sub(r'PGS0+(.+)', r'\1', score_id) + # if is_updated: + # sql_cmd = f"UPDATE {self.sql_table} SET score_md5='{new_score_md5}' WHERE score_id={id};\n" + # else: + # sql_cmd = f"INSERT INTO {self.sql_table} (score_id,score_md5) VALUES ({id},'{new_score_md5}');\n" + # md5_sql_file.write(sql_cmd) - md5_sql_file.close() + # md5_sql_file.close() # Copied PGS Scoring files diff --git a/release/scripts/run_copy_scoring_files.py b/release/scripts/run_copy_scoring_files.py index 3f44e7a3..2174347c 100644 --- a/release/scripts/run_copy_scoring_files.py +++ b/release/scripts/run_copy_scoring_files.py @@ -5,17 +5,17 @@ from release.scripts.CopyHarmonizedScoringFilesPOS import CopyHarmonizedScoringFilesPOS -def copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath): +def copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username): print("\n#### Copy the new formatted scoring files ####") - pgs_scoring_files = CopyScoringFiles(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath) + pgs_scoring_files = CopyScoringFiles(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username) pgs_scoring_files.get_previous_release() pgs_scoring_files.get_list_of_scores() pgs_scoring_files.copy_scoring_files_to_production() pgs_scoring_files.copy_scoring_files_to_metadata() -def copy_hmpos_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath): +def copy_hmpos_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username): print("\n#### Copy the new harmonized position scoring files ####") - pgs_harmonized_files = CopyHarmonizedScoringFilesPOS(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath) + pgs_harmonized_files = CopyHarmonizedScoringFilesPOS(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username) pgs_harmonized_files.copy_harmonized_files_to_production() pgs_harmonized_files.copy_harmonized_files_to_metadata() @@ -42,7 +42,7 @@ def main(): argparser.add_argument("--scores_dir", type=str, help='The path to the scoring files directory (Production)', required=False) argparser.add_argument("--hm_staged_scores_dir", type=str, help='The path to the harmonized Position staged files directory', required=True) argparser.add_argument("--hm_scores_dir", type=str, help='The path to the harmonized scoring files directory (Production)', required=False) - + argparser.add_argument("--username", type=str, help='Linux/Unix username', required=True) args = argparser.parse_args() @@ -51,6 +51,7 @@ def main(): scores_dir = args.scores_dir hm_staged_scores_dir = args.hm_staged_scores_dir hm_scores_dir = args.hm_scores_dir + username = args.username release_date_file = f'{new_ftp_dir}/release_date.txt' new_release_date = get_new_release_date(release_date_file) @@ -58,9 +59,9 @@ def main(): md5_sql_filename = f'scores_md5_{new_release_date}.sql' md5_sql_filepath = f'{new_ftp_dir}/{md5_sql_filename}' - copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath) + copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username) - copy_hmpos_scoring_files(new_ftp_dir,hm_staged_scores_dir,hm_scores_dir,md5_sql_filepath) + copy_hmpos_scoring_files(new_ftp_dir,hm_staged_scores_dir,hm_scores_dir,md5_sql_filepath,username) # Move/remove temporary files if os.path.isfile(release_date_file):