From 37e925793148a10523500b3bf490cd5dd91c800c Mon Sep 17 00:00:00 2001 From: luc Date: Fri, 10 May 2024 16:28:33 -0400 Subject: [PATCH 1/2] update recipe change --- deliver_pipeline_dag.py | 1 + scripts/cellranger_config.py | 24 ++------ scripts/deliver_pipeline.py | 106 +++++++++++++---------------------- stats_by_project_dag.py | 4 +- 4 files changed, 47 insertions(+), 88 deletions(-) diff --git a/deliver_pipeline_dag.py b/deliver_pipeline_dag.py index e3fe37b..dd1fde7 100644 --- a/deliver_pipeline_dag.py +++ b/deliver_pipeline_dag.py @@ -25,6 +25,7 @@ def deliver(ds, **kwargs): project = kwargs["params"]["project"] pi = kwargs["params"]["pi"] + # recipe here is actually request name recipe = kwargs["params"]["recipe"] print("Delivering the pipeline output and/or .bams for {} {} {}".format(project, pi, recipe)) diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py index 235488f..e4e6105 100644 --- a/scripts/cellranger_config.py +++ b/scripts/cellranger_config.py @@ -18,20 +18,6 @@ "Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 " } }, - "atac_count": { - "tool": " /igo/work/nabors/tools/cellranger-atac-2.1.0/cellranger-atac count ", - "genome": { - "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-GRCh38-1.0.1 ", - "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-mm10-1.1.0 " - } - }, - "cnv": { - "tool": " /igo/work/nabors/tools/cellranger-dna-1.1.0/cellranger-dna cnv ", - "genome": { - "Human": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCh38-1.0.0 ", - "Mouse": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCm38-1.0.0 " - } - }, "multi": { "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi " }, @@ -62,12 +48,10 @@ ARC_OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200" # 10X recipe list for different pipelines -COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"] -VDJ_FLAVORS = ["10X_Genomics_VDJ"] -ATAC_FLAVORS = ["10X_Genomics_ATAC"] -CNV_FLAVORS = ["10X_Genomics_CNV"] -ARC_FLAVORS = ["10X_Genomics_Multiome", "10X_Genomics_Multiome_ATAC", "10X_Genomics_Multiome_GeneExpression"] -SPATIAL_FLAVORS = ["10X_Genomics_Visium"] +COUNT_FLAVORS = ["SC_Chromium-GEX-3", "SC_Chromium-GEX-5"] +VDJ_FLAVORS = ["SC_Chromium-TCR", "SC_Chromium-BCR"] +ARC_FLAVORS = ["SC_Chromium-Multiome", "SC_Chromium-Multiome_ATAC", "SC_Chromium-Multiome_GEX"] +SPATIAL_FLAVORS = ["ST_Visium"] # we do not want to PROCESS SAIL (15500) or SCRI (12437) projects SCRI = "12437" diff --git a/scripts/deliver_pipeline.py b/scripts/deliver_pipeline.py index 26e8af8..895dc63 100644 --- a/scripts/deliver_pipeline.py +++ b/scripts/deliver_pipeline.py @@ -27,70 +27,59 @@ PICARD = "java -jar /igo/home/igo/resources/picard2.23.2/picard.jar " NGS_STATS_FASTQ_ENDPOINT = "http://igodb.mskcc.org:8080/ngs-stats/permissions/getRequestPermissions/" -def deliver_pipeline_output(project, pi, recipe): - if not project or not pi or not recipe: +def deliver_pipeline_output(project, pi, requestName): + if not project or not pi or not requestName: return "Project, pi and recipe are all required arguments." # change pi to all lowercase pi = pi.lower() delivery_folder = LAB_SHARE_DIR + "/" + pi + "/Project_" + project + "/pipeline" - if recipe.startswith("RNASeq"): + if requestName == "RNALibraryPrep": print("Delivering all RNASeq .bams for {} {} {}".format(project, pi, recipe)) bamdict = find_bams(project, STATS_DIR) bsub_commands = write_bams_to_share(bamdict, delivery_folder) reconcile_bam_fastq_list(project, bamdict) return "Completed RNA bams delivery" - # if is missionbio recipe, find tapestri pipelie output and copy all sample folders - elif recipe == "MissionBio": - tapestri_path = "/igo/staging/stats/MissionBio/Project_" + project - if not os.path.exists(tapestri_path): - print("No tapestri result available") - else: - tapestri_delivery_folder = delivery_folder + "/Tapestri" - if not os.path.exists(tapestri_delivery_folder): - print("Creating pipeline delivery folder {}".format(tapestri_delivery_folder)) - os.makedirs(tapestri_delivery_folder) - - # copy each sample folder to the delivery folder - tapestri_path = tapestri_path + "/" - sample_list = os.listdir(tapestri_path) - for sample in sample_list: - sample_folder = tapestri_path + sample - destination = tapestri_delivery_folder + "/" + sample - print("copy {}".format(sample_folder)) - shutil.copytree(sample_folder, destination, symlinks=True) - - # if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy - # add cellranger multi output for featurebarcoding project here for now - elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding": - pipeline_path = "/igo/staging/PIPELINE/Project_" + project - if not os.path.exists(pipeline_path): - print("No pipeline result available") - else: - if not os.path.exists(delivery_folder): - print("Creating pipeline delivery folder {}".format(delivery_folder)) - os.makedirs(delivery_folder) - - # copy each sample folder to the delivery folder - pipeline_path = pipeline_path + "/" - sample_list = os.listdir(pipeline_path) - for sample in sample_list: - sample_path = pipeline_path + sample - destination = delivery_folder + "/" + sample - print("copy {}".format(sample_path)) - if os.path.isdir(sample_path): - shutil.copytree(sample_path, destination, symlinks=True) - else: - cmd = "cp {} {}".format(sample_path, destination) - print(cmd) - call(cmd, shell=True) - - # if 10X recipe or SCRI project starting with 12437, copy cell ranger result to project folder - elif recipe.startswith("10XGenomics") or project.startswith("12437_"): + # TCR seq only need deliver manifest, those files located under viale lab drive + # example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv + elif requestName == "TCRSeq": + pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq" + TCR_delivery_folder = delivery_folder + "/Manifest" + if not os.path.exists(TCR_delivery_folder): + print("Creating pipeline delivery folder {}".format(TCR_delivery_folder)) + os.makedirs(TCR_delivery_folder) + + cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder) + print(cmd) + call(cmd, shell=True) + + # For all other projects, check CELLRANGER folder first then PIPELINE folder + else: folder_list = scripts.deliver_cellranger.find_cellranger(project) if len(folder_list) == 0: - print("No cellranger result available") + # check PIPELINE folder + pipeline_path = "/igo/staging/PIPELINE/Project_" + project + if not os.path.exists(pipeline_path): + print("No cellranger/pipeline result available") + else: + if not os.path.exists(delivery_folder): + print("Creating pipeline delivery folder {}".format(delivery_folder)) + os.makedirs(delivery_folder) + + # copy each sample folder to the delivery folder + pipeline_path = pipeline_path + "/" + sample_list = os.listdir(pipeline_path) + for sample in sample_list: + sample_path = pipeline_path + sample + destination = delivery_folder + "/" + sample + print("copy {}".format(sample_path)) + if os.path.isdir(sample_path): + shutil.copytree(sample_path, destination, symlinks=True) + else: + cmd = "cp {} {}".format(sample_path, destination) + print(cmd) + call(cmd, shell=True) else: # create pipeline folder if not exists cellranger_delivery_folder = delivery_folder + "/cellranger" @@ -105,21 +94,6 @@ def deliver_pipeline_output(project, pi, recipe): print("copy {}".format(folder)) shutil.copytree(folder, sample_delivery_name, symlinks=True) - # TCR seq only need deliver manifest, those files located under viale lab drive - # example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv - elif recipe == "TCRSeq-IGO": - pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq" - TCR_delivery_folder = delivery_folder + "/Manifest" - if not os.path.exists(TCR_delivery_folder): - print("Creating pipeline delivery folder {}".format(TCR_delivery_folder)) - os.makedirs(TCR_delivery_folder) - - cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder) - print(cmd) - call(cmd, shell=True) - - else: - print("Pipeline delivery is not needed for recipe {} and project {}".format(recipe, project)) return "Completed pipeline delivery" def find_bams(project, stats_base_dir): diff --git a/stats_by_project_dag.py b/stats_by_project_dag.py index b99dc9e..ddb104d 100644 --- a/stats_by_project_dag.py +++ b/stats_by_project_dag.py @@ -57,9 +57,9 @@ def run_stats(ds, **kwargs): print(cmd) subprocess.run(cmd, shell=True) - elif "10X_" in recipe: + elif "SC_Chromium" in recipe: scripts.cellranger.launch_cellranger_by_project_location(project_directory, recipe, species) - elif "ONT" in recipe: + elif "Nanopore" in recipe: cmd = "bsub -J ont_stats_{} -n 16 -M 16 /igo/work/nabors/tools/venvpy3/bin/python /igo/work/igo/igo-demux/scripts/ont_stats.py {}".format(project_id, project_directory) print(cmd) subprocess.run(cmd, shell=True) From dc946b440818b2b07f2d944e6bbb487ff7b3ca02 Mon Sep 17 00:00:00 2001 From: luc Date: Fri, 10 May 2024 16:38:19 -0400 Subject: [PATCH 2/2] update recipe --- SampleSheet.py | 4 ++-- demux_run_dag.py | 14 +++++--------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/SampleSheet.py b/SampleSheet.py index b41f43d..7907ef8 100644 --- a/SampleSheet.py +++ b/SampleSheet.py @@ -99,7 +99,7 @@ def split_sample_sheet(self): if sample sheet recipes have mixed DLP and other all DLP need to go on a separate sample sheet named "_DLP" """ # if 10x DRAGEN demux add to header CreateFastqForIndexReads,1,,,,,,, - if any("10X_" in s for s in self.recipe_set): + if any("SC_Chromium" in s for s in self.recipe_set): print("Adding CreateFastqForIndexReads,1 to sample sheet header since 10X samples are present") self.df_ss_header.loc[len(self.df_ss_header.index)-1] = ["CreateFastqForIndexReads",1,"","","","","","",""] self.df_ss_header.loc[len(self.df_ss_header.index)] = ["[Data]","","","","","","","",""] @@ -111,7 +111,7 @@ def split_sample_sheet(self): split_ss_list = [ss_copy, self] was_split = False - if "DLP" in self.recipe_set and len(self.recipe_set) > 1: + if "SC_DLP" in self.recipe_set and len(self.recipe_set) > 1: print("Copying all DLP samples to a new sample sheet") # copy all DLP rows to a new sample sheet dlp_data = self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("DLP") == True].copy() diff --git a/demux_run_dag.py b/demux_run_dag.py index 12e3733..aa026bb 100644 --- a/demux_run_dag.py +++ b/demux_run_dag.py @@ -66,7 +66,7 @@ def demux(ds, **kwargs): # check if the sample sheet contains DLP project is_DLP = False - if "DLP" in sample_sheet.recipe_set: + if "SC_DLP" in sample_sheet.recipe_set: is_DLP = True dragen_demux = True @@ -214,7 +214,7 @@ def stats(ds, **kwargs): def fingerprinting(ds, **kwargs): # read in sample sheet as arguments, filter out projects that need to run fingerprinting - recipe_list_for_fp = [".*IMPACT*", ".*Heme*", "IDT_Exome*", "WholeExomeSequencing", "Twist_Exome", "MSK-ACCESS*", "CMO-CH", "HumanWholeGenome"] + recipe_list_for_fp = ["PED-PEG", "WGS_Deep", "HC_IMPACT", "HC_IMPACT-Heme", "HC_ACCESS", "WES_Human", "HC_CMOCH"] # call fingerprinting_dag.py for each project samplesheet_path = kwargs["params"]["samplesheet"] @@ -228,13 +228,9 @@ def fingerprinting(ds, **kwargs): project_list_to_run = [] for project, recipe in sample_sheet.project_dict.items(): # fingerprinting only support human - if project_genome_dict[project] == "Human": - for recipe_list_item in recipe_list_for_fp: - print(project, recipe) - expr = re.compile(recipe_list_item) - if expr.match(recipe): - project_list_to_run.append(project) - break + if project_genome_dict[project] == "Human" and recipe in recipe_list_for_fp: + project_list_to_run.append(project) + print("Projects need to run fp: {}".format(project_list_to_run)) if len(project_list_to_run) == 0: return "No project need to run fingerprinting"