mskcc · CuijieLu · May 20, 2024 · May 10, 2024 · May 10, 2024
diff --git a/SampleSheet.py b/SampleSheet.py
@@ -99,7 +99,7 @@ def split_sample_sheet(self):
          if sample sheet recipes have mixed DLP and other all DLP need to go on a separate sample sheet named "_DLP"
         """
         # if 10x DRAGEN demux add to header CreateFastqForIndexReads,1,,,,,,,
-        if any("10X_" in s for s in self.recipe_set):
+        if any("SC_Chromium" in s for s in self.recipe_set):
             print("Adding CreateFastqForIndexReads,1 to sample sheet header since 10X samples are present")
             self.df_ss_header.loc[len(self.df_ss_header.index)-1] = ["CreateFastqForIndexReads",1,"","","","","","",""]
             self.df_ss_header.loc[len(self.df_ss_header.index)] = ["[Data]","","","","","","","",""]
@@ -111,7 +111,7 @@ def split_sample_sheet(self):
         split_ss_list = [ss_copy, self]  
 
         was_split = False
-        if "DLP" in self.recipe_set and len(self.recipe_set) > 1:
+        if "SC_DLP" in self.recipe_set and len(self.recipe_set) > 1:
             print("Copying all DLP samples to a new sample sheet")
             # copy all DLP rows to a new sample sheet
             dlp_data = self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("DLP") == True].copy()

diff --git a/deliver_pipeline_dag.py b/deliver_pipeline_dag.py
@@ -25,6 +25,7 @@
     def deliver(ds, **kwargs):
         project = kwargs["params"]["project"]
         pi = kwargs["params"]["pi"]
+        # recipe here is actually request name
         recipe = kwargs["params"]["recipe"]
         print("Delivering the pipeline output and/or .bams for {} {} {}".format(project, pi, recipe))
 

diff --git a/demux_run_dag.py b/demux_run_dag.py
@@ -66,7 +66,7 @@ def demux(ds, **kwargs):
 
         # check if the sample sheet contains DLP project
         is_DLP = False
-        if "DLP" in sample_sheet.recipe_set:
+        if "SC_DLP" in sample_sheet.recipe_set:
             is_DLP = True
             dragen_demux = True
 
@@ -214,7 +214,7 @@ def stats(ds, **kwargs):
 
     def fingerprinting(ds, **kwargs):
         # read in sample sheet as arguments, filter out projects that need to run fingerprinting
-        recipe_list_for_fp = [".*IMPACT*", ".*Heme*", "IDT_Exome*", "WholeExomeSequencing", "Twist_Exome", "MSK-ACCESS*", "CMO-CH", "HumanWholeGenome"]
+        recipe_list_for_fp = ["PED-PEG", "WGS_Deep", "HC_IMPACT", "HC_IMPACT-Heme", "HC_ACCESS", "WES_Human", "HC_CMOCH"]
         # call fingerprinting_dag.py for each project
         samplesheet_path = kwargs["params"]["samplesheet"]
 
@@ -228,13 +228,9 @@ def fingerprinting(ds, **kwargs):
         project_list_to_run = []        
         for project, recipe in sample_sheet.project_dict.items():
             # fingerprinting only support human
-            if project_genome_dict[project] == "Human":
-                for recipe_list_item in recipe_list_for_fp:
-                    print(project, recipe)
-                    expr = re.compile(recipe_list_item)
-                    if expr.match(recipe):
-                        project_list_to_run.append(project)
-                        break
+            if project_genome_dict[project] == "Human" and recipe in recipe_list_for_fp:
+                project_list_to_run.append(project)
+
         print("Projects need to run fp: {}".format(project_list_to_run))
         if len(project_list_to_run) == 0:
             return "No project need to run fingerprinting"

diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
@@ -18,20 +18,6 @@
             "Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 "
         }
     },
-    "atac_count": {
-        "tool": " /igo/work/nabors/tools/cellranger-atac-2.1.0/cellranger-atac count ",
-        "genome": {
-            "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-GRCh38-1.0.1 ",
-            "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-mm10-1.1.0 "
-        }
-    },
-    "cnv": {
-        "tool": " /igo/work/nabors/tools/cellranger-dna-1.1.0/cellranger-dna cnv ",
-        "genome": {
-            "Human": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCh38-1.0.0 ",
-            "Mouse": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCm38-1.0.0 "
-        }
-    },
     "multi": {
         "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
     },
@@ -62,12 +48,10 @@
 ARC_OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
 
 # 10X recipe list for different pipelines
-COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]
-VDJ_FLAVORS = ["10X_Genomics_VDJ"]
-ATAC_FLAVORS = ["10X_Genomics_ATAC"]
-CNV_FLAVORS = ["10X_Genomics_CNV"]
-ARC_FLAVORS = ["10X_Genomics_Multiome", "10X_Genomics_Multiome_ATAC", "10X_Genomics_Multiome_GeneExpression"]
-SPATIAL_FLAVORS = ["10X_Genomics_Visium"]
+COUNT_FLAVORS = ["SC_Chromium-GEX-3", "SC_Chromium-GEX-5"]
+VDJ_FLAVORS = ["SC_Chromium-TCR", "SC_Chromium-BCR"]
+ARC_FLAVORS = ["SC_Chromium-Multiome", "SC_Chromium-Multiome_ATAC", "SC_Chromium-Multiome_GEX"]
+SPATIAL_FLAVORS = ["ST_Visium"]
 
 # we do not want to PROCESS SAIL (15500) or SCRI (12437) projects
 SCRI = "12437"

diff --git a/scripts/deliver_pipeline.py b/scripts/deliver_pipeline.py
@@ -27,70 +27,59 @@
 PICARD = "java -jar /igo/home/igo/resources/picard2.23.2/picard.jar "
 NGS_STATS_FASTQ_ENDPOINT = "http://igodb.mskcc.org:8080/ngs-stats/permissions/getRequestPermissions/"
 
-def deliver_pipeline_output(project, pi, recipe):
-    if not project or not pi or not recipe:
+def deliver_pipeline_output(project, pi, requestName):
+    if not project or not pi or not requestName:
         return "Project, pi and recipe are all required arguments."
     # change pi to all lowercase
     pi = pi.lower()
     delivery_folder = LAB_SHARE_DIR + "/" + pi + "/Project_" + project + "/pipeline"
 
-    if recipe.startswith("RNASeq"):
+    if requestName == "RNALibraryPrep":
         print("Delivering all RNASeq .bams for {} {} {}".format(project, pi, recipe))
         bamdict = find_bams(project, STATS_DIR)
         bsub_commands =  write_bams_to_share(bamdict, delivery_folder)
         reconcile_bam_fastq_list(project, bamdict)
         return "Completed RNA bams delivery"
 
-    # if is missionbio recipe, find tapestri pipelie output and copy all sample folders
-    elif recipe == "MissionBio":
-        tapestri_path = "/igo/staging/stats/MissionBio/Project_" + project
-        if not os.path.exists(tapestri_path):
-            print("No tapestri result available")
-        else:
-            tapestri_delivery_folder = delivery_folder + "/Tapestri"
-            if not os.path.exists(tapestri_delivery_folder):
-                print("Creating pipeline delivery folder {}".format(tapestri_delivery_folder))
-                os.makedirs(tapestri_delivery_folder)
-
-            # copy each sample folder to the delivery folder
-            tapestri_path = tapestri_path + "/"
-            sample_list = os.listdir(tapestri_path)
-            for sample in sample_list:
-                sample_folder = tapestri_path + sample
-                destination = tapestri_delivery_folder + "/" + sample
-                print("copy {}".format(sample_folder))
-                shutil.copytree(sample_folder, destination, symlinks=True)
-
-    # if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy
-    # add cellranger multi output for featurebarcoding project here for now
-    elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding":
-        pipeline_path = "/igo/staging/PIPELINE/Project_" + project
-        if not os.path.exists(pipeline_path):
-            print("No pipeline result available")
-        else:
-            if not os.path.exists(delivery_folder):
-                print("Creating pipeline delivery folder {}".format(delivery_folder))
-                os.makedirs(delivery_folder)
-
-            # copy each sample folder to the delivery folder
-            pipeline_path = pipeline_path + "/"
-            sample_list = os.listdir(pipeline_path)
-            for sample in sample_list:
-                sample_path = pipeline_path + sample
-                destination = delivery_folder + "/" + sample
-                print("copy {}".format(sample_path))
-                if os.path.isdir(sample_path):
-                    shutil.copytree(sample_path, destination, symlinks=True)
-                else:
-                    cmd = "cp {} {}".format(sample_path, destination)
-                    print(cmd)
-                    call(cmd, shell=True)
-
-    # if 10X recipe or SCRI project starting with 12437, copy cell ranger result to project folder
-    elif recipe.startswith("10XGenomics") or project.startswith("12437_"):
+    # TCR seq only need deliver manifest, those files located under viale lab drive
+    # example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
+    elif requestName == "TCRSeq":
+        pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
+        TCR_delivery_folder = delivery_folder + "/Manifest"
+        if not os.path.exists(TCR_delivery_folder):
+                print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
+                os.makedirs(TCR_delivery_folder)
+
+        cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
+        print(cmd)
+        call(cmd, shell=True)
+
+    # For all other projects, check CELLRANGER folder first then PIPELINE folder
+    else:
         folder_list = scripts.deliver_cellranger.find_cellranger(project)
         if len(folder_list) == 0:
-            print("No cellranger result available")
+            # check PIPELINE folder 
+            pipeline_path = "/igo/staging/PIPELINE/Project_" + project
+            if not os.path.exists(pipeline_path):
+                print("No cellranger/pipeline result available")
+            else:
+                if not os.path.exists(delivery_folder):
+                    print("Creating pipeline delivery folder {}".format(delivery_folder))
+                    os.makedirs(delivery_folder)
+
+                # copy each sample folder to the delivery folder
+                pipeline_path = pipeline_path + "/"
+                sample_list = os.listdir(pipeline_path)
+                for sample in sample_list:
+                    sample_path = pipeline_path + sample
+                    destination = delivery_folder + "/" + sample
+                    print("copy {}".format(sample_path))
+                    if os.path.isdir(sample_path):
+                        shutil.copytree(sample_path, destination, symlinks=True)
+                    else:
+                        cmd = "cp {} {}".format(sample_path, destination)
+                        print(cmd)
+                        call(cmd, shell=True)
         else:
             # create pipeline folder if not exists
             cellranger_delivery_folder = delivery_folder + "/cellranger"
@@ -105,21 +94,6 @@ def deliver_pipeline_output(project, pi, recipe):
                 print("copy {}".format(folder))
                 shutil.copytree(folder, sample_delivery_name, symlinks=True)
 
-    # TCR seq only need deliver manifest, those files located under viale lab drive
-    # example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
-    elif recipe == "TCRSeq-IGO":
-        pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
-        TCR_delivery_folder = delivery_folder + "/Manifest"
-        if not os.path.exists(TCR_delivery_folder):
-                print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
-                os.makedirs(TCR_delivery_folder)
-
-        cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
-        print(cmd)
-        call(cmd, shell=True)
-
-    else:
-        print("Pipeline delivery is not needed for recipe {} and project {}".format(recipe, project))
     return "Completed pipeline delivery"
 
 def find_bams(project, stats_base_dir):

diff --git a/stats_by_project_dag.py b/stats_by_project_dag.py
@@ -57,9 +57,9 @@ def run_stats(ds, **kwargs):
                 print(cmd)
                 subprocess.run(cmd, shell=True)
 
-        elif "10X_" in recipe:
+        elif "SC_Chromium" in recipe:
             scripts.cellranger.launch_cellranger_by_project_location(project_directory, recipe, species)
-        elif "ONT" in recipe:
+        elif "Nanopore" in recipe:
             cmd = "bsub -J ont_stats_{} -n 16 -M 16 /igo/work/nabors/tools/venvpy3/bin/python /igo/work/igo/igo-demux/scripts/ont_stats.py {}".format(project_id, project_directory)
             print(cmd)
             subprocess.run(cmd, shell=True)