Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update recipe #83

Merged
merged 2 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions SampleSheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def split_sample_sheet(self):
if sample sheet recipes have mixed DLP and other all DLP need to go on a separate sample sheet named "_DLP"
"""
# if 10x DRAGEN demux add to header CreateFastqForIndexReads,1,,,,,,,
if any("10X_" in s for s in self.recipe_set):
if any("SC_Chromium" in s for s in self.recipe_set):
print("Adding CreateFastqForIndexReads,1 to sample sheet header since 10X samples are present")
self.df_ss_header.loc[len(self.df_ss_header.index)-1] = ["CreateFastqForIndexReads",1,"","","","","","",""]
self.df_ss_header.loc[len(self.df_ss_header.index)] = ["[Data]","","","","","","","",""]
Expand All @@ -111,7 +111,7 @@ def split_sample_sheet(self):
split_ss_list = [ss_copy, self]

was_split = False
if "DLP" in self.recipe_set and len(self.recipe_set) > 1:
if "SC_DLP" in self.recipe_set and len(self.recipe_set) > 1:
print("Copying all DLP samples to a new sample sheet")
# copy all DLP rows to a new sample sheet
dlp_data = self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("DLP") == True].copy()
Expand Down
1 change: 1 addition & 0 deletions deliver_pipeline_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
def deliver(ds, **kwargs):
project = kwargs["params"]["project"]
pi = kwargs["params"]["pi"]
# recipe here is actually request name
recipe = kwargs["params"]["recipe"]
print("Delivering the pipeline output and/or .bams for {} {} {}".format(project, pi, recipe))

Expand Down
14 changes: 5 additions & 9 deletions demux_run_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def demux(ds, **kwargs):

# check if the sample sheet contains DLP project
is_DLP = False
if "DLP" in sample_sheet.recipe_set:
if "SC_DLP" in sample_sheet.recipe_set:
is_DLP = True
dragen_demux = True

Expand Down Expand Up @@ -214,7 +214,7 @@ def stats(ds, **kwargs):

def fingerprinting(ds, **kwargs):
# read in sample sheet as arguments, filter out projects that need to run fingerprinting
recipe_list_for_fp = [".*IMPACT*", ".*Heme*", "IDT_Exome*", "WholeExomeSequencing", "Twist_Exome", "MSK-ACCESS*", "CMO-CH", "HumanWholeGenome"]
recipe_list_for_fp = ["PED-PEG", "WGS_Deep", "HC_IMPACT", "HC_IMPACT-Heme", "HC_ACCESS", "WES_Human", "HC_CMOCH"]
# call fingerprinting_dag.py for each project
samplesheet_path = kwargs["params"]["samplesheet"]

Expand All @@ -228,13 +228,9 @@ def fingerprinting(ds, **kwargs):
project_list_to_run = []
for project, recipe in sample_sheet.project_dict.items():
# fingerprinting only support human
if project_genome_dict[project] == "Human":
for recipe_list_item in recipe_list_for_fp:
print(project, recipe)
expr = re.compile(recipe_list_item)
if expr.match(recipe):
project_list_to_run.append(project)
break
if project_genome_dict[project] == "Human" and recipe in recipe_list_for_fp:
project_list_to_run.append(project)

print("Projects need to run fp: {}".format(project_list_to_run))
if len(project_list_to_run) == 0:
return "No project need to run fingerprinting"
Expand Down
24 changes: 4 additions & 20 deletions scripts/cellranger_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,6 @@
"Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 "
}
},
"atac_count": {
"tool": " /igo/work/nabors/tools/cellranger-atac-2.1.0/cellranger-atac count ",
"genome": {
"Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-GRCh38-1.0.1 ",
"Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-mm10-1.1.0 "
}
},
"cnv": {
"tool": " /igo/work/nabors/tools/cellranger-dna-1.1.0/cellranger-dna cnv ",
"genome": {
"Human": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCh38-1.0.0 ",
"Mouse": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCm38-1.0.0 "
}
},
"multi": {
"tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
},
Expand Down Expand Up @@ -62,12 +48,10 @@
ARC_OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"

# 10X recipe list for different pipelines
COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]
VDJ_FLAVORS = ["10X_Genomics_VDJ"]
ATAC_FLAVORS = ["10X_Genomics_ATAC"]
CNV_FLAVORS = ["10X_Genomics_CNV"]
ARC_FLAVORS = ["10X_Genomics_Multiome", "10X_Genomics_Multiome_ATAC", "10X_Genomics_Multiome_GeneExpression"]
SPATIAL_FLAVORS = ["10X_Genomics_Visium"]
COUNT_FLAVORS = ["SC_Chromium-GEX-3", "SC_Chromium-GEX-5"]
VDJ_FLAVORS = ["SC_Chromium-TCR", "SC_Chromium-BCR"]
ARC_FLAVORS = ["SC_Chromium-Multiome", "SC_Chromium-Multiome_ATAC", "SC_Chromium-Multiome_GEX"]
SPATIAL_FLAVORS = ["ST_Visium"]

# we do not want to PROCESS SAIL (15500) or SCRI (12437) projects
SCRI = "12437"
Expand Down
106 changes: 40 additions & 66 deletions scripts/deliver_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,70 +27,59 @@
PICARD = "java -jar /igo/home/igo/resources/picard2.23.2/picard.jar "
NGS_STATS_FASTQ_ENDPOINT = "http://igodb.mskcc.org:8080/ngs-stats/permissions/getRequestPermissions/"

def deliver_pipeline_output(project, pi, recipe):
if not project or not pi or not recipe:
def deliver_pipeline_output(project, pi, requestName):
if not project or not pi or not requestName:
return "Project, pi and recipe are all required arguments."
# change pi to all lowercase
pi = pi.lower()
delivery_folder = LAB_SHARE_DIR + "/" + pi + "/Project_" + project + "/pipeline"

if recipe.startswith("RNASeq"):
if requestName == "RNALibraryPrep":
print("Delivering all RNASeq .bams for {} {} {}".format(project, pi, recipe))
bamdict = find_bams(project, STATS_DIR)
bsub_commands = write_bams_to_share(bamdict, delivery_folder)
reconcile_bam_fastq_list(project, bamdict)
return "Completed RNA bams delivery"

# if is missionbio recipe, find tapestri pipelie output and copy all sample folders
elif recipe == "MissionBio":
tapestri_path = "/igo/staging/stats/MissionBio/Project_" + project
if not os.path.exists(tapestri_path):
print("No tapestri result available")
else:
tapestri_delivery_folder = delivery_folder + "/Tapestri"
if not os.path.exists(tapestri_delivery_folder):
print("Creating pipeline delivery folder {}".format(tapestri_delivery_folder))
os.makedirs(tapestri_delivery_folder)

# copy each sample folder to the delivery folder
tapestri_path = tapestri_path + "/"
sample_list = os.listdir(tapestri_path)
for sample in sample_list:
sample_folder = tapestri_path + sample
destination = tapestri_delivery_folder + "/" + sample
print("copy {}".format(sample_folder))
shutil.copytree(sample_folder, destination, symlinks=True)

# if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy
# add cellranger multi output for featurebarcoding project here for now
elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding":
pipeline_path = "/igo/staging/PIPELINE/Project_" + project
if not os.path.exists(pipeline_path):
print("No pipeline result available")
else:
if not os.path.exists(delivery_folder):
print("Creating pipeline delivery folder {}".format(delivery_folder))
os.makedirs(delivery_folder)

# copy each sample folder to the delivery folder
pipeline_path = pipeline_path + "/"
sample_list = os.listdir(pipeline_path)
for sample in sample_list:
sample_path = pipeline_path + sample
destination = delivery_folder + "/" + sample
print("copy {}".format(sample_path))
if os.path.isdir(sample_path):
shutil.copytree(sample_path, destination, symlinks=True)
else:
cmd = "cp {} {}".format(sample_path, destination)
print(cmd)
call(cmd, shell=True)

# if 10X recipe or SCRI project starting with 12437, copy cell ranger result to project folder
elif recipe.startswith("10XGenomics") or project.startswith("12437_"):
# TCR seq only need deliver manifest, those files located under viale lab drive
# example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
elif requestName == "TCRSeq":
pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
TCR_delivery_folder = delivery_folder + "/Manifest"
if not os.path.exists(TCR_delivery_folder):
print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
os.makedirs(TCR_delivery_folder)

cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
print(cmd)
call(cmd, shell=True)

# For all other projects, check CELLRANGER folder first then PIPELINE folder
else:
folder_list = scripts.deliver_cellranger.find_cellranger(project)
if len(folder_list) == 0:
print("No cellranger result available")
# check PIPELINE folder
pipeline_path = "/igo/staging/PIPELINE/Project_" + project
if not os.path.exists(pipeline_path):
print("No cellranger/pipeline result available")
else:
if not os.path.exists(delivery_folder):
print("Creating pipeline delivery folder {}".format(delivery_folder))
os.makedirs(delivery_folder)

# copy each sample folder to the delivery folder
pipeline_path = pipeline_path + "/"
sample_list = os.listdir(pipeline_path)
for sample in sample_list:
sample_path = pipeline_path + sample
destination = delivery_folder + "/" + sample
print("copy {}".format(sample_path))
if os.path.isdir(sample_path):
shutil.copytree(sample_path, destination, symlinks=True)
else:
cmd = "cp {} {}".format(sample_path, destination)
print(cmd)
call(cmd, shell=True)
else:
# create pipeline folder if not exists
cellranger_delivery_folder = delivery_folder + "/cellranger"
Expand All @@ -105,21 +94,6 @@ def deliver_pipeline_output(project, pi, recipe):
print("copy {}".format(folder))
shutil.copytree(folder, sample_delivery_name, symlinks=True)

# TCR seq only need deliver manifest, those files located under viale lab drive
# example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
elif recipe == "TCRSeq-IGO":
pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
TCR_delivery_folder = delivery_folder + "/Manifest"
if not os.path.exists(TCR_delivery_folder):
print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
os.makedirs(TCR_delivery_folder)

cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
print(cmd)
call(cmd, shell=True)

else:
print("Pipeline delivery is not needed for recipe {} and project {}".format(recipe, project))
return "Completed pipeline delivery"

def find_bams(project, stats_base_dir):
Expand Down
4 changes: 2 additions & 2 deletions stats_by_project_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def run_stats(ds, **kwargs):
print(cmd)
subprocess.run(cmd, shell=True)

elif "10X_" in recipe:
elif "SC_Chromium" in recipe:
scripts.cellranger.launch_cellranger_by_project_location(project_directory, recipe, species)
elif "ONT" in recipe:
elif "Nanopore" in recipe:
cmd = "bsub -J ont_stats_{} -n 16 -M 16 /igo/work/nabors/tools/venvpy3/bin/python /igo/work/igo/igo-demux/scripts/ont_stats.py {}".format(project_id, project_directory)
print(cmd)
subprocess.run(cmd, shell=True)
Expand Down
Loading