diff --git a/Snakefile b/Snakefile index 4792d36..3f3cf95 100644 --- a/Snakefile +++ b/Snakefile @@ -48,7 +48,7 @@ def _get_config(sample, item): return CHEMISTRY[DEFAULTS["chemistry"]][item] except KeyError: return DEFAULTS[item] - + # assemble outputs for rule all SAMPLE_OUTS = [] for x in SAMPLES: diff --git a/chemistry.yaml b/chemistry.yaml index c5ffe95..08b6207 100644 --- a/chemistry.yaml +++ b/chemistry.yaml @@ -30,24 +30,28 @@ chromiumV2: STAR_R1: --soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 STAR_R2: --soloUMIlen 10 dropseq: + bc_whitelist: None illumina: filter_R1_length: 50 STAR_R1: --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 microwellseq: + bc_whitelist: None bc_cut: CGACTCACTACAGGG...TCGGTGACACGATCG illumina: filter_R1_length: 54 STAR_R1: --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 bd: + bc_whitelist: None bc_cut: ACTGGCCTGCGA...GGTAGCGGTGACA illumina: filter_R1_length: 53 STAR_R1: --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 indrop: + bc_whitelist: None illumina: filter_R1_length: 32 STAR_R1: --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 - STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 \ No newline at end of file + STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 diff --git a/config.yaml b/config.yaml index 51911c0..d908349 100644 --- a/config.yaml +++ b/config.yaml @@ -14,15 +14,15 @@ STAR: STAR_INDEX: # path to STAR genome index - "index/cr2020A_star" + "/pl/active/amc_heme/ref/cr2020A_star" WHITELIST_V3: # path to Chromium V3 whitelist - "whitelist/3M-february-2018.txt" + "/pl/active/amc_heme/ref/3M-february-2018.txt" WHITELIST_V2: # path to Chromium V2 whitelist - "whitelist/737K-august-2016.txt" + "/pl/active/amc_heme/ref/737K-august-2016.txt" POLYA_SITES: # polya_db 3 on GRCh38 with 5 bases upstream and 3 bases @@ -53,7 +53,7 @@ SAMPLES: # star_args (STAR_R1, STAR_R2, STAR_paired) # extra star_args (STAR_R1_extra_args, STAR_R2_extra_args, STAR_paired_extra_args) test: - basename: sample + basename: sample- platform: illumina chemistry: chromiumV2 test2: @@ -73,4 +73,4 @@ report_section_order: star: order: 100 featureCounts: - order: -1000 \ No newline at end of file + order: -1000 diff --git a/rules/count.snake b/rules/count.snake index 12156b2..d3e7ffa 100644 --- a/rules/count.snake +++ b/rules/count.snake @@ -147,9 +147,10 @@ rule filter_R1: mem_mb = 8000 shell: """ - if [[ {params.length} == 'False') ]]; then + if [[ {params.length} == 'False' ]]; then ln -s {input} {output} else + samtools index {input} samtools view -h {input} | grep -v 'CB:Z:-\|UB:Z:-' | samtools view -b - > {params.temp} set +e python3 inst/scripts/filter_bam_correct.py -i {params.temp} -o {params.temp2} -l {params.length} -s -c 20 @@ -183,6 +184,7 @@ rule filter_R2: mem_mb = 8000 shell: """ + samtools index {input} samtools view -h {input} | grep -v 'CB:Z:-\|UB:Z:-' | samtools view -b - > {params.temp} samtools index {params.temp} python3 inst/scripts/filter_bam.py -i {params.temp} -o {output} @@ -210,9 +212,10 @@ rule filter_paired: mem_mb = 8000 shell: """ - if [[ {params.length} == 'False') ]]; then + if [[ {params.length} == 'False' ]]; then ln -s {input} {output} else + samtools index {input} samtools view -h {input} | grep -v 'CB:Z:-\|UB:Z:-' | samtools view -b - > {params.temp} set +e python3 inst/scripts/filter_bam_correct.py -i {params.temp} -o {params.temp2} -l {params.length} diff --git a/rules/cutadapt_star.snake b/rules/cutadapt_star.snake index 44956f8..342006c 100644 --- a/rules/cutadapt_star.snake +++ b/rules/cutadapt_star.snake @@ -7,7 +7,10 @@ import pandas as pd """ Extract per-sample fastq paths """ def _get_fq_paths(wildcards): - fqs = map(lambda x: os.path.join(DATA, x + "*R1*"), SAMPLES[wildcards.sample]["basename"]) + basename = SAMPLES[wildcards.sample]["basename"] + if isinstance(basename, str): + basename = [basename] + fqs = map(lambda x: os.path.join(DATA, x + "*R1*"), basename) fqs = map(lambda x: glob.glob(x), fqs) fqs = list(chain.from_iterable(fqs))