Skip to content

Commit

Permalink
correct fastq list/str handling and add whitelist info for non-10X
Browse files Browse the repository at this point in the history
  • Loading branch information
agillen committed Oct 14, 2024
1 parent 146b095 commit e8743e8
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _get_config(sample, item):
return CHEMISTRY[DEFAULTS["chemistry"]][item]
except KeyError:
return DEFAULTS[item]

# assemble outputs for rule all
SAMPLE_OUTS = []
for x in SAMPLES:
Expand Down
6 changes: 5 additions & 1 deletion chemistry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,28 @@ chromiumV2:
STAR_R1: --soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 10
dropseq:
bc_whitelist: None
illumina:
filter_R1_length: 50
STAR_R1: --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13
microwellseq:
bc_whitelist: None
bc_cut: CGACTCACTACAGGG...TCGGTGACACGATCG
illumina:
filter_R1_length: 54
STAR_R1: --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19
bd:
bc_whitelist: None
bc_cut: ACTGGCCTGCGA...GGTAGCGGTGACA
illumina:
filter_R1_length: 53
STAR_R1: --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28
indrop:
bc_whitelist: None
illumina:
filter_R1_length: 32
STAR_R1: --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9
STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9
10 changes: 5 additions & 5 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ STAR:

STAR_INDEX:
# path to STAR genome index
"index/cr2020A_star"
"/pl/active/amc_heme/ref/cr2020A_star"

WHITELIST_V3:
# path to Chromium V3 whitelist
"whitelist/3M-february-2018.txt"
"/pl/active/amc_heme/ref/3M-february-2018.txt"

WHITELIST_V2:
# path to Chromium V2 whitelist
"whitelist/737K-august-2016.txt"
"/pl/active/amc_heme/ref/737K-august-2016.txt"

POLYA_SITES:
# polya_db 3 on GRCh38 with 5 bases upstream and 3 bases
Expand Down Expand Up @@ -53,7 +53,7 @@ SAMPLES:
# star_args (STAR_R1, STAR_R2, STAR_paired)
# extra star_args (STAR_R1_extra_args, STAR_R2_extra_args, STAR_paired_extra_args)
test:
basename: sample
basename: sample-
platform: illumina
chemistry: chromiumV2
test2:
Expand All @@ -73,4 +73,4 @@ report_section_order:
star:
order: 100
featureCounts:
order: -1000
order: -1000
7 changes: 5 additions & 2 deletions rules/count.snake
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,10 @@ rule filter_R1:
mem_mb = 8000
shell:
"""
if [[ {params.length} == 'False') ]]; then
if [[ {params.length} == 'False' ]]; then
ln -s {input} {output}
else
samtools index {input}
samtools view -h {input} | grep -v 'CB:Z:-\|UB:Z:-' | samtools view -b - > {params.temp}
set +e
python3 inst/scripts/filter_bam_correct.py -i {params.temp} -o {params.temp2} -l {params.length} -s -c 20
Expand Down Expand Up @@ -183,6 +184,7 @@ rule filter_R2:
mem_mb = 8000
shell:
"""
samtools index {input}
samtools view -h {input} | grep -v 'CB:Z:-\|UB:Z:-' | samtools view -b - > {params.temp}
samtools index {params.temp}
python3 inst/scripts/filter_bam.py -i {params.temp} -o {output}
Expand Down Expand Up @@ -210,9 +212,10 @@ rule filter_paired:
mem_mb = 8000
shell:
"""
if [[ {params.length} == 'False') ]]; then
if [[ {params.length} == 'False' ]]; then
ln -s {input} {output}
else
samtools index {input}
samtools view -h {input} | grep -v 'CB:Z:-\|UB:Z:-' | samtools view -b - > {params.temp}
set +e
python3 inst/scripts/filter_bam_correct.py -i {params.temp} -o {params.temp2} -l {params.length}
Expand Down
5 changes: 4 additions & 1 deletion rules/cutadapt_star.snake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ import pandas as pd

""" Extract per-sample fastq paths """
def _get_fq_paths(wildcards):
fqs = map(lambda x: os.path.join(DATA, x + "*R1*"), SAMPLES[wildcards.sample]["basename"])
basename = SAMPLES[wildcards.sample]["basename"]
if isinstance(basename, str):
basename = [basename]
fqs = map(lambda x: os.path.join(DATA, x + "*R1*"), basename)
fqs = map(lambda x: glob.glob(x), fqs)
fqs = list(chain.from_iterable(fqs))

Expand Down

0 comments on commit e8743e8

Please sign in to comment.