-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseekdeep_prevalences.smk
47 lines (42 loc) · 1.41 KB
/
seekdeep_prevalences.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
'''
takes as input mutations of interest, a metadata sheet, and coverage and
alternate count thresholds, and turns them into prevalence tables
This version is somewhat special purpose because there are many separate runs
'''
configfile: 'seekdeep_prevalences.yaml'
rule all:
input:
prevalences='output_files/prevalences.tsv'
rule generate_counts_table:
'''
takes multiple sequencing runs as input and outputs the total counts
associated with each sample for each mutation
'''
input:
input_count_files=expand('{counts_file}', counts_file=config['seekdeep_graphing_runs']),
interesting_mutations=config['interesting_mutations']
params:
replicate_suffix=config['replicate_suffix']
output:
counts_yaml='output_files/mutation_counts_table.yaml',
coverage_csv='output_files/AA_coverage_counts.csv',
alternate_csv='output_files/AA_alternate_counts.csv'
script:
'scripts/generate_counts_table.py'
rule calculate_prevalences:
'''
takes mutation counts file and applies thresholds and metadata to generate
summaries
'''
input:
counts_yaml='output_files/mutation_counts_table.yaml',
metadata_sheet=config['metadata_sheet']
params:
coverage_threshold=config['coverage_threshold'],
alternate_threshold=config['alternate_threshold'],
summarize_by=config['summarize_by'],
sample_column=config['sample_column']
output:
prevalence_table='output_files/prevalences.tsv'
script:
'scripts/calculate_prevalences.py'