-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile_conf_gen
77 lines (64 loc) · 2.64 KB
/
Snakefile_conf_gen
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import json
import pandas as pd
import ase.io
# get target filenames and import paths for software.
df = pd.read_csv('data/input.smi', delimiter = '\t', names = ['hmdb_id', 'smiles'])
ids = df['hmdb_id'].tolist()
targets = []
for theID in ids:
for thecharge in os.listdir('results/' + theID):
targets.append('results/' + theID + '/' + thecharge + '/generated_conformers/model0.tar.gz')
with open('paths.json', 'r') as f:
paths = json.load(f)
with open('arguments.json', 'r') as f:
args = json.load(f)
ionized_models = {}
for theID in ids:
ionized_models[theID] = {}
for thecharge in os.listdir('results/' + theID):
ionized_models[theID][thecharge] = len([x for x in os.listdir('results/' + theID + '/' + thecharge + '/site_screen/') if x[-2:] == 'in'])
# Snakemake rules
rule all:
input: targets
rule site_screen_energy:
input: lambda wildcards: expand("results/{hmdb_id}/{adduct}/site_screen/{num}.out", hmdb_id=wildcards.hmdb_id, adduct=wildcards.adduct, num=range(ionized_models[wildcards.hmdb_id][wildcards.adduct]))
output: "results/{hmdb_id}/{adduct}/site_screen/energy.csv"
shell: """
src/gather_quick_energy.py results/{wildcards.hmdb_id}/{wildcards.adduct}/site_screen/
"""
rule charged_models:
input: "results/{hmdb_id}/{adduct}/site_screen/energy.csv"
output: "results/{hmdb_id}/{adduct}/charged_model.smi"
shell: """
src/charged_model_smiles.py {input} results/{wildcards.hmdb_id}/{wildcards.adduct}/model.smi
"""
rule generate_confs:
input: "results/{hmdb_id}/{adduct}/charged_model.smi"
output: "results/{hmdb_id}/{adduct}/generated_conformers/model0/0.mol"
params: conf_num = args['generate_up_to_n_confomers']
shell:
"""
mkdir -p results/{wildcards.hmdb_id}/{wildcards.adduct}/generated_conformers
src/generate_conformers.py {input} 10 {params.conf_num}
"""
rule quick_sp:
input: "results/{hmdb_id}/{adduct}/site_screen/{num}.in"
output: "results/{hmdb_id}/{adduct}/site_screen/{num}.out"
params: quick_path = paths['quick_path']
shell: """
module load CUDA/10.0.130
source {params.quick_path}
cd results/{wildcards.hmdb_id}/{wildcards.adduct}/site_screen/
quick.cuda {wildcards.num}.in
"""
rule compress:
input: "results/{hmdb_id}/{adduct}/generated_conformers/model0/0.mol"
output: "results/{hmdb_id}/{adduct}/generated_conformers/model0.tar.gz"
shell:
"""
cd results/{wildcards.hmdb_id}/{wildcards.adduct}/generated_conformers/
for d in *; do
tar -zcvf "$d".tar.gz $d
done
"""