forked from npschafer/openawsem
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmm_create_project.py
executable file
·199 lines (170 loc) · 9.72 KB
/
mm_create_project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#!/usr/bin/env python3
import os
import argparse
import sys
import openmmawsem
import helperFunctions.myFunctions
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
__author__ = 'Wei Lu'
parser = argparse.ArgumentParser(
description="The goal of this python3 code is to automatically create\
the project template as fast as possible. Written by Wei Lu."
)
parser.add_argument("protein", help="The name of the protein(1r69 for example): \
python3 ~/OPENAWSEM_LOCATION/mm_create_project.py 1r69 or you can specify the target pdb file you want\
to simulation. for example do: python3 ~/OPENAWSEM_LOCATION/mm_create_project.py YOUR_PDB_LOCATION/1r69.pdb")
parser.add_argument("-c", "--chain", default="-1", help="chains to be simulated, could be for example 'abc'.")
parser.add_argument("-d", "--debug", action="store_true", default=False)
parser.add_argument("--frag", action="store_true", default=False, help="Generate fragment memories")
parser.add_argument("--extended", action="store_true", default=False, help="feature in development, Start from an extended structure generated using pymol (please ensure it is installed), only support single chain.")
parser.add_argument("--membrane", action="store_true", default=False)
parser.add_argument("--hybrid", action="store_true", default=False)
parser.add_argument("--verbose", action="store_true", default=False)
parser.add_argument("--predict_ssweight_from_fasta", action="store_true", default=False)
parser.add_argument("--keepIds", action="store_true", default=False, help="Set to True if you want to preserve the chain and residue index. default will rename chains from 'A', and index from 1")
parser.add_argument("--keepLigands", action="store_true", default=False)
args = parser.parse_args()
# Print if in debug
if args.debug:
do = print
cd = print
else:
do = os.system
cd = os.chdir
# Log the command to a file
with open('create_project_commandline_args.txt', 'w') as f:
f.write(' '.join(sys.argv))
f.write('\n')
# if you provide the pdb then we will use it for the project(move to folder named original_pdbs to prevent from overwritten), otherwise we download it online.
if args.protein[-4:] == '.pdb':
if not os.path.exists(args.protein):
print("ERROR: the pdb you specified is not exist")
exit()
name = os.path.basename(args.protein)[:-4]
pdb = os.path.basename(args.protein)
do("mkdir -p original_pdbs")
do(f"cp {args.protein} original_pdbs/")
elif args.protein[-6:] == ".fasta":
print("Creating simulation folder from fasta file.")
name = os.path.basename(args.protein)[:-6]
# use pymol to generate the initial pdb.
do(f"python3 {__location__}/helperFunctions/fasta2pdb.py {name} -f {args.protein}")
helperFunctions.myFunctions.add_chain_to_pymol_pdb(f"{name}.pdb") # only work for one chain only now
do("mkdir -p original_fasta")
do("mkdir -p original_pdbs")
do(f"cp {args.protein} original_fasta/")
do(f"cp {name}.pdb crystal_structure.pdb") # treat as crystal_structure.
pdb = f"{name}.pdb"
else:
# If the file does not exist download it from the server
name = args.protein
pdb = f"{name}.pdb"
pdb_list = [name]
helperFunctions.myFunctions.downloadPdb(pdb_list)
removeHeterogens = False if args.keepLigands is True else True
chain = args.chain
if not os.path.exists(f"crystal_structure.pdb"):
helperFunctions.myFunctions.cleanPdb([name], chain=chain, toFolder="cleaned_pdbs", verbose=args.verbose, keepIds=True, removeHeterogens=removeHeterogens)
do(f"cp cleaned_pdbs/{pdb} crystal_structure.pdb")
# If the chain is not specified then select all the chains
if chain == "-1":
chain = helperFunctions.myFunctions.getAllChains("crystal_structure.pdb", removeDNAchains=True)
print("Chains info read from crystal_structure.pdb, chains to simulate: ", chain)
# for compute Q
input_pdb_filename, cleaned_pdb_filename = openmmawsem.prepare_pdb("crystal_structure.pdb", chain, use_cis_proline=False, keepIds=args.keepIds, removeHeterogens=removeHeterogens)
openmmawsem.ensure_atom_order(input_pdb_filename)
# get fasta, pdb, seq file ready
chain = helperFunctions.myFunctions.getAllChains("crystal_structure-cleaned.pdb")
openmmawsem.getSeqFromCleanPdb(input_pdb_filename, chains=chain, writeFastaFile=True)
do(f"cp crystal_structure.fasta {name}.fasta")
if args.extended:
print("Trying to create the extended structure extended.pdb using pymol, please ensure that pymol is installed and callable using 'pymol' in terminal.")
do(f"python3 {__location__}/helperFunctions/fasta2pdb.py extended -f {name}.fasta")
# print("If you has multiple chains, please use other methods to generate the extended structures.")
helperFunctions.myFunctions.add_chain_to_pymol_pdb("extended.pdb") # only work for one chain only now
input_pdb_filename, cleaned_pdb_filename = openmmawsem.prepare_pdb("extended.pdb", "A", use_cis_proline=False, keepIds=args.keepIds, removeHeterogens=removeHeterogens)
openmmawsem.ensure_atom_order(input_pdb_filename)
do(f"cp crystal_structure-cleaned.pdb {pdb}")
if args.keepLigands:
# cleaned_pdb_filename = f"{name}-cleaned.pdb"
# input_pdb_filename = f"{name}-openmmawsem.pdb"
# do(f"grep 'ATOM' {input_pdb_filename} > tmp.pdb")
# do(f"grep 'HETATM' {cleaned_pdb_filename} >> tmp.pdb")
# do(f"mv tmp.pdb {input_pdb_filename}")
do(f"cp crystal_structure-cleaned.pdb {name}-cleaned.pdb")
do(f"grep 'ATOM' crystal_structure-openmmawsem.pdb > tmp.pdb")
do(f"grep 'HETATM' crystal_structure-cleaned.pdb >> tmp.pdb")
do(f"mv tmp.pdb {name}-openmmawsem.pdb")
else:
input_pdb_filename, cleaned_pdb_filename = openmmawsem.prepare_pdb(pdb, chain, keepIds=args.keepIds, removeHeterogens=removeHeterogens)
openmmawsem.ensure_atom_order(input_pdb_filename)
os.system(f"cp {__location__}/parameters/burial_gamma.dat .")
os.system(f"cp {__location__}/parameters/gamma.dat .")
os.system(f"cp {__location__}/parameters/membrane_gamma.dat .")
os.system(f"cp {__location__}/parameters/anti_* .")
os.system(f"cp {__location__}/parameters/para_* .")
for c in chain:
# print(f"convert chain {c} of crystal structure to Gro file")
do(f"python {__location__}/helperFunctions/Pdb2Gro.py crystal_structure-cleaned.pdb {name}_{c}.gro {c}")
## ssweight
do("stride crystal_structure.pdb > ssweight.stride")
do(f"python {__location__}/helperFunctions/stride2ssweight.py > ssweight")
protein_length = helperFunctions.myFunctions.getFromTerminal("wc ssweight").split()[0]
if int(protein_length) == 0:
seq = helperFunctions.myFunctions.read_fasta(f"{name}.fasta")
# print(len(seq))
protein_length = len(seq)
print("impose no secondary bias.")
print("you might want to install Predict_Property and use the predict_ssweight_from_fasta option.")
with open("ssweight", "w") as out:
for i in range(protein_length):
out.write("0.0 0.0\n")
print(f"protein: {name}, length: {protein_length}")
if args.predict_ssweight_from_fasta:
# another option for secondary prediction bias generation is using "Predict_Property.sh -i {name}.fasta" to predict from fasta file.
# but you need install it from https://github.com/realbigws/Predict_Property.
# after installation, you can do the following to generate ssweight.
# for me I put 'export Predict_Property="/Users/weilu/Research/Build/Predict_Property"' inside ~/.bash_profile file.
do(f"$Predict_Property/Predict_Property.sh -i {name}.fasta")
from_secondary = f"{name}_PROP/{name}.ss3"
toPre = "."
to_ssweight = f"{toPre}/ssweight"
print("convert ssweight")
import pandas as pd
data = pd.read_csv(from_secondary, comment="#", names=["i", "Res", "ss3", "Helix", "Sheet", "Coil"], sep="\s+")
# print(data)
with open(to_ssweight, "w") as out:
for i, line in data.iterrows():
if line["ss3"] == "H":
out.write("1.0 0.0\n")
if line["ss3"] == "E":
out.write("0.0 1.0\n")
if line["ss3"] == "C":
out.write("0.0 0.0\n")
seq_data = helperFunctions.myFunctions.seq_length_from_pdb("crystal_structure-cleaned.pdb", chain)
with open("single_frags.mem", "w") as out:
out.write("[Target]\nquery\n\n[Memories]\n")
for (chain_name, chain_start_residue_index, seq_length) in seq_data:
# print(f"write chain {chain_name}")
out.write(f"{name}_{chain_name}.gro {chain_start_residue_index} 1 {seq_length} 20\n") # residue index in Gro always start at 1.
# below used for zim and zimPosition file
if args.membrane or args.hybrid:
do("grep -E 'CB|CA GLY' crystal_structure-cleaned.pdb > cbs.data")
do("""awk '{if($9>15) print "1"; else if($9<-15) print "3"; else print "2"}' cbs.data > zimPosition""")
helperFunctions.myFunctions.create_zim(f"crystal_structure.fasta", tableLocation=f"{__location__}/helperFunctions")
if args.frag:
do(f"cp crystal_structure.fasta {name}.fasta")
do(f"cp {__location__}/database/cullpdb_pc80_* .")
do(f"python {__location__}/helperFunctions/MultCha_prepFrags_index.py \
cullpdb_pc80_res3.0_R1.0_d160504_chains29712 %s.fasta 20 1 9 > logfile" % name)
helperFunctions.myFunctions.check_and_correct_fragment_memory("frags.mem")
helperFunctions.myFunctions.relocate(fileLocation="frags.mem", toLocation="fraglib")
# print(f"{__location__}//Gros/")
helperFunctions.myFunctions.replace(f"frags.mem", f"{__location__}//Gros/", "./fraglib/")
do("cp frags.mem frag_memory.mem")
do(f"cp {__location__}/mm_run.py .")
do(f"cp {__location__}/mm_analysis.py .")
# do(f"cp {__location__}/params.py .")
do(f"cp {__location__}/forces_setup.py .")
print(f"{args.protein} project folder created")
print("please modify the force_steps.py if we want to change what energy terms to be used.")