-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmultiego.py
257 lines (223 loc) · 10 KB
/
multiego.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import argparse
import sys
import os
import pandas as pd
import time
import gc
from src.multiego import ensemble
from src.multiego import io
from tools.face_generator import generate_face
from src.multiego.resources.type_definitions import parse_json
from src.multiego.arguments import args_dict
def meGO_parsing():
"""
Parses command-line arguments for the multi-eGO model generation.
Returns:
argparse.Namespace: An object containing parsed arguments.
This function sets up an argument parser using the argparse library to handle command-line arguments
required for generating a multi-eGO model based on training simulations and reference simulations.
"""
parser = argparse.ArgumentParser(
prog="multiego.py",
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Generates a multi-eGO model based on one or more training simulations
and their corresponding reference simulations. In most cases one single
parameter is required, --epsilon, that sets the maximum interaction energy
for a contact pair.
""",
epilog="""\
example usage:
1) generate a random coil prior model to generate the reference data for a single domain intramolecular interactions
> python multiego.py --system GB1 --egos rc
2) generate a production simulation using the reference data in the reference folder and the training data in the md_monomer folder
interaction energy is set to 0.3 kJ/mol
> python multiego.py --system GB1 --egos production --train md_monomer --epsilon 0.3
""",
)
for arg, arg_dict in args_dict.items():
# necessary for the boolean flags
if "action" in arg_dict.keys() and (arg_dict["action"] == "store_true" or arg_dict["action"] == "store_false"):
arg_dict.pop("type") # necessary for boolean flags
parser.add_argument(arg, **arg_dict)
args, remaining = parser.parse_known_args()
args.root_dir = os.path.dirname(os.path.abspath(__file__))
multi_flag = False
# Check if no arguments are provided
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
if args.config:
config_yaml = io.read_config(args.config, args_dict)
# check if yaml file is empty
if not config_yaml:
print("WARNING: Configuration file was parsed, but the dictionary is empty")
else:
args = io.combine_configurations(config_yaml, args, args_dict)
# check if the configuration file is provided or if system, and egos rc are provided or if system, egos production, train and epsilon are provided
if not args.system:
print("ERROR: No system name found! Please provide a system name.")
sys.exit()
if not args.egos:
print("ERROR: No egos mode found! Please provide an egos mode.")
sys.exit()
if args.egos == "production" and not args.train:
print("ERROR: No training simulations found! Please provide a list of training simulations.")
sys.exit()
if args.egos == "production" and not (
args.epsilon or args.multi_epsilon_intra or args.multi_epsilon_inter or args.inter_epsilon
):
print("ERROR: No epsilon value found! Please provide an epsilon value.")
sys.exit()
if args.p_to_learn < 0.9:
print("WARNING: --p_to_learn should be large enough (suggested value is 0.9995)")
if args.epsilon_min <= 0.0:
print("--epsilon_min (" + str(args.epsilon_min) + ") must be greater than 0.")
sys.exit()
if args.multi_epsilon_intra or args.multi_epsilon_inter_domain or args.multi_epsilon_inter:
multi_flag = True
custom_dict = {}
if args.custom_dict:
custom_dict = parse_json(args.custom_dict)
if custom_dict == None:
print("ERROR: Custom dictionary was parsed, but the dictionary is empty")
sys.exit()
print(f"Running Multi-eGO: {args.egos}\n")
print("- Processing Multi-eGO topology")
mego_ensemble = ensemble.init_meGO_ensemble(args, custom_dict)
topol_names = [m for m in mego_ensemble["topology"].molecules]
args.names = []
for name in args.multi_epsilon_intra.keys():
args.names.append(name)
for name in args.multi_epsilon_inter_domain.keys():
args.names.append(name)
for name in args.multi_epsilon_inter.keys():
args.names.append(name)
for name in args.multi_epsilon_inter[name].keys():
args.names.append(name)
args.names = list(set(args.names))
if sorted(args.names) != sorted(topol_names) and multi_flag:
print("ERROR: The names of the molecules in the topology and the multi-epsilon files are different")
sys.exit()
elif not multi_flag:
args.names = topol_names
if args.egos == "production" and not args.reference:
args.reference = ["reference"]
if args.epsilon and not args.inter_epsilon:
args.inter_epsilon = args.epsilon
if args.epsilon and not args.inter_domain_epsilon:
args.inter_domain_epsilon = args.epsilon
if not args.multi_epsilon_intra:
args.multi_epsilon_intra = {k: v for k, v in zip(args.names, [args.epsilon] * len(args.names))}
if not args.multi_epsilon_inter_domain and args.inter_domain_epsilon:
args.multi_epsilon_inter_domain = {k: v for k, v in zip(args.names, [args.inter_domain_epsilon] * len(args.names))}
if not args.multi_epsilon_inter_domain and not args.inter_domain_epsilon:
args.multi_epsilon_inter_domain = args.multi_epsilon_intra
if not args.multi_epsilon_inter and args.inter_epsilon:
args.multi_epsilon_inter = {k1: {k2: args.inter_epsilon for k2 in args.names} for k1 in args.names}
# check all epsilons are set and greater than epsilon_min
if args.egos == "production":
for k, v in args.multi_epsilon_intra.items():
if v < args.epsilon_min:
print("ERROR: epsilon value for " + k + " is less than epsilon_min")
sys.exit()
for k, v in args.multi_epsilon_inter_domain.items():
if v < args.epsilon_min:
print("ERROR: epsilon value for " + k + " is less than epsilon_min")
sys.exit()
for k1, v1 in args.multi_epsilon_inter.items():
for k2, v2 in v1.items():
if v2 < args.epsilon_min:
print("ERROR: epsilon value for " + k1 + "-" + k2 + " is less than epsilon_min")
sys.exit()
if args.symmetry_file and args.symmetry:
print("ERROR: Both symmetry file and symmetry list provided. Please provide only one.")
sys.exit()
if args.symmetry_file:
args.symmetry = io.read_symmetry_file(args.symmetry_file)
elif args.symmetry:
args.symmetry = io.parse_symmetry_list(args.symmetry)
custom_c12_dict = pd.DataFrame()
if args.custom_c12 is not None:
custom_c12_dict = io.read_custom_c12_parameters(args.custom_c12)
if custom_c12_dict is None or custom_c12_dict.empty:
print("ERROR: Custom c12 paramter file was parsed, but the dictionary is empty")
sys.exit()
if remaining:
print("Unknown arguments provided: " + str(remaining))
parser.print_usage()
sys.exit()
return args, mego_ensemble, custom_dict
def main():
"""
Parses command-line arguments and generates a multi-eGO model by invoking various functions
related to ensemble generation, LJ parameter computation, and writing the output.
"""
bt = time.time()
generate_face.print_welcome()
args, meGO_ensembles, custom_dict = meGO_parsing()
st = time.time()
elapsed_time = st - bt
print("- Done in:", elapsed_time, "seconds")
print("- Checking for input files and folders")
io.check_files_existence(args)
if args.egos == "production":
io.check_matrix_format(args)
print("\t- Generating bonded interactions")
meGO_ensembles = ensemble.generate_bonded_interactions(meGO_ensembles)
print("\t- Generating 1-4 data")
pairs14, exclusion_bonds14 = ensemble.generate_14_data(meGO_ensembles)
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
if args.egos == "production":
print("- Processing Multi-eGO contact matrices")
meGO_ensembles, matrices = ensemble.init_meGO_matrices(meGO_ensembles, args, custom_dict)
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
print("- Initializing LJ dataset")
train_dataset = ensemble.init_LJ_datasets(meGO_ensembles, matrices, pairs14, exclusion_bonds14, args)
# force memory cleaning to decrease footprint in case of large dataset
del matrices
gc.collect()
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
print("- Generate LJ dataset")
meGO_LJ, meGO_LJ_14 = ensemble.generate_LJ(meGO_ensembles, train_dataset, args)
# force memory cleaning to decrease footprint in case of large dataset
del train_dataset
gc.collect()
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
elif args.egos == "mg":
print("- Generate the LJ dataset")
meGO_LJ = ensemble.generate_OO_LJ(meGO_ensembles)
io.print_stats(meGO_LJ)
meGO_LJ_14 = pairs14
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
print("- Finalize pairs and exclusions")
meGO_LJ_14 = ensemble.make_pairs_exclusion_topology(meGO_ensembles, meGO_LJ_14, args)
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
print("- Writing Multi-eGO model")
meGO_LJ = ensemble.sort_LJ(meGO_ensembles, meGO_LJ)
io.write_model(meGO_ensembles, meGO_LJ, meGO_LJ_14, args)
et = time.time()
elapsed_time = et - st
print("- Done in:", elapsed_time, "seconds")
print("- Ran in:", et - bt, "seconds")
generate_face.print_goodbye()
if __name__ == "__main__":
main()