-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmapping_sequence.py
executable file
·137 lines (107 loc) · 5.48 KB
/
mapping_sequence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python
def reorder_atoms(mol_pdb_fname, mol_repeat_pdb, mol_match_pdb):
"""
Fucntion reorders the atom sequence of a suprecell pdb saved from Mercury similar to the single molecule pdb which has been used to generate the suprecell.
mol_pdb_fname : str
pdb file of the single molecule, used to generate suprecell
mol_repeat_pdb : str
pdb file of the supercell saved from Mercury
mol_match_pdb : str
pdb file of the reordered suprecell
"""
elements = []
with open(mol_pdb_fname, 'r') as pdb_file:
for line in pdb_file:
if line.startswith('HETATM'):
columns = line.split()
element = columns[2]
elements.append(element)
data = {}
current_key = 0
with open(mol_repeat_pdb, 'r') as f:
for line in f:
if line.startswith('HETATM'):
line_data = line.strip().split()
if len(line_data[0]) > 6:
atom_name = line_data[1]
if current_key not in data:
data[current_key] = {}
# Appending a unique index for duplicate entries
if atom_name in data[current_key]:
atom_name += str(len(data[current_key]))
values = {
'x_cord': float(line_data[4]),
'y_cord': float(line_data[5]),
'z_cord': float(line_data[6])
}
data[current_key][atom_name] = values
else:
atom_name = line_data[2]
if current_key not in data:
data[current_key] = {}
# Appending a unique index for duplicate entries
if atom_name in data[current_key]:
atom_name += str(len(data[current_key]))
values = {
'x_cord': float(line_data[5]),
'y_cord': float(line_data[6]),
'z_cord': float(line_data[7])
}
data[current_key][atom_name] = values
if len(data[current_key]) == len(elements):
current_key += 1
### Make sure the supercell pdb file save from Mercury has the similar atom order (can be different name) with the single compund PBD file, if not manually create the list the matches the atom order.
###keys_mapping = manually generated list
keys_mapping = elements
#keys_mapping = ['C1', 'C1D', 'C2', 'C2D', 'C3', 'C3D', 'H1', 'H1D', 'H2', 'H2D', 'H3', 'H3D' ]
# Replace the keys in each dictionary
for key, val in data.items():
temp = {}
for old_key, new_key in zip(list(val), keys_mapping):
temp[new_key] = val[old_key]
val.clear()
val.update(temp)
# if your key_mapping is similar to the elements, you mightn't have to run next three lines, but anyway it does not make any difference
new_data = {}
for key, entry in data.items():
new_data[key] = {atom: entry.get(atom, {}) for atom in elements}
with open(mol_match_pdb, 'w') as outfile, open(mol_repeat_pdb, 'r') as infile:
for line in infile:
if line.startswith('HETATM'):
break
outfile.write(line)
# Initialize the global atom_counter
atom_counter = 1
# Iterate over each dictionary in the data
for idx, atom_data in enumerate(new_data.values(), start=1):
for atom_name, coordinates in atom_data.items():
outfile.write('{ATOM:<6}{serial_number:>5} {atom_name:<4}{alt_loc_indicator:<1}{res_name:<3} {chain_id:<1}{res_seq_number:>4}{insert_code:<1} {x:8.3f}{y:8.3f}{z:8.3f}{occupancy:6.2f}{temp_factor:6.2f}\n'.format(
ATOM='HETATM',
serial_number=atom_counter,
atom_name=atom_name,
alt_loc_indicator=' ',
res_name="UNK",
chain_id='',
res_seq_number=idx,
insert_code=' ',
x= coordinates['x_cord'],
y= coordinates['y_cord'],
z= coordinates['z_cord'],
occupancy=1.0,
temp_factor=0.0
))
# Increment the global atom_counter
atom_counter += 1
print("END", file=outfile)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='match the connectivity of two PDBs for same molecule',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--template', metavar='pdb file', default='in.pdb',
help='(in) file name of the original PDB file save from cif file')
parser.add_argument('--input', metavar='pdb file', default='template.pdb',
help='(in) file name of the supercell pdb file save from pymatgen cif file')
parser.add_argument('--output', metavar='pdb file', default='out.pdb',
help='(out) file name for the reordered supercell molecule ')
args = parser.parse_args()
reorder_atoms(args.template, args.input, args.output)