From 94290e0b113e2438b16fcce294d75516cee023c2 Mon Sep 17 00:00:00 2001 From: "Anderson, Amos" Date: Thu, 4 Oct 2018 09:20:02 -0400 Subject: [PATCH 1/3] allow alternative approaches to finding missing residues --- pdbfixer/pdbfixer.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index d1ac03e..9c2c92c 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -510,7 +510,7 @@ def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosi # Create the new residue. - newResidue = chain.topology.addResidue(residueName, chain, "%d" % ((firstIndex+i)%10000)) + newResidue = chain.topology.addResidue(residueName, chain, "%d" % (firstIndex+i)) fraction = (i+1.0)/(numResidues+1.0) translate = startPosition + (endPosition-startPosition)*fraction + loopHeight*math.sin(fraction*math.pi)*loopDirection templateAtoms = list(template.topology.atoms()) @@ -569,7 +569,7 @@ def removeChains(self, chainIndices=None, chainIds=None): return - def findMissingResidues(self): + def findMissingResidues(self,chainWithGapsOverride=None): """Find residues that are missing from the structure. The results are stored into the missingResidues field, which is a dict. Each key is a tuple consisting of @@ -590,6 +590,11 @@ def findMissingResidues(self): # Find the sequence of each chain, with gaps for missing residues. for chain in chains: + if chainWithGapsOverride: + if chain.id in chainWithGapsOverride: + chainWithGaps[chain] = chainWithGapsOverride[chain.id] + continue + minResidue = min(int(r.id) for r in chain.residues()) maxResidue = max(int(r.id) for r in chain.residues()) residues = [None]*(maxResidue-minResidue+1) @@ -607,6 +612,8 @@ def findMissingResidues(self): continue if chain in chainSequence: continue + if chain not in chainWithGaps: + continue for offset in range(len(sequence.residues)-len(chainWithGaps[chain])+1): if all(a == b or b == None for a,b in zip(sequence.residues[offset:], chainWithGaps[chain])): chainSequence[chain] = sequence From d82462eab404b3237cf67199e9b333174c2fd55c Mon Sep 17 00:00:00 2001 From: Amos Anderson Date: Thu, 4 Oct 2018 09:47:30 -0400 Subject: [PATCH 2/3] better handling of ligands in findMissingResidues --- pdbfixer/pdbfixer.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index 9c2c92c..bb8fb7c 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -587,6 +587,12 @@ def findMissingResidues(self,chainWithGapsOverride=None): chains = [c for c in self.topology.chains() if len(list(c.residues())) > 0] chainWithGaps = {} + # This is PDBFixer's best guess for what might appear in a SEQRES + + knownResidues = set(self.templates.keys()) | set(substitutions.keys()) + for s in self.sequences: + knownResidues.update(s.residues) + # Find the sequence of each chain, with gaps for missing residues. for chain in chains: @@ -595,10 +601,19 @@ def findMissingResidues(self,chainWithGapsOverride=None): chainWithGaps[chain] = chainWithGapsOverride[chain.id] continue - minResidue = min(int(r.id) for r in chain.residues()) - maxResidue = max(int(r.id) for r in chain.residues()) - residues = [None]*(maxResidue-minResidue+1) + seqresResidues = [] for r in chain.residues(): + if r.name in knownResidues: + seqresResidues.append(r) + else: + # assume that everything that follows is a ligand/water + break + + if len(seqresResidues) == 0: continue + minResidue = min(int(r.id) for r in seqresResidues) + maxResidue = max(int(r.id) for r in seqresResidues) + residues = [None]*(maxResidue-minResidue+1) + for r in seqresResidues: residues[int(r.id)-minResidue] = r.name chainWithGaps[chain] = residues From 4e889e9b89c4625a5e9dfbf967f69cb0158683a0 Mon Sep 17 00:00:00 2001 From: Amos Anderson Date: Thu, 4 Oct 2018 12:13:17 -0400 Subject: [PATCH 3/3] _addAtomsToTopology needs to preserve insertion codes --- pdbfixer/pdbfixer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index bb8fb7c..1e376fc 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -389,7 +389,7 @@ def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): # Create the new residue and add existing heavy atoms. - newResidue = newTopology.addResidue(residue.name, newChain, residue.id) + newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode) for atom in residue.atoms(): if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen): if atom.name == 'OXT' and (chain.index, indexInChain+1) in self.missingResidues: