Minor refactoring and update

wfondrie · May 8, 2024 · b75ac45 · b75ac45
1 parent e603993
commit b75ac45
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 17 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [v0.4.6]
+### Added
+- Added support for unsigned modification masses that don't quite conform to the Proforma standard.
+
 ## [v0.4.5]
 ### Changed
 - The `scan_id` column for parsed spectra is not a sting instead of an integer. This is less space efficient, but we ran into issues with Sciex indexing when trying to use only an integer.

diff --git a/depthcharge/primitives.py b/depthcharge/primitives.py
@@ -17,6 +17,7 @@
 from rdkit.Chem import Draw
 from spectrum_utils.spectrum import MsmsSpectrum
 
+from . import utils
 from .constants import PROTON
 
 MSKB_TO_UNIMOD = {
@@ -72,22 +73,7 @@ def __post_init__(self) -> None:
             if mod is None:
                 continue
 
-            try:
-                try:
-                    mass = mod[0].value
-                except (IndexError, AttributeError):
-                    mass = mod
-
-                mod = [MassModification(float(mass))]
-            except ValueError:
-                try:
-                    mod = [GenericModification(mod)]
-                except (AttributeError, TypeError):
-                    pass
-            except TypeError:
-                pass
-
-            parsed[idx] = mod
+            parsed[idx] = [_resolve_mod(m) for m in utils.listify(mod)]
 
         self.modifications = parsed
         n_mod = self.modifications[0]
@@ -449,3 +435,30 @@ def to_tensor(self) -> torch.tensor:
 
         """
         return torch.tensor(np.vstack([self.mz, self.intensity]).T)
+
+
+def _resolve_mod(
+    mod: MassModification | GenericModification | str | float,
+) -> MassModification | GenericModification:
+    """Resolve the type of a modification.
+
+    Parameters
+    ----------
+    mod : MassModification, GenericModification, str, or float
+        The modification to resolve.
+
+    Returns
+    -------
+    MassModification or GenericModification
+        The best modification for the input type.
+
+    """
+    try:
+        mod = mod.value
+    except AttributeError:
+        pass
+
+    try:
+        return MassModification(float(mod))
+    except ValueError:
+        return GenericModification(str(mod))
diff --git a/tests/unit_tests/test_primitives.py b/tests/unit_tests/test_primitives.py
@@ -53,7 +53,7 @@ def test_peptide_init():
 def test_almost_proforma():
     """Test a peptide lacking an explicit sign."""
     parsed = Peptide.from_proforma("LES[79.0]LIEK")
-    assert parsed.split() == ["L", "E", "S[79.000000]", "L", "I", "E", "K"]
+    assert parsed.split() == ["L", "E", "S[+79.000000]", "L", "I", "E", "K"]
 
 
 def test_peptide_from_proforma():

diff --git a/tests/unit_tests/test_tokenizers/test_peptides.py b/tests/unit_tests/test_tokenizers/test_peptides.py
@@ -109,3 +109,9 @@ def test_single_peptide():
     ion = tokenizer.calculate_precursor_ions("LESLIEK", 2)
     expected = mass.fast_mass("LESLIEK", charge=2, ion_type="M")
     torch.testing.assert_close(ion, torch.tensor([expected]))
+
+
+def test_almost_compliant_proform():
+    """Test initializing with a peptide without an expicit mass sign."""
+    tokenizer = PeptideTokenizer.from_proforma("[10]-EDITHR")
+    assert "[+10.000000]-" in tokenizer.residues