Skip to content

Commit

Permalink
Minor refactoring and update
Browse files Browse the repository at this point in the history
  • Loading branch information
wfondrie committed May 8, 2024
1 parent e603993 commit b75ac45
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 17 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [v0.4.6]
### Added
- Added support for unsigned modification masses that don't quite conform to the Proforma standard.

## [v0.4.5]
### Changed
- The `scan_id` column for parsed spectra is not a sting instead of an integer. This is less space efficient, but we ran into issues with Sciex indexing when trying to use only an integer.
Expand Down
45 changes: 29 additions & 16 deletions depthcharge/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from rdkit.Chem import Draw
from spectrum_utils.spectrum import MsmsSpectrum

from . import utils
from .constants import PROTON

MSKB_TO_UNIMOD = {
Expand Down Expand Up @@ -72,22 +73,7 @@ def __post_init__(self) -> None:
if mod is None:
continue

try:
try:
mass = mod[0].value
except (IndexError, AttributeError):
mass = mod

mod = [MassModification(float(mass))]
except ValueError:
try:
mod = [GenericModification(mod)]
except (AttributeError, TypeError):
pass
except TypeError:
pass

parsed[idx] = mod
parsed[idx] = [_resolve_mod(m) for m in utils.listify(mod)]

self.modifications = parsed
n_mod = self.modifications[0]
Expand Down Expand Up @@ -449,3 +435,30 @@ def to_tensor(self) -> torch.tensor:
"""
return torch.tensor(np.vstack([self.mz, self.intensity]).T)


def _resolve_mod(
mod: MassModification | GenericModification | str | float,
) -> MassModification | GenericModification:
"""Resolve the type of a modification.
Parameters
----------
mod : MassModification, GenericModification, str, or float
The modification to resolve.
Returns
-------
MassModification or GenericModification
The best modification for the input type.
"""
try:
mod = mod.value
except AttributeError:
pass

try:
return MassModification(float(mod))
except ValueError:
return GenericModification(str(mod))
2 changes: 1 addition & 1 deletion tests/unit_tests/test_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_peptide_init():
def test_almost_proforma():
"""Test a peptide lacking an explicit sign."""
parsed = Peptide.from_proforma("LES[79.0]LIEK")
assert parsed.split() == ["L", "E", "S[79.000000]", "L", "I", "E", "K"]
assert parsed.split() == ["L", "E", "S[+79.000000]", "L", "I", "E", "K"]


def test_peptide_from_proforma():
Expand Down
6 changes: 6 additions & 0 deletions tests/unit_tests/test_tokenizers/test_peptides.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,9 @@ def test_single_peptide():
ion = tokenizer.calculate_precursor_ions("LESLIEK", 2)
expected = mass.fast_mass("LESLIEK", charge=2, ion_type="M")
torch.testing.assert_close(ion, torch.tensor([expected]))


def test_almost_compliant_proform():
"""Test initializing with a peptide without an expicit mass sign."""
tokenizer = PeptideTokenizer.from_proforma("[10]-EDITHR")
assert "[+10.000000]-" in tokenizer.residues

0 comments on commit b75ac45

Please sign in to comment.