Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Precursor m/z value fix #283

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 5 additions & 22 deletions src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,36 +85,19 @@ def _load(self) -> None:

# Load the spectrum
spectrum_id: str = spec["params"]["scans"]
# calculate precursor m/z from precursor mass and charge
precursor_mass = spec["params"]["pepmass"][0]
precursor_charge = self._get_precursor_charge(spec["params"]["charge"])
precursor_mz: float = precursor_mass / abs(precursor_charge)
# The pepmass in an mgf file is actually the m/z and not the peptide mass
# See: https://www.matrixscience.com/help/obsolete_data_file_formats.html
precursor_mz: float = spec["params"]["pepmass"][0]
CunliangGeng marked this conversation as resolved.
Show resolved Hide resolved
precursor_charge: int = spec["params"]["charge"][0]
rt = spec["params"].get("rtinseconds", 0)

spectrum = Spectrum(
id=spectrum_id,
mz=list(spec["m/z array"]),
intensity=list(spec["intensity array"]),
precursor_mz=precursor_mz,
precursor_charge=precursor_charge,
rt=rt,
metadata=spec["params"],
)
self._spectra.append(spectrum)

def _get_precursor_charge(self, charges: list[int]) -> int:
"""Get the precursor charge from the charge list.

Args:
charges: list of charge values.

Returns:
the precursor charge.
"""
charge = charges[0]
if charge == 0:
logger.warning(
f"Invalid precursor charge value 0. "
f"Assuming charge is 1 for spectrum '{self._file}'."
)
charge = 1
return charge
CunliangGeng marked this conversation as resolved.
Show resolved Hide resolved
19 changes: 15 additions & 4 deletions src/nplinker/metabolomics/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ class Spectrum:
id: the spectrum ID.
mz: the list of m/z values.
intensity: the list of intensity values.
precursor_mz: the m/z value of the precursor.
precursor_mz: the m/z value of the precursor ion.
precursor_charge: the charge of the precursor ion.
rt: the retention time in seconds.
metadata: the metadata of the spectrum, i.e. the header information in the MGF
file.
metadata: the metadata of the spectrum, i.e. the header information in the MGF file.
gnps_annotations: the GNPS annotations of the spectrum.
gnps_id: the GNPS ID of the spectrum.
strains: the strains that this spectrum belongs to.
Expand All @@ -34,6 +34,7 @@ def __init__(
mz: list[float],
intensity: list[float],
precursor_mz: float,
precursor_charge: int,
rt: float = 0,
metadata: dict | None = None,
) -> None:
Expand All @@ -44,6 +45,7 @@ def __init__(
mz: the list of m/z values.
intensity: the list of intensity values.
precursor_mz: the precursor m/z.
precursor_charge: the charge of the precursor ion.
rt: the retention time in seconds. Defaults to 0.
metadata: the metadata of the spectrum, i.e. the header information
in the MGF file.
Expand All @@ -52,6 +54,7 @@ def __init__(
self.mz = mz
self.intensity = intensity
self.precursor_mz = precursor_mz
self.precursor_charge = precursor_charge
self.rt = rt
self.metadata = metadata or {}

Expand All @@ -78,7 +81,15 @@ def __reduce__(self) -> tuple:
"""Reduce function for pickling."""
return (
self.__class__,
(self.id, self.mz, self.intensity, self.precursor_mz, self.rt, self.metadata),
(
self.id,
self.mz,
self.intensity,
self.precursor_mz,
self.precursor_charge,
self.rt,
self.metadata,
),
self.__dict__,
)

Expand Down
4 changes: 2 additions & 2 deletions tests/unit/metabolomics/test_molecular_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
@pytest.fixture()
def spectrum1():
"""Return a Spectrum object."""
spec = Spectrum(id="spec001", mz=[1.0], intensity=[1.0], precursor_mz=100.0)
spec = Spectrum(id="spec001", mz=[1.0], intensity=[1.0], precursor_mz=100.0, precursor_charge=1)
spec.strains = StrainCollection()
spec.strains.add(Strain("strain001"))
yield spec
Expand All @@ -17,7 +17,7 @@ def spectrum1():
@pytest.fixture()
def spectrum2():
"""Return a Spectrum object."""
spec = Spectrum(id="spec002", mz=[1.0], intensity=[1.0], precursor_mz=100.0)
spec = Spectrum(id="spec002", mz=[1.0], intensity=[1.0], precursor_mz=100.0, precursor_charge=1)
spec.strains = StrainCollection()
spec.strains.add(Strain("strain002"))
yield spec
Expand Down
17 changes: 9 additions & 8 deletions tests/unit/metabolomics/test_spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
)
def test_init(rt, metadata, expected_metadata):
"""Test the initialization of the Spectrum class."""
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, rt, metadata)
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1, rt, metadata)

assert spec.id == "spec1"
assert spec.mz == [100, 200]
assert spec.intensity == [0.1, 0.2]
assert spec.precursor_mz == 150
assert spec.precursor_charge == 1
assert spec.rt == rt
assert spec.metadata == expected_metadata

Expand All @@ -32,36 +33,36 @@ def test_init(rt, metadata, expected_metadata):

def test_str_repr():
"""Test the __str__ and __repr__ methods."""
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1)
assert str(spec) == "Spectrum(id=spec1, #strains=0)"
assert repr(spec) == "Spectrum(id=spec1, #strains=0)"


def test_eq():
"""Test the __eq__ method."""
spec1 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"})
spec2 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"})
spec3 = Spectrum("spec2", [100, 200], [0.1, 0.2], 150, 0, {"info": "test"})
spec1 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1, 0, {"info": "test"})
spec2 = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1, 0, {"info": "test"})
spec3 = Spectrum("spec2", [100, 200], [0.1, 0.2], 150, 1, 0, {"info": "test"})

assert spec1 == spec2
assert spec1 != spec3


def test_hash():
"""Test the __hash__ method."""
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1)
assert hash(spec) == hash(("spec1", 150))


def test_peaks():
"""Test the peaks attribute."""
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1)
assert np.array_equal(spec.peaks, np.array([[100, 0.1], [200, 0.2]]))


def test_has_strain():
"""Test the has_strain method."""
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150)
spec = Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1)
strain1 = Strain("strain1")
strain2 = Strain("strain2")

Expand Down
6 changes: 3 additions & 3 deletions tests/unit/metabolomics/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ def spectra():
"""Fixture for a list of Spectrum objects."""
# The order of the spectra is important for the tests.
return [
Spectrum("spec0", [100, 200], [0.1, 0.2], 150),
Spectrum("spec1", [100, 200], [0.1, 0.2], 150),
Spectrum("spec2", [100, 200], [0.1, 0.2], 150),
Spectrum("spec0", [100, 200], [0.1, 0.2], 150, 1),
Spectrum("spec1", [100, 200], [0.1, 0.2], 150, 1),
Spectrum("spec2", [100, 200], [0.1, 0.2], 150, 1),
]


Expand Down
6 changes: 3 additions & 3 deletions tests/unit/scoring/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def gcfs(strains_list) -> tuple[GCF, GCF, GCF]:

@fixture(scope="session")
def spectra(strains_list) -> tuple[Spectrum, Spectrum, Spectrum]:
spectrum1 = Spectrum("spectrum1", [1], [1], 10.0)
spectrum1 = Spectrum("spectrum1", [1], [1], 10.0, 1)
spectrum1.strains.add(strains_list[0])
spectrum2 = Spectrum("spectrum2", [1], [1], 10.0)
spectrum2 = Spectrum("spectrum2", [1], [1], 10.0, 1)
spectrum2.strains.add(strains_list[1])
spectrum3 = Spectrum("spectrum3", [1], [1], 10.0)
spectrum3 = Spectrum("spectrum3", [1], [1], 10.0, 1)
spectrum3.strains.add(strains_list[0])
spectrum3.strains.add(strains_list[1])
return spectrum1, spectrum2, spectrum3
Expand Down
Loading