5
5
from . import ConsensusFeature as _ConsensusFeature
6
6
from . import FeatureMap as _FeatureMap
7
7
from . import Feature as _Feature
8
+ from . import MRMFeature as _MRMFeature
8
9
from . import MSExperiment as _MSExperiment
10
+ from . import PeakMap as _PeakMap
9
11
from . import PeptideIdentification as _PeptideIdentification
10
12
from . import ControlledVocabulary as _ControlledVocabulary
11
13
from . import File as _File
12
14
from . import IonSource as _IonSource
15
+ from . import MSSpectrum as _MSSpectrum
16
+ from . import MSChromatogram as _MSChromatogram
17
+ from . import MRMTransitionGroupCP as _MRMTransitionGroupCP
13
18
14
19
import pandas as _pd
15
20
import numpy as _np
21
+ from enum import Enum as _Enum
16
22
17
23
class _ConsensusMapDF (_ConsensusMap ):
18
24
def __init__ (self , * args , ** kwargs ):
@@ -130,7 +136,11 @@ def get_df(self):
130
136
"""
131
137
return _pd .concat ([self .get_metadata_df (), self .get_intensity_df ()], axis = 1 )
132
138
139
+ # fix class module and name to show up correctly in readthedocs page generated with sphinx autodoc
140
+ # needs to link back to rst page of original class, which is pyopenms.ConsensusMap, NOT pyopenms._dataframes._ConsensusMapDF (wh)
133
141
ConsensusMap = _ConsensusMapDF
142
+ ConsensusMap .__module__ = _ConsensusMap .__module__
143
+ ConsensusMap .__name__ = 'ConsensusMap'
134
144
135
145
# TODO tell the advanced user that they could change this, in case they have different needs.
136
146
# TODO check if type could be inferred in the first pass
@@ -147,7 +157,12 @@ def get_df(self):
147
157
b'num_of_masstraces' : 'i' ,
148
158
b'masstrace_intensity' : 'f' , # TODO this is actually a DoubleList. Think about what to do here. For _np.fromiter we would need to set the length of the array.
149
159
b'Group' : 'U50' ,
150
- b'is_ungrouped_monoisotopic' : 'i' # TODO this sounds very boolean to me
160
+ b'is_ungrouped_monoisotopic' : 'i' , # TODO this sounds very boolean to me
161
+ b'left_width' : 'f' ,
162
+ b'right_width' : 'f' ,
163
+ b'total_xic' : 'f' ,
164
+ b'PeptideRef' : 'U100' ,
165
+ b'peak_apices_sum' : 'f'
151
166
}
152
167
"""Global dict to define which autoconversion to numpy types is tried for certain metavalues.
153
168
@@ -294,6 +309,8 @@ def get_assigned_peptide_identifications(self):
294
309
return result
295
310
296
311
FeatureMap = _FeatureMapDF
312
+ FeatureMap .__module__ = _FeatureMap .__module__
313
+ FeatureMap .__name__ = 'FeatureMap'
297
314
298
315
299
316
class _MSExperimentDF (_MSExperiment ):
@@ -319,7 +336,7 @@ def get_df(self, long : bool = False):
319
336
cols = ["RT" , "mzarray" , "intarray" ]
320
337
321
338
return _pd .DataFrame (data = ((spec .getRT (), * spec .get_peaks ()) for spec in self ), columns = cols )
322
-
339
+
323
340
def get_ion_df (self ):
324
341
"""Generates a pandas DataFrame with all peaks and the ionic mobility in the MSExperiment
325
342
@@ -476,8 +493,13 @@ def _get_ion_spec_arrays(mslevel):
476
493
477
494
return ms1_df , ms2_df
478
495
479
- MSExperiment = _MSExperimentDF
480
496
PeakMap = _MSExperimentDF
497
+ PeakMap .__module__ = _PeakMap .__module__
498
+ PeakMap .__name__ = 'PeakMap'
499
+
500
+ MSExperiment = _MSExperimentDF
501
+ MSExperiment .__module__ = _MSExperiment .__module__
502
+ MSExperiment .__name__ = 'MSExperiment'
481
503
482
504
483
505
# TODO think about the best way for such top-level function. IMHO in python, encapsulation in a stateless class in unnecessary.
@@ -608,5 +630,260 @@ def update_scores_from_df(peps: List[_PeptideIdentification], df : _pd.DataFrame
608
630
609
631
return rets
610
632
633
+ def _add_meta_values (df : _pd .DataFrame , object : any ) -> _pd .DataFrame :
634
+ """
635
+ Adds metavalues from given object to given DataFrame.
636
+
637
+ Args:
638
+ df (pd.DataFrame): DataFrame to which metavalues will be added.
639
+ object (any): Object from which metavalues will be extracted.
640
+
641
+ Returns:
642
+ pd.DataFrame: DataFrame with added meta values.
643
+ """
644
+ mvs = []
645
+ object .getKeys (mvs )
646
+ for k in mvs :
647
+ v = object .getMetaValue (k )
648
+ dtype = 'U100'
649
+ try :
650
+ v = int (v )
651
+ dtype = int
652
+ except ValueError :
653
+ try :
654
+ v = float (v )
655
+ dtype = 'double'
656
+ except ValueError :
657
+ dtype = f'U{ len (v )} '
658
+
659
+ df [k .decode ()] = _np .full (df .shape [0 ], v , dtype = _np .dtype (dtype ))
660
+
661
+ return df
611
662
663
+ class _MSSpectrumDF (_MSSpectrum ):
664
+ def __init__ (self , * args , ** kwargs ):
665
+ super ().__init__ (* args , ** kwargs )
666
+
667
+ def get_df (self , export_ion_mobility : bool = True , export_meta_values : bool = True , export_peptide_identifications : bool = True ) -> _pd .DataFrame :
668
+ """
669
+ Returns a DataFrame representation of the MSSpectrum.
670
+
671
+ mz: The mass-to-charge ratio (m/z) values of the peaks in the mass spectrum.
672
+ intensity: The intensity (abundance) of the peaks in the mass spectrum.
673
+ ion_mobility: The ion mobility values.
674
+ ion_mobility_unit: The ion mobility unit.
675
+ ms_level: The MS level of the mass spectrum (1 for MS1, 2 for MS2, etc.).
676
+ precursor_mz: The mass-to-charge of the precursor ion.
677
+ precursor_charge: The charge of the precursor ion.
678
+ native_id: The native identifier of the spectrum.
679
+ spectrum: The spectrum of annotated peptide identification hit.
680
+
681
+ Args:
682
+ export_ion_mobility (bool): Whether to export ion mobility data.
683
+ export_meta_values (bool): Whether to export meta values.
684
+ export_peptide_identifications (bool): Whether to export peptide identifications.
685
+
686
+ Returns:
687
+ pd.DataFrame: DataFrame representation of the MSSpectrum.
688
+ """
689
+ mzs , intensities = self .get_peaks ()
690
+
691
+ df = _pd .DataFrame ({'mz' : mzs , 'intensity' : intensities })
692
+
693
+ cnt = df .shape [0 ]
694
+
695
+ if export_ion_mobility :
696
+ df ['ion_mobility' ] = _np .array ([i for i in self .getFloatDataArrays ()[0 ]]) if self .containsIMData () else _np .nan
697
+ df ['ion_mobility_unit' ] = _np .full (cnt , self .getDriftTimeUnitAsString (), dtype = _np .dtype ('U20' ))
698
+
699
+ df ['ms_level' ] = _np .full (cnt , self .getMSLevel (), dtype = _np .dtype ('uint16' ))
700
+
701
+ precs = self .getPrecursors ()
702
+ df ['precursor_mz' ] = _np .full (cnt , (precs [0 ].getMZ () if precs else 0.0 ), dtype = _np .dtype ('double' ))
703
+ df ['precursor_charge' ] = _np .full (cnt , (precs [0 ].getCharge () if precs else 0 ), dtype = _np .dtype ('uint16' ))
704
+
705
+ df ['native_id' ] = _np .full (cnt , self .getNativeID (), dtype = _np .dtype ('U100' ))
706
+
707
+ if export_peptide_identifications :
708
+ peps = self .getPeptideIdentifications ()
709
+ seq = ''
710
+ if peps :
711
+ hits = peps [0 ].getHits ()
712
+ if hits :
713
+ seq = hits [0 ].getSequence ().toString ()
714
+ df ['sequence' ] = _np .full (cnt , seq , dtype = _np .dtype (f'U{ len (seq )} ' ))
715
+
716
+ if export_meta_values :
717
+ df = _add_meta_values (df , self )
718
+
719
+ return df
720
+
721
+ MSSpectrum = _MSSpectrumDF
722
+ MSSpectrum .__module__ = _MSSpectrum .__module__
723
+ MSSpectrum .__name__ = 'MSSpectrum'
724
+
725
+ class _ChromatogramType (_Enum ):
726
+ MASS_CHROMATOGRAM = 0
727
+ TOTAL_ION_CURRENT_CHROMATOGRAM = 1
728
+ SELECTED_ION_CURRENT_CHROMATOGRAM = 2
729
+ BASEPEAK_CHROMATOGRAM = 3
730
+ SELECTED_ION_MONITORING_CHROMATOGRAM = 4
731
+ SELECTED_REACTION_MONITORING_CHROMATOGRAM = 5
732
+ ELECTROMAGNETIC_RADIATION_CHROMATOGRAM = 6
733
+ ABSORPTION_CHROMATOGRAM = 7
734
+ EMISSION_CHROMATOGRAM = 8
735
+
736
+ class _MSChromatogramDF (_MSChromatogram ):
737
+ def __init__ (self , * args , ** kwargs ):
738
+ super ().__init__ (* args , ** kwargs )
739
+
740
+ def get_df (self , export_meta_values : bool = True ) -> _pd .DataFrame :
741
+ """
742
+ Returns a DataFrame representation of the MSChromatogram.
743
+
744
+ time: The retention time (in seconds) of the chromatographic peaks.
745
+ intensity: The intensity (abundance) of the signal at each time point.
746
+ chromatogram_type: The type of chromatogram.
747
+ precursor_mz: The mass-to-charge of the precursor ion.
748
+ precursor_charge: The charge of the precursor ion.
749
+ comment: A comment assigned to the chromatogram.
750
+ native_id: The chromatogram native identifier.
751
+
752
+ Args:
753
+ export_meta_values (bool): Whether to export meta values.
754
+
755
+ Returns:
756
+ pd.DataFrame: DataFrame representation of the MSChromatogram.
757
+ """
758
+ def extract_data (c : _MSChromatogram ):
759
+ rts , intys = c .get_peaks ()
760
+ for rt , inty in zip (rts , intys ):
761
+ yield rt , inty
762
+
763
+ cnt = len (self .get_peaks ()[0 ])
764
+
765
+ dtypes = [('time' , _np .dtype ('double' )), ('intensity' , _np .dtype ('uint64' ))]
766
+
767
+ arr = _np .fromiter (iter = extract_data (self ), dtype = dtypes , count = cnt )
768
+
769
+ df = _pd .DataFrame (arr )
770
+
771
+ df ['chromatogram_type' ] = _np .full (cnt , _ChromatogramType (self .getChromatogramType ()).name , dtype = _np .dtype ('U100' ))
772
+
773
+ df ['precursor_mz' ] = _np .full (cnt , self .getPrecursor ().getMZ (), dtype = _np .dtype ('double' ))
774
+ df ['precursor_charge' ] = _np .full (cnt , self .getPrecursor ().getCharge (), dtype = _np .dtype ('uint16' ))
775
+
776
+ df ['product_mz' ] = _np .full (cnt , self .getProduct ().getMZ (), dtype = _np .dtype ('double' ))
777
+
778
+ df ['comment' ] = _np .full (cnt , self .getComment (), dtype = _np .dtype ('U100' ))
779
+
780
+ df ['native_id' ] = _np .full (cnt , self .getNativeID (), dtype = _np .dtype ('U100' ))
781
+
782
+ if export_meta_values :
783
+ df = _add_meta_values (df , self )
784
+
785
+ return df
786
+
787
+ MSChromatogram = _MSChromatogramDF
788
+ MSChromatogram .__module__ = _MSChromatogram .__module__
789
+ MSChromatogram .__name__ = 'MSChromatogram'
790
+
791
+ class _MRMTransitionGroupCPDF (_MRMTransitionGroupCP ):
792
+ def __init__ (self , * args , ** kwargs ):
793
+ super ().__init__ (* args , ** kwargs )
794
+
795
+ def get_chromatogram_df (self , export_meta_values : bool = True ) -> _pd .DataFrame :
796
+ """
797
+ Returns a DataFrame representation of the Chromatograms stored in MRMTransitionGroupCP.
798
+
799
+ rt: The retention time of the transition group.
800
+ intensity: The intensity of the transition group.
801
+ precursor_mz: The mass-to-charge ratio of the precursor ion.
802
+ precursor_charge: The charge of the precursor ion.
803
+ product_mz: The mass-to-charge ratio of the product ion.
804
+ product_charge: The charge of the product ion.
805
+ native_id: The native identifier of the transition group.
806
+
807
+ Args:
808
+ export_meta_values (bool): Whether to export meta values.
809
+
810
+ Returns:
811
+ pd.DataFrame: DataFrame representation of the chromatograms stored in MRMTransitionGroupCP.
812
+ """
813
+ chroms = self .getChromatograms ()
814
+ out = [ _MSChromatogramDF (c ).get_df (export_meta_values = export_meta_values ) for c in chroms ]
815
+ return _pd .concat (out )
816
+
817
+ def get_feature_df (self , meta_values : Union [None , List [str ], str ] = None ) -> _pd .DataFrame :
818
+ """
819
+ Returns a DataFrame representation of the Features stored in MRMTransitionGroupCP.
820
+
821
+ rt: The retention time of the transition group.
822
+ intensity: The intensity of the transition group.
823
+ precursor_mz: The mass-to-charge ratio of the precursor ion.
824
+ precursor_charge: The charge of the precursor ion.
825
+ product_mz: The mass-to-charge ratio of the product ion.
826
+ product_charge: The charge of the product ion.
827
+ native_id: The native identifier of the transition group.
828
+
829
+ Args:
830
+ export_meta_values (bool): Whether to export meta values.
831
+
832
+ Returns:
833
+ pd.DataFrame: DataFrame representation of the Features stored in MRMTransitionGroupCP.
834
+ """
835
+ # get all possible meta value keys in a set
836
+ if meta_values == 'all' :
837
+ meta_values = set ()
838
+ for f in self :
839
+ mvs = []
840
+ f .getKeys (mvs )
841
+ for m in mvs :
842
+ meta_values .add (m )
843
+
844
+ elif not meta_values : # if None, set to empty list
845
+ meta_values = []
846
+
847
+ features = self .getFeatures ()
848
+
849
+ def gen (features : List [_MRMFeature ], fun ):
850
+ for f in features :
851
+ yield from fun (f )
852
+
853
+ def extract_meta_data (f : _MRMFeature ):
854
+ """Extracts feature meta data.
855
+
856
+ Extracts information from a given feature with the requested meta values and, if requested,
857
+ the sequence, score and ID_filename (primary MS run path of the linked ProteinIdentification)
858
+ of the best PeptideHit (first) assigned to that feature.
859
+
860
+ Parameters:
861
+ f (Feature): feature from which to extract the meta data
862
+
863
+ Yields:
864
+ tuple: tuple containing feature information, and meta values (optional)
865
+ """
866
+ vals = [f .getMetaValue (m ) if f .metaValueExists (m ) else _np .nan for m in meta_values ]
867
+
868
+ yield tuple ((f .getUniqueId (), f .getRT (), f .getIntensity (), f .getOverallQuality (), * vals ))
869
+
870
+ features = self .getFeatures ()
871
+
872
+
873
+ mddtypes = [('feature_id' , _np .dtype ('uint64' )), ('RT' , 'f' ), ('intensity' , 'f' ), ('quality' , 'f' )]
874
+
875
+ for meta_value in meta_values :
876
+ if meta_value in common_meta_value_types :
877
+ mddtypes .append ((meta_value .decode (), common_meta_value_types [meta_value ]))
878
+ else :
879
+ mddtypes .append ((meta_value .decode (), 'U50' ))
880
+
881
+ mdarr = _np .fromiter (iter = gen (features , extract_meta_data ), dtype = mddtypes , count = len (features ))
882
+
883
+ return _pd .DataFrame (mdarr ).set_index ('feature_id' )
612
884
885
+ # fix class module and name to show up correctly in readthedocs page generated with sphinx autodoc
886
+ # needs to link back to rst page of original class, which is pyopenms.MRMTransitionGroupCP, NOT pyopenms._dataframes._MRMTransitionGroupCPDF (wh)
887
+ MRMTransitionGroupCP = _MRMTransitionGroupCPDF
888
+ MRMTransitionGroupCP .__module__ = _MRMTransitionGroupCP .__module__
889
+ MRMTransitionGroupCP .__name__ = 'MRMTransitionGroupCP'
0 commit comments