From e220205ee1ec8bc1e3614d638ce6ad68468659a7 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:59:48 +0100 Subject: [PATCH 01/16] add anndata dependency --- requirements.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..e69de29b From b9661453df9584f7d001415e03c494536402fe15 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:39:16 +0100 Subject: [PATCH 02/16] filter DIANN by extra FDR values --- .../constants/const_files/psm_reader.yaml | 8 ++++++- alphabase/psm_reader/dia_psm_reader.py | 21 +++++++++++++++++++ alphabase/psm_reader/keys.py | 18 +++++++++++----- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/alphabase/constants/const_files/psm_reader.yaml b/alphabase/constants/const_files/psm_reader.yaml index 8b0aeb9a..12bc0e7b 100644 --- a/alphabase/constants/const_files/psm_reader.yaml +++ b/alphabase/constants/const_files/psm_reader.yaml @@ -185,7 +185,7 @@ diann: # 1.8.1 rt_unit: minute fixed_C57: False column_mapping: - 'raw_name': 'Run' # File.Name? + 'raw_name': 'Run' 'sequence': 'Stripped.Sequence' 'charge': 'Precursor.Charge' 'rt': 'RT' @@ -200,6 +200,12 @@ diann: # 1.8.1 'score': 'CScore' 'fdr': 'Q.Value' 'intensity': "PG.MaxLFQ" + # extra columns for performing FDR cutoff, not propagated to the output + '_fdr2': 'Global.Q.Value' # first search + '_fdr3': 'Global.PG.Q.Value' # first search + '_fdr4': 'Lib.Q.Value' # second search + '_fdr5': 'Lib.PG.Q.Value' # second search + mod_seq_columns: - "Modified.Sequence" modification_mapping_type: 'maxquant' diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 1a586d80..76c85787 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -47,8 +47,29 @@ def _post_process(self, origin_df: pd.DataFrame) -> None: columns={PsmDfCols.SPEC_IDX: PsmDfCols.DIANN_SPEC_INDEX}, inplace=True ) + self._perform_additional_fdr() + super()._post_process(origin_df) + def _perform_additional_fdr(self) -> None: + """Filter PSMs based on additional FDR columns and drop the temporary columns.""" + mask = np.ones(len(self._psm_df), dtype=bool) + + extra_fdr_columns = [ + PsmDfCols.FDR2, + PsmDfCols.FDR3, + PsmDfCols.FDR4, + PsmDfCols.FDR5, + ] + for col in extra_fdr_columns: + if col in self._psm_df.columns: + mask &= self._psm_df[PsmDfCols.FDR] <= self._keep_fdr + + if not all(mask): + self._psm_df = self._psm_df[mask] + + self._psm_df = self._psm_df.drop(columns=extra_fdr_columns, errors="ignore") + class SpectronautReportReader(ModifiedSequenceReader): """Reader for Spectronaut's report TSV/CSV.""" diff --git a/alphabase/psm_reader/keys.py b/alphabase/psm_reader/keys.py index c2349519..3c87bc1e 100644 --- a/alphabase/psm_reader/keys.py +++ b/alphabase/psm_reader/keys.py @@ -44,7 +44,6 @@ class PsmDfCols(metaclass=ConstantsClass): MOBILITY = "mobility" PEPTIDE_FDR = "peptide_fdr" PROTEIN_FDR = "protein_fdr" - INTENSITY = "intensity" RAW_NAME = "raw_name" CHARGE = "charge" @@ -54,10 +53,19 @@ class PsmDfCols(metaclass=ConstantsClass): PRECURSOR_MZ = "precursor_mz" DIANN_SPEC_INDEX = "diann_spec_idx" - # part of the output, but not directly referenced - _UNIPROT_IDS = "uniprot_ids" - _GENES = "genes" - _QUERY_ID = "query_id" + # part of the output, but not directly referenced in code + UNIPROT_IDS = "uniprot_ids" + GENES = "genes" + QUERY_ID = "query_id" + + INTENSITY = "intensity" + + # part of psm_reader.yaml, but not part of output + # extra FDR columns for DIANN + FDR2 = "_fdr2" # first search + FDR3 = "_fdr3" # first search + FDR4 = "_fdr4" # second search + FDR5 = "_fdr5" # second search class LibPsmDfCols(metaclass=ConstantsClass): From 4f0879d41b7264e1b6b1b28522b8c6faa68ca52f Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:52:18 +0100 Subject: [PATCH 03/16] add tests --- tests/integration/test_psm_readers.py | 2 + tests/unit/psm_reader/test_dia_psm_reader.py | 52 ++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tests/unit/psm_reader/test_dia_psm_reader.py diff --git a/tests/integration/test_psm_readers.py b/tests/integration/test_psm_readers.py index dabd88a6..7f429c24 100644 --- a/tests/integration/test_psm_readers.py +++ b/tests/integration/test_psm_readers.py @@ -46,7 +46,9 @@ F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636 Q9UH36 Q9UH36 SRRD 3296.49 3428.89 3428.89 3296.49 3428.89 3428.89 3428.89 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 3.99074e-05 1.96448e-05 0.000159821 0.000159821 0.000146135 0.000161212 0 1 3296.49 3428.89 3296.49 0.852479 19.9208 19.8731 19.9685 123.9 19.8266 128.292 0 0.960106 5308.05 1.96902 0.683134 0.362287 0.999997 1.23691 3.43242e-05 1212.01;2178.03;1390.01;1020.01;714.008;778.008; 1212.01;1351.73;887.591;432.92;216.728;732.751; 0.956668;0.757581;0.670497;0.592489;0.47072;0.855203; 30053 1.19708 1.19328 1.19453 1.19469 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642 Q9UH36 Q9UH36 SRRD 2365 2334.05 2334.05 2365 2334.05 2334.05 2334.05 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000184434 1.96448e-05 0.000596659 0.000596659 0.000146135 0.000604961 0 1 2365 2334.05 2365 0.922581 19.905 19.8573 19.9527 123.9 19.782 128.535 0 0.940191 4594.04 1.31068 0.758988 0 0.995505 0.28633 2.12584e-06 1209.02;1210.02;1414.02;1051.01;236.003;130.002; 1209.02;1109.89;732.154;735.384;0;46.0967; 0.919244;0.937624;0.436748;0.639369;0.296736;0.647924; 30029 1.195 1.19328 1.19381 1.19339 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648 Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 1.96448e-05 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 + F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 """ +# last line is made up to not pass extra FDR check TEST_DATA_SPECTRONAUT = """ReferenceRun PrecursorCharge Workflow IntModifiedPeptide CV AllowForNormalization ModifiedPeptide StrippedPeptide iRT IonMobility iRTSourceSpecific BGSInferenceId IsProteotypic IntLabeledPeptide LabeledPeptide PrecursorMz ReferenceRunQvalue ReferenceRunMS1Response FragmentLossType FragmentNumber FragmentType FragmentCharge FragmentMz RelativeIntensity ExcludeFromAssay Database ProteinGroups UniProtIds Protein Name ProteinDescription Organisms OrganismId Genes Protein Existence Sequence Version FASTAName 202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843 2 _ALVAT[+80]PGK_ True _ALVAT[Phospho (STY)]PGK_ ALVATPGK -5.032703 0.758 -5.032703 P19338 False _ALVAT[+80]PGK_ _ALVAT[Phospho (STY)]PGK_ 418.717511324722 0 10352 noloss 3 y 1 301.187031733932 53.1991 False sp P19338 P19338 NUCL_HUMAN Nucleolin Homo sapiens NCL 1 3 MCT_human_UP000005640_9606 diff --git a/tests/unit/psm_reader/test_dia_psm_reader.py b/tests/unit/psm_reader/test_dia_psm_reader.py new file mode 100644 index 00000000..b1054882 --- /dev/null +++ b/tests/unit/psm_reader/test_dia_psm_reader.py @@ -0,0 +1,52 @@ +import pandas as pd + +from alphabase.psm_reader.dia_psm_reader import DiannReader + + +def test_perform_additional_fdr_columns_above_threshold(): + """Test that PSMs are filtered based on additional FDR columns.""" + reader = DiannReader() + reader._psm_df = pd.DataFrame( + { + "name": ["p1", "p2", "p3", "p4", "p5"], + "_fdr2": [0.01, 0.06, 0.01, 0.01, 0.01], + "_fdr3": [0.01, 0.01, 0.06, 0.01, 0.01], + "_fdr4": [0.01, 0.01, 0.01, 0.06, 0.01], + "_fdr5": [0.01, 0.01, 0.01, 0.01, 0.06], + "intensity": [1, 2, 3, 4, 5], + } + ) + reader._keep_fdr = 0.05 + + # when + reader._perform_additional_fdr() + + pd.testing.assert_frame_equal( + reader._psm_df, pd.DataFrame({"name": ["p1"], "intensity": [1]}) + ) + + +def test_perform_additional_fdr_columns_above_threshold_missing_columns(): + """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" + reader = DiannReader() + reader._psm_df = pd.DataFrame( + { + "name": ["p1", "p2", "p3"], + "_fdr2": [0.01, 0.06, 0.01], + "_fdr3": [0.01, 0.01, 0.06], + # '_fdr4', '_fdr5' missing + "intensity": [ + 1, + 2, + 3, + ], + } + ) + reader._keep_fdr = 0.05 + + # when + reader._perform_additional_fdr() + + pd.testing.assert_frame_equal( + reader._psm_df, pd.DataFrame({"name": ["p1"], "intensity": [1]}) + ) From 9adf036cbcbd30bd06ac3e83dd1661f553602d72 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:52:44 +0100 Subject: [PATCH 04/16] fix --- alphabase/psm_reader/dia_psm_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 76c85787..545f192a 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -63,7 +63,7 @@ def _perform_additional_fdr(self) -> None: ] for col in extra_fdr_columns: if col in self._psm_df.columns: - mask &= self._psm_df[PsmDfCols.FDR] <= self._keep_fdr + mask &= self._psm_df[col] <= self._keep_fdr if not all(mask): self._psm_df = self._psm_df[mask] From e018bacb9c29e5797d8736f6de2fb38c516160f0 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Tue, 21 Jan 2025 14:18:17 +0100 Subject: [PATCH 05/16] add test for nan --- alphabase/anndata/anndata_factory.py | 2 +- tests/unit/anndata/test_anndata_factory.py | 29 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/alphabase/anndata/anndata_factory.py b/alphabase/anndata/anndata_factory.py index ffcd56cc..efc95337 100644 --- a/alphabase/anndata/anndata_factory.py +++ b/alphabase/anndata/anndata_factory.py @@ -57,7 +57,7 @@ def create_anndata(self) -> ad.AnnData: index=PsmDfCols.RAW_NAME, columns=PsmDfCols.PROTEINS, values=PsmDfCols.INTENSITY, - aggfunc="first", + aggfunc="first", # DataFrameGroupBy.first -> will skip NA fill_value=np.nan, dropna=False, ) diff --git a/tests/unit/anndata/test_anndata_factory.py b/tests/unit/anndata/test_anndata_factory.py index 52cfb030..1d94d7ab 100644 --- a/tests/unit/anndata/test_anndata_factory.py +++ b/tests/unit/anndata/test_anndata_factory.py @@ -119,3 +119,32 @@ def test_from_files(mock_reader): assert np.array_equal( adata.X, np.array([[100, 200], [300, np.nan]]), equal_nan=True ) + + +@patch("alphabase.psm_reader.psm_reader.psm_reader_provider.get_reader") +def test_from_files_nan(mock_reader): + df = pd.concat( + [ + pd.DataFrame( + { + PsmDfCols.RAW_NAME: ["raw2"], + PsmDfCols.PROTEINS: ["protein2"], + PsmDfCols.INTENSITY: [np.nan], + } + ), + _get_test_psm_df(), + ] + ) + mock_reader.return_value.load.return_value = df + + factory = AnnDataFactory.from_files(["file1", "file2"], reader_type="diann") + + # when + adata = factory.create_anndata() + + assert adata.shape == (2, 2) + assert adata.obs_names.tolist() == ["raw1", "raw2"] + assert adata.var_names.tolist() == ["protein1", "protein2"] + assert np.array_equal( + adata.X, np.array([[100, 200], [300, np.nan]]), equal_nan=True + ) From dac81e13373773acff514c6408d7ab18a7200687 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:10:21 +0100 Subject: [PATCH 06/16] adjust to refactored code --- alphabase/psm_reader/dia_psm_reader.py | 6 +++--- tests/unit/psm_reader/test_dia_psm_reader.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 545f192a..2eabc1bd 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -47,12 +47,12 @@ def _post_process(self, origin_df: pd.DataFrame) -> None: columns={PsmDfCols.SPEC_IDX: PsmDfCols.DIANN_SPEC_INDEX}, inplace=True ) - self._perform_additional_fdr() - super()._post_process(origin_df) - def _perform_additional_fdr(self) -> None: + def _filter_fdr(self) -> None: """Filter PSMs based on additional FDR columns and drop the temporary columns.""" + super()._filter_fdr() + mask = np.ones(len(self._psm_df), dtype=bool) extra_fdr_columns = [ diff --git a/tests/unit/psm_reader/test_dia_psm_reader.py b/tests/unit/psm_reader/test_dia_psm_reader.py index b1054882..8126029b 100644 --- a/tests/unit/psm_reader/test_dia_psm_reader.py +++ b/tests/unit/psm_reader/test_dia_psm_reader.py @@ -3,7 +3,7 @@ from alphabase.psm_reader.dia_psm_reader import DiannReader -def test_perform_additional_fdr_columns_above_threshold(): +def test_filter_fdr_columns_above_threshold(): """Test that PSMs are filtered based on additional FDR columns.""" reader = DiannReader() reader._psm_df = pd.DataFrame( @@ -19,14 +19,14 @@ def test_perform_additional_fdr_columns_above_threshold(): reader._keep_fdr = 0.05 # when - reader._perform_additional_fdr() + reader._filter_fdr() pd.testing.assert_frame_equal( reader._psm_df, pd.DataFrame({"name": ["p1"], "intensity": [1]}) ) -def test_perform_additional_fdr_columns_above_threshold_missing_columns(): +def test_filter_fdr_columns_above_threshold_missing_columns(): """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" reader = DiannReader() reader._psm_df = pd.DataFrame( @@ -45,7 +45,7 @@ def test_perform_additional_fdr_columns_above_threshold_missing_columns(): reader._keep_fdr = 0.05 # when - reader._perform_additional_fdr() + reader._filter_fdr() pd.testing.assert_frame_equal( reader._psm_df, pd.DataFrame({"name": ["p1"], "intensity": [1]}) From c06e05c4fb9b020b2ff61df68a8870f945c4b432 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:31:48 +0100 Subject: [PATCH 07/16] adjust to refactored code --- alphabase/psm_reader/dia_psm_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 2eabc1bd..50c29642 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -63,7 +63,7 @@ def _filter_fdr(self) -> None: ] for col in extra_fdr_columns: if col in self._psm_df.columns: - mask &= self._psm_df[col] <= self._keep_fdr + mask &= self._psm_df[col] <= self._fdr_threshold if not all(mask): self._psm_df = self._psm_df[mask] From 6185c6ffecb7d08517479695f880ac21e16ffda3 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:03:10 +0100 Subject: [PATCH 08/16] reorganize constructors --- alphabase/psm_reader/dia_psm_reader.py | 64 ++++++++++++++++++------- alphabase/psm_reader/maxquant_reader.py | 63 +++++++++++++++++------- tests/integration/test_psm_readers.py | 3 +- 3 files changed, 96 insertions(+), 34 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 50c29642..a505f6f5 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -1,5 +1,7 @@ """Readers for Spectronaut's output library and reports, Swath data and DIANN data.""" +from typing import List, Optional + import numpy as np import pandas as pd @@ -42,6 +44,48 @@ class DiannReader(ModifiedSequenceReader): _add_unimod_to_mod_mapping = True _min_max_rt_norm = False + def __init__( # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions + self, + *, + column_mapping: Optional[dict] = None, + modification_mapping: Optional[dict] = None, + mod_seq_columns: Optional[List[str]] = None, + fdr: float = 0.01, + keep_decoy: bool = False, + rt_unit: Optional[str] = None, + # DIANN reader-specific + filter_first_search_fdr: bool = False, + filter_second_search_fdr: bool = False, + **kwargs, + ): + """Reader for MaxQuant-like data (in terms of modification loading and decoy translation). + + See documentation of `PSMReaderBase` for more information. + + Parameters + ---------- + filter_first_search_fdr : bool, optional + If true, the FDR filtering will be also to the first search (columns _fdr2 and _fdr3) + + filter_second_search_fdr : bool, optional + If true, the FDR filtering will be also to the second search (columns _fdr4 and _fdr5) + + See documentation of `PSMReaderBase` for the rest of parameters. + + """ + super().__init__( + column_mapping=column_mapping, + modification_mapping=modification_mapping, + mod_seq_columns=mod_seq_columns, + fdr=fdr, + keep_decoy=keep_decoy, + rt_unit=rt_unit, + **kwargs, + ) + + self._filter_first_search_fdr = filter_first_search_fdr + self._filter_second_search_fdr = filter_second_search_fdr + def _post_process(self, origin_df: pd.DataFrame) -> None: self._psm_df.rename( columns={PsmDfCols.SPEC_IDX: PsmDfCols.DIANN_SPEC_INDEX}, inplace=True @@ -53,22 +97,10 @@ def _filter_fdr(self) -> None: """Filter PSMs based on additional FDR columns and drop the temporary columns.""" super()._filter_fdr() - mask = np.ones(len(self._psm_df), dtype=bool) - - extra_fdr_columns = [ - PsmDfCols.FDR2, - PsmDfCols.FDR3, - PsmDfCols.FDR4, - PsmDfCols.FDR5, - ] - for col in extra_fdr_columns: - if col in self._psm_df.columns: - mask &= self._psm_df[col] <= self._fdr_threshold - - if not all(mask): - self._psm_df = self._psm_df[mask] - - self._psm_df = self._psm_df.drop(columns=extra_fdr_columns, errors="ignore") + self._psm_df = self._psm_df.drop( + columns=[PsmDfCols.FDR2, PsmDfCols.FDR3, PsmDfCols.FDR4, PsmDfCols.FDR5], + errors="ignore", + ) class SpectronautReportReader(ModifiedSequenceReader): diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index 6bab794b..6cf8c176 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -129,7 +129,7 @@ class ModifiedSequenceReader(PSMReaderBase, ABC): _add_unimod_to_mod_mapping = True - def __init__( # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions + def __init__( # noqa: PLR0913 # too many arguments in function definition, missing argument descriptions self, *, column_mapping: Optional[dict] = None, @@ -138,23 +138,13 @@ def __init__( # noqa: PLR0913, D417 # too many arguments in function definition fdr: float = 0.01, keep_decoy: bool = False, rt_unit: Optional[str] = None, - # MaxQuant reader-specific - fixed_C57: Optional[bool] = None, # noqa: N803 TODO: make this *,fixed_c57 (breaking) **kwargs, ): """Reader for MaxQuant-like data (in terms of modification loading and decoy translation). See documentation of `PSMReaderBase` for more information. - Parameters - ---------- - fixed_C57 : bool, optional - If true, the search engine will not show `Carbamidomethyl` - in the modified sequences. - by default read from psm_reader_yaml key `fixed_C57`. - - See documentation of `PSMReaderBase` for the rest of parameters. - + See documentation of `PSMReaderBase` for the parameters. """ super().__init__( column_mapping=column_mapping, @@ -166,11 +156,7 @@ def __init__( # noqa: PLR0913, D417 # too many arguments in function definition **kwargs, ) - self.fixed_C57 = ( - fixed_C57 - if fixed_C57 is not None - else psm_reader_yaml[self._reader_type]["fixed_C57"] - ) + self.fixed_C57 = False def _translate_decoy(self) -> None: if PsmDfCols.DECOY in self._psm_df.columns: @@ -207,6 +193,49 @@ class MaxQuantReader(ModifiedSequenceReader): _reader_type = "maxquant" + def __init__( # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions + self, + *, + column_mapping: Optional[dict] = None, + modification_mapping: Optional[dict] = None, + mod_seq_columns: Optional[List[str]] = None, + fdr: float = 0.01, + keep_decoy: bool = False, + rt_unit: Optional[str] = None, + # MaxQuant reader-specific + fixed_C57: Optional[bool] = None, # noqa: N803 TODO: make this *,fixed_c57 (breaking) + **kwargs, + ): + """Reader for MaxQuant-like data (in terms of modification loading and decoy translation). + + See documentation of `PSMReaderBase` for more information. + + Parameters + ---------- + fixed_C57 : bool, optional + If true, the search engine will not show `Carbamidomethyl` + in the modified sequences. + by default read from psm_reader_yaml key `fixed_C57`. + + See documentation of `PSMReaderBase` for the rest of parameters. + + """ + super().__init__( + column_mapping=column_mapping, + modification_mapping=modification_mapping, + mod_seq_columns=mod_seq_columns, + fdr=fdr, + keep_decoy=keep_decoy, + rt_unit=rt_unit, + **kwargs, + ) + + self.fixed_C57 = ( + fixed_C57 + if fixed_C57 is not None + else psm_reader_yaml[self._reader_type]["fixed_C57"] + ) + def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: """MaxQuant-specific preprocessing of output data.""" df = df[~pd.isna(df["Retention time"])] diff --git a/tests/integration/test_psm_readers.py b/tests/integration/test_psm_readers.py index 7f429c24..0b075581 100644 --- a/tests/integration/test_psm_readers.py +++ b/tests/integration/test_psm_readers.py @@ -46,8 +46,9 @@ F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636 Q9UH36 Q9UH36 SRRD 3296.49 3428.89 3428.89 3296.49 3428.89 3428.89 3428.89 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 3.99074e-05 1.96448e-05 0.000159821 0.000159821 0.000146135 0.000161212 0 1 3296.49 3428.89 3296.49 0.852479 19.9208 19.8731 19.9685 123.9 19.8266 128.292 0 0.960106 5308.05 1.96902 0.683134 0.362287 0.999997 1.23691 3.43242e-05 1212.01;2178.03;1390.01;1020.01;714.008;778.008; 1212.01;1351.73;887.591;432.92;216.728;732.751; 0.956668;0.757581;0.670497;0.592489;0.47072;0.855203; 30053 1.19708 1.19328 1.19453 1.19469 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642 Q9UH36 Q9UH36 SRRD 2365 2334.05 2334.05 2365 2334.05 2334.05 2334.05 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000184434 1.96448e-05 0.000596659 0.000596659 0.000146135 0.000604961 0 1 2365 2334.05 2365 0.922581 19.905 19.8573 19.9527 123.9 19.782 128.535 0 0.940191 4594.04 1.31068 0.758988 0 0.995505 0.28633 2.12584e-06 1209.02;1210.02;1414.02;1051.01;236.003;130.002; 1209.02;1109.89;732.154;735.384;0;46.0967; 0.919244;0.937624;0.436748;0.639369;0.296736;0.647924; 30029 1.195 1.19328 1.19381 1.19339 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648 Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 1.96448e-05 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 - F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 """ +# F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 +# # last line is made up to not pass extra FDR check TEST_DATA_SPECTRONAUT = """ReferenceRun PrecursorCharge Workflow IntModifiedPeptide CV AllowForNormalization ModifiedPeptide StrippedPeptide iRT IonMobility iRTSourceSpecific BGSInferenceId IsProteotypic IntLabeledPeptide LabeledPeptide PrecursorMz ReferenceRunQvalue ReferenceRunMS1Response FragmentLossType FragmentNumber FragmentType FragmentCharge FragmentMz RelativeIntensity ExcludeFromAssay Database ProteinGroups UniProtIds Protein Name ProteinDescription Organisms OrganismId Genes Protein Existence Sequence Version FASTAName From adc3cbd55eb070a917ba5992136d57a8288f544a Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:09:02 +0100 Subject: [PATCH 09/16] temporarily skip tests --- tests/unit/psm_reader/test_dia_psm_reader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/psm_reader/test_dia_psm_reader.py b/tests/unit/psm_reader/test_dia_psm_reader.py index 8126029b..c2db8ccc 100644 --- a/tests/unit/psm_reader/test_dia_psm_reader.py +++ b/tests/unit/psm_reader/test_dia_psm_reader.py @@ -1,8 +1,11 @@ +from unittest import skip + import pandas as pd from alphabase.psm_reader.dia_psm_reader import DiannReader +@skip def test_filter_fdr_columns_above_threshold(): """Test that PSMs are filtered based on additional FDR columns.""" reader = DiannReader() @@ -26,6 +29,7 @@ def test_filter_fdr_columns_above_threshold(): ) +@skip def test_filter_fdr_columns_above_threshold_missing_columns(): """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" reader = DiannReader() From 242fdb96f332c2c2be4236c4537df89c253585ac Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Thu, 23 Jan 2025 21:04:09 +0100 Subject: [PATCH 10/16] add new logic and new test references --- alphabase/psm_reader/dia_psm_reader.py | 16 ++++++++++++++++ .../reference_ad_diann_181.parquet | Bin 13185 -> 11848 bytes .../reference_ad_diann_190.parquet | Bin 5764 -> 4486 bytes .../reference_diann_1.8.1_tsv.parquet | Bin 22262 -> 21420 bytes .../reference_diann_1.9.0_tsv.parquet | Bin 16054 -> 15659 bytes tests/integration/test_psm_readers.py | 3 +-- tests/unit/psm_reader/test_dia_psm_reader.py | 4 ---- 7 files changed, 17 insertions(+), 6 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index a505f6f5..9369cd99 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -97,6 +97,22 @@ def _filter_fdr(self) -> None: """Filter PSMs based on additional FDR columns and drop the temporary columns.""" super()._filter_fdr() + extra_fdr_columns = [] + + if self._filter_first_search_fdr is not None: + extra_fdr_columns += [PsmDfCols.FDR2, PsmDfCols.FDR3] + + if self._filter_second_search_fdr is not None: + extra_fdr_columns += [PsmDfCols.FDR4, PsmDfCols.FDR5] + + mask = np.ones(len(self._psm_df), dtype=bool) + for col in extra_fdr_columns: + if col in self._psm_df.columns: + mask &= self._psm_df[col] <= self._fdr_threshold + + if not all(mask): + self._psm_df = self._psm_df[mask] + self._psm_df = self._psm_df.drop( columns=[PsmDfCols.FDR2, PsmDfCols.FDR3, PsmDfCols.FDR4, PsmDfCols.FDR5], errors="ignore", diff --git a/tests/integration/reference_data/reference_ad_diann_181.parquet b/tests/integration/reference_data/reference_ad_diann_181.parquet index ba33f97af97ba091e54c2e1e45f61144e629df59..5ff764c967df547bb0a0b9d4e3126612d9cbd3a0 100644 GIT binary patch delta 1367 zcmb7?O>Em_7{?Q*w%auHBf+WNx+zIIM@y>a!^x%~vFcsYw61EqNs}e1kZ6;1X}UB? zDOo;jYu4N=n0Jpp-D^}CWJTuM-Xs8z%(K3v^h`}3EqSC z|G$3z&m;dHU&-Hp{!XR~B>WP#Z-NIni+^b3_6FfPJ^>@dJJ`!ZaF<~5Unp*uf;s98 z43Jl`hr@7#$_T%c+t`~H_=#fi$1Hb)g+8;I!QqZp4?lD_ zvG1&~+{5Azp5Y#j!IhrRuuo3FT{DZnH_m-v2ZMeB{xTceiQD=e?D9!)_p4r zzbY@6hYnss@3#Hk1WW0efQA@gXd)UswQ$-)VP=E*bva<5IQUt~~N7=|GH6xH(gVHoyJ5ZL+C70&U|^c zHD=d$5dtqZ({*q=hC5Pryx5iZ$YIcO3>g*I+uF));!SBi!NZty5f6p3Xv0~MAWDBVaWKQmL28p zm>ge>sDPGPiI|}(x6rlo|Mku0sR93x94T(x2 z3Z+%KqCUc^R|^p;F7R%*vSbuZDAh>qrku8HD`gNGL9=sfId8C>qwt&CPh9XN!r2MD zQ)C1QZvgj8;d}$$@&<_Yta3ly`rNyy#oJ@S!gJbwi1H3t=7}E7HH${GfMVXi0YO)j AlmGw# delta 2195 zcmb_aZERCj7{0w-Z@ad-kM?%ATiMpG+?BO#{oGa}tY>XE+U$1g)~)M8m|Z)zmbUAh zbF6|3#7{y5eF>UK$Rdgc&`A1k3O|e?{19V^3C8$`#TbH#2_`BKC7#=Mh-^PF;U@Ro z=Y7v}p7Z8D&y`&Vr#_n~TZ;G5a+;_n2=f;=a>`G8Us-lZub<9dtop>Y6E|A_Sa!{B zmtKz<^R5^1_KnDC7tecbJny0jT1D5I@cBxbqA3j{S=%WhUQ0z7Hs}%rAv2J8m4^gE z;p>-4_8WzUmZ}{DRjFpE0kgZ;Zl+0Brjta69dt)-~=SlD(p4e_%kY*a3R#Rq|{-oToJ6 z6Pws&jfR1P3I)8R*a9-~0rF=p92RRt*BaSh8=y~Yg6rb<$d%0?S87CO^z7Lt*s9zC z=L8k7D#?*=Zh_t98qvLG_C5=Zph@OX#Y*psbrQ)+@Gx*9H)SHN-EJbBXwGPy?dot0g+z*U(EM&+5xT)e|M>9ULOhwpvNTdbs+hb>;)VI*s3RAmOq`YtwCs^={h z){(-W2=-J*aei;X;h-}R33e3z?=6AqHvxO3ojvcs4zCy5P#!y6lTXV@lDy@D*{AD8 z$DUzVJM#liRS3XPQ(Z-ussE~W5DYaJo1RQZX&qdy@QDhs0&(SnOuCt5sNxW?zfvQp zdMov8{%++P0`5r%QK@GiFNIx?nZT}%)7P>-d4DPtk0!5ced0b>DjrQ-*Z)YHVxyE* z2OjwSr0iA-*^R7LsP9`;a#o@`V^9)=+Qd-VXeJ(=V@_7(Lp35{$@2?!iZb4PxWI(+ zqqTR1n~LV^OpUrdiOE!aJQGb%>b5oN3Vm=fAqdv5Y_jMM5R4oR4)%w)6{E!5X>8*k z3*x}guw57k`e3?2_q2P?jS%X_FY+(=^6xfC%cyS8U@o`-=4Q3nQ^EtyGm;>ejO7AJ zFLdeaL`5{N&x#lOJwZE{fZYa0g4T+RHX|AKaMnO5W-khrFczYMTrRec0|8qnDk3dA zD?t_xbNk)Aj<{4nT00{d4vj@eJl;htQ%jZLKyy|cFfUjJJrO(&BN<#P#`QB2uOpB` zHjY=SE#o1iv{@{Z>JRtC90i#Y9B7`Uh8<(Wp;%F+zf_FtGqQ|-I&N{!N5YmAKElXi z=^opt)iFI09)@wfS{e^KvJtCu@7NGr(lKIZBAOma#^HjtZfSIAVak^dERKbH%o8?W zqCe5y;Y;t!MzCfqvlrJW5!;!<*#AO&(KqLF`>kW6{#-QdocASXrhG}Sr7wx*xrE=@ z=f>RLp82ufp4_NyEEUTHvJ+N&L4!-!7yFXl#GW8;AL`{7%v~ywF&^X_y@*37Foj)q z&!8!UCXo*zFJGewU$-MKLVZ&>0d60{LSepk^R>s$pI_t`KPIB?86NiY3r|F-vu#gX z8~%287jEL!_$amTpTdK0!V~@b;|fizjBnrty652Q7FLDL81dS087=9#kw57KiBYe> zhD1hkdlB+YINRDf@fjZ+ZP`K<3`TIN#ZJY!z;MtCcUm0ERCbsP@pnFFERoR=+%~e} zVzdxsjCK)c<3b2NGIo?jEP))X8XctsPeHxOA+m9~$UYb~IVf*nXTXEXt#w!<%5 ZSwR(OTkFBxT7h4xocLQy5Cdq?@(+>^e-Qux diff --git a/tests/integration/reference_data/reference_ad_diann_190.parquet b/tests/integration/reference_data/reference_ad_diann_190.parquet index 7b828bd95ef6d70d62ce0abc43f53e80b59aa018..fce6d5882fe9f513138b7f8badaee5af61586b6b 100644 GIT binary patch delta 946 zcma)5&ubGw6y9vIo3z;`8k@LjqoSoNg=(`&gH6HK*)*veyPGC8%?}EaA9b6}&l=L0 zfEJ;Mcqr&V!HeKUJb0+|U+6(l&|G_vUOd@@7xAbwo2s>V@Uc9;`R2WEzBj{eTzWWU zmw#9ii~4Hh9vNYW z9EG<`Ht0624!vSr_FkutpiekiI_jVI_$KV6gK_NX9Q!(M%gnv)h2Kq6kfip==O^JO z>MZrr${qAU9bNA$`3RP-=T?@>8GA3>q}}ixea^7N{pD)%PuXblRv>m`K4Yx$O@=t(ET?M%Yo(A3l*Kc|9*R0#X$ z0;qO2)JV^U+DUJ^l&i?KtY=))kP73iIaMm$j^c{~vpAtzuU3q32ooNh!U{@46~hQl z0CxxyzIhCU82n+kpBM4YkIjyV|-E{iP;!6@yRD2_T77@Eo=~t#+%N$=bm%V zx!-rrN&3#v-iwVz5Owe!yp4Zh0c}JtL2$1=7Mjifw3U9dLp;#IEIqUT>s!<@3qPT)osT=kV}us|LSKMC(WCG& z*`N*#P|sNDy>9WI9*!|H4aAz`%?muGk!P*%ef@Og$`&>QOIL5sSkf@n;Uc%gn%v z?nmm2i|{*Ufc}aX_ssAQX7e)N(Ras12uvxxu~ta(CW6-wQ?0@~;nNO#2L=PJ^hoIT zIlcKmtugDwGmV)&zbjYIc4Rj6&jtIB^+IEUu$-ya?Acmv3q2Nt$XebvnF*qATwpen z#Z0m!e9pqxJHH>$M%6huZW~ApygEQae! zHC&f-3m%YLEvRbBA6-us=66wG+GCL_gFO2^y9}^)#qAP=jAZe8~f>| zcGb8|%b~oMw!ftv8E*--#7d?O`dMV~q3II*#%LZb#>&ZfxU%Y3w5L8k9wvq9>Yna+vEm%s%gXp24OQTwxb0NK!noMgMGlf_! zJ)u@oIb1bqH@xJmF65=$`cgG373Vfmh3H1w;htDs3D%QwcUjAkm&&1>b5|33c3w|gar<4Ef3xU z!HtZg*{h~m#*?7M2N&_IAgdh0LGU5hXI8wDBZ3aTO3Rm>lcAftVvZ;=fbovu0Yke7 z1nO{yd&%J6Muu0AMFLy!kh^A3PeQ#43LS2ELlfcUv=d?XoCG5b^BUp=Vh=EHjH!)% z4(wtJ(758^Z7o0Em+NUq$eYGs&`KaA0NJ$)p>wkc08CF4f^9Roy;X*CkM;T6R)YBJ MkLczI0{>V23HFHZ2><{9 diff --git a/tests/integration/reference_data/reference_diann_1.8.1_tsv.parquet b/tests/integration/reference_data/reference_diann_1.8.1_tsv.parquet index 83fc84f342427c6c3935afeb44e995ba133f7b49..63174d5ec6a9c579aacafe8136c308c64a98bd32 100644 GIT binary patch literal 21420 zcmdUX2V7Iv`*#8fBrJgv8Wa^NT%sZw48bb*Tvh-B29Q}5*((Vcwxe~mMWs}&qqVKu zYEfIN*4o;itJS)yxK~viRh&?3-Q)k9dlLvG{NDCo^?l#_k#p~v&vTyhJkR+)=iaEK z;-vzPV55cLCR-t}5wILttWV8bLlKL`l1XH~Qus?BM){@>FNn-Zr^2GB7%Dw7N0E^( zlSnjq>ERW{5^1_jsxHu^M-1?jq!$cJ&&Vyy(WXnJx5HSrVJsPai8`&oWl_OskF zb8?HdIZ*#BoW%}j$(Wn3fF(#1jPe$2U<>R876J>`0G5c-vEcVlS~+|3g3A#mzZNl@cy5fpNlsr083!Tg2WzfEEeSJxPmR_ z3PCSgA$~Elw}N7d%eI$?Q}NaUJ9|nIM-6r9<}^G>p@_F}=;ah1E{_RU_OurcSB57k z;%yz=++wKkxbOrj-p;|nm6B6(Wq3@Iiz6pa&Su+$E8`s;JR}Je6wBp`BuYM1;2=QI zj7pFzlyYSf6(?5=brHBkQu45bu!Q6=DvXLqND2>ACdCVSD#PRA;v!NN3WYpYp-51M z#U>@ich?MJafbGA>7`7P%j4pd331ABg_2Ur6_g@uXit~kJc?5G5eE*Zl*5Np6#Yk0 zaWHSKprWJZIZ*gAWKYl>_?p-aw%DolhOOUrM0<{^J%49thC;zFQZVhqzaq=wEyB-awn!i+V@wc1IIP|4}~_h>(i^aB<#8BGkHa*U*7`L@3(E ztpEGYeNp`%hhs8ZMX2!HnCW$4A~ZfjmMF3EL{Gki3U^Ob(e#HkH3uTh3){m_8oo}-K=u3A$BKz9T$vz7!PP0N{MC@RI4vi%u+uIAy2Y}ew$3$n#PME4-D_at#*B5rZdWE= zZ)oCo51ReBFMPb$E+-_Be|Y|klK?r*t3s#Xe%}I%Re&Upr?o!63y{ow?jJ4d1?Ur7 z*`D2}1ZdE;373k%m|HH)z7)$BpalnFE#z=djIl58z1Rtrq+1?vaqvRkIW8Ad26~~I ztdYM({6rwUl%C%bsLI+y^mZ+QzS(+n;a~-UB5tfIoV-eaMz~RbWVyPd{1Jinv;Eys zYTT3p;V5^sG<@TqPp7&g*CVgZt7>vbm+IHGOkeDdp1yzg%n3?_ZpLlu^=^>}b&osU zWBd{kD*IOOc;9Ui`V4GPyIX{=MS?r)DME2Y3o1Xm=z)j??y}inq(gUG4*68zi4w5( zOFdD{HPc*&m@}1sVPc-L!(8j@@=$*%dN)AMNqUMx153nY>pPCPuG=FHD#sMw}$O z0qaJJ(e3fz-O|M9Tky%*^*)T#`N0Pb{!je8KCxm1rcruAD@LDeo(JyLhcSlVebDC< zi5>6#B1PNY!h)6}OT5hErRcrD<2%(;r6@2jte|+C6k#8HcfS->Un8VES3gu3^JiUx zlOH3u?tY9M9sSUoCE$ka{Lq*+GtT6*B#2c!iL>*d7&!z0CrM&-Z~obj-VYa}sSAIt zKPV8R)_3;|-`*@n*!NH*(1q-(Fe!4u6O&1i77PBo1O@#1X6&Ku5|n&XyxFTo!qA&c zAkT8M1-wrQgu5Rk2!yjeo_V4y>^)9;nl^I%B!oQuGGVJRE`1e7-Xv)^a-OyfNF3%}F>W)_5-0c29{00g-^i{~-7Ize|Iym{QSt4ZF>Kv*3QG|Ld9MyLp z?BC|tmjw4k6C<}y=qc=r4%EZOq!KavCYbbL);TkNv?o#woJNh#@kF)X7x5oX^hB_0 z!q)1E;=LA6YuN6|^sDt`#sL>V;tgBjh}20FzS;3C1PH5C{xE_n21~`?-D+LY{t^u+X5Je#z1dx4WeU`B&>o-4)?_yVlsEa=_==`a;6mXN1^ z!c_Fn?WIs+Ez$=d8`v!3=g{`(gQqUeEKn7cRWtM0!gk)pCcX+Go>4Fl|Rq6c9Jz@M8~qIF6XZB9-y!k%=)>EFW_53cqx&t(!GGCP;%QnM*gDP(^5L&u$P+ z4-N6-_hLu#y^?T{)6}l2rqYOgV z5^Zr=PJyPBk?&-#^F4E-K~B_#0g{0OgVr6XHE{=ac5dE(OsLjfL2mv`N`LV$w^`@Uw($f+7^_AQv9764LXw_}xLaauDAq;x zP+Ak&u8bbqI+>6#jQ)08!bqBf8q@eA;(Yy!o?Q-Mhm3xt2WEn~(TB&ZYa1YhU8EHBr(2_ICwyE;WR&x6Lc1yW}S!R+673WZa}j0 zDkR-5LGtEhNK*H5zvi*NJ_VHeABX(IZG0hTz#e$=+yP14PmoOb8IqNU`4X0|?L^Lu zG-!J!5sG5h^H*{DeGgA|yCAu~50c#7Fv}jcJkI$|(9HS=$hFu?IHh|bC*A-JPX7ny z7~y5koyJ)wfP7|mNa}h((w%@rD}vrX58z46<_mdOpR>8EIHVa&YW;n_{b2E3IBSJ#bt`iR zg)@$i56xr_u7tdY+vwIT?9+`)p{_%GjNIIW_Er`@v1cj0g|@$#JI+#w1pBxj9a07M zJZS0U_(7!S5&qoxwJ8HmSUZ38Yrg*(+xbUsRSh}cP3UCtZfNGAsxV^i(A*;rr$($u z=tol~j*m3hh)T0EH0hdhHRCAP*gEg#6Z768>W0HcWXxU{Oq*2X2CiTuS0PBEH=>hp zTE}JIjbpcxb#Jbi^4Ieg@|ty@t54yc_33cG4tT zR6UFT;5IqUW_8sYf3}eRBZrlZe9%HRTrHZnZ&wSsbffJuH^|Gztah{b;Rd;3&EsD} z?zWHzRLRX#a#~2xx20^ss*dL|jE=RELY}@GUOMAaD2Z!T>R^h63v#pJD20}@MLXPC z`wsNz-ihkoLNk_|)ixNfk!1;xq5*5Zyf15F_0lgf z-e3_6^V%^Rw=@f0v@i;G$I|(kC2=pCSe)L5bp8gHCKarSBe>2{2o$t}HJIHMv*#Ws znyq~tf(4g;%g!!n4!-a^E2VVUrr5YPkL`@y;axm zTJX#(1MUgF=uT!l&hW3FGk|fY-YkH9Nn_^czpI;lic5lVV8$WFi zE)=Y9&hEBXH|~buMH1WNtq;BlZrWDx)|=}s$wz_KpWC)x53W1EB>Ag1uZ+xJypp}<)(63}&rcmPG&73)d`KmK#f=Yw=TedG)3?_K z^R?fkE<9^Z2LJlO8t(bS!LRSw`TLPAe&i~4=Dge*HQAIla^uY>-AUs6vtt8F!%VA= zzOj&}7bjL7<5Cl-inPr+hwjw;!;J4djUwrFC`q~oe7J6WzX z6Okor`uqttJdwprtU4E7V@=HeARB{I7BQ!!U3JWJv$c%kEVXoAXGz>t6BqMAag4d? zf=APe!xwyPsStd|Y_Y8z8t!|05fq!p(>(R$@;Ek!VJ~>V1D9`C4}RViwlUs2u^P6> zX5!HXH8f)d?SdAQNG?3?9>C{GSl17uT)4j2eiHez*&CN*KMNyy*Pr7mdATRf!1FwxCU&3=i`D-z zWF$V1fT@{_V3Sw&d&0Hnl{|k6ydA?Bz}A+#i4z$N3{Fh3fhys0YiKLNTcz6tz8f!a zdvC6gcW15}jMI3P19cuMm{PMD^swK`trF zs*gSI)VqIi8+i-6-{UUkY`P7Nql=)$xl~A=s32*b14(xP6}((Ok~^9gay=M`aWn{s zZI}tfj;BH0#`D3zNaR*2looD=9#*TM^tQK*D-@OWmQnDE%6H1xc&g8x3 z-RxPztKD%GdL4h<3~6Z5749b9a{trTI_$mw@N8POzQJ-@#Cm_21jui};aF1+{i(m!ex{pRl{ zFc&Q1z_@nh<`-znax|rFQQ(g@&X4#+T{UrZjLtW9SYfyiTB9j3{uEZ#E5{)9c82waZ6d8k26h@JAFp3mn6uBFt$VQAJ6&OX@V-(qhQDh)Skux!hY{n=u5~IkUF^aUq zC~^=+ktsBa{0gJUei%hQz$nriqeu^oB9$0Lj>jnS3ydOV0N|!$6nPq>$QX!Xwpkd zal*uKnEwb(99fvpWwvd zCgyV4gDV{^Eo*ATm{u1Ym=p_m z%+po|arfPoW|1*HLKC+)lG8u8mW_MTNY1J6n=O0XNM5-Wv7haGgxqcR`eXj5hsgaQ ziv}&w9wN_rIYiseK17;_q$gT993huDKe_Sxokmjp{O!WTKO4z+`m1vtmo<_Vn&0fd z&$~!=3x6=T;{A){dxO_$-<)-soWA@!*TbF{Nr&fuKJ&J`NX`m<{c7on3#4bxhRHJu z4v|@2JIs`pXUV!#_HoJm&XOB`I%>5i?<_fZU~mtc9Zlq=Jr~xE7;uhko~qpV`_8kZ zV#Ej8UuIq+&-ebY_VA=jJd8245h(hYtJgO0Ns#RkLfH zg?F1spXy1jDbJz(ffT31Q}&Y{IqP<~u52QcW}Q%9s%auKm$Q4=hwdYrSVOyaPuWk_ zl$o=l9v&cV=Y{%yGkYJodF`O3=eF-76{jD^Q<^4nb=ooInFkli#!-FT&gGpXecyPp z^rP2LlD%)$E|@y3nY_WelRV42ykbU=YovM7wX(X$&E(mz1CO3R zXeK%H9sBmIZYGyz4}01;rI}nC_R+eHZ{8%QRqvWvZFi0IOzpdMRB|&pZ>gx_wJ(~< zL$}_^8y9<%ObZ`#<;sp5s(0Zrnc{BNq`O4OeyUnE9xAe@d!_8!Wvs-y~h1bZcv*jm7R^A{J zQ{&z{JLv{_;?RWbHLMoWyL?p=aq~Lq==Qu)u>CsuJ-70^E$kcQ{WHZ|pWeGbuDlu5 z^11azvgzzcQ+8~)K>BXG_lfJg3#4OAdicC?7ch)BK5@`9E9B|LkFCMD6iPa57XJ|S z7`%}eIa$3MV)De{hecq(`d9M!2wzjIZ9j4N(Uy4h8Jl=E3mdU9gI%y~(ugOT32NZ* z+l_W^6WOirG{sEJ`?~*l^p(>GZewpwK=;?JNvLt0h>p3t7lc$yL4lIXam#b3ps(%o zKPp`~6=_wGK_59!MI(PA7bV`AjD8(_x9P7tQ_=C>*Z19iJ_Dts-#!|Q2)nY z?$c3J*-vi{{G=8g&wJXvwrU1qjqYYgy*(W*^j`V)Ck-_SmZsx3T|%C|i;$;h4$siI z6eRKY4kf>=uV1iv^Je{LT+g0AhpxoZ6$Uc5DbeO+m1|3jwdv}yjQ6bLI4|H34a?X> z%@QnlV+Oil%cS7{*KbcIwlA;2JC%trgN|=cwyiRlj(Jv=?ueCnb5g&OO+5OdU40Bk z>SBCTvv7Ih?9UQflxF4Us51opSOObQv-S@sJy{AIC^CimPvi839B8Tgq>Lr7f((3k z!aTISoMy867g+FdBOYA;9!NK={RoNxWe>bJDCYp-0mn<}sw0a~I|r2hz+_ zd(^?O&y^Bdc=?CYFZ=BB)f^uY zuQIlO;So3SLgM6C5OWf*1{J?bAV|D?l;%8h0UKZh!E>&e#&%-CX_3&PBtw&(Q7T*@ z80TJ~$pSvp}PR4@ak)1&9;#_c#vZb6PJoBn}K`_K$u>1EX=n4UX)QIS=g&9G3e zE~##-D9$X*$tuIFsdLIQvNOsup!qm=_~}J)c7Y}j_e~rgF(A-=f&nufxq#U*Kkvb# zbl{~!&A;LZ3v7ZnFs7TiyB-7^(=@Dku{4>VZ5UH*FYiPT4%q|zaC>;kZ6NcuVgr#I zO;AfON+)sBWu1uRs){pAk};_G?}V5SSdHhU|Fjj?ybPkC@WvX0Kc{6XUNA|0xMNJy zu%;8ywx#-Cte`%iGA3wP(~00eGgKq3G5B)Im~5xY7fYG`_Q;rKTk(sfrjIs_2^!XP zqWNzYlm3pt6gBv~02g;6Wng<9HOHXhzZoTj%*MoYg)f*CNQHS5XiTs})e9!u@wJUH zZR7e*4E&qb#4lHjN$N{G5&D-V+4i=?Bthmy3a)ywRN-{Qn4Ye%6QzG6NZom=F)4$J zPUH+M;D2QrhR2DGj8;QsyJ`@pYfT653Gl@xzh_F^7x7{UR~G#?rsx zjl;j=g=;89`B5QxR0KukQK5NsSx5-{GMIj%>nrhNcw2oe?t}Ai(XHtED2l42pdZ}v zAfd&0EXY$-bTPCK85pHjm8Z#*{8D7G6`7GqWtp0of?)+AnK{+b+KgoXqNu{+yeLg{ zex^EG6;)7~7p3;k&rD9lZOXHgt4bB2R6&++e14WD4$7!XUA`hKG%6q}tVo|%G5Ls~ zlmtA6j+QE@BvT!)%8@6H#dMN=OA9j95oKu!&{ys+&6HP`MukSJGm?`^(;|Y(Q&~;Ior1Fg9*oriHVgcp_+LmQSs&caJn9|ZyZyfoFqw2j?d4QhvPYcJd(;&WkE$ScMUyvRZ?Pnfy6ILBkjn0yPVqi!g9)% z>iDE(l~$6LTve1J)5{EGrH=5`=V@8VBS8KUV^e(N{j-2)kYPb~vVRHY0rR{|icmZk1G=)Ls_bMuH%2aLYLzNe9-rGL z@6hP6X(a$G93)V^!Tc}%4)X*@X&;meJ_Y<$yB(^fVB@ONjFfnP zMh}hcFf$o!D7mU6JF=)t-d?y~MMhU-%acGKW4h9>$}Dw|BrRE0o>fiD)RdmR3>(B_ z(zH`#sv;e2Bfm@g)|8OyOyBt2jO6&Sv}|9z5AI@3vsIaD*eAO1t7)>N^0X92uBPl)vRqfo4#(&WD>=!`Q-1 z-)P2vn_4?{CwnvYr=9%V-{MJ~78%u*jN0l&i#oM2 zwRW9k@nU>_np_3A#vlBCh$>YYRFW2%*Olx5r$m&44vx)|ClzAb&0&18DPCW+o|OR( z=+Yj+zv}&nDPCTN55xTA;x*O#0TcVTQ5#b__cDA%W>G{)SH7#Q&ZIpW+P|@Wb=af+ z&fbXkWSM_Omby!yRhAcByrq=Q0_>S1Z zF84=uTb(JLe<8j?7e7^}B}V|Riqgd_RVAqqdt_;f==cHR9e4(;!C(kk8pt#wIjEeD zX9~lmibAS9rXU((1c*o4@N}9gOA}j?Df6Sph$--^Rusk*S8SjDoX4Z-;#K+ZK(ROwa^YcgkQ+iXK|kU)6y|{nDNOZ^p@J&Lip98R zUTBP1DHh`jstQsj2xj76I!kw=vLd4)9!#pvf_*bZCJmsgLbX8*y(p?usT(gYh9)Yf z8_~Ew~Km`9U_s zc)x*CnVud#u7ImVpV!GNk}hqRS15jz8|<$*A2zVRUf$A9{h3kQ`_oz>Pb1!aqR%PL(3@?QC{lj(qhlIq_qh{gMKtB{XkM|?c(F*X(fCXZx zY|QUKJ$*gB&iaArhqT8NbaiMRrsw;2<_r3vkD)LRRQPZ>q{8;6*UvCrUT=?`=ZmKj zg7*@LO@k__p_#h&QIryFlD5yT^aE4D_5`tGYD%;OXQ|ls{+a)W{`657^r!n#saTce z6x^9Biqf?8XV#l`I&DX5z%`zVJhm8UYtld?pg$N&9!L{AGTlX6a#$kFg!V68tRLMz ztNrYS?p%fGgFgbjhWb=IE^UaQta>Ha>jN!R1ASn|Yd=Gx$Afx(*C8ySTSHHs`!SU_@ literal 22262 zcmdUX2V7Iv+kXNHCNd0^U{F+~cO9*>T30RZRTVc*(7NaUIk^c&5`J%gt@?l8`_XgHe4gh#=Q+>$o^wx7P*D;- zi@(l{-@=shE%*!@24j|KOEAS?Fr;FsheRy(Od3piBn`<5NlT&vL#c2oDI`svoFo;C z)fq`aC3#{=l2oF~QYQuX^%N&%4Ngi)OV2AvOA<>EgCE53*c`-gOinLKD@YQ1Ap6hH z1_I^Vfegz)hUA4cv!^vf`XY$Igo=Cye>gwYjlY}8=kd+>W)9vASL)vi;UPd;5+46; zKeR$iKo#PjO(ERPWSVxZ-jyA{&-GVMc$YAzYt@T?o!M)+mvzjFi1W597Otv1TAkf% z{Ncrk_AQwM#lwGfmCpGY*3&g zHj3X(5fm928Jr-O%ViOAd9)%hA~rUvi@HC9HK?n7cSWpB78$9Cj#LE66_i3Ir{sZy zy4m;OP?Vx4DHuX2h76%7Hp_ydBD=fyWLPpiqbZqeP!Q&>hzN`ZrPMI_wanLT)0)DM zzRZH|nk>EGbvmHGkQSNNl-Qjhi8vIiXBCIC@aNbsoEo)DkS0)0kX)Tz$n)oXH)V2^ zZAbH*`n7h-2`i5!=KSt^b@s^SUpn-x;^7e@9$D#`SYg_T;xx5VYhvp;cE6et`z?qy z8XNluq8habN4~Qh8wz*{-T*8EesF*A16m^Zz4de?;OL4399?BQhO3cWX#HM@RxeC~ zVcN-?!S3*I0F&5kGxx>@VIhIOsp(YVeS<*0<|9v?Z?;A2(bx;W`M97yUL))HkN7CL%V%F# z_p(DlCtB`>CGb)3D3|nK;`zw0>EyB}e-g;#XSR)Pk_-JN`6&DBh%Y7vx}ufmuT5Ju z!3EunT;Kgua~Jd??ct^5^8})R{Pax(8cli)+nD8wN~Atv!-hJc@g7@dng%+d>wkQ_ zpn91TTF;!@BY3wHvidIU_wJncQJ95EpYe9RP|KoSgZk}pMY-I^bs1A!5%g;QrB9vE zk~@dOQ(9cnsAOr3xT6ro&Rtr6Vv7*PisI|hF$LiUBnZSH>OjvD*c)IB=rjyx-Wf)R5^LbEZ`0+x!9CRr*$6N^W5 zwXBh#-aow85I9|e=1=a=3_l`4IC-K`f@*t0jnfje^pB&lH$|Rk?+*X^EO$>7HgcRu zPI#g_%KbwJxq6~=jpkE+aPmY0-Hyz>G8*Jrp~#u}P=xs1_FN`y#psih1Iyz%V&qpI zzt~G9LIXP}C`tgJf<^<~T>5a(s zegq1h`oJvdgpeNhy+XA0CJfkAA#&dEZ2Y~ELIiUirpp0+k0{{iIxXPnh853t!(5o{ zsVz$Fbnno$vT`_b60VQv>i3cxv{}>vTv0fGLfG|G7i9Nz-|amY!B;e_PPo5;pnca=0zDsp@APrX6=i

~=+wIWtRVUvQ>vYPt|b<4GbBqCvYm44lOmA|jeSe=_)j zFD72P+oM2;K6%M3z}hhmxq@0nR~gs#u_z+2lbv%Z;fDvw%l9djLr^Ou>FKd zcT~2ay7|}K?g(Z|;qd|y8t_Y0ZO;f1x;+YEuRD@=1ss(TFj@II(Vu?HL+cvtR&af0jj}xJ-A7VvHPzP)lqa^4v zpJO{!RTAWr5tx;?Rf6!+ac{o_eOpOv|LivjD!)cZI1Zk)Ub=YF`m^yw+2MapjJEYe zALN4$W{e@=|NfBzuj;5IReyD8e_(k!O=jwO(=*kmSW7J+c} zKL`ThWY-r$q{L1Nd_=^-NgutqC`8!oI(H_}%T*)qu2`X66XpqNyUqol0p`aV(vI)Y zby~pD4J)37hPm(1cdYY#M0ea9_#&8#uI(gFS6*4>2i+yI;p)xykBxKO#Ti{WGlcr7Q@0XL=!gTa35C+k-XpM)bKPRI>b0@!VMFmD3`j= zS8Nl~?ZDG;@UlM$>S51#)S2$b1R+wzj$XbF(8`=%MZ=V()@bIsPtq3du||^`Y72WU z*Lsl2)~GEBckP5 zB0^5j9~3t}b4QvL!AnG=!zm(!7x@KS+|dfts}o%7MKo&Loh8g1Rqi3UP0ipauDW7XmzK zx%@p51_5FHJrM?np#wb8xX93NE4@8w{VfzB-!;2SHg^)EKDZ;UVpLYK&@I40jKrH< zI+;%sp)BlT(nPepDhV2Xxxwi)pb!me$TI?jqtJx<=c8EY2 z{Ct0%KrlMzuN4!tXALJ1_>=UbE(G$>JOoPDJOquh0F`La*c~A)&(A`1#u?z){B}G@ zYo|H7Va2o1F!vpLkc~mFJV*zP2g%FVhafloiAry9AcjJ=Bf3!s!sFY@`6imZ3&#O^ zr+EON+3~x)wi(kE2{^hK=Aw(g%7q+rSKVf8Hsy z602mySQ+%gkXAb!jP_%%DZkD{&Ue)GWc`*`+ zufKgai%`X2U#=Cih8GkX>=DN8#0arP2 zV0*u5)299Vf8Mm|^qDiaZr^_S@+H(KmPP38?DGrqiqf*wg|x=Evh8NFh(mH>au_~6 zYF7rMD&A_spWdqCI;OK3FjI(qJth5^9c>4Di~IHSg^0q#+pC`svm-CkHzGccZHwOt zk!)9=qC~T(ChI_HPf)|CorSQ{(G&Sh<>G2^I~-tR%}vM zs#dWJSay{xq9%$sF&HbhEq9~(oNdB?0_Uc*$xdX3ut3PjKt)ssct^_vu(>kd6GtiJ zp17;nL_*jdJ;7L`qdeG&}3&QRTIm^LKTS^EcDSE|1up(=7}(lNEQ1-GRgS;0c$*y7-XG;p}CYu(_PuPnc{D ztAfqtu@>K8cj7R{T5x%c;g(|7`o~-WBi=He6~W>}6U|l@~2fevs)*cbW?E66Q;ya)e?g7otpMoIt z2IO5k55c5!5cr;fVCPi`KDY!dJS+vQMTekTr;CvF<4Fh}Z{;?!M*j-2$Q=*}w?oi( z4+IH&A?S0A>%%%71FC-UCe6U1+lKXA?UpUf>XZ$VJk~VR>W%PZ@Q%`OSA?! zoLdiN<{Kfnz7KS?if_su!%FD_>4{w-=t4kH;0i%7?(M-E2o`<~LHR@o_W#CJu;LFv zjB^}<6-R(Yon;-XWgAe=;ekw7Eg+xl1i|qapn#{3ASPmR1)Qrdne4?Z(u6But$ql# zMfad&X$u4=??CYR0|_7oPq#lh*xGy*B0S8U7F8eL z_qe&;l;1PGPFsF+_*U7#bDadXR-f;jcd#swm^LW=@Z+lBh0)nZD|V)@{ZM@zuQJ5q zRK4}CP?@YwQWvXe?|aJ1Ze2&>R5Ecl0oFT1Dch0VsQqs>lq?!y}<#l11i7INxlUcp&Nw`}g+xi`FfZ29ThA5gzcwk!KD zND~Fy?4LbvCVzfkx%M%nH|I2ba~8^7NB6w+Go<6|<&PFZ+O)jn@v-sO$Zt2^O_61G#ZcRcyO!jC0s37SqG?fN9X7K+9>x>=R;E0?*>qF7}xki(%qXjW$)|d?qjSwb%jw zJT3N!>vpWL+#zG1YKgaTjs!lss`wblWtF2GhLUQ6R;ZK{Wab%1+f8}-yCOI+Y zqg5-O4j^l;^nJkpz6<$mq%djCCron5`iR9ZznS27!h3rk+htwJZ2sz|)J}W->i3om zy|H7PU-HvruO(Cal5@qD$66k()bd;7*SIz7!w*(>Ap1u2QkB1W=r{4)ccEu_Z1RbZ z`L~uW*ZoFK_-JL$El1LC`>v6__Dadg=c)z{N(m)1>lQJW-TKP!+kvIrg*U$Pn?{8= zeX(tVAGcs-!kjbaq~GsfEn}ZMgfiL;|lrI4l)=J!9`8DiY2XP9fm@DzfrW1Op;+$_Xk zcwB$y@FDE@{1r4x*vi)I za2|083Q&**tYIAWb0M3@a&v*z$Z83f=jL!sNd3Nl|9-bEM-h|5lLsC~9!w_ZJxC$W z?!Ica4~KF65R+$7*>i&mT7D7%i<$2(Hb-7LhE!8CXUv#kPn_25MZe#6f-PnpS-}-& z<{y2m*;dCrS&THtGac*D*W2&Ae)M|{TfjM9`VCVI2hM$-vMV{Oi!ZWy9LG^-*wZ;~ zOU|<6IkoPMYz`;=_-VEyN74Hk#B1h0g;4DNge~S+M4f^9`7fWt+G%OYUe9q|ZvlmZ z1?GTwD&wJFA%3t3I|A(B+@0pgc4SR-fT|x4fV9uH9&7>2ITxb5Eo>meGc|)&Ef`(Y z4TA+ce-aBSGyq0%YclCU;SKhjE)P&oG3Vr`_t|xp490#&PSn(fnt69OLxcMx{25zNT z+Tur@udrP{K4~=?+4FIA!hAFW4m6Z~R-+KecYjca@aK?Utm_f3Ci}F``1c4fP=@a= zH6`F^M+)hD^Xs`#O&?nPgYOZy*X_7f(7)@CSD!M+ZST1<^WO8yZQYxz<-Nbz|NURw zufi4`S#lq5XNj|)GG9#?ZkDE%Q3p{iP7~KjIK{Ef?zF1*9{n5@5bm_fYJ4HjIQMv zT{mKM9fHwy6GqoH7+w2dbp0z>E~`IA*HS>&-(z%bh0*nLjIJMHbUg~AYXwHv&KO<0 zVRRjj?Y}oh*QYSL{sN4aRe;g;ZH%tFV07IJqwA9xU0VXW4#(&^4bZhaM%QOCy6(6U zf@>IEuhgLHF66Rxm$!un8IFLV^Aac$?t*!X%ghx=;F6saeTtby8t|ni4kR zWvv){O-Ewt*GyvhcqYsPLvh>bgwc5*<$LR{EZ7gXhK^Fow4(A{M5)V@BKpF$Bo$T* zU^_Ry`1Ru*YlE5sT-9!f7R515x7spIyL&QBP5U$Km_+R~=$Y=y4@;G&J6LF7+MQq$ zHB(_)PhsNiu~EWCg}33`>4fJx;!TtoOJGb`ruaV#*YJ&=a9j{B3H*+3`(DGeo6aPz z*D{IqwHgUE)Qn2BmB}xY^933ZIGojy!DTYcxgA&x3rjqa@K%+>u!jG9_-_yYyTJdh zmYf0Iu;B1?r(d`a!}-JX6fWTCiUb_&D)EEI*)SIh#17gqU@O~Rr<7n{Nh`4>BwgdWJq||fS7F!Xb`wR1Gvt7NeqAlLH+$u3Lt`$IYjzyzxF_Qb`C0bFTh6~;B-JVY+aCYaNX}osu0w#|c`~7= zt4p!#Mbd9U)?wkFjpV60l8s$=H=Oc|k%50H(F zL0!7U?Skx1WribX;|@x{>sF|Jl4LollSr!+JWN%{W1RUcRfQ-0B)xA2?;z zx({xWX95pAdHJY`yc&!Ge;V0D*3NS+dGGrsvg3%Tr<0^N$sV^R%&Z#RM4HB4E1LMU zi4-RE+A=JziDZ3a)2mx~6FDz+@biXoP2?HQrG;O1y+$Smjkt1U`weoHF!?~xr5j{j zQn1s++c!v!wKH1COW~6~>YZ?tJb3GqjFAyQ-^YBX>&qrGHqL6o{U2|VQzMsW9R2tj z*~jEohE?`8a;52_mgIX)q{^f4^sPfpWK2TjXJ^LTAdep$ow|$xM+0Yyj}I-qLAn($ zjwNnhCv6;Gmh!htNq0Qf#UmzRLOc}R*&3V#e>w_5% zwdYBj@T8#Hk>|;2-j>Yh#N7qRKM>kw}b{XbEj{aiw58#;o^^lz@E92We>3Oyk{DSU&2>wE& z&MSCz(sSL4SpEZ(nEWmF4Tf^IBfU}IP-ntl3dv}nf%{bo!+$1NmW5R?jtO_3oK&t3MR3)9uJP8NbLw}ba4Lh2eispCV&qxX%k^o!u$0PXnWV{ zb;nex)SjBtAJ5jTVG>nqUYVFarFQ5W>xKO@J1>U7tWcSmrb_1bX7DY9Ca*3Y3mI~} zai%kMS4DN1EU2lyT*=^fgaq7_q9d)>9_Xa*8zx*1#>91>0<~my->GzDu>ks?S@iSi zj|aLO4K>|CE6s{N#iHZ*V1bUk+A+~0w84+GAR6eUBaMZ41$T70m?4G%ueyaNKAn2a z+_VC~kTKxqw!MS>Gyebio89mF-tarP#QT=-`4R8pDiHAwZVSAVPx)`>>oxI)zuppW z?px+N`C#<+&-mZ=VeY#+{371z5yKnb_7iVptNsqQyu`af&+ifp5^o==+1*>r1Smmp z?7paF+AaqQx4=o%&8z*oss6` z=IDffNd_6};@n(FX|AP0Qml#RhE8y2MwypN%TTP!FK;c#OUX`C7Gc>`X+_DY$wkRfeWVlo9w{$1OPzt+ z#taGW>*F+9Png%GfbP-1k;7D8ljXINe%%PsJ)O5^>#5wJg9VyFX`@a}UK=Lqxp~P(+2|GgXGZk>Xv2Q#ZcgKp zx4{%F-q2$3ZEd>X4YSnkc?@~#m$YHpI#mB)1a-SALx%b#Z5aM{6VbHg)LhLl#tm+4;OsW6^yc2{mZMkjpC%>jn+=(1 zGv6>P&)($Ic^j6u5#Zm9CjP`@$WoWvhS9&AlC58Cj54G@=HQ|? z%N34D4Ebp@+i?0fiqsyw8nV(WXv0o#2K=L`p?{`m$hU3j8)gp2NrpVNnQwrV2JaQ5 zB^TutXkb`wR!Tu~K{-ZY`Q^z41$m{Idr4YBVOE|R=SsZX#qLrkfJQp|(JPd?OWh?- zqZtAVDk^GlTz~q6QLx`tgr5pZ0Y7(-)c)js5Jh=X{uxv-MP*O{8Jb*w|JTY(aVGq- zURxePK^vTgM?*zZ9!gQA6tsgkCK8Z`X+fHz!V00jf4@*wZgHY4)-zrjQIZl8Ta=;> z&l;TNpORJ{R*)R$l@pqsml3KC%S=(F=7wgKW`wG|GE?GWaGm1RxUxcd0F|Zmh{{x| zBO#9})uv070ioWZfkCC=0nkPrAx?~^aCupJthg{VAS^pYS|&+}^Ge4&65}&-mE}-B zxCrDiq@SqD%}tR-rKhNZixQ)up3F;93o61U&hK)#!ixAWOk>sYzf`LXBF^oDe6;)#)--qD^Ccs{KK? zpg)PPI4#~^TPLcm& zU5fjn$2THZ86S)LrqxSgeqvl%PP|m7Zx}~uY*}htY;l5==BtzigKY#?=<6;u&MQAb z>YD?)#{B~ukd&sy>dCJ1%}Q3qW~T;d%8L>~uRueb&^m5qzVZNUcdz&+NHa5)t@dRo z*K6g*^aVDY8knFRPq2+P;|(?@j>!IRbQPwhWhe`_bB&D~(M_VNtRx{mQW`<`-H2`y z3sa=Q|Ftaf9ueY%`2QqRRJv{s{{N6I#e>r5#9*Gnd`J!Utw@#mmTKmj(Xnfz6GK~9 zrO13UUybkU^A+YsVM+-2l#tivVG+#n{1jQPIE}Vb!+EHZz#PggOpcH8YG1ZDZCC!~ zDIQU|V2>5F?=fOuAz>w{vev(R6f{MG5Gybz$t){qCDHxoh%pd zgBSRJ|J(!#p1T?E%+6Bce04s+sDIV@5o3CL8$Jxn13JqG|Jkm8HK=3E=H7;{P)l2=skwN z7sLT;C^fzyRfTn$TauE6_cNho5z(Hd^6bzoyhqU6e@GMK!zz;Fe2X^Fn(wlfx|12P~T zSc>pbY6SoK|2a@MjU>QQ<&_)0moezbYHzBNF*QDkxC1$@SL=p^4%; z6qbSV$NHyyODaSn+%h8|T%-_*Z~;{YA-xHv_rIE?rV-WFo-{=P1->-DP^wg+r572l zVJg#9&_p#cf6OZ+3^0)wth-_=w!}lrKd`Wsf2O8t3=TE>Z%tC8Gb$w~*q;ij(DI|` zp802M(?P*>e>JMm>u(OG1UMJ4zcd&2*SP;ws3=_|ic)C#N5&YJ7uS?YO29wk7!{7| zOLhE%V9?-!(Sk>hpC=^!bCTnNiviD6C}pwP7z3t3u2jblA6LMWuS;w76{5*~rLO=S zh5a@7>0&H<8cqlH>!Z`Rq)mI8>Z|q|tB@t;C^P(3q7u#c22hGt`L*_;u^&D8vFiP4 z?1M>#$y@miQuL$8D>78W#6VuCPK89YVm+Ls!UDDJwc{0Bp-t-gUzW_EN?#@e;vQJ_JR5PYp{A^TyC*aQyh?i`FZ@E{(^nz z_D5I-Drg8CQsMchv(G?nS~nlt_7``>zjgmd4NB4052X|^CpGi4J^R31@O;wldvTJA zc-21Te`v2c>Vo!~c2oj3Wf=u;`U$1ft?lXY);OKkqc@NmcSRPF2R>At2o?eDVWMP! zHnAhqKAM>v7y~`g_?LFZPg7s-Tt(Bk4D$zn1a=MO37D>CB7(8%j9h0A8l~#l15HmA z?4dok(a?i(UDJUKfn$Dh0sIBaqQ1)f{Jxq)N=NM>q$7Pi30Lpn3gf@|#~^yapSys+ Hc>DhV01uZQ diff --git a/tests/integration/reference_data/reference_diann_1.9.0_tsv.parquet b/tests/integration/reference_data/reference_diann_1.9.0_tsv.parquet index b48e669a99969d0f1f67997eb153b3236f4b9c0d..7a452d77b07441de1b5863dd37425fa3a13a29af 100644 GIT binary patch delta 3877 zcmb_e4Nw%<9e?lkKK3~7IN)}Xq;7^T*<#__8ThBQM>qKrDkRMMfuvD5~GHbXMe!f8!(EXKZN@4S+S z*}J`W@BP1jzyJT;myT}SS)xv15YO-}Wm)!UK>+~ZA*I>g{YFJ^`u1)GYExL5NJ}nI z8!cwKq3XrTx~g?GjDqFa-&5@FmiBqG-+;_!7eg~327of7jz|!-(CG4rWd?0j&H|UX z!nH!I5Z&d`0BDoBl_e$aii*;urOU-=?18b(R>ns3Ge$d}sra5WT9kGD=?r)YWUN8h#>+?t$z@i0*1oC;p?^X=0Sjeg$(# z;bKz9La`Hici<}u4zm=;4z@+V^}#PPg{+LjvAqW}g~3yXw(PBVIhCPh05g-ZlL+7) zR8C>s6S8WR#zxOtIO~i32QEGH@duje0G%^fnFya5iiW1K6nsgsw<*-BN4*gkD#hqf z11gjp{8apK>_nDAd!HvFxMWB#zK2reOx|0k)hzVgFNq2|cb%gga> z^IEDgoV-bY(K?*PNCaz=zRtct3@gL`+}km z)~KL&5uUF}!7pgISk5?(|HJafVL+G zRrdOZ`lg!Ny2i2GuVvSxE*omKVfVE7W>S4_X0acJT&kK_=g!W{%`bovMo(d$D_0+p zr}h*|XUuB(DmfYgwTsgO2I-!!tCei3tW8@j8`THdUWmR* zLEBO!xQ&yCubNoAEZQP_NXs5xOT)<#u{xfW9NLqHZly{NttAc(LS`3q5hEpLSx-mg z0TomZ<*GDFuuo5eGkRLt(xNqk`{c(z$nz(%u~*dEz`w{#&t9z&c=dGgw!Op+vJUya ziQ62*KV%#%>i7fzOfDIBN{unvKL7C-T3q7mmN?r*z3{A3tMM5Z@~7Ta5y+uH6t7$L>xJ2zHr zp}WL^$&2mA%m5-)jn(xHGC|4g&@}_y%^;;mWTu9r3RlIOV&&)$1)}e*$2vXNymaNw zTSL@UN9-v(9fuw`+TzaE-P?c1F+Ar7TOwE6oP7)Cfn`^ocD|X`iQ?uy<7|I__|Sqz z=qzc>jqlz%><}pGXNy$Roa2%}%=6zc(|#Qi>c;IpNmP?a1SKnFUd@ZJ^>BO7Cjec} z4upYPv%10GrH)bP9XmRnHI){eX-GB@7P(8@_Hj6V%8=B|clKq85*2i#0ReIg!PNmF zo2-gCe-}RP>FIg)>t3x{S$t7N(^(f%RRuKjm_Xi#y9&wYzGfTw9DLcNq9c{pEh2RD zsc$BTQbmsHEMjOyNdFK4qM7!|ZWg3Sglr3BXY!Kxv>*@*ZEamsOYt4c7U4GIDw!bCWP&*33^0yzUA>QKB(}KcKz$C0EdsRZBb9?9 zUNYyGWIu;!crH3Hm)sMLBw&w%s3V5M*JHTMqLpVlUK_paJm2bW8`1yM znQ_dnTkO5;luhuC2MC^#$j=EW{MM5f1_lNIy0tKnYz^tib@dJFf{MSV6m^#n2@a`C z2+Vwcm#}R*Zu*UFr#E+2H@iuhf9j5phJbMeW_aJI5L!FDKl8G;wk|&sJXokZA!?DA^y;0rs*#_b0 zo>iTjPHzyBcjr|NY{r|J;KAz8Oit0{Q2E9ah#pm4EjKcmJOAQFTbJU~WHz%U#dQ3X zT@BNCi`6p9OgR<{#sAMximyQ1^za5xA!+Kt(2<719NGlMB;%EXLf>tqu}u$egh`k^ z@njM1U=d592`7mJ#Uwr4u>^(NH^Xe3jPN~lphOS)9=Y33-6jf6!nFZ+eOh#d1I97WqpXCwF4&~zFq$i-gfy$Ywt^Tny@(?Nv{*jte`13X_5c6? delta 4315 zcmbVP3s4hh9^cK2O-KSE*-e5G0vA6w5i5l76tcU>qiTcz4tlgc5U}TzhY#$iQw5*r z-P9}aspnX#R zk;mn7&YS>b2#QQg&CAT2nVy!KojqM8RoggjR$7`fH#Z|QVc**kOvch6gKv}5`LaIvu8pWVpXo!8vIsBe7!Zw`7txzmTlv;5-91}921>#y# zVWpaJ>LLzK5}@A?`U@gpmNrO3ipUlLc}d_PEv^b3zjCF98 z$f7!EpY_V&c|L){k43gA&k3-B^ae;Qt(dp0qF_*%_toGL3>BxaX9VPVR7=`b%RP9{ z?z{J<{eF|q!YbM)R_{SiRcz9!ocm^0-OGKwu+PjkmD;L4n{Q!n{Ab${`4S7P5u3@s zKCWK#YhpCJuQm~%Q9p_OZ)j`zJovSEg1-2p`7?9A@CkkZW;z%qQNs|4KFym5w!tVd z-!g8!xZtZ~VqcxYRq5I>-bDOdiIgvPmAh6%?Gk}DJ-A01Jl?dhLKmcSBjkb)La>Em zyr%$uDG3kw+4RbsOlc?cNrgRWdUYr4l$h03t1Eu~XGaeseoelxhsnXS&Syi~&r)fA z_3g$!x`%lRA_g7dc|%7mbX~U@3ZsXyq7=OO4U43Hqc=aIw0yy$l0_9O-FPyKsCp4- z3I&%8&)E{*@te#J0GOXKOvl$wYII{5w_@Pf?|jB{@zU~&;zec4T-*GCS`0eF!MSjV z2ac^=L6&%^C&?s1PMp!PF>&#@Op_HC6B%d2Wr5iVv5pwA20b&goeGW3X^n~JS7s-q zWM@PU;S-FOWGC=zbP~4ih z&M62{O^uYs9wNhKbpH(*inF9f(L0=!fF05>iJL^AQf5o_X*mz+rn>dD`3(sjA+O3& z?ilzq!a$0iP-S63*}SslrLJDw5F)k&RTIDwGh8F%R6~ibMMei~kpFo3=G8mQJ7d04 zZ(q>`AIQww-mderOCQ}~#AjAW6c4+Z{BIu?h9!2xIDd2HoAkeGzxbwW7#Q!Lw4v_7 z<8DSrczp6O9%@BV`{XnnAvRdOFcvQ>EMMy8ex4JpB5*$vJer6wJk;~>`|%xCq2BC3 zGwkV2&mW;hPBZG(;PshADP!KYC4V1Pz+}$4+j~oR-L5`uCA8qNy-s_!?3Xj6B{N$sG^b zc)Mo#VKy4a=>>;ao`ageH5QL2u>xAh$EBuwv(5fc&p{(6qbYS)`XxQ80u=+4d8BxcF5#PU6Xs$HM#Gi z0sS1^yV2yHMT1(+Pi{XlTtfFTWGLb%cPBr&7nDyXw@+yc^ND-7$!+JLeT)GK6ZfEg z#DX?Ed_5pPX+$QfPxUg9sIH!dDq4B3P@EoH`7KjnaUO7NhIJ}S)K>qN zmd|qAnZ#L*NB`1)g*m^&S=X(&%DDIMVJr<+|7kr}yh8}PRf8JfrI}L?zFY6sT@9JA z%D^i_)BrR|_Z{H$G&nbitDH|3dCslKlZP~0jH^0Vvk3_&@5ZNBv3JjvEJ=*p$evh- z&j|PXGh0xep;Mmt6FWEPY|Gg>>v;({NCWE#&q^KP@x>!?s1+sSI?FUJRX<1&@5q4$ zb?`(hwM#&~F95wx(3^!YKGcVgXJ{E4GQ4iwrlys(f4^`uDd7!Phj#=g*TWNP3$>uN zsBh}QRczP0orTTcuVfGI%s>9-Kj5!wiz;kuY{9iPE4}&_zNE2nJ^=UD*Q;x{&-P)h z!a_Gv-NLp#5B!vil-Am@YPe>s`REl*5l&e327Ny{_zQ-Od-0EfF#OrlUmf~<)yUsG~E?XBGI0qpG+d36V;pF9I$n>i<*Tj$8l$ diff --git a/tests/integration/test_psm_readers.py b/tests/integration/test_psm_readers.py index 0b075581..7f429c24 100644 --- a/tests/integration/test_psm_readers.py +++ b/tests/integration/test_psm_readers.py @@ -46,9 +46,8 @@ F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636 Q9UH36 Q9UH36 SRRD 3296.49 3428.89 3428.89 3296.49 3428.89 3428.89 3428.89 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 3.99074e-05 1.96448e-05 0.000159821 0.000159821 0.000146135 0.000161212 0 1 3296.49 3428.89 3296.49 0.852479 19.9208 19.8731 19.9685 123.9 19.8266 128.292 0 0.960106 5308.05 1.96902 0.683134 0.362287 0.999997 1.23691 3.43242e-05 1212.01;2178.03;1390.01;1020.01;714.008;778.008; 1212.01;1351.73;887.591;432.92;216.728;732.751; 0.956668;0.757581;0.670497;0.592489;0.47072;0.855203; 30053 1.19708 1.19328 1.19453 1.19469 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642 Q9UH36 Q9UH36 SRRD 2365 2334.05 2334.05 2365 2334.05 2334.05 2334.05 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000184434 1.96448e-05 0.000596659 0.000596659 0.000146135 0.000604961 0 1 2365 2334.05 2365 0.922581 19.905 19.8573 19.9527 123.9 19.782 128.535 0 0.940191 4594.04 1.31068 0.758988 0 0.995505 0.28633 2.12584e-06 1209.02;1210.02;1414.02;1051.01;236.003;130.002; 1209.02;1109.89;732.154;735.384;0;46.0967; 0.919244;0.937624;0.436748;0.639369;0.296736;0.647924; 30029 1.195 1.19328 1.19381 1.19339 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648 Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 1.96448e-05 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 + F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 """ -# F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 -# # last line is made up to not pass extra FDR check TEST_DATA_SPECTRONAUT = """ReferenceRun PrecursorCharge Workflow IntModifiedPeptide CV AllowForNormalization ModifiedPeptide StrippedPeptide iRT IonMobility iRTSourceSpecific BGSInferenceId IsProteotypic IntLabeledPeptide LabeledPeptide PrecursorMz ReferenceRunQvalue ReferenceRunMS1Response FragmentLossType FragmentNumber FragmentType FragmentCharge FragmentMz RelativeIntensity ExcludeFromAssay Database ProteinGroups UniProtIds Protein Name ProteinDescription Organisms OrganismId Genes Protein Existence Sequence Version FASTAName diff --git a/tests/unit/psm_reader/test_dia_psm_reader.py b/tests/unit/psm_reader/test_dia_psm_reader.py index c2db8ccc..8126029b 100644 --- a/tests/unit/psm_reader/test_dia_psm_reader.py +++ b/tests/unit/psm_reader/test_dia_psm_reader.py @@ -1,11 +1,8 @@ -from unittest import skip - import pandas as pd from alphabase.psm_reader.dia_psm_reader import DiannReader -@skip def test_filter_fdr_columns_above_threshold(): """Test that PSMs are filtered based on additional FDR columns.""" reader = DiannReader() @@ -29,7 +26,6 @@ def test_filter_fdr_columns_above_threshold(): ) -@skip def test_filter_fdr_columns_above_threshold_missing_columns(): """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" reader = DiannReader() From 08f62b4922b1672027c7d778c3febb0d2ff7f8bf Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:41:02 +0100 Subject: [PATCH 11/16] Revert "add new logic and new test references" This reverts commit 242fdb96f332c2c2be4236c4537df89c253585ac. --- alphabase/psm_reader/dia_psm_reader.py | 16 ---------------- .../reference_ad_diann_181.parquet | Bin 11848 -> 13185 bytes .../reference_ad_diann_190.parquet | Bin 4486 -> 5764 bytes .../reference_diann_1.8.1_tsv.parquet | Bin 21420 -> 22262 bytes .../reference_diann_1.9.0_tsv.parquet | Bin 15659 -> 16054 bytes tests/integration/test_psm_readers.py | 3 ++- tests/unit/psm_reader/test_dia_psm_reader.py | 4 ++++ 7 files changed, 6 insertions(+), 17 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 9369cd99..a505f6f5 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -97,22 +97,6 @@ def _filter_fdr(self) -> None: """Filter PSMs based on additional FDR columns and drop the temporary columns.""" super()._filter_fdr() - extra_fdr_columns = [] - - if self._filter_first_search_fdr is not None: - extra_fdr_columns += [PsmDfCols.FDR2, PsmDfCols.FDR3] - - if self._filter_second_search_fdr is not None: - extra_fdr_columns += [PsmDfCols.FDR4, PsmDfCols.FDR5] - - mask = np.ones(len(self._psm_df), dtype=bool) - for col in extra_fdr_columns: - if col in self._psm_df.columns: - mask &= self._psm_df[col] <= self._fdr_threshold - - if not all(mask): - self._psm_df = self._psm_df[mask] - self._psm_df = self._psm_df.drop( columns=[PsmDfCols.FDR2, PsmDfCols.FDR3, PsmDfCols.FDR4, PsmDfCols.FDR5], errors="ignore", diff --git a/tests/integration/reference_data/reference_ad_diann_181.parquet b/tests/integration/reference_data/reference_ad_diann_181.parquet index 5ff764c967df547bb0a0b9d4e3126612d9cbd3a0..ba33f97af97ba091e54c2e1e45f61144e629df59 100644 GIT binary patch delta 2195 zcmb_aZERCj7{0w-Z@ad-kM?%ATiMpG+?BO#{oGa}tY>XE+U$1g)~)M8m|Z)zmbUAh zbF6|3#7{y5eF>UK$Rdgc&`A1k3O|e?{19V^3C8$`#TbH#2_`BKC7#=Mh-^PF;U@Ro z=Y7v}p7Z8D&y`&Vr#_n~TZ;G5a+;_n2=f;=a>`G8Us-lZub<9dtop>Y6E|A_Sa!{B zmtKz<^R5^1_KnDC7tecbJny0jT1D5I@cBxbqA3j{S=%WhUQ0z7Hs}%rAv2J8m4^gE z;p>-4_8WzUmZ}{DRjFpE0kgZ;Zl+0Brjta69dt)-~=SlD(p4e_%kY*a3R#Rq|{-oToJ6 z6Pws&jfR1P3I)8R*a9-~0rF=p92RRt*BaSh8=y~Yg6rb<$d%0?S87CO^z7Lt*s9zC z=L8k7D#?*=Zh_t98qvLG_C5=Zph@OX#Y*psbrQ)+@Gx*9H)SHN-EJbBXwGPy?dot0g+z*U(EM&+5xT)e|M>9ULOhwpvNTdbs+hb>;)VI*s3RAmOq`YtwCs^={h z){(-W2=-J*aei;X;h-}R33e3z?=6AqHvxO3ojvcs4zCy5P#!y6lTXV@lDy@D*{AD8 z$DUzVJM#liRS3XPQ(Z-ussE~W5DYaJo1RQZX&qdy@QDhs0&(SnOuCt5sNxW?zfvQp zdMov8{%++P0`5r%QK@GiFNIx?nZT}%)7P>-d4DPtk0!5ced0b>DjrQ-*Z)YHVxyE* z2OjwSr0iA-*^R7LsP9`;a#o@`V^9)=+Qd-VXeJ(=V@_7(Lp35{$@2?!iZb4PxWI(+ zqqTR1n~LV^OpUrdiOE!aJQGb%>b5oN3Vm=fAqdv5Y_jMM5R4oR4)%w)6{E!5X>8*k z3*x}guw57k`e3?2_q2P?jS%X_FY+(=^6xfC%cyS8U@o`-=4Q3nQ^EtyGm;>ejO7AJ zFLdeaL`5{N&x#lOJwZE{fZYa0g4T+RHX|AKaMnO5W-khrFczYMTrRec0|8qnDk3dA zD?t_xbNk)Aj<{4nT00{d4vj@eJl;htQ%jZLKyy|cFfUjJJrO(&BN<#P#`QB2uOpB` zHjY=SE#o1iv{@{Z>JRtC90i#Y9B7`Uh8<(Wp;%F+zf_FtGqQ|-I&N{!N5YmAKElXi z=^opt)iFI09)@wfS{e^KvJtCu@7NGr(lKIZBAOma#^HjtZfSIAVak^dERKbH%o8?W zqCe5y;Y;t!MzCfqvlrJW5!;!<*#AO&(KqLF`>kW6{#-QdocASXrhG}Sr7wx*xrE=@ z=f>RLp82ufp4_NyEEUTHvJ+N&L4!-!7yFXl#GW8;AL`{7%v~ywF&^X_y@*37Foj)q z&!8!UCXo*zFJGewU$-MKLVZ&>0d60{LSepk^R>s$pI_t`KPIB?86NiY3r|F-vu#gX z8~%287jEL!_$amTpTdK0!V~@b;|fizjBnrty652Q7FLDL81dS087=9#kw57KiBYe> zhD1hkdlB+YINRDf@fjZ+ZP`K<3`TIN#ZJY!z;MtCcUm0ERCbsP@pnFFERoR=+%~e} zVzdxsjCK)c<3b2NGIo?jEP))X8XctsPeHxOA+m9~$UYb~IVf*nXTXEXt#w!<%5 ZSwR(OTkFBxT7h4xocLQy5Cdq?@(+>^e-Qux delta 1367 zcmb7?O>Em_7{?Q*w%auHBf+WNx+zIIM@y>a!^x%~vFcsYw61EqNs}e1kZ6;1X}UB? zDOo;jYu4N=n0Jpp-D^}CWJTuM-Xs8z%(K3v^h`}3EqSC z|G$3z&m;dHU&-Hp{!XR~B>WP#Z-NIni+^b3_6FfPJ^>@dJJ`!ZaF<~5Unp*uf;s98 z43Jl`hr@7#$_T%c+t`~H_=#fi$1Hb)g+8;I!QqZp4?lD_ zvG1&~+{5Azp5Y#j!IhrRuuo3FT{DZnH_m-v2ZMeB{xTceiQD=e?D9!)_p4r zzbY@6hYnss@3#Hk1WW0efQA@gXd)UswQ$-)VP=E*bva<5IQUt~~N7=|GH6xH(gVHoyJ5ZL+C70&U|^c zHD=d$5dtqZ({*q=hC5Pryx5iZ$YIcO3>g*I+uF));!SBi!NZty5f6p3Xv0~MAWDBVaWKQmL28p zm>ge>sDPGPiI|}(x6rlo|Mku0sR93x94T(x2 z3Z+%KqCUc^R|^p;F7R%*vSbuZDAh>qrku8HD`gNGL9=sfId8C>qwt&CPh9XN!r2MD zQ)C1QZvgj8;d}$$@&<_Yta3ly`rNyy#oJ@S!gJbwi1H3t=7}E7HH${GfMVXi0YO)j AlmGw# diff --git a/tests/integration/reference_data/reference_ad_diann_190.parquet b/tests/integration/reference_data/reference_ad_diann_190.parquet index fce6d5882fe9f513138b7f8badaee5af61586b6b..7b828bd95ef6d70d62ce0abc43f53e80b59aa018 100644 GIT binary patch delta 1717 zcmbtVT}&fY6rPzf!%*5fu$}I(bg==Mg$3OfTCp2}y|ksBhPF&$TFQ&*50rMNU7)n3 zbxk!vpVk<1_krm8VB&*MMBT(};>IjyV|-E{iP;!6@yRD2_T77@Eo=~t#+%N$=bm%V zx!-rrN&3#v-iwVz5Owe!yp4Zh0c}JtL2$1=7Mjifw3U9dLp;#IEIqUT>s!<@3qPT)osT=kV}us|LSKMC(WCG& z*`N*#P|sNDy>9WI9*!|H4aAz`%?muGk!P*%ef@Og$`&>QOIL5sSkf@n;Uc%gn%v z?nmm2i|{*Ufc}aX_ssAQX7e)N(Ras12uvxxu~ta(CW6-wQ?0@~;nNO#2L=PJ^hoIT zIlcKmtugDwGmV)&zbjYIc4Rj6&jtIB^+IEUu$-ya?Acmv3q2Nt$XebvnF*qATwpen z#Z0m!e9pqxJHH>$M%6huZW~ApygEQae! zHC&f-3m%YLEvRbBA6-us=66wG+GCL_gFO2^y9}^)#qAP=jAZe8~f>| zcGb8|%b~oMw!ftv8E*--#7d?O`dMV~q3II*#%LZb#>&ZfxU%Y3w5L8k9wvq9>Yna+vEm%s%gXp24OQTwxb0NK!noMgMGlf_! zJ)u@oIb1bqH@xJmF65=$`cgG373Vfmh3H1w;htDs3D%QwcUjAkm&&1>b5|33c3w|gar<4Ef3xU z!HtZg*{h~m#*?7M2N&_IAgdh0LGU5hXI8wDBZ3aTO3Rm>lcAftVvZ;=fbovu0Yke7 z1nO{yd&%J6Muu0AMFLy!kh^A3PeQ#43LS2ELlfcUv=d?XoCG5b^BUp=Vh=EHjH!)% z4(wtJ(758^Z7o0Em+NUq$eYGs&`KaA0NJ$)p>wkc08CF4f^9Roy;X*CkM;T6R)YBJ MkLczI0{>V23HFHZ2><{9 delta 946 zcma)5&ubGw6y9vIo3z;`8k@LjqoSoNg=(`&gH6HK*)*veyPGC8%?}EaA9b6}&l=L0 zfEJ;Mcqr&V!HeKUJb0+|U+6(l&|G_vUOd@@7xAbwo2s>V@Uc9;`R2WEzBj{eTzWWU zmw#9ii~4Hh9vNYW z9EG<`Ht0624!vSr_FkutpiekiI_jVI_$KV6gK_NX9Q!(M%gnv)h2Kq6kfip==O^JO z>MZrr${qAU9bNA$`3RP-=T?@>8GA3>q}}ixea^7N{pD)%PuXblRv>m`K4Yx$O@=t(ET?M%Yo(A3l*Kc|9*R0#X$ z0;qO2)JV^U+DUJ^l&i?KtY=))kP73iIaMm$j^c{~vpAtzuU3q32ooNh!U{@46~hQl z0CxxyzIhCU82n+kpBM4YkcO9*>T30RZRTVc*(7NaUIk^c&5`J%gt@?l8`_XgHe4gh#=Q+>$o^wx7P*D;- zi@(l{-@=shE%*!@24j|KOEAS?Fr;FsheRy(Od3piBn`<5NlT&vL#c2oDI`svoFo;C z)fq`aC3#{=l2oF~QYQuX^%N&%4Ngi)OV2AvOA<>EgCE53*c`-gOinLKD@YQ1Ap6hH z1_I^Vfegz)hUA4cv!^vf`XY$Igo=Cye>gwYjlY}8=kd+>W)9vASL)vi;UPd;5+46; zKeR$iKo#PjO(ERPWSVxZ-jyA{&-GVMc$YAzYt@T?o!M)+mvzjFi1W597Otv1TAkf% z{Ncrk_AQwM#lwGfmCpGY*3&g zHj3X(5fm928Jr-O%ViOAd9)%hA~rUvi@HC9HK?n7cSWpB78$9Cj#LE66_i3Ir{sZy zy4m;OP?Vx4DHuX2h76%7Hp_ydBD=fyWLPpiqbZqeP!Q&>hzN`ZrPMI_wanLT)0)DM zzRZH|nk>EGbvmHGkQSNNl-Qjhi8vIiXBCIC@aNbsoEo)DkS0)0kX)Tz$n)oXH)V2^ zZAbH*`n7h-2`i5!=KSt^b@s^SUpn-x;^7e@9$D#`SYg_T;xx5VYhvp;cE6et`z?qy z8XNluq8habN4~Qh8wz*{-T*8EesF*A16m^Zz4de?;OL4399?BQhO3cWX#HM@RxeC~ zVcN-?!S3*I0F&5kGxx>@VIhIOsp(YVeS<*0<|9v?Z?;A2(bx;W`M97yUL))HkN7CL%V%F# z_p(DlCtB`>CGb)3D3|nK;`zw0>EyB}e-g;#XSR)Pk_-JN`6&DBh%Y7vx}ufmuT5Ju z!3EunT;Kgua~Jd??ct^5^8})R{Pax(8cli)+nD8wN~Atv!-hJc@g7@dng%+d>wkQ_ zpn91TTF;!@BY3wHvidIU_wJncQJ95EpYe9RP|KoSgZk}pMY-I^bs1A!5%g;QrB9vE zk~@dOQ(9cnsAOr3xT6ro&Rtr6Vv7*PisI|hF$LiUBnZSH>OjvD*c)IB=rjyx-Wf)R5^LbEZ`0+x!9CRr*$6N^W5 zwXBh#-aow85I9|e=1=a=3_l`4IC-K`f@*t0jnfje^pB&lH$|Rk?+*X^EO$>7HgcRu zPI#g_%KbwJxq6~=jpkE+aPmY0-Hyz>G8*Jrp~#u}P=xs1_FN`y#psih1Iyz%V&qpI zzt~G9LIXP}C`tgJf<^<~T>5a(s zegq1h`oJvdgpeNhy+XA0CJfkAA#&dEZ2Y~ELIiUirpp0+k0{{iIxXPnh853t!(5o{ zsVz$Fbnno$vT`_b60VQv>i3cxv{}>vTv0fGLfG|G7i9Nz-|amY!B;e_PPo5;pnca=0zDsp@APrX6=i

~=+wIWtRVUvQ>vYPt|b<4GbBqCvYm44lOmA|jeSe=_)j zFD72P+oM2;K6%M3z}hhmxq@0nR~gs#u_z+2lbv%Z;fDvw%l9djLr^Ou>FKd zcT~2ay7|}K?g(Z|;qd|y8t_Y0ZO;f1x;+YEuRD@=1ss(TFj@II(Vu?HL+cvtR&af0jj}xJ-A7VvHPzP)lqa^4v zpJO{!RTAWr5tx;?Rf6!+ac{o_eOpOv|LivjD!)cZI1Zk)Ub=YF`m^yw+2MapjJEYe zALN4$W{e@=|NfBzuj;5IReyD8e_(k!O=jwO(=*kmSW7J+c} zKL`ThWY-r$q{L1Nd_=^-NgutqC`8!oI(H_}%T*)qu2`X66XpqNyUqol0p`aV(vI)Y zby~pD4J)37hPm(1cdYY#M0ea9_#&8#uI(gFS6*4>2i+yI;p)xykBxKO#Ti{WGlcr7Q@0XL=!gTa35C+k-XpM)bKPRI>b0@!VMFmD3`j= zS8Nl~?ZDG;@UlM$>S51#)S2$b1R+wzj$XbF(8`=%MZ=V()@bIsPtq3du||^`Y72WU z*Lsl2)~GEBckP5 zB0^5j9~3t}b4QvL!AnG=!zm(!7x@KS+|dfts}o%7MKo&Loh8g1Rqi3UP0ipauDW7XmzK zx%@p51_5FHJrM?np#wb8xX93NE4@8w{VfzB-!;2SHg^)EKDZ;UVpLYK&@I40jKrH< zI+;%sp)BlT(nPepDhV2Xxxwi)pb!me$TI?jqtJx<=c8EY2 z{Ct0%KrlMzuN4!tXALJ1_>=UbE(G$>JOoPDJOquh0F`La*c~A)&(A`1#u?z){B}G@ zYo|H7Va2o1F!vpLkc~mFJV*zP2g%FVhafloiAry9AcjJ=Bf3!s!sFY@`6imZ3&#O^ zr+EON+3~x)wi(kE2{^hK=Aw(g%7q+rSKVf8Hsy z602mySQ+%gkXAb!jP_%%DZkD{&Ue)GWc`*`+ zufKgai%`X2U#=Cih8GkX>=DN8#0arP2 zV0*u5)299Vf8Mm|^qDiaZr^_S@+H(KmPP38?DGrqiqf*wg|x=Evh8NFh(mH>au_~6 zYF7rMD&A_spWdqCI;OK3FjI(qJth5^9c>4Di~IHSg^0q#+pC`svm-CkHzGccZHwOt zk!)9=qC~T(ChI_HPf)|CorSQ{(G&Sh<>G2^I~-tR%}vM zs#dWJSay{xq9%$sF&HbhEq9~(oNdB?0_Uc*$xdX3ut3PjKt)ssct^_vu(>kd6GtiJ zp17;nL_*jdJ;7L`qdeG&}3&QRTIm^LKTS^EcDSE|1up(=7}(lNEQ1-GRgS;0c$*y7-XG;p}CYu(_PuPnc{D ztAfqtu@>K8cj7R{T5x%c;g(|7`o~-WBi=He6~W>}6U|l@~2fevs)*cbW?E66Q;ya)e?g7otpMoIt z2IO5k55c5!5cr;fVCPi`KDY!dJS+vQMTekTr;CvF<4Fh}Z{;?!M*j-2$Q=*}w?oi( z4+IH&A?S0A>%%%71FC-UCe6U1+lKXA?UpUf>XZ$VJk~VR>W%PZ@Q%`OSA?! zoLdiN<{Kfnz7KS?if_su!%FD_>4{w-=t4kH;0i%7?(M-E2o`<~LHR@o_W#CJu;LFv zjB^}<6-R(Yon;-XWgAe=;ekw7Eg+xl1i|qapn#{3ASPmR1)Qrdne4?Z(u6But$ql# zMfad&X$u4=??CYR0|_7oPq#lh*xGy*B0S8U7F8eL z_qe&;l;1PGPFsF+_*U7#bDadXR-f;jcd#swm^LW=@Z+lBh0)nZD|V)@{ZM@zuQJ5q zRK4}CP?@YwQWvXe?|aJ1Ze2&>R5Ecl0oFT1Dch0VsQqs>lq?!y}<#l11i7INxlUcp&Nw`}g+xi`FfZ29ThA5gzcwk!KD zND~Fy?4LbvCVzfkx%M%nH|I2ba~8^7NB6w+Go<6|<&PFZ+O)jn@v-sO$Zt2^O_61G#ZcRcyO!jC0s37SqG?fN9X7K+9>x>=R;E0?*>qF7}xki(%qXjW$)|d?qjSwb%jw zJT3N!>vpWL+#zG1YKgaTjs!lss`wblWtF2GhLUQ6R;ZK{Wab%1+f8}-yCOI+Y zqg5-O4j^l;^nJkpz6<$mq%djCCron5`iR9ZznS27!h3rk+htwJZ2sz|)J}W->i3om zy|H7PU-HvruO(Cal5@qD$66k()bd;7*SIz7!w*(>Ap1u2QkB1W=r{4)ccEu_Z1RbZ z`L~uW*ZoFK_-JL$El1LC`>v6__Dadg=c)z{N(m)1>lQJW-TKP!+kvIrg*U$Pn?{8= zeX(tVAGcs-!kjbaq~GsfEn}ZMgfiL;|lrI4l)=J!9`8DiY2XP9fm@DzfrW1Op;+$_Xk zcwB$y@FDE@{1r4x*vi)I za2|083Q&**tYIAWb0M3@a&v*z$Z83f=jL!sNd3Nl|9-bEM-h|5lLsC~9!w_ZJxC$W z?!Ica4~KF65R+$7*>i&mT7D7%i<$2(Hb-7LhE!8CXUv#kPn_25MZe#6f-PnpS-}-& z<{y2m*;dCrS&THtGac*D*W2&Ae)M|{TfjM9`VCVI2hM$-vMV{Oi!ZWy9LG^-*wZ;~ zOU|<6IkoPMYz`;=_-VEyN74Hk#B1h0g;4DNge~S+M4f^9`7fWt+G%OYUe9q|ZvlmZ z1?GTwD&wJFA%3t3I|A(B+@0pgc4SR-fT|x4fV9uH9&7>2ITxb5Eo>meGc|)&Ef`(Y z4TA+ce-aBSGyq0%YclCU;SKhjE)P&oG3Vr`_t|xp490#&PSn(fnt69OLxcMx{25zNT z+Tur@udrP{K4~=?+4FIA!hAFW4m6Z~R-+KecYjca@aK?Utm_f3Ci}F``1c4fP=@a= zH6`F^M+)hD^Xs`#O&?nPgYOZy*X_7f(7)@CSD!M+ZST1<^WO8yZQYxz<-Nbz|NURw zufi4`S#lq5XNj|)GG9#?ZkDE%Q3p{iP7~KjIK{Ef?zF1*9{n5@5bm_fYJ4HjIQMv zT{mKM9fHwy6GqoH7+w2dbp0z>E~`IA*HS>&-(z%bh0*nLjIJMHbUg~AYXwHv&KO<0 zVRRjj?Y}oh*QYSL{sN4aRe;g;ZH%tFV07IJqwA9xU0VXW4#(&^4bZhaM%QOCy6(6U zf@>IEuhgLHF66Rxm$!un8IFLV^Aac$?t*!X%ghx=;F6saeTtby8t|ni4kR zWvv){O-Ewt*GyvhcqYsPLvh>bgwc5*<$LR{EZ7gXhK^Fow4(A{M5)V@BKpF$Bo$T* zU^_Ry`1Ru*YlE5sT-9!f7R515x7spIyL&QBP5U$Km_+R~=$Y=y4@;G&J6LF7+MQq$ zHB(_)PhsNiu~EWCg}33`>4fJx;!TtoOJGb`ruaV#*YJ&=a9j{B3H*+3`(DGeo6aPz z*D{IqwHgUE)Qn2BmB}xY^933ZIGojy!DTYcxgA&x3rjqa@K%+>u!jG9_-_yYyTJdh zmYf0Iu;B1?r(d`a!}-JX6fWTCiUb_&D)EEI*)SIh#17gqU@O~Rr<7n{Nh`4>BwgdWJq||fS7F!Xb`wR1Gvt7NeqAlLH+$u3Lt`$IYjzyzxF_Qb`C0bFTh6~;B-JVY+aCYaNX}osu0w#|c`~7= zt4p!#Mbd9U)?wkFjpV60l8s$=H=Oc|k%50H(F zL0!7U?Skx1WribX;|@x{>sF|Jl4LollSr!+JWN%{W1RUcRfQ-0B)xA2?;z zx({xWX95pAdHJY`yc&!Ge;V0D*3NS+dGGrsvg3%Tr<0^N$sV^R%&Z#RM4HB4E1LMU zi4-RE+A=JziDZ3a)2mx~6FDz+@biXoP2?HQrG;O1y+$Smjkt1U`weoHF!?~xr5j{j zQn1s++c!v!wKH1COW~6~>YZ?tJb3GqjFAyQ-^YBX>&qrGHqL6o{U2|VQzMsW9R2tj z*~jEohE?`8a;52_mgIX)q{^f4^sPfpWK2TjXJ^LTAdep$ow|$xM+0Yyj}I-qLAn($ zjwNnhCv6;Gmh!htNq0Qf#UmzRLOc}R*&3V#e>w_5% zwdYBj@T8#Hk>|;2-j>Yh#N7qRKM>kw}b{XbEj{aiw58#;o^^lz@E92We>3Oyk{DSU&2>wE& z&MSCz(sSL4SpEZ(nEWmF4Tf^IBfU}IP-ntl3dv}nf%{bo!+$1NmW5R?jtO_3oK&t3MR3)9uJP8NbLw}ba4Lh2eispCV&qxX%k^o!u$0PXnWV{ zb;nex)SjBtAJ5jTVG>nqUYVFarFQ5W>xKO@J1>U7tWcSmrb_1bX7DY9Ca*3Y3mI~} zai%kMS4DN1EU2lyT*=^fgaq7_q9d)>9_Xa*8zx*1#>91>0<~my->GzDu>ks?S@iSi zj|aLO4K>|CE6s{N#iHZ*V1bUk+A+~0w84+GAR6eUBaMZ41$T70m?4G%ueyaNKAn2a z+_VC~kTKxqw!MS>Gyebio89mF-tarP#QT=-`4R8pDiHAwZVSAVPx)`>>oxI)zuppW z?px+N`C#<+&-mZ=VeY#+{371z5yKnb_7iVptNsqQyu`af&+ifp5^o==+1*>r1Smmp z?7paF+AaqQx4=o%&8z*oss6` z=IDffNd_6};@n(FX|AP0Qml#RhE8y2MwypN%TTP!FK;c#OUX`C7Gc>`X+_DY$wkRfeWVlo9w{$1OPzt+ z#taGW>*F+9Png%GfbP-1k;7D8ljXINe%%PsJ)O5^>#5wJg9VyFX`@a}UK=Lqxp~P(+2|GgXGZk>Xv2Q#ZcgKp zx4{%F-q2$3ZEd>X4YSnkc?@~#m$YHpI#mB)1a-SALx%b#Z5aM{6VbHg)LhLl#tm+4;OsW6^yc2{mZMkjpC%>jn+=(1 zGv6>P&)($Ic^j6u5#Zm9CjP`@$WoWvhS9&AlC58Cj54G@=HQ|? z%N34D4Ebp@+i?0fiqsyw8nV(WXv0o#2K=L`p?{`m$hU3j8)gp2NrpVNnQwrV2JaQ5 zB^TutXkb`wR!Tu~K{-ZY`Q^z41$m{Idr4YBVOE|R=SsZX#qLrkfJQp|(JPd?OWh?- zqZtAVDk^GlTz~q6QLx`tgr5pZ0Y7(-)c)js5Jh=X{uxv-MP*O{8Jb*w|JTY(aVGq- zURxePK^vTgM?*zZ9!gQA6tsgkCK8Z`X+fHz!V00jf4@*wZgHY4)-zrjQIZl8Ta=;> z&l;TNpORJ{R*)R$l@pqsml3KC%S=(F=7wgKW`wG|GE?GWaGm1RxUxcd0F|Zmh{{x| zBO#9})uv070ioWZfkCC=0nkPrAx?~^aCupJthg{VAS^pYS|&+}^Ge4&65}&-mE}-B zxCrDiq@SqD%}tR-rKhNZixQ)up3F;93o61U&hK)#!ixAWOk>sYzf`LXBF^oDe6;)#)--qD^Ccs{KK? zpg)PPI4#~^TPLcm& zU5fjn$2THZ86S)LrqxSgeqvl%PP|m7Zx}~uY*}htY;l5==BtzigKY#?=<6;u&MQAb z>YD?)#{B~ukd&sy>dCJ1%}Q3qW~T;d%8L>~uRueb&^m5qzVZNUcdz&+NHa5)t@dRo z*K6g*^aVDY8knFRPq2+P;|(?@j>!IRbQPwhWhe`_bB&D~(M_VNtRx{mQW`<`-H2`y z3sa=Q|Ftaf9ueY%`2QqRRJv{s{{N6I#e>r5#9*Gnd`J!Utw@#mmTKmj(Xnfz6GK~9 zrO13UUybkU^A+YsVM+-2l#tivVG+#n{1jQPIE}Vb!+EHZz#PggOpcH8YG1ZDZCC!~ zDIQU|V2>5F?=fOuAz>w{vev(R6f{MG5Gybz$t){qCDHxoh%pd zgBSRJ|J(!#p1T?E%+6Bce04s+sDIV@5o3CL8$Jxn13JqG|Jkm8HK=3E=H7;{P)l2=skwN z7sLT;C^fzyRfTn$TauE6_cNho5z(Hd^6bzoyhqU6e@GMK!zz;Fe2X^Fn(wlfx|12P~T zSc>pbY6SoK|2a@MjU>QQ<&_)0moezbYHzBNF*QDkxC1$@SL=p^4%; z6qbSV$NHyyODaSn+%h8|T%-_*Z~;{YA-xHv_rIE?rV-WFo-{=P1->-DP^wg+r572l zVJg#9&_p#cf6OZ+3^0)wth-_=w!}lrKd`Wsf2O8t3=TE>Z%tC8Gb$w~*q;ij(DI|` zp802M(?P*>e>JMm>u(OG1UMJ4zcd&2*SP;ws3=_|ic)C#N5&YJ7uS?YO29wk7!{7| zOLhE%V9?-!(Sk>hpC=^!bCTnNiviD6C}pwP7z3t3u2jblA6LMWuS;w76{5*~rLO=S zh5a@7>0&H<8cqlH>!Z`Rq)mI8>Z|q|tB@t;C^P(3q7u#c22hGt`L*_;u^&D8vFiP4 z?1M>#$y@miQuL$8D>78W#6VuCPK89YVm+Ls!UDDJwc{0Bp-t-gUzW_EN?#@e;vQJ_JR5PYp{A^TyC*aQyh?i`FZ@E{(^nz z_D5I-Drg8CQsMchv(G?nS~nlt_7``>zjgmd4NB4052X|^CpGi4J^R31@O;wldvTJA zc-21Te`v2c>Vo!~c2oj3Wf=u;`U$1ft?lXY);OKkqc@NmcSRPF2R>At2o?eDVWMP! zHnAhqKAM>v7y~`g_?LFZPg7s-Tt(Bk4D$zn1a=MO37D>CB7(8%j9h0A8l~#l15HmA z?4dok(a?i(UDJUKfn$Dh0sIBaqQ1)f{Jxq)N=NM>q$7Pi30Lpn3gf@|#~^yapSys+ Hc>DhV01uZQ literal 21420 zcmdUX2V7Iv`*#8fBrJgv8Wa^NT%sZw48bb*Tvh-B29Q}5*((Vcwxe~mMWs}&qqVKu zYEfIN*4o;itJS)yxK~viRh&?3-Q)k9dlLvG{NDCo^?l#_k#p~v&vTyhJkR+)=iaEK z;-vzPV55cLCR-t}5wILttWV8bLlKL`l1XH~Qus?BM){@>FNn-Zr^2GB7%Dw7N0E^( zlSnjq>ERW{5^1_jsxHu^M-1?jq!$cJ&&Vyy(WXnJx5HSrVJsPai8`&oWl_OskF zb8?HdIZ*#BoW%}j$(Wn3fF(#1jPe$2U<>R876J>`0G5c-vEcVlS~+|3g3A#mzZNl@cy5fpNlsr083!Tg2WzfEEeSJxPmR_ z3PCSgA$~Elw}N7d%eI$?Q}NaUJ9|nIM-6r9<}^G>p@_F}=;ah1E{_RU_OurcSB57k z;%yz=++wKkxbOrj-p;|nm6B6(Wq3@Iiz6pa&Su+$E8`s;JR}Je6wBp`BuYM1;2=QI zj7pFzlyYSf6(?5=brHBkQu45bu!Q6=DvXLqND2>ACdCVSD#PRA;v!NN3WYpYp-51M z#U>@ich?MJafbGA>7`7P%j4pd331ABg_2Ur6_g@uXit~kJc?5G5eE*Zl*5Np6#Yk0 zaWHSKprWJZIZ*gAWKYl>_?p-aw%DolhOOUrM0<{^J%49thC;zFQZVhqzaq=wEyB-awn!i+V@wc1IIP|4}~_h>(i^aB<#8BGkHa*U*7`L@3(E ztpEGYeNp`%hhs8ZMX2!HnCW$4A~ZfjmMF3EL{Gki3U^Ob(e#HkH3uTh3){m_8oo}-K=u3A$BKz9T$vz7!PP0N{MC@RI4vi%u+uIAy2Y}ew$3$n#PME4-D_at#*B5rZdWE= zZ)oCo51ReBFMPb$E+-_Be|Y|klK?r*t3s#Xe%}I%Re&Upr?o!63y{ow?jJ4d1?Ur7 z*`D2}1ZdE;373k%m|HH)z7)$BpalnFE#z=djIl58z1Rtrq+1?vaqvRkIW8Ad26~~I ztdYM({6rwUl%C%bsLI+y^mZ+QzS(+n;a~-UB5tfIoV-eaMz~RbWVyPd{1Jinv;Eys zYTT3p;V5^sG<@TqPp7&g*CVgZt7>vbm+IHGOkeDdp1yzg%n3?_ZpLlu^=^>}b&osU zWBd{kD*IOOc;9Ui`V4GPyIX{=MS?r)DME2Y3o1Xm=z)j??y}inq(gUG4*68zi4w5( zOFdD{HPc*&m@}1sVPc-L!(8j@@=$*%dN)AMNqUMx153nY>pPCPuG=FHD#sMw}$O z0qaJJ(e3fz-O|M9Tky%*^*)T#`N0Pb{!je8KCxm1rcruAD@LDeo(JyLhcSlVebDC< zi5>6#B1PNY!h)6}OT5hErRcrD<2%(;r6@2jte|+C6k#8HcfS->Un8VES3gu3^JiUx zlOH3u?tY9M9sSUoCE$ka{Lq*+GtT6*B#2c!iL>*d7&!z0CrM&-Z~obj-VYa}sSAIt zKPV8R)_3;|-`*@n*!NH*(1q-(Fe!4u6O&1i77PBo1O@#1X6&Ku5|n&XyxFTo!qA&c zAkT8M1-wrQgu5Rk2!yjeo_V4y>^)9;nl^I%B!oQuGGVJRE`1e7-Xv)^a-OyfNF3%}F>W)_5-0c29{00g-^i{~-7Ize|Iym{QSt4ZF>Kv*3QG|Ld9MyLp z?BC|tmjw4k6C<}y=qc=r4%EZOq!KavCYbbL);TkNv?o#woJNh#@kF)X7x5oX^hB_0 z!q)1E;=LA6YuN6|^sDt`#sL>V;tgBjh}20FzS;3C1PH5C{xE_n21~`?-D+LY{t^u+X5Je#z1dx4WeU`B&>o-4)?_yVlsEa=_==`a;6mXN1^ z!c_Fn?WIs+Ez$=d8`v!3=g{`(gQqUeEKn7cRWtM0!gk)pCcX+Go>4Fl|Rq6c9Jz@M8~qIF6XZB9-y!k%=)>EFW_53cqx&t(!GGCP;%QnM*gDP(^5L&u$P+ z4-N6-_hLu#y^?T{)6}l2rqYOgV z5^Zr=PJyPBk?&-#^F4E-K~B_#0g{0OgVr6XHE{=ac5dE(OsLjfL2mv`N`LV$w^`@Uw($f+7^_AQv9764LXw_}xLaauDAq;x zP+Ak&u8bbqI+>6#jQ)08!bqBf8q@eA;(Yy!o?Q-Mhm3xt2WEn~(TB&ZYa1YhU8EHBr(2_ICwyE;WR&x6Lc1yW}S!R+673WZa}j0 zDkR-5LGtEhNK*H5zvi*NJ_VHeABX(IZG0hTz#e$=+yP14PmoOb8IqNU`4X0|?L^Lu zG-!J!5sG5h^H*{DeGgA|yCAu~50c#7Fv}jcJkI$|(9HS=$hFu?IHh|bC*A-JPX7ny z7~y5koyJ)wfP7|mNa}h((w%@rD}vrX58z46<_mdOpR>8EIHVa&YW;n_{b2E3IBSJ#bt`iR zg)@$i56xr_u7tdY+vwIT?9+`)p{_%GjNIIW_Er`@v1cj0g|@$#JI+#w1pBxj9a07M zJZS0U_(7!S5&qoxwJ8HmSUZ38Yrg*(+xbUsRSh}cP3UCtZfNGAsxV^i(A*;rr$($u z=tol~j*m3hh)T0EH0hdhHRCAP*gEg#6Z768>W0HcWXxU{Oq*2X2CiTuS0PBEH=>hp zTE}JIjbpcxb#Jbi^4Ieg@|ty@t54yc_33cG4tT zR6UFT;5IqUW_8sYf3}eRBZrlZe9%HRTrHZnZ&wSsbffJuH^|Gztah{b;Rd;3&EsD} z?zWHzRLRX#a#~2xx20^ss*dL|jE=RELY}@GUOMAaD2Z!T>R^h63v#pJD20}@MLXPC z`wsNz-ihkoLNk_|)ixNfk!1;xq5*5Zyf15F_0lgf z-e3_6^V%^Rw=@f0v@i;G$I|(kC2=pCSe)L5bp8gHCKarSBe>2{2o$t}HJIHMv*#Ws znyq~tf(4g;%g!!n4!-a^E2VVUrr5YPkL`@y;axm zTJX#(1MUgF=uT!l&hW3FGk|fY-YkH9Nn_^czpI;lic5lVV8$WFi zE)=Y9&hEBXH|~buMH1WNtq;BlZrWDx)|=}s$wz_KpWC)x53W1EB>Ag1uZ+xJypp}<)(63}&rcmPG&73)d`KmK#f=Yw=TedG)3?_K z^R?fkE<9^Z2LJlO8t(bS!LRSw`TLPAe&i~4=Dge*HQAIla^uY>-AUs6vtt8F!%VA= zzOj&}7bjL7<5Cl-inPr+hwjw;!;J4djUwrFC`q~oe7J6WzX z6Okor`uqttJdwprtU4E7V@=HeARB{I7BQ!!U3JWJv$c%kEVXoAXGz>t6BqMAag4d? zf=APe!xwyPsStd|Y_Y8z8t!|05fq!p(>(R$@;Ek!VJ~>V1D9`C4}RViwlUs2u^P6> zX5!HXH8f)d?SdAQNG?3?9>C{GSl17uT)4j2eiHez*&CN*KMNyy*Pr7mdATRf!1FwxCU&3=i`D-z zWF$V1fT@{_V3Sw&d&0Hnl{|k6ydA?Bz}A+#i4z$N3{Fh3fhys0YiKLNTcz6tz8f!a zdvC6gcW15}jMI3P19cuMm{PMD^swK`trF zs*gSI)VqIi8+i-6-{UUkY`P7Nql=)$xl~A=s32*b14(xP6}((Ok~^9gay=M`aWn{s zZI}tfj;BH0#`D3zNaR*2looD=9#*TM^tQK*D-@OWmQnDE%6H1xc&g8x3 z-RxPztKD%GdL4h<3~6Z5749b9a{trTI_$mw@N8POzQJ-@#Cm_21jui};aF1+{i(m!ex{pRl{ zFc&Q1z_@nh<`-znax|rFQQ(g@&X4#+T{UrZjLtW9SYfyiTB9j3{uEZ#E5{)9c82waZ6d8k26h@JAFp3mn6uBFt$VQAJ6&OX@V-(qhQDh)Skux!hY{n=u5~IkUF^aUq zC~^=+ktsBa{0gJUei%hQz$nriqeu^oB9$0Lj>jnS3ydOV0N|!$6nPq>$QX!Xwpkd zal*uKnEwb(99fvpWwvd zCgyV4gDV{^Eo*ATm{u1Ym=p_m z%+po|arfPoW|1*HLKC+)lG8u8mW_MTNY1J6n=O0XNM5-Wv7haGgxqcR`eXj5hsgaQ ziv}&w9wN_rIYiseK17;_q$gT993huDKe_Sxokmjp{O!WTKO4z+`m1vtmo<_Vn&0fd z&$~!=3x6=T;{A){dxO_$-<)-soWA@!*TbF{Nr&fuKJ&J`NX`m<{c7on3#4bxhRHJu z4v|@2JIs`pXUV!#_HoJm&XOB`I%>5i?<_fZU~mtc9Zlq=Jr~xE7;uhko~qpV`_8kZ zV#Ej8UuIq+&-ebY_VA=jJd8245h(hYtJgO0Ns#RkLfH zg?F1spXy1jDbJz(ffT31Q}&Y{IqP<~u52QcW}Q%9s%auKm$Q4=hwdYrSVOyaPuWk_ zl$o=l9v&cV=Y{%yGkYJodF`O3=eF-76{jD^Q<^4nb=ooInFkli#!-FT&gGpXecyPp z^rP2LlD%)$E|@y3nY_WelRV42ykbU=YovM7wX(X$&E(mz1CO3R zXeK%H9sBmIZYGyz4}01;rI}nC_R+eHZ{8%QRqvWvZFi0IOzpdMRB|&pZ>gx_wJ(~< zL$}_^8y9<%ObZ`#<;sp5s(0Zrnc{BNq`O4OeyUnE9xAe@d!_8!Wvs-y~h1bZcv*jm7R^A{J zQ{&z{JLv{_;?RWbHLMoWyL?p=aq~Lq==Qu)u>CsuJ-70^E$kcQ{WHZ|pWeGbuDlu5 z^11azvgzzcQ+8~)K>BXG_lfJg3#4OAdicC?7ch)BK5@`9E9B|LkFCMD6iPa57XJ|S z7`%}eIa$3MV)De{hecq(`d9M!2wzjIZ9j4N(Uy4h8Jl=E3mdU9gI%y~(ugOT32NZ* z+l_W^6WOirG{sEJ`?~*l^p(>GZewpwK=;?JNvLt0h>p3t7lc$yL4lIXam#b3ps(%o zKPp`~6=_wGK_59!MI(PA7bV`AjD8(_x9P7tQ_=C>*Z19iJ_Dts-#!|Q2)nY z?$c3J*-vi{{G=8g&wJXvwrU1qjqYYgy*(W*^j`V)Ck-_SmZsx3T|%C|i;$;h4$siI z6eRKY4kf>=uV1iv^Je{LT+g0AhpxoZ6$Uc5DbeO+m1|3jwdv}yjQ6bLI4|H34a?X> z%@QnlV+Oil%cS7{*KbcIwlA;2JC%trgN|=cwyiRlj(Jv=?ueCnb5g&OO+5OdU40Bk z>SBCTvv7Ih?9UQflxF4Us51opSOObQv-S@sJy{AIC^CimPvi839B8Tgq>Lr7f((3k z!aTISoMy867g+FdBOYA;9!NK={RoNxWe>bJDCYp-0mn<}sw0a~I|r2hz+_ zd(^?O&y^Bdc=?CYFZ=BB)f^uY zuQIlO;So3SLgM6C5OWf*1{J?bAV|D?l;%8h0UKZh!E>&e#&%-CX_3&PBtw&(Q7T*@ z80TJ~$pSvp}PR4@ak)1&9;#_c#vZb6PJoBn}K`_K$u>1EX=n4UX)QIS=g&9G3e zE~##-D9$X*$tuIFsdLIQvNOsup!qm=_~}J)c7Y}j_e~rgF(A-=f&nufxq#U*Kkvb# zbl{~!&A;LZ3v7ZnFs7TiyB-7^(=@Dku{4>VZ5UH*FYiPT4%q|zaC>;kZ6NcuVgr#I zO;AfON+)sBWu1uRs){pAk};_G?}V5SSdHhU|Fjj?ybPkC@WvX0Kc{6XUNA|0xMNJy zu%;8ywx#-Cte`%iGA3wP(~00eGgKq3G5B)Im~5xY7fYG`_Q;rKTk(sfrjIs_2^!XP zqWNzYlm3pt6gBv~02g;6Wng<9HOHXhzZoTj%*MoYg)f*CNQHS5XiTs})e9!u@wJUH zZR7e*4E&qb#4lHjN$N{G5&D-V+4i=?Bthmy3a)ywRN-{Qn4Ye%6QzG6NZom=F)4$J zPUH+M;D2QrhR2DGj8;QsyJ`@pYfT653Gl@xzh_F^7x7{UR~G#?rsx zjl;j=g=;89`B5QxR0KukQK5NsSx5-{GMIj%>nrhNcw2oe?t}Ai(XHtED2l42pdZ}v zAfd&0EXY$-bTPCK85pHjm8Z#*{8D7G6`7GqWtp0of?)+AnK{+b+KgoXqNu{+yeLg{ zex^EG6;)7~7p3;k&rD9lZOXHgt4bB2R6&++e14WD4$7!XUA`hKG%6q}tVo|%G5Ls~ zlmtA6j+QE@BvT!)%8@6H#dMN=OA9j95oKu!&{ys+&6HP`MukSJGm?`^(;|Y(Q&~;Ior1Fg9*oriHVgcp_+LmQSs&caJn9|ZyZyfoFqw2j?d4QhvPYcJd(;&WkE$ScMUyvRZ?Pnfy6ILBkjn0yPVqi!g9)% z>iDE(l~$6LTve1J)5{EGrH=5`=V@8VBS8KUV^e(N{j-2)kYPb~vVRHY0rR{|icmZk1G=)Ls_bMuH%2aLYLzNe9-rGL z@6hP6X(a$G93)V^!Tc}%4)X*@X&;meJ_Y<$yB(^fVB@ONjFfnP zMh}hcFf$o!D7mU6JF=)t-d?y~MMhU-%acGKW4h9>$}Dw|BrRE0o>fiD)RdmR3>(B_ z(zH`#sv;e2Bfm@g)|8OyOyBt2jO6&Sv}|9z5AI@3vsIaD*eAO1t7)>N^0X92uBPl)vRqfo4#(&WD>=!`Q-1 z-)P2vn_4?{CwnvYr=9%V-{MJ~78%u*jN0l&i#oM2 zwRW9k@nU>_np_3A#vlBCh$>YYRFW2%*Olx5r$m&44vx)|ClzAb&0&18DPCW+o|OR( z=+Yj+zv}&nDPCTN55xTA;x*O#0TcVTQ5#b__cDA%W>G{)SH7#Q&ZIpW+P|@Wb=af+ z&fbXkWSM_Omby!yRhAcByrq=Q0_>S1Z zF84=uTb(JLe<8j?7e7^}B}V|Riqgd_RVAqqdt_;f==cHR9e4(;!C(kk8pt#wIjEeD zX9~lmibAS9rXU((1c*o4@N}9gOA}j?Df6Sph$--^Rusk*S8SjDoX4Z-;#K+ZK(ROwa^YcgkQ+iXK|kU)6y|{nDNOZ^p@J&Lip98R zUTBP1DHh`jstQsj2xj76I!kw=vLd4)9!#pvf_*bZCJmsgLbX8*y(p?usT(gYh9)Yf z8_~Ew~Km`9U_s zc)x*CnVud#u7ImVpV!GNk}hqRS15jz8|<$*A2zVRUf$A9{h3kQ`_oz>Pb1!aqR%PL(3@?QC{lj(qhlIq_qh{gMKtB{XkM|?c(F*X(fCXZx zY|QUKJ$*gB&iaArhqT8NbaiMRrsw;2<_r3vkD)LRRQPZ>q{8;6*UvCrUT=?`=ZmKj zg7*@LO@k__p_#h&QIryFlD5yT^aE4D_5`tGYD%;OXQ|ls{+a)W{`657^r!n#saTce z6x^9Biqf?8XV#l`I&DX5z%`zVJhm8UYtld?pg$N&9!L{AGTlX6a#$kFg!V68tRLMz ztNrYS?p%fGgFgbjhWb=IE^UaQta>Ha>jN!R1ASn|Yd=Gx$Afx(*C8ySTSHHs`!SU_@ diff --git a/tests/integration/reference_data/reference_diann_1.9.0_tsv.parquet b/tests/integration/reference_data/reference_diann_1.9.0_tsv.parquet index 7a452d77b07441de1b5863dd37425fa3a13a29af..b48e669a99969d0f1f67997eb153b3236f4b9c0d 100644 GIT binary patch delta 4315 zcmbVP3s4hh9^cK2O-KSE*-e5G0vA6w5i5l76tcU>qiTcz4tlgc5U}TzhY#$iQw5*r z-P9}aspnX#R zk;mn7&YS>b2#QQg&CAT2nVy!KojqM8RoggjR$7`fH#Z|QVc**kOvch6gKv}5`LaIvu8pWVpXo!8vIsBe7!Zw`7txzmTlv;5-91}921>#y# zVWpaJ>LLzK5}@A?`U@gpmNrO3ipUlLc}d_PEv^b3zjCF98 z$f7!EpY_V&c|L){k43gA&k3-B^ae;Qt(dp0qF_*%_toGL3>BxaX9VPVR7=`b%RP9{ z?z{J<{eF|q!YbM)R_{SiRcz9!ocm^0-OGKwu+PjkmD;L4n{Q!n{Ab${`4S7P5u3@s zKCWK#YhpCJuQm~%Q9p_OZ)j`zJovSEg1-2p`7?9A@CkkZW;z%qQNs|4KFym5w!tVd z-!g8!xZtZ~VqcxYRq5I>-bDOdiIgvPmAh6%?Gk}DJ-A01Jl?dhLKmcSBjkb)La>Em zyr%$uDG3kw+4RbsOlc?cNrgRWdUYr4l$h03t1Eu~XGaeseoelxhsnXS&Syi~&r)fA z_3g$!x`%lRA_g7dc|%7mbX~U@3ZsXyq7=OO4U43Hqc=aIw0yy$l0_9O-FPyKsCp4- z3I&%8&)E{*@te#J0GOXKOvl$wYII{5w_@Pf?|jB{@zU~&;zec4T-*GCS`0eF!MSjV z2ac^=L6&%^C&?s1PMp!PF>&#@Op_HC6B%d2Wr5iVv5pwA20b&goeGW3X^n~JS7s-q zWM@PU;S-FOWGC=zbP~4ih z&M62{O^uYs9wNhKbpH(*inF9f(L0=!fF05>iJL^AQf5o_X*mz+rn>dD`3(sjA+O3& z?ilzq!a$0iP-S63*}SslrLJDw5F)k&RTIDwGh8F%R6~ibMMei~kpFo3=G8mQJ7d04 zZ(q>`AIQww-mderOCQ}~#AjAW6c4+Z{BIu?h9!2xIDd2HoAkeGzxbwW7#Q!Lw4v_7 z<8DSrczp6O9%@BV`{XnnAvRdOFcvQ>EMMy8ex4JpB5*$vJer6wJk;~>`|%xCq2BC3 zGwkV2&mW;hPBZG(;PshADP!KYC4V1Pz+}$4+j~oR-L5`uCA8qNy-s_!?3Xj6B{N$sG^b zc)Mo#VKy4a=>>;ao`ageH5QL2u>xAh$EBuwv(5fc&p{(6qbYS)`XxQ80u=+4d8BxcF5#PU6Xs$HM#Gi z0sS1^yV2yHMT1(+Pi{XlTtfFTWGLb%cPBr&7nDyXw@+yc^ND-7$!+JLeT)GK6ZfEg z#DX?Ed_5pPX+$QfPxUg9sIH!dDq4B3P@EoH`7KjnaUO7NhIJ}S)K>qN zmd|qAnZ#L*NB`1)g*m^&S=X(&%DDIMVJr<+|7kr}yh8}PRf8JfrI}L?zFY6sT@9JA z%D^i_)BrR|_Z{H$G&nbitDH|3dCslKlZP~0jH^0Vvk3_&@5ZNBv3JjvEJ=*p$evh- z&j|PXGh0xep;Mmt6FWEPY|Gg>>v;({NCWE#&q^KP@x>!?s1+sSI?FUJRX<1&@5q4$ zb?`(hwM#&~F95wx(3^!YKGcVgXJ{E4GQ4iwrlys(f4^`uDd7!Phj#=g*TWNP3$>uN zsBh}QRczP0orTTcuVfGI%s>9-Kj5!wiz;kuY{9iPE4}&_zNE2nJ^=UD*Q;x{&-P)h z!a_Gv-NLp#5B!vil-Am@YPe>s`REl*5l&e327Ny{_zQ-Od-0EfF#OrlUmf~<)yUsG~E?XBGI0qpG+d36V;pF9I$n>i<*Tj$8l$ delta 3877 zcmb_e4Nw%<9e?lkKK3~7IN)}Xq;7^T*<#__8ThBQM>qKrDkRMMfuvD5~GHbXMe!f8!(EXKZN@4S+S z*}J`W@BP1jzyJT;myT}SS)xv15YO-}Wm)!UK>+~ZA*I>g{YFJ^`u1)GYExL5NJ}nI z8!cwKq3XrTx~g?GjDqFa-&5@FmiBqG-+;_!7eg~327of7jz|!-(CG4rWd?0j&H|UX z!nH!I5Z&d`0BDoBl_e$aii*;urOU-=?18b(R>ns3Ge$d}sra5WT9kGD=?r)YWUN8h#>+?t$z@i0*1oC;p?^X=0Sjeg$(# z;bKz9La`Hici<}u4zm=;4z@+V^}#PPg{+LjvAqW}g~3yXw(PBVIhCPh05g-ZlL+7) zR8C>s6S8WR#zxOtIO~i32QEGH@duje0G%^fnFya5iiW1K6nsgsw<*-BN4*gkD#hqf z11gjp{8apK>_nDAd!HvFxMWB#zK2reOx|0k)hzVgFNq2|cb%gga> z^IEDgoV-bY(K?*PNCaz=zRtct3@gL`+}km z)~KL&5uUF}!7pgISk5?(|HJafVL+G zRrdOZ`lg!Ny2i2GuVvSxE*omKVfVE7W>S4_X0acJT&kK_=g!W{%`bovMo(d$D_0+p zr}h*|XUuB(DmfYgwTsgO2I-!!tCei3tW8@j8`THdUWmR* zLEBO!xQ&yCubNoAEZQP_NXs5xOT)<#u{xfW9NLqHZly{NttAc(LS`3q5hEpLSx-mg z0TomZ<*GDFuuo5eGkRLt(xNqk`{c(z$nz(%u~*dEz`w{#&t9z&c=dGgw!Op+vJUya ziQ62*KV%#%>i7fzOfDIBN{unvKL7C-T3q7mmN?r*z3{A3tMM5Z@~7Ta5y+uH6t7$L>xJ2zHr zp}WL^$&2mA%m5-)jn(xHGC|4g&@}_y%^;;mWTu9r3RlIOV&&)$1)}e*$2vXNymaNw zTSL@UN9-v(9fuw`+TzaE-P?c1F+Ar7TOwE6oP7)Cfn`^ocD|X`iQ?uy<7|I__|Sqz z=qzc>jqlz%><}pGXNy$Roa2%}%=6zc(|#Qi>c;IpNmP?a1SKnFUd@ZJ^>BO7Cjec} z4upYPv%10GrH)bP9XmRnHI){eX-GB@7P(8@_Hj6V%8=B|clKq85*2i#0ReIg!PNmF zo2-gCe-}RP>FIg)>t3x{S$t7N(^(f%RRuKjm_Xi#y9&wYzGfTw9DLcNq9c{pEh2RD zsc$BTQbmsHEMjOyNdFK4qM7!|ZWg3Sglr3BXY!Kxv>*@*ZEamsOYt4c7U4GIDw!bCWP&*33^0yzUA>QKB(}KcKz$C0EdsRZBb9?9 zUNYyGWIu;!crH3Hm)sMLBw&w%s3V5M*JHTMqLpVlUK_paJm2bW8`1yM znQ_dnTkO5;luhuC2MC^#$j=EW{MM5f1_lNIy0tKnYz^tib@dJFf{MSV6m^#n2@a`C z2+Vwcm#}R*Zu*UFr#E+2H@iuhf9j5phJbMeW_aJI5L!FDKl8G;wk|&sJXokZA!?DA^y;0rs*#_b0 zo>iTjPHzyBcjr|NY{r|J;KAz8Oit0{Q2E9ah#pm4EjKcmJOAQFTbJU~WHz%U#dQ3X zT@BNCi`6p9OgR<{#sAMximyQ1^za5xA!+Kt(2<719NGlMB;%EXLf>tqu}u$egh`k^ z@njM1U=d592`7mJ#Uwr4u>^(NH^Xe3jPN~lphOS)9=Y33-6jf6!nFZ+eOh#d1I97WqpXCwF4&~zFq$i-gfy$Ywt^Tny@(?Nv{*jte`13X_5c6? diff --git a/tests/integration/test_psm_readers.py b/tests/integration/test_psm_readers.py index 7f429c24..0b075581 100644 --- a/tests/integration/test_psm_readers.py +++ b/tests/integration/test_psm_readers.py @@ -46,8 +46,9 @@ F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636 Q9UH36 Q9UH36 SRRD 3296.49 3428.89 3428.89 3296.49 3428.89 3428.89 3428.89 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 3.99074e-05 1.96448e-05 0.000159821 0.000159821 0.000146135 0.000161212 0 1 3296.49 3428.89 3296.49 0.852479 19.9208 19.8731 19.9685 123.9 19.8266 128.292 0 0.960106 5308.05 1.96902 0.683134 0.362287 0.999997 1.23691 3.43242e-05 1212.01;2178.03;1390.01;1020.01;714.008;778.008; 1212.01;1351.73;887.591;432.92;216.728;732.751; 0.956668;0.757581;0.670497;0.592489;0.47072;0.855203; 30053 1.19708 1.19328 1.19453 1.19469 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642 Q9UH36 Q9UH36 SRRD 2365 2334.05 2334.05 2365 2334.05 2334.05 2334.05 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000184434 1.96448e-05 0.000596659 0.000596659 0.000146135 0.000604961 0 1 2365 2334.05 2365 0.922581 19.905 19.8573 19.9527 123.9 19.782 128.535 0 0.940191 4594.04 1.31068 0.758988 0 0.995505 0.28633 2.12584e-06 1209.02;1210.02;1414.02;1051.01;236.003;130.002; 1209.02;1109.89;732.154;735.384;0;46.0967; 0.919244;0.937624;0.436748;0.639369;0.296736;0.647924; 30029 1.195 1.19328 1.19381 1.19339 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648 Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 1.96448e-05 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 - F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 """ +# F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 +# # last line is made up to not pass extra FDR check TEST_DATA_SPECTRONAUT = """ReferenceRun PrecursorCharge Workflow IntModifiedPeptide CV AllowForNormalization ModifiedPeptide StrippedPeptide iRT IonMobility iRTSourceSpecific BGSInferenceId IsProteotypic IntLabeledPeptide LabeledPeptide PrecursorMz ReferenceRunQvalue ReferenceRunMS1Response FragmentLossType FragmentNumber FragmentType FragmentCharge FragmentMz RelativeIntensity ExcludeFromAssay Database ProteinGroups UniProtIds Protein Name ProteinDescription Organisms OrganismId Genes Protein Existence Sequence Version FASTAName diff --git a/tests/unit/psm_reader/test_dia_psm_reader.py b/tests/unit/psm_reader/test_dia_psm_reader.py index 8126029b..c2db8ccc 100644 --- a/tests/unit/psm_reader/test_dia_psm_reader.py +++ b/tests/unit/psm_reader/test_dia_psm_reader.py @@ -1,8 +1,11 @@ +from unittest import skip + import pandas as pd from alphabase.psm_reader.dia_psm_reader import DiannReader +@skip def test_filter_fdr_columns_above_threshold(): """Test that PSMs are filtered based on additional FDR columns.""" reader = DiannReader() @@ -26,6 +29,7 @@ def test_filter_fdr_columns_above_threshold(): ) +@skip def test_filter_fdr_columns_above_threshold_missing_columns(): """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" reader = DiannReader() From b4c67dc08e0f1c7e234c91942a5fbb09a25d76d2 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:43:45 +0100 Subject: [PATCH 12/16] add new filtering logic --- alphabase/psm_reader/dia_psm_reader.py | 16 ++++++++++++++++ tests/unit/psm_reader/test_dia_psm_reader.py | 4 ---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index a505f6f5..a0b69858 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -97,6 +97,22 @@ def _filter_fdr(self) -> None: """Filter PSMs based on additional FDR columns and drop the temporary columns.""" super()._filter_fdr() + extra_fdr_columns = [] + + if self._filter_first_search_fdr: + extra_fdr_columns += [PsmDfCols.FDR2, PsmDfCols.FDR3] + + if self._filter_second_search_fdr: + extra_fdr_columns += [PsmDfCols.FDR4, PsmDfCols.FDR5] + + mask = np.ones(len(self._psm_df), dtype=bool) + for col in extra_fdr_columns: + if col in self._psm_df.columns: + mask &= self._psm_df[col] <= self._fdr_threshold + + if not all(mask): + self._psm_df = self._psm_df[mask] + self._psm_df = self._psm_df.drop( columns=[PsmDfCols.FDR2, PsmDfCols.FDR3, PsmDfCols.FDR4, PsmDfCols.FDR5], errors="ignore", diff --git a/tests/unit/psm_reader/test_dia_psm_reader.py b/tests/unit/psm_reader/test_dia_psm_reader.py index c2db8ccc..8126029b 100644 --- a/tests/unit/psm_reader/test_dia_psm_reader.py +++ b/tests/unit/psm_reader/test_dia_psm_reader.py @@ -1,11 +1,8 @@ -from unittest import skip - import pandas as pd from alphabase.psm_reader.dia_psm_reader import DiannReader -@skip def test_filter_fdr_columns_above_threshold(): """Test that PSMs are filtered based on additional FDR columns.""" reader = DiannReader() @@ -29,7 +26,6 @@ def test_filter_fdr_columns_above_threshold(): ) -@skip def test_filter_fdr_columns_above_threshold_missing_columns(): """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" reader = DiannReader() From b7441e4096715c712d5e040719c8094e61e93dfe Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:58:51 +0100 Subject: [PATCH 13/16] add method to steer reader behaviour --- alphabase/anndata/anndata_factory.py | 21 +++++++++++-- tests/unit/anndata/test_anndata_factory.py | 35 ++++++++++++++++++++-- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/alphabase/anndata/anndata_factory.py b/alphabase/anndata/anndata_factory.py index efc95337..533a63c5 100644 --- a/alphabase/anndata/anndata_factory.py +++ b/alphabase/anndata/anndata_factory.py @@ -1,7 +1,8 @@ """Factory class to convert PSM DataFrames to AnnData format.""" import warnings -from typing import List, Optional, Union +from collections import defaultdict +from typing import Any, Dict, List, Optional, Union import anndata as ad import numpy as np @@ -104,7 +105,11 @@ def from_files( """ from alphabase.psm_reader.psm_reader import psm_reader_provider - reader: PSMReaderBase = psm_reader_provider.get_reader(reader_type, **kwargs) + reader_config = cls._get_reader_configuration(reader_type) + + reader: PSMReaderBase = psm_reader_provider.get_reader( + reader_type, **reader_config, **kwargs + ) custom_column_mapping = { k: v @@ -121,3 +126,15 @@ def from_files( psm_df = reader.load(file_paths) return cls(psm_df) + + @classmethod + def _get_reader_configuration(cls, reader_type: str) -> Dict[str, Dict[str, Any]]: + """Get reader-specific configuration for mapping PSMs to anndata.""" + reader_kwargs = defaultdict(dict) + + reader_kwargs["diann"] = { + "filter_first_search_fdr": False, + "filter_second_search_fdr": False, + } + + return reader_kwargs[reader_type] diff --git a/tests/unit/anndata/test_anndata_factory.py b/tests/unit/anndata/test_anndata_factory.py index 1d94d7ab..1f64305c 100644 --- a/tests/unit/anndata/test_anndata_factory.py +++ b/tests/unit/anndata/test_anndata_factory.py @@ -105,10 +105,14 @@ def test_create_anndata_with_empty_dataframe(): @patch("alphabase.psm_reader.psm_reader.psm_reader_provider.get_reader") -def test_from_files(mock_reader): +@patch("alphabase.anndata.anndata_factory.AnnDataFactory._get_reader_configuration") +def test_from_files(mock_get_reader_configuration, mock_reader): mock_reader.return_value.load.return_value = _get_test_psm_df() - factory = AnnDataFactory.from_files(["file1", "file2"], reader_type="maxquant") + mock_get_reader_configuration.return_value = {"extra_key": "extra_value"} + factory = AnnDataFactory.from_files( + ["file1", "file2"], reader_type="some_reader_type" + ) # when adata = factory.create_anndata() @@ -120,6 +124,8 @@ def test_from_files(mock_reader): adata.X, np.array([[100, 200], [300, np.nan]]), equal_nan=True ) + mock_reader.assert_called_once_with("some_reader_type", extra_key="extra_value") + @patch("alphabase.psm_reader.psm_reader.psm_reader_provider.get_reader") def test_from_files_nan(mock_reader): @@ -137,7 +143,9 @@ def test_from_files_nan(mock_reader): ) mock_reader.return_value.load.return_value = df - factory = AnnDataFactory.from_files(["file1", "file2"], reader_type="diann") + factory = AnnDataFactory.from_files( + ["file1", "file2"], reader_type="some_reader_type" + ) # when adata = factory.create_anndata() @@ -148,3 +156,24 @@ def test_from_files_nan(mock_reader): assert np.array_equal( adata.X, np.array([[100, 200], [300, np.nan]]), equal_nan=True ) + + mock_reader.assert_called_once_with("some_reader_type") + + +def test_get_reader_configuration_with_valid_reader_type(): + """Test that the correct configuration is returned for a valid reader type.""" + config = AnnDataFactory._get_reader_configuration( + "diann" + ) # diann is taken as an example here + + assert config == { + "filter_first_search_fdr": False, + "filter_second_search_fdr": False, + } + + +def test_get_reader_configuration_with_unknown_reader_type(): + """Test that a reader type without special config is handled correctly.""" + + config = AnnDataFactory._get_reader_configuration("invalid_reader_type") + assert config == {} From 2bef455806418b4a3f2f9fdd0c3dc91ae6f41f2b Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:00:12 +0100 Subject: [PATCH 14/16] switch on filters and adapt test data --- alphabase/anndata/anndata_factory.py | 4 ++-- .../reference_ad_diann_181.parquet | Bin 13185 -> 11848 bytes .../reference_ad_diann_190.parquet | Bin 5764 -> 4486 bytes tests/unit/anndata/test_anndata_factory.py | 4 ++-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/alphabase/anndata/anndata_factory.py b/alphabase/anndata/anndata_factory.py index 533a63c5..ed9c556c 100644 --- a/alphabase/anndata/anndata_factory.py +++ b/alphabase/anndata/anndata_factory.py @@ -133,8 +133,8 @@ def _get_reader_configuration(cls, reader_type: str) -> Dict[str, Dict[str, Any] reader_kwargs = defaultdict(dict) reader_kwargs["diann"] = { - "filter_first_search_fdr": False, - "filter_second_search_fdr": False, + "filter_first_search_fdr": True, + "filter_second_search_fdr": True, } return reader_kwargs[reader_type] diff --git a/tests/integration/reference_data/reference_ad_diann_181.parquet b/tests/integration/reference_data/reference_ad_diann_181.parquet index ba33f97af97ba091e54c2e1e45f61144e629df59..5ff764c967df547bb0a0b9d4e3126612d9cbd3a0 100644 GIT binary patch delta 1367 zcmb7?O>Em_7{?Q*w%auHBf+WNx+zIIM@y>a!^x%~vFcsYw61EqNs}e1kZ6;1X}UB? zDOo;jYu4N=n0Jpp-D^}CWJTuM-Xs8z%(K3v^h`}3EqSC z|G$3z&m;dHU&-Hp{!XR~B>WP#Z-NIni+^b3_6FfPJ^>@dJJ`!ZaF<~5Unp*uf;s98 z43Jl`hr@7#$_T%c+t`~H_=#fi$1Hb)g+8;I!QqZp4?lD_ zvG1&~+{5Azp5Y#j!IhrRuuo3FT{DZnH_m-v2ZMeB{xTceiQD=e?D9!)_p4r zzbY@6hYnss@3#Hk1WW0efQA@gXd)UswQ$-)VP=E*bva<5IQUt~~N7=|GH6xH(gVHoyJ5ZL+C70&U|^c zHD=d$5dtqZ({*q=hC5Pryx5iZ$YIcO3>g*I+uF));!SBi!NZty5f6p3Xv0~MAWDBVaWKQmL28p zm>ge>sDPGPiI|}(x6rlo|Mku0sR93x94T(x2 z3Z+%KqCUc^R|^p;F7R%*vSbuZDAh>qrku8HD`gNGL9=sfId8C>qwt&CPh9XN!r2MD zQ)C1QZvgj8;d}$$@&<_Yta3ly`rNyy#oJ@S!gJbwi1H3t=7}E7HH${GfMVXi0YO)j AlmGw# delta 2195 zcmb_aZERCj7{0w-Z@ad-kM?%ATiMpG+?BO#{oGa}tY>XE+U$1g)~)M8m|Z)zmbUAh zbF6|3#7{y5eF>UK$Rdgc&`A1k3O|e?{19V^3C8$`#TbH#2_`BKC7#=Mh-^PF;U@Ro z=Y7v}p7Z8D&y`&Vr#_n~TZ;G5a+;_n2=f;=a>`G8Us-lZub<9dtop>Y6E|A_Sa!{B zmtKz<^R5^1_KnDC7tecbJny0jT1D5I@cBxbqA3j{S=%WhUQ0z7Hs}%rAv2J8m4^gE z;p>-4_8WzUmZ}{DRjFpE0kgZ;Zl+0Brjta69dt)-~=SlD(p4e_%kY*a3R#Rq|{-oToJ6 z6Pws&jfR1P3I)8R*a9-~0rF=p92RRt*BaSh8=y~Yg6rb<$d%0?S87CO^z7Lt*s9zC z=L8k7D#?*=Zh_t98qvLG_C5=Zph@OX#Y*psbrQ)+@Gx*9H)SHN-EJbBXwGPy?dot0g+z*U(EM&+5xT)e|M>9ULOhwpvNTdbs+hb>;)VI*s3RAmOq`YtwCs^={h z){(-W2=-J*aei;X;h-}R33e3z?=6AqHvxO3ojvcs4zCy5P#!y6lTXV@lDy@D*{AD8 z$DUzVJM#liRS3XPQ(Z-ussE~W5DYaJo1RQZX&qdy@QDhs0&(SnOuCt5sNxW?zfvQp zdMov8{%++P0`5r%QK@GiFNIx?nZT}%)7P>-d4DPtk0!5ced0b>DjrQ-*Z)YHVxyE* z2OjwSr0iA-*^R7LsP9`;a#o@`V^9)=+Qd-VXeJ(=V@_7(Lp35{$@2?!iZb4PxWI(+ zqqTR1n~LV^OpUrdiOE!aJQGb%>b5oN3Vm=fAqdv5Y_jMM5R4oR4)%w)6{E!5X>8*k z3*x}guw57k`e3?2_q2P?jS%X_FY+(=^6xfC%cyS8U@o`-=4Q3nQ^EtyGm;>ejO7AJ zFLdeaL`5{N&x#lOJwZE{fZYa0g4T+RHX|AKaMnO5W-khrFczYMTrRec0|8qnDk3dA zD?t_xbNk)Aj<{4nT00{d4vj@eJl;htQ%jZLKyy|cFfUjJJrO(&BN<#P#`QB2uOpB` zHjY=SE#o1iv{@{Z>JRtC90i#Y9B7`Uh8<(Wp;%F+zf_FtGqQ|-I&N{!N5YmAKElXi z=^opt)iFI09)@wfS{e^KvJtCu@7NGr(lKIZBAOma#^HjtZfSIAVak^dERKbH%o8?W zqCe5y;Y;t!MzCfqvlrJW5!;!<*#AO&(KqLF`>kW6{#-QdocASXrhG}Sr7wx*xrE=@ z=f>RLp82ufp4_NyEEUTHvJ+N&L4!-!7yFXl#GW8;AL`{7%v~ywF&^X_y@*37Foj)q z&!8!UCXo*zFJGewU$-MKLVZ&>0d60{LSepk^R>s$pI_t`KPIB?86NiY3r|F-vu#gX z8~%287jEL!_$amTpTdK0!V~@b;|fizjBnrty652Q7FLDL81dS087=9#kw57KiBYe> zhD1hkdlB+YINRDf@fjZ+ZP`K<3`TIN#ZJY!z;MtCcUm0ERCbsP@pnFFERoR=+%~e} zVzdxsjCK)c<3b2NGIo?jEP))X8XctsPeHxOA+m9~$UYb~IVf*nXTXEXt#w!<%5 ZSwR(OTkFBxT7h4xocLQy5Cdq?@(+>^e-Qux diff --git a/tests/integration/reference_data/reference_ad_diann_190.parquet b/tests/integration/reference_data/reference_ad_diann_190.parquet index 7b828bd95ef6d70d62ce0abc43f53e80b59aa018..fce6d5882fe9f513138b7f8badaee5af61586b6b 100644 GIT binary patch delta 946 zcma)5&ubGw6y9vIo3z;`8k@LjqoSoNg=(`&gH6HK*)*veyPGC8%?}EaA9b6}&l=L0 zfEJ;Mcqr&V!HeKUJb0+|U+6(l&|G_vUOd@@7xAbwo2s>V@Uc9;`R2WEzBj{eTzWWU zmw#9ii~4Hh9vNYW z9EG<`Ht0624!vSr_FkutpiekiI_jVI_$KV6gK_NX9Q!(M%gnv)h2Kq6kfip==O^JO z>MZrr${qAU9bNA$`3RP-=T?@>8GA3>q}}ixea^7N{pD)%PuXblRv>m`K4Yx$O@=t(ET?M%Yo(A3l*Kc|9*R0#X$ z0;qO2)JV^U+DUJ^l&i?KtY=))kP73iIaMm$j^c{~vpAtzuU3q32ooNh!U{@46~hQl z0CxxyzIhCU82n+kpBM4YkIjyV|-E{iP;!6@yRD2_T77@Eo=~t#+%N$=bm%V zx!-rrN&3#v-iwVz5Owe!yp4Zh0c}JtL2$1=7Mjifw3U9dLp;#IEIqUT>s!<@3qPT)osT=kV}us|LSKMC(WCG& z*`N*#P|sNDy>9WI9*!|H4aAz`%?muGk!P*%ef@Og$`&>QOIL5sSkf@n;Uc%gn%v z?nmm2i|{*Ufc}aX_ssAQX7e)N(Ras12uvxxu~ta(CW6-wQ?0@~;nNO#2L=PJ^hoIT zIlcKmtugDwGmV)&zbjYIc4Rj6&jtIB^+IEUu$-ya?Acmv3q2Nt$XebvnF*qATwpen z#Z0m!e9pqxJHH>$M%6huZW~ApygEQae! zHC&f-3m%YLEvRbBA6-us=66wG+GCL_gFO2^y9}^)#qAP=jAZe8~f>| zcGb8|%b~oMw!ftv8E*--#7d?O`dMV~q3II*#%LZb#>&ZfxU%Y3w5L8k9wvq9>Yna+vEm%s%gXp24OQTwxb0NK!noMgMGlf_! zJ)u@oIb1bqH@xJmF65=$`cgG373Vfmh3H1w;htDs3D%QwcUjAkm&&1>b5|33c3w|gar<4Ef3xU z!HtZg*{h~m#*?7M2N&_IAgdh0LGU5hXI8wDBZ3aTO3Rm>lcAftVvZ;=fbovu0Yke7 z1nO{yd&%J6Muu0AMFLy!kh^A3PeQ#43LS2ELlfcUv=d?XoCG5b^BUp=Vh=EHjH!)% z4(wtJ(758^Z7o0Em+NUq$eYGs&`KaA0NJ$)p>wkc08CF4f^9Roy;X*CkM;T6R)YBJ MkLczI0{>V23HFHZ2><{9 diff --git a/tests/unit/anndata/test_anndata_factory.py b/tests/unit/anndata/test_anndata_factory.py index 1f64305c..e955cd75 100644 --- a/tests/unit/anndata/test_anndata_factory.py +++ b/tests/unit/anndata/test_anndata_factory.py @@ -167,8 +167,8 @@ def test_get_reader_configuration_with_valid_reader_type(): ) # diann is taken as an example here assert config == { - "filter_first_search_fdr": False, - "filter_second_search_fdr": False, + "filter_first_search_fdr": True, + "filter_second_search_fdr": True, } From 4d9ce70dabc347c61da24177369783dfae4f8c1f Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:04:27 +0100 Subject: [PATCH 15/16] small improvements --- alphabase/psm_reader/dia_psm_reader.py | 6 +++--- alphabase/psm_reader/keys.py | 3 +-- alphabase/psm_reader/maxquant_reader.py | 2 +- tests/integration/test_psm_readers.py | 3 --- tests/unit/anndata/test_anndata_factory.py | 3 ++- 5 files changed, 7 insertions(+), 10 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index a0b69858..56e44e86 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -58,17 +58,17 @@ def __init__( # noqa: PLR0913, D417 # too many arguments in function definition filter_second_search_fdr: bool = False, **kwargs, ): - """Reader for MaxQuant-like data (in terms of modification loading and decoy translation). + """Reader for DIANN data. See documentation of `PSMReaderBase` for more information. Parameters ---------- filter_first_search_fdr : bool, optional - If true, the FDR filtering will be also to the first search (columns _fdr2 and _fdr3) + If True, the FDR filtering will be done also to the first search columns (_fdr2 and _fdr3) filter_second_search_fdr : bool, optional - If true, the FDR filtering will be also to the second search (columns _fdr4 and _fdr5) + If True, the FDR filtering will be done also to the second columns (_fdr4 and _fdr5) See documentation of `PSMReaderBase` for the rest of parameters. diff --git a/alphabase/psm_reader/keys.py b/alphabase/psm_reader/keys.py index 3c87bc1e..64cbca72 100644 --- a/alphabase/psm_reader/keys.py +++ b/alphabase/psm_reader/keys.py @@ -48,6 +48,7 @@ class PsmDfCols(metaclass=ConstantsClass): RAW_NAME = "raw_name" CHARGE = "charge" PROTEINS = "proteins" + INTENSITY = "intensity" SCAN_NUM = "scan_num" PRECURSOR_MZ = "precursor_mz" @@ -58,8 +59,6 @@ class PsmDfCols(metaclass=ConstantsClass): GENES = "genes" QUERY_ID = "query_id" - INTENSITY = "intensity" - # part of psm_reader.yaml, but not part of output # extra FDR columns for DIANN FDR2 = "_fdr2" # first search diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index 6cf8c176..b006326b 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -206,7 +206,7 @@ def __init__( # noqa: PLR0913, D417 # too many arguments in function definition fixed_C57: Optional[bool] = None, # noqa: N803 TODO: make this *,fixed_c57 (breaking) **kwargs, ): - """Reader for MaxQuant-like data (in terms of modification loading and decoy translation). + """Reader for MaxQuant data. See documentation of `PSMReaderBase` for more information. diff --git a/tests/integration/test_psm_readers.py b/tests/integration/test_psm_readers.py index 0b075581..dabd88a6 100644 --- a/tests/integration/test_psm_readers.py +++ b/tests/integration/test_psm_readers.py @@ -47,9 +47,6 @@ F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642 Q9UH36 Q9UH36 SRRD 2365 2334.05 2334.05 2365 2334.05 2334.05 2334.05 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000184434 1.96448e-05 0.000596659 0.000596659 0.000146135 0.000604961 0 1 2365 2334.05 2365 0.922581 19.905 19.8573 19.9527 123.9 19.782 128.535 0 0.940191 4594.04 1.31068 0.758988 0 0.995505 0.28633 2.12584e-06 1209.02;1210.02;1414.02;1051.01;236.003;130.002; 1209.02;1109.89;732.154;735.384;0;46.0967; 0.919244;0.937624;0.436748;0.639369;0.296736;0.647924; 30029 1.195 1.19328 1.19381 1.19339 F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648 Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 1.96448e-05 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 """ -# F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648x.d 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B_1_22648x Q9UH36 Q9UH36 SRRD 1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR AAAAAAALESWQAAAPR (UniMod:1)AAAAAAALESWQAAAPR2 2 0.000185123 9.999999999 0.000307409 0.000307409 0.000146135 0.000311332 0 1 1664.51 1635.46 1664.51 0.811147 19.8893 19.8416 19.937 123.9 19.7567 128.896 0 0.458773 6614.06 1.7503 0.491071 0.00111683 0.997286 1.92753 2.80543e-05 744.01;1708.02;1630.02;1475.02;0;533.006; 322.907;808.594;577.15;536.033;0;533.006; 0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005 1.19409 1.19328 1.19323 1.19308 -# -# last line is made up to not pass extra FDR check TEST_DATA_SPECTRONAUT = """ReferenceRun PrecursorCharge Workflow IntModifiedPeptide CV AllowForNormalization ModifiedPeptide StrippedPeptide iRT IonMobility iRTSourceSpecific BGSInferenceId IsProteotypic IntLabeledPeptide LabeledPeptide PrecursorMz ReferenceRunQvalue ReferenceRunMS1Response FragmentLossType FragmentNumber FragmentType FragmentCharge FragmentMz RelativeIntensity ExcludeFromAssay Database ProteinGroups UniProtIds Protein Name ProteinDescription Organisms OrganismId Genes Protein Existence Sequence Version FASTAName 202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843 2 _ALVAT[+80]PGK_ True _ALVAT[Phospho (STY)]PGK_ ALVATPGK -5.032703 0.758 -5.032703 P19338 False _ALVAT[+80]PGK_ _ALVAT[Phospho (STY)]PGK_ 418.717511324722 0 10352 noloss 3 y 1 301.187031733932 53.1991 False sp P19338 P19338 NUCL_HUMAN Nucleolin Homo sapiens NCL 1 3 MCT_human_UP000005640_9606 diff --git a/tests/unit/anndata/test_anndata_factory.py b/tests/unit/anndata/test_anndata_factory.py index e955cd75..a52d534a 100644 --- a/tests/unit/anndata/test_anndata_factory.py +++ b/tests/unit/anndata/test_anndata_factory.py @@ -162,6 +162,7 @@ def test_from_files_nan(mock_reader): def test_get_reader_configuration_with_valid_reader_type(): """Test that the correct configuration is returned for a valid reader type.""" + # when config = AnnDataFactory._get_reader_configuration( "diann" ) # diann is taken as an example here @@ -174,6 +175,6 @@ def test_get_reader_configuration_with_valid_reader_type(): def test_get_reader_configuration_with_unknown_reader_type(): """Test that a reader type without special config is handled correctly.""" - + # when config = AnnDataFactory._get_reader_configuration("invalid_reader_type") assert config == {} From f44cca3c7dc727cc1d3286dd13228ae83fe960a1 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:13:53 +0100 Subject: [PATCH 16/16] fix tests --- tests/unit/psm_reader/test_dia_psm_reader.py | 53 ++++++++++++-------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/tests/unit/psm_reader/test_dia_psm_reader.py b/tests/unit/psm_reader/test_dia_psm_reader.py index 8126029b..04c9ff06 100644 --- a/tests/unit/psm_reader/test_dia_psm_reader.py +++ b/tests/unit/psm_reader/test_dia_psm_reader.py @@ -1,12 +1,12 @@ import pandas as pd +import pytest from alphabase.psm_reader.dia_psm_reader import DiannReader -def test_filter_fdr_columns_above_threshold(): - """Test that PSMs are filtered based on additional FDR columns.""" - reader = DiannReader() - reader._psm_df = pd.DataFrame( +@pytest.fixture +def psm_df(): + return pd.DataFrame( { "name": ["p1", "p2", "p3", "p4", "p5"], "_fdr2": [0.01, 0.06, 0.01, 0.01, 0.01], @@ -16,6 +16,13 @@ def test_filter_fdr_columns_above_threshold(): "intensity": [1, 2, 3, 4, 5], } ) + + +def test_filter_fdr_columns_above_threshold(psm_df): + """Test that PSMs are filtered based on additional FDR columns.""" + reader = DiannReader(filter_first_search_fdr=True, filter_second_search_fdr=True) + reader._psm_df = psm_df + reader._keep_fdr = 0.05 # when @@ -26,27 +33,33 @@ def test_filter_fdr_columns_above_threshold(): ) -def test_filter_fdr_columns_above_threshold_missing_columns(): - """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" - reader = DiannReader() - reader._psm_df = pd.DataFrame( - { - "name": ["p1", "p2", "p3"], - "_fdr2": [0.01, 0.06, 0.01], - "_fdr3": [0.01, 0.01, 0.06], - # '_fdr4', '_fdr5' missing - "intensity": [ - 1, - 2, - 3, - ], - } +def test_filter_fdr_columns_not(psm_df): + """Test that PSMs are filtered based on additional FDR columns.""" + reader = DiannReader(filter_first_search_fdr=False, filter_second_search_fdr=False) + reader._psm_df = psm_df + reader._keep_fdr = 0.05 + + # when + reader._filter_fdr() + + pd.testing.assert_frame_equal( + reader._psm_df, psm_df.drop(columns=["_fdr2", "_fdr3", "_fdr4", "_fdr5"]) ) + + +def test_filter_fdr_columns_above_threshold_missing_columns(psm_df): + """Test that PSMs are filtered based on additional FDR columns, tolerates missing columns.""" + reader = DiannReader(filter_first_search_fdr=True, filter_second_search_fdr=True) + reader._psm_df = psm_df.drop(columns=["_fdr4", "_fdr5"]) + reader._keep_fdr = 0.05 # when reader._filter_fdr() pd.testing.assert_frame_equal( - reader._psm_df, pd.DataFrame({"name": ["p1"], "intensity": [1]}) + reader._psm_df.reset_index(drop=True), + pd.DataFrame({"name": ["p1", "p4", "p5"], "intensity": [1, 4, 5]}).reset_index( + drop=True + ), )