diff --git a/alphabase/protein/fasta.py b/alphabase/protein/fasta.py index 71a1f85f..dfd2031a 100644 --- a/alphabase/protein/fasta.py +++ b/alphabase/protein/fasta.py @@ -488,10 +488,16 @@ def parse_labels(labels:list): cterm_label_mod = label return label_aas, label_mod_dict, nterm_label_mod, cterm_label_mod -def create_labeling_peptide_df(peptide_df:pd.DataFrame, labels:list): +def create_labeling_peptide_df( + peptide_df:pd.DataFrame, labels:list, + inplace:bool=False + ): if len(peptide_df) == 0: return peptide_df - df = peptide_df.copy() + if inplace: + df = peptide_df + else: + df = peptide_df.copy() ( label_aas, label_mod_dict, @@ -789,12 +795,12 @@ def __init__(self, self._parse_fix_and_var_mods() def _parse_fix_and_var_mods(self): - self.fix_mod_aas = '' - self.fix_mod_prot_nterm_dict = {} - self.fix_mod_prot_cterm_dict = {} - self.fix_mod_pep_nterm_dict = {} - self.fix_mod_pep_cterm_dict = {} - self.fix_mod_dict = {} + # self.fix_mod_aas = '' + # self.fix_mod_prot_nterm_dict = {} + # self.fix_mod_prot_cterm_dict = {} + # self.fix_mod_pep_nterm_dict = {} + # self.fix_mod_pep_cterm_dict = {} + # self.fix_mod_dict = {} def _set_term_mod(term_mod, prot_nterm, prot_cterm, pep_nterm, pep_cterm, @@ -828,19 +834,19 @@ def _set_dict(term_dict,site,mod, allow_conflicts ) - for mod in self.fix_mods: - if mod.find('@')+2 == len(mod): - self.fix_mod_aas += mod[-1] - self.fix_mod_dict[mod[-1]] = mod - else: - _set_term_mod( - mod, - self.fix_mod_prot_nterm_dict, - self.fix_mod_prot_cterm_dict, - self.fix_mod_pep_nterm_dict, - self.fix_mod_pep_cterm_dict, - allow_conflicts=False - ) + # for mod in self.fix_mods: + # if mod.find('@')+2 == len(mod): + # self.fix_mod_aas += mod[-1] + # self.fix_mod_dict[mod[-1]] = mod + # else: + # _set_term_mod( + # mod, + # self.fix_mod_prot_nterm_dict, + # self.fix_mod_prot_cterm_dict, + # self.fix_mod_pep_nterm_dict, + # self.fix_mod_pep_cterm_dict, + # allow_conflicts=False + # ) self.var_mod_aas = '' self.var_mod_prot_nterm_dict = {} @@ -863,7 +869,7 @@ def _set_dict(term_dict,site,mod, else: for mod in self.var_mods: if mod.find('@')+2 == len(mod): - if mod[-1] in self.fix_mod_dict: continue + # if mod[-1] in self.fix_mod_dict: continue self.var_mod_aas += mod[-1] self.var_mod_dict[mod[-1]] = mod get_var_mods_per_sites = get_var_mods_per_sites_single_mod_on_aa @@ -1122,18 +1128,18 @@ def add_mods_for_one_seq(self, sequence:str, list[str]: list of modification names list[str]: list of modification sites """ - fix_mods, fix_mod_sites = get_fix_mods( - sequence, self.fix_mod_aas, self.fix_mod_dict - ) - #TODO add prot and pep C-term fix mods - #TODO add prot and pep N-term fix mods - - if len(fix_mods) == 0: - fix_mods = [''] - fix_mod_sites = [''] - else: - fix_mods = [fix_mods] - fix_mod_sites = [fix_mod_sites] + # fix_mods, fix_mod_sites = get_fix_mods( + # sequence, self.fix_mod_aas, self.fix_mod_dict + # ) + # #TODO add prot and pep C-term fix mods + # #TODO add prot and pep N-term fix mods + + # if len(fix_mods) == 0: + # fix_mods = [''] + # fix_mod_sites = [''] + # else: + # fix_mods = [fix_mods] + # fix_mod_sites = [fix_mod_sites] var_mods_list, var_mod_sites_list = get_var_mods( sequence, self.var_mod_aas, self.var_mod_dict, @@ -1160,12 +1166,12 @@ def add_mods_for_one_seq(self, sequence:str, return ( list( ';'.join([i for i in items if i]) for items in itertools.product( - fix_mods, nterm_var_mods, var_mods_list + nterm_var_mods, var_mods_list ) ), list( ';'.join([i for i in items if i]) for items in itertools.product( - fix_mod_sites, nterm_var_mod_sites, var_mod_sites_list + nterm_var_mod_sites, var_mod_sites_list ) ), ) @@ -1195,6 +1201,11 @@ def add_modifications(self): self._precursor_df, ['mods','mod_sites'] ) + self._precursor_df.dropna(subset=['mods'], inplace=True) + self._precursor_df = create_labeling_peptide_df( + self._precursor_df, self.fix_mods, + inplace=True + ) self._precursor_df.reset_index(drop=True, inplace=True) def add_special_modifications(self): diff --git a/nbdev_nbs/constants/aa.ipynb b/nbdev_nbs/constants/aa.ipynb index 6dbf3502..9c352d60 100644 --- a/nbdev_nbs/constants/aa.ipynb +++ b/nbdev_nbs/constants/aa.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -258,7 +258,7 @@ "90 Z C(1000000) 1.200000e+07" ] }, - "execution_count": 3, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -293,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -314,7 +314,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -339,7 +339,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -359,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -382,7 +382,7 @@ " 453.26996726, 396.24850354, 259.18959168, 146.1055277 ]])}" ] }, - "execution_count": 7, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -404,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -424,7 +424,7 @@ " 1.28094963e+02]])" ] }, - "execution_count": 8, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -436,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -447,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ diff --git a/nbdev_nbs/protein/fasta.ipynb b/nbdev_nbs/protein/fasta.ipynb index 1511baf8..c5a791a3 100644 --- a/nbdev_nbs/protein/fasta.ipynb +++ b/nbdev_nbs/protein/fasta.ipynb @@ -1330,8 +1330,8 @@ " 0\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term\n", - " 3;0\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", + " 0;3\n", " 8\n", " xx\n", " \n", @@ -1343,8 +1343,8 @@ " 0\n", " True\n", " False\n", - " Carbamidomethyl@C;Oxidation@M\n", - " 4;1\n", + " Oxidation@M;Carbamidomethyl@C\n", + " 1;4\n", " 9\n", " xx\n", " \n", @@ -1369,8 +1369,8 @@ " 0\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", - " 4;0;1\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", + " 0;1;4\n", " 9\n", " xx\n", " \n", @@ -1382,8 +1382,8 @@ " 0\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term\n", - " 4;0\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", + " 0;4\n", " 9\n", " xx\n", " \n", @@ -1486,8 +1486,8 @@ " 1\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term\n", - " 3;0\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", + " 0;3\n", " 15\n", " xx\n", " \n", @@ -1499,8 +1499,8 @@ " 1\n", " True\n", " False\n", - " Carbamidomethyl@C;Oxidation@M\n", - " 4;1\n", + " Oxidation@M;Carbamidomethyl@C\n", + " 1;4\n", " 16\n", " xx\n", " \n", @@ -1525,8 +1525,8 @@ " 1\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", - " 4;0;1\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", + " 0;1;4\n", " 16\n", " xx\n", " \n", @@ -1538,8 +1538,8 @@ " 1\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term\n", - " 4;0\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", + " 0;4\n", " 16\n", " xx\n", " \n", @@ -1577,8 +1577,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Oxidation@M\n", - " 3;17\n", + " Oxidation@M;Carbamidomethyl@C\n", + " 17;3\n", " 22\n", " xx\n", " \n", @@ -1603,8 +1603,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", - " 3;0;17\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", + " 0;17;3\n", " 22\n", " xx\n", " \n", @@ -1616,8 +1616,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term\n", - " 3;0\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", + " 0;3\n", " 22\n", " xx\n", " \n", @@ -1629,8 +1629,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Oxidation@M\n", - " 4;1\n", + " Oxidation@M;Carbamidomethyl@C\n", + " 1;4\n", " 23\n", " xx\n", " \n", @@ -1642,8 +1642,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Oxidation@M\n", - " 4;18\n", + " Oxidation@M;Carbamidomethyl@C\n", + " 18;4\n", " 23\n", " xx\n", " \n", @@ -1655,8 +1655,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Oxidation@M;Oxidation@M\n", - " 4;1;18\n", + " Oxidation@M;Oxidation@M;Carbamidomethyl@C\n", + " 1;18;4\n", " 23\n", " xx\n", " \n", @@ -1681,8 +1681,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", - " 4;0;1\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", + " 0;1;4\n", " 23\n", " xx\n", " \n", @@ -1694,8 +1694,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", - " 4;0;18\n", + " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet...\n", + " 0;18;4\n", " 23\n", " xx\n", " \n", @@ -1707,8 +1707,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...\n", - " 4;0;1;18\n", + " Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;...\n", + " 0;1;18;4\n", " 23\n", " xx\n", " \n", @@ -1720,8 +1720,8 @@ " 2\n", " True\n", " False\n", - " Carbamidomethyl@C;Acetyl@Protein_N-term\n", - " 4;0\n", + " Acetyl@Protein_N-term;Carbamidomethyl@C\n", + " 0;4\n", " 23\n", " xx\n", " \n", @@ -1775,11 +1775,11 @@ "2 True Oxidation@M \n", "3 True \n", "4 False Carbamidomethyl@C \n", - "5 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", - "6 False Carbamidomethyl@C;Oxidation@M \n", + "5 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", + "6 False Oxidation@M;Carbamidomethyl@C \n", "7 False Carbamidomethyl@C \n", - "8 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", - "9 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", + "8 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "9 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "10 True Oxidation@M \n", "11 True \n", "12 True Acetyl@Protein_N-term;Oxidation@M \n", @@ -1787,25 +1787,25 @@ "14 True Oxidation@M \n", "15 True \n", "16 False Carbamidomethyl@C \n", - "17 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", - "18 False Carbamidomethyl@C;Oxidation@M \n", + "17 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", + "18 False Oxidation@M;Carbamidomethyl@C \n", "19 False Carbamidomethyl@C \n", - "20 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", - "21 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", + "20 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "21 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "22 True Oxidation@M \n", "23 True \n", - "24 False Carbamidomethyl@C;Oxidation@M \n", + "24 False Oxidation@M;Carbamidomethyl@C \n", "25 False Carbamidomethyl@C \n", - "26 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", - "27 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", - "28 False Carbamidomethyl@C;Oxidation@M \n", - "29 False Carbamidomethyl@C;Oxidation@M \n", - "30 False Carbamidomethyl@C;Oxidation@M;Oxidation@M \n", + "26 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "27 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", + "28 False Oxidation@M;Carbamidomethyl@C \n", + "29 False Oxidation@M;Carbamidomethyl@C \n", + "30 False Oxidation@M;Oxidation@M;Carbamidomethyl@C \n", "31 False Carbamidomethyl@C \n", - "32 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", - "33 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", - "34 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n", - "35 False Carbamidomethyl@C;Acetyl@Protein_N-term \n", + "32 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "33 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n", + "34 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n", + "35 False Acetyl@Protein_N-term;Carbamidomethyl@C \n", "\n", " mod_sites nAA proteins genes \n", "0 7 xx;yy gene \n", @@ -1813,11 +1813,11 @@ "2 2 7 xx;yy gene \n", "3 7 xx;yy gene \n", "4 3 8 xx \n", - "5 3;0 8 xx \n", - "6 4;1 9 xx \n", + "5 0;3 8 xx \n", + "6 1;4 9 xx \n", "7 4 9 xx \n", - "8 4;0;1 9 xx \n", - "9 4;0 9 xx \n", + "8 0;1;4 9 xx \n", + "9 0;4 9 xx \n", "10 9 14 xx;yy gene \n", "11 14 xx;yy gene \n", "12 0;9 14 xx;yy gene \n", @@ -1825,25 +1825,25 @@ "14 2 14 xx \n", "15 14 xx \n", "16 3 15 xx \n", - "17 3;0 15 xx \n", - "18 4;1 16 xx \n", + "17 0;3 15 xx \n", + "18 1;4 16 xx \n", "19 4 16 xx \n", - "20 4;0;1 16 xx \n", - "21 4;0 16 xx \n", + "20 0;1;4 16 xx \n", + "21 0;4 16 xx \n", "22 9 21 xx \n", "23 21 xx \n", - "24 3;17 22 xx \n", + "24 17;3 22 xx \n", "25 3 22 xx \n", - "26 3;0;17 22 xx \n", - "27 3;0 22 xx \n", - "28 4;1 23 xx \n", - "29 4;18 23 xx \n", - "30 4;1;18 23 xx \n", + "26 0;17;3 22 xx \n", + "27 0;3 22 xx \n", + "28 1;4 23 xx \n", + "29 18;4 23 xx \n", + "30 1;18;4 23 xx \n", "31 4 23 xx \n", - "32 4;0;1 23 xx \n", - "33 4;0;18 23 xx \n", - "34 4;0;1;18 23 xx \n", - "35 4;0 23 xx " + "32 0;1;4 23 xx \n", + "33 0;18;4 23 xx \n", + "34 0;1;18;4 23 xx \n", + "35 0;4 23 xx " ] }, "execution_count": null, @@ -1969,6 +1969,57 @@ "assert ~_lib.precursor_df.sequence.str.contains('B').any()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test fix mods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fasta_lib = SpecLibFasta(\n", + " var_mods = [\"Oxidation@M\"], \n", + " fix_mods = [\"Dimethyl@K\", \"Dimethyl@Any_N-term\"],\n", + " special_mods=[],\n", + " # labeling_channels={0:['Phospho@S']}\n", + ")\n", + "protein_dict = get_protein_dict()\n", + "fasta_lib.import_and_process_protein_dict(protein_dict)\n", + "assert fasta_lib.precursor_df.mods.str.contains('Dimethyl@Any_N-term').all()\n", + "assert fasta_lib.precursor_df.mods.str.contains('Oxidation@M').any()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test min_var_mod_num" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fasta_lib = SpecLibFasta(\n", + " var_mods = [\"Oxidation@M\"], \n", + " fix_mods = [\"Carbamidomethyl@C\"],\n", + " special_mods=[],\n", + " min_var_mod_num=1,\n", + " max_var_mod_num=2,\n", + ")\n", + "protein_dict = get_protein_dict()\n", + "fasta_lib.import_and_process_protein_dict(protein_dict)\n", + "fasta_lib.calc_precursor_mz()\n", + "assert fasta_lib.precursor_df.mods.str.contains(\"Oxidation@M\").all()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1992,6 +2043,25 @@ "assert fasta_lib.precursor_df.mods.str.contains('Phospho@S').any()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fasta_lib = SpecLibFasta(\n", + " var_mods = [\"Oxidation@M\"], \n", + " fix_mods = [\"Dimethyl@K\", \"Dimethyl@Any_N-term\"],\n", + " special_mods=[\"Phospho@S\"],\n", + " # labeling_channels={0:['Phospho@S']}\n", + ")\n", + "protein_dict = get_protein_dict()\n", + "fasta_lib.import_and_process_protein_dict(protein_dict)\n", + "assert fasta_lib.precursor_df.mods.str.contains('Dimethyl@Any_N-term').all()\n", + "assert fasta_lib.precursor_df.mods.str.contains('Oxidation@M').any()\n", + "assert fasta_lib.precursor_df.mods.str.contains('Phospho@S').any()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -2015,241 +2085,18 @@ ").all()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test labelling" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sequenceprotein_idxesmiss_cleavageis_prot_ntermis_prot_ctermmodsmod_sitesnAAlabeling_channelcharge
0AFGHIJK0;10TrueTrueDimethyl@Any N-term;Dimethyl@K0;7702
1AFGHIJK0;10TrueTrueDimethyl@Any N-term;Dimethyl@K0;7703
2AFGHIJK0;10TrueTrueDimethyl@Any N-term;Dimethyl@K0;7704
3LMNOPQR0;10FalseTrueDimethyl@Any N-term0702
4LMNOPQR0;10FalseTrueDimethyl@Any N-term0703
.................................
61ABCDESTKAFGHIJKLMNOPQR02TrueFalseDimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K0;8;152243
62ABCDESTKAFGHIJKLMNOPQR02TrueFalseDimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K0;8;152244
63MABCDESTKAFGHIJKLMNOPQR02TrueFalseDimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K0;9;162342
64MABCDESTKAFGHIJKLMNOPQR02TrueFalseDimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K0;9;162343
65MABCDESTKAFGHIJKLMNOPQR02TrueFalseDimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K0;9;162344
\n", - "

66 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " sequence protein_idxes miss_cleavage is_prot_nterm \\\n", - "0 AFGHIJK 0;1 0 True \n", - "1 AFGHIJK 0;1 0 True \n", - "2 AFGHIJK 0;1 0 True \n", - "3 LMNOPQR 0;1 0 False \n", - "4 LMNOPQR 0;1 0 False \n", - ".. ... ... ... ... \n", - "61 ABCDESTKAFGHIJKLMNOPQR 0 2 True \n", - "62 ABCDESTKAFGHIJKLMNOPQR 0 2 True \n", - "63 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n", - "64 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n", - "65 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n", - "\n", - " is_prot_cterm mods mod_sites \\\n", - "0 True Dimethyl@Any N-term;Dimethyl@K 0;7 \n", - "1 True Dimethyl@Any N-term;Dimethyl@K 0;7 \n", - "2 True Dimethyl@Any N-term;Dimethyl@K 0;7 \n", - "3 True Dimethyl@Any N-term 0 \n", - "4 True Dimethyl@Any N-term 0 \n", - ".. ... ... ... \n", - "61 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;8;15 \n", - "62 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;8;15 \n", - "63 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;9;16 \n", - "64 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;9;16 \n", - "65 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;9;16 \n", - "\n", - " nAA labeling_channel charge \n", - "0 7 0 2 \n", - "1 7 0 3 \n", - "2 7 0 4 \n", - "3 7 0 2 \n", - "4 7 0 3 \n", - ".. ... ... ... \n", - "61 22 4 3 \n", - "62 22 4 4 \n", - "63 23 4 2 \n", - "64 23 4 3 \n", - "65 23 4 4 \n", - "\n", - "[66 rows x 10 columns]" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "fasta_lib = SpecLibFasta(\n", " var_mods = [], fix_mods = [],\n", @@ -2264,8 +2111,7 @@ "assert fasta_lib.precursor_df.mods.str.contains('Dimethyl4').any()\n", "assert fasta_lib.precursor_df.mods.str.contains('Dimethyl@').sum()==fasta_lib.precursor_df.mods.str.contains('Dimethyl4').sum()\n", "assert (fasta_lib.precursor_df.labeling_channel==0).sum() == fasta_lib.precursor_df.mods.str.contains('Dimethyl@').sum()\n", - "assert (fasta_lib.precursor_df.labeling_channel==4).sum() == fasta_lib.precursor_df.mods.str.contains('Dimethyl4').sum()\n", - "fasta_lib.precursor_df" + "assert (fasta_lib.precursor_df.labeling_channel==4).sum() == fasta_lib.precursor_df.mods.str.contains('Dimethyl4').sum()" ] }, { @@ -2281,6 +2127,10 @@ "display_name": "python3", "language": "python", "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.12" } }, "nbformat": 4,