diff --git a/alphabase/protein/fasta.py b/alphabase/protein/fasta.py
index 71a1f85f..dfd2031a 100644
--- a/alphabase/protein/fasta.py
+++ b/alphabase/protein/fasta.py
@@ -488,10 +488,16 @@ def parse_labels(labels:list):
cterm_label_mod = label
return label_aas, label_mod_dict, nterm_label_mod, cterm_label_mod
-def create_labeling_peptide_df(peptide_df:pd.DataFrame, labels:list):
+def create_labeling_peptide_df(
+ peptide_df:pd.DataFrame, labels:list,
+ inplace:bool=False
+ ):
if len(peptide_df) == 0: return peptide_df
- df = peptide_df.copy()
+ if inplace:
+ df = peptide_df
+ else:
+ df = peptide_df.copy()
(
label_aas, label_mod_dict,
@@ -789,12 +795,12 @@ def __init__(self,
self._parse_fix_and_var_mods()
def _parse_fix_and_var_mods(self):
- self.fix_mod_aas = ''
- self.fix_mod_prot_nterm_dict = {}
- self.fix_mod_prot_cterm_dict = {}
- self.fix_mod_pep_nterm_dict = {}
- self.fix_mod_pep_cterm_dict = {}
- self.fix_mod_dict = {}
+ # self.fix_mod_aas = ''
+ # self.fix_mod_prot_nterm_dict = {}
+ # self.fix_mod_prot_cterm_dict = {}
+ # self.fix_mod_pep_nterm_dict = {}
+ # self.fix_mod_pep_cterm_dict = {}
+ # self.fix_mod_dict = {}
def _set_term_mod(term_mod,
prot_nterm, prot_cterm, pep_nterm, pep_cterm,
@@ -828,19 +834,19 @@ def _set_dict(term_dict,site,mod,
allow_conflicts
)
- for mod in self.fix_mods:
- if mod.find('@')+2 == len(mod):
- self.fix_mod_aas += mod[-1]
- self.fix_mod_dict[mod[-1]] = mod
- else:
- _set_term_mod(
- mod,
- self.fix_mod_prot_nterm_dict,
- self.fix_mod_prot_cterm_dict,
- self.fix_mod_pep_nterm_dict,
- self.fix_mod_pep_cterm_dict,
- allow_conflicts=False
- )
+ # for mod in self.fix_mods:
+ # if mod.find('@')+2 == len(mod):
+ # self.fix_mod_aas += mod[-1]
+ # self.fix_mod_dict[mod[-1]] = mod
+ # else:
+ # _set_term_mod(
+ # mod,
+ # self.fix_mod_prot_nterm_dict,
+ # self.fix_mod_prot_cterm_dict,
+ # self.fix_mod_pep_nterm_dict,
+ # self.fix_mod_pep_cterm_dict,
+ # allow_conflicts=False
+ # )
self.var_mod_aas = ''
self.var_mod_prot_nterm_dict = {}
@@ -863,7 +869,7 @@ def _set_dict(term_dict,site,mod,
else:
for mod in self.var_mods:
if mod.find('@')+2 == len(mod):
- if mod[-1] in self.fix_mod_dict: continue
+ # if mod[-1] in self.fix_mod_dict: continue
self.var_mod_aas += mod[-1]
self.var_mod_dict[mod[-1]] = mod
get_var_mods_per_sites = get_var_mods_per_sites_single_mod_on_aa
@@ -1122,18 +1128,18 @@ def add_mods_for_one_seq(self, sequence:str,
list[str]: list of modification names
list[str]: list of modification sites
"""
- fix_mods, fix_mod_sites = get_fix_mods(
- sequence, self.fix_mod_aas, self.fix_mod_dict
- )
- #TODO add prot and pep C-term fix mods
- #TODO add prot and pep N-term fix mods
-
- if len(fix_mods) == 0:
- fix_mods = ['']
- fix_mod_sites = ['']
- else:
- fix_mods = [fix_mods]
- fix_mod_sites = [fix_mod_sites]
+ # fix_mods, fix_mod_sites = get_fix_mods(
+ # sequence, self.fix_mod_aas, self.fix_mod_dict
+ # )
+ # #TODO add prot and pep C-term fix mods
+ # #TODO add prot and pep N-term fix mods
+
+ # if len(fix_mods) == 0:
+ # fix_mods = ['']
+ # fix_mod_sites = ['']
+ # else:
+ # fix_mods = [fix_mods]
+ # fix_mod_sites = [fix_mod_sites]
var_mods_list, var_mod_sites_list = get_var_mods(
sequence, self.var_mod_aas, self.var_mod_dict,
@@ -1160,12 +1166,12 @@ def add_mods_for_one_seq(self, sequence:str,
return (
list(
';'.join([i for i in items if i]) for items in itertools.product(
- fix_mods, nterm_var_mods, var_mods_list
+ nterm_var_mods, var_mods_list
)
),
list(
';'.join([i for i in items if i]) for items in itertools.product(
- fix_mod_sites, nterm_var_mod_sites, var_mod_sites_list
+ nterm_var_mod_sites, var_mod_sites_list
)
),
)
@@ -1195,6 +1201,11 @@ def add_modifications(self):
self._precursor_df,
['mods','mod_sites']
)
+ self._precursor_df.dropna(subset=['mods'], inplace=True)
+ self._precursor_df = create_labeling_peptide_df(
+ self._precursor_df, self.fix_mods,
+ inplace=True
+ )
self._precursor_df.reset_index(drop=True, inplace=True)
def add_special_modifications(self):
diff --git a/nbdev_nbs/constants/aa.ipynb b/nbdev_nbs/constants/aa.ipynb
index 6dbf3502..9c352d60 100644
--- a/nbdev_nbs/constants/aa.ipynb
+++ b/nbdev_nbs/constants/aa.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -38,7 +38,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -258,7 +258,7 @@
"90 Z C(1000000) 1.200000e+07"
]
},
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -293,7 +293,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -314,7 +314,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -339,7 +339,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -359,7 +359,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -382,7 +382,7 @@
" 453.26996726, 396.24850354, 259.18959168, 146.1055277 ]])}"
]
},
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -404,7 +404,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -424,7 +424,7 @@
" 1.28094963e+02]])"
]
},
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -436,7 +436,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -447,7 +447,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
diff --git a/nbdev_nbs/protein/fasta.ipynb b/nbdev_nbs/protein/fasta.ipynb
index 1511baf8..c5a791a3 100644
--- a/nbdev_nbs/protein/fasta.ipynb
+++ b/nbdev_nbs/protein/fasta.ipynb
@@ -1330,8 +1330,8 @@
"
0 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term | \n",
- " 3;0 | \n",
+ " Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
+ " 0;3 | \n",
" 8 | \n",
" xx | \n",
" | \n",
@@ -1343,8 +1343,8 @@
" 0 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Oxidation@M | \n",
- " 4;1 | \n",
+ " Oxidation@M;Carbamidomethyl@C | \n",
+ " 1;4 | \n",
" 9 | \n",
" xx | \n",
" | \n",
@@ -1369,8 +1369,8 @@
" 0 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... | \n",
- " 4;0;1 | \n",
+ " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
+ " 0;1;4 | \n",
" 9 | \n",
" xx | \n",
" | \n",
@@ -1382,8 +1382,8 @@
" 0 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term | \n",
- " 4;0 | \n",
+ " Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
+ " 0;4 | \n",
" 9 | \n",
" xx | \n",
" | \n",
@@ -1486,8 +1486,8 @@
" 1 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term | \n",
- " 3;0 | \n",
+ " Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
+ " 0;3 | \n",
" 15 | \n",
" xx | \n",
" | \n",
@@ -1499,8 +1499,8 @@
" 1 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Oxidation@M | \n",
- " 4;1 | \n",
+ " Oxidation@M;Carbamidomethyl@C | \n",
+ " 1;4 | \n",
" 16 | \n",
" xx | \n",
" | \n",
@@ -1525,8 +1525,8 @@
" 1 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... | \n",
- " 4;0;1 | \n",
+ " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
+ " 0;1;4 | \n",
" 16 | \n",
" xx | \n",
" | \n",
@@ -1538,8 +1538,8 @@
" 1 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term | \n",
- " 4;0 | \n",
+ " Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
+ " 0;4 | \n",
" 16 | \n",
" xx | \n",
" | \n",
@@ -1577,8 +1577,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Oxidation@M | \n",
- " 3;17 | \n",
+ " Oxidation@M;Carbamidomethyl@C | \n",
+ " 17;3 | \n",
" 22 | \n",
" xx | \n",
" | \n",
@@ -1603,8 +1603,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... | \n",
- " 3;0;17 | \n",
+ " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
+ " 0;17;3 | \n",
" 22 | \n",
" xx | \n",
" | \n",
@@ -1616,8 +1616,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term | \n",
- " 3;0 | \n",
+ " Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
+ " 0;3 | \n",
" 22 | \n",
" xx | \n",
" | \n",
@@ -1629,8 +1629,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Oxidation@M | \n",
- " 4;1 | \n",
+ " Oxidation@M;Carbamidomethyl@C | \n",
+ " 1;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
@@ -1642,8 +1642,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Oxidation@M | \n",
- " 4;18 | \n",
+ " Oxidation@M;Carbamidomethyl@C | \n",
+ " 18;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
@@ -1655,8 +1655,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Oxidation@M;Oxidation@M | \n",
- " 4;1;18 | \n",
+ " Oxidation@M;Oxidation@M;Carbamidomethyl@C | \n",
+ " 1;18;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
@@ -1681,8 +1681,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... | \n",
- " 4;0;1 | \n",
+ " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
+ " 0;1;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
@@ -1694,8 +1694,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... | \n",
- " 4;0;18 | \n",
+ " Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
+ " 0;18;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
@@ -1707,8 +1707,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... | \n",
- " 4;0;1;18 | \n",
+ " Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... | \n",
+ " 0;1;18;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
@@ -1720,8 +1720,8 @@
" 2 | \n",
" True | \n",
" False | \n",
- " Carbamidomethyl@C;Acetyl@Protein_N-term | \n",
- " 4;0 | \n",
+ " Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
+ " 0;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
@@ -1775,11 +1775,11 @@
"2 True Oxidation@M \n",
"3 True \n",
"4 False Carbamidomethyl@C \n",
- "5 False Carbamidomethyl@C;Acetyl@Protein_N-term \n",
- "6 False Carbamidomethyl@C;Oxidation@M \n",
+ "5 False Acetyl@Protein_N-term;Carbamidomethyl@C \n",
+ "6 False Oxidation@M;Carbamidomethyl@C \n",
"7 False Carbamidomethyl@C \n",
- "8 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n",
- "9 False Carbamidomethyl@C;Acetyl@Protein_N-term \n",
+ "8 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n",
+ "9 False Acetyl@Protein_N-term;Carbamidomethyl@C \n",
"10 True Oxidation@M \n",
"11 True \n",
"12 True Acetyl@Protein_N-term;Oxidation@M \n",
@@ -1787,25 +1787,25 @@
"14 True Oxidation@M \n",
"15 True \n",
"16 False Carbamidomethyl@C \n",
- "17 False Carbamidomethyl@C;Acetyl@Protein_N-term \n",
- "18 False Carbamidomethyl@C;Oxidation@M \n",
+ "17 False Acetyl@Protein_N-term;Carbamidomethyl@C \n",
+ "18 False Oxidation@M;Carbamidomethyl@C \n",
"19 False Carbamidomethyl@C \n",
- "20 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n",
- "21 False Carbamidomethyl@C;Acetyl@Protein_N-term \n",
+ "20 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n",
+ "21 False Acetyl@Protein_N-term;Carbamidomethyl@C \n",
"22 True Oxidation@M \n",
"23 True \n",
- "24 False Carbamidomethyl@C;Oxidation@M \n",
+ "24 False Oxidation@M;Carbamidomethyl@C \n",
"25 False Carbamidomethyl@C \n",
- "26 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n",
- "27 False Carbamidomethyl@C;Acetyl@Protein_N-term \n",
- "28 False Carbamidomethyl@C;Oxidation@M \n",
- "29 False Carbamidomethyl@C;Oxidation@M \n",
- "30 False Carbamidomethyl@C;Oxidation@M;Oxidation@M \n",
+ "26 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n",
+ "27 False Acetyl@Protein_N-term;Carbamidomethyl@C \n",
+ "28 False Oxidation@M;Carbamidomethyl@C \n",
+ "29 False Oxidation@M;Carbamidomethyl@C \n",
+ "30 False Oxidation@M;Oxidation@M;Carbamidomethyl@C \n",
"31 False Carbamidomethyl@C \n",
- "32 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n",
- "33 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n",
- "34 False Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat... \n",
- "35 False Carbamidomethyl@C;Acetyl@Protein_N-term \n",
+ "32 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n",
+ "33 False Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... \n",
+ "34 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n",
+ "35 False Acetyl@Protein_N-term;Carbamidomethyl@C \n",
"\n",
" mod_sites nAA proteins genes \n",
"0 7 xx;yy gene \n",
@@ -1813,11 +1813,11 @@
"2 2 7 xx;yy gene \n",
"3 7 xx;yy gene \n",
"4 3 8 xx \n",
- "5 3;0 8 xx \n",
- "6 4;1 9 xx \n",
+ "5 0;3 8 xx \n",
+ "6 1;4 9 xx \n",
"7 4 9 xx \n",
- "8 4;0;1 9 xx \n",
- "9 4;0 9 xx \n",
+ "8 0;1;4 9 xx \n",
+ "9 0;4 9 xx \n",
"10 9 14 xx;yy gene \n",
"11 14 xx;yy gene \n",
"12 0;9 14 xx;yy gene \n",
@@ -1825,25 +1825,25 @@
"14 2 14 xx \n",
"15 14 xx \n",
"16 3 15 xx \n",
- "17 3;0 15 xx \n",
- "18 4;1 16 xx \n",
+ "17 0;3 15 xx \n",
+ "18 1;4 16 xx \n",
"19 4 16 xx \n",
- "20 4;0;1 16 xx \n",
- "21 4;0 16 xx \n",
+ "20 0;1;4 16 xx \n",
+ "21 0;4 16 xx \n",
"22 9 21 xx \n",
"23 21 xx \n",
- "24 3;17 22 xx \n",
+ "24 17;3 22 xx \n",
"25 3 22 xx \n",
- "26 3;0;17 22 xx \n",
- "27 3;0 22 xx \n",
- "28 4;1 23 xx \n",
- "29 4;18 23 xx \n",
- "30 4;1;18 23 xx \n",
+ "26 0;17;3 22 xx \n",
+ "27 0;3 22 xx \n",
+ "28 1;4 23 xx \n",
+ "29 18;4 23 xx \n",
+ "30 1;18;4 23 xx \n",
"31 4 23 xx \n",
- "32 4;0;1 23 xx \n",
- "33 4;0;18 23 xx \n",
- "34 4;0;1;18 23 xx \n",
- "35 4;0 23 xx "
+ "32 0;1;4 23 xx \n",
+ "33 0;18;4 23 xx \n",
+ "34 0;1;18;4 23 xx \n",
+ "35 0;4 23 xx "
]
},
"execution_count": null,
@@ -1969,6 +1969,57 @@
"assert ~_lib.precursor_df.sequence.str.contains('B').any()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Test fix mods"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fasta_lib = SpecLibFasta(\n",
+ " var_mods = [\"Oxidation@M\"], \n",
+ " fix_mods = [\"Dimethyl@K\", \"Dimethyl@Any_N-term\"],\n",
+ " special_mods=[],\n",
+ " # labeling_channels={0:['Phospho@S']}\n",
+ ")\n",
+ "protein_dict = get_protein_dict()\n",
+ "fasta_lib.import_and_process_protein_dict(protein_dict)\n",
+ "assert fasta_lib.precursor_df.mods.str.contains('Dimethyl@Any_N-term').all()\n",
+ "assert fasta_lib.precursor_df.mods.str.contains('Oxidation@M').any()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Test min_var_mod_num"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fasta_lib = SpecLibFasta(\n",
+ " var_mods = [\"Oxidation@M\"], \n",
+ " fix_mods = [\"Carbamidomethyl@C\"],\n",
+ " special_mods=[],\n",
+ " min_var_mod_num=1,\n",
+ " max_var_mod_num=2,\n",
+ ")\n",
+ "protein_dict = get_protein_dict()\n",
+ "fasta_lib.import_and_process_protein_dict(protein_dict)\n",
+ "fasta_lib.calc_precursor_mz()\n",
+ "assert fasta_lib.precursor_df.mods.str.contains(\"Oxidation@M\").all()"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -1992,6 +2043,25 @@
"assert fasta_lib.precursor_df.mods.str.contains('Phospho@S').any()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fasta_lib = SpecLibFasta(\n",
+ " var_mods = [\"Oxidation@M\"], \n",
+ " fix_mods = [\"Dimethyl@K\", \"Dimethyl@Any_N-term\"],\n",
+ " special_mods=[\"Phospho@S\"],\n",
+ " # labeling_channels={0:['Phospho@S']}\n",
+ ")\n",
+ "protein_dict = get_protein_dict()\n",
+ "fasta_lib.import_and_process_protein_dict(protein_dict)\n",
+ "assert fasta_lib.precursor_df.mods.str.contains('Dimethyl@Any_N-term').all()\n",
+ "assert fasta_lib.precursor_df.mods.str.contains('Oxidation@M').any()\n",
+ "assert fasta_lib.precursor_df.mods.str.contains('Phospho@S').any()"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -2015,241 +2085,18 @@
").all()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Test labelling"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " sequence | \n",
- " protein_idxes | \n",
- " miss_cleavage | \n",
- " is_prot_nterm | \n",
- " is_prot_cterm | \n",
- " mods | \n",
- " mod_sites | \n",
- " nAA | \n",
- " labeling_channel | \n",
- " charge | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " AFGHIJK | \n",
- " 0;1 | \n",
- " 0 | \n",
- " True | \n",
- " True | \n",
- " Dimethyl@Any N-term;Dimethyl@K | \n",
- " 0;7 | \n",
- " 7 | \n",
- " 0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " AFGHIJK | \n",
- " 0;1 | \n",
- " 0 | \n",
- " True | \n",
- " True | \n",
- " Dimethyl@Any N-term;Dimethyl@K | \n",
- " 0;7 | \n",
- " 7 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " AFGHIJK | \n",
- " 0;1 | \n",
- " 0 | \n",
- " True | \n",
- " True | \n",
- " Dimethyl@Any N-term;Dimethyl@K | \n",
- " 0;7 | \n",
- " 7 | \n",
- " 0 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " LMNOPQR | \n",
- " 0;1 | \n",
- " 0 | \n",
- " False | \n",
- " True | \n",
- " Dimethyl@Any N-term | \n",
- " 0 | \n",
- " 7 | \n",
- " 0 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " LMNOPQR | \n",
- " 0;1 | \n",
- " 0 | \n",
- " False | \n",
- " True | \n",
- " Dimethyl@Any N-term | \n",
- " 0 | \n",
- " 7 | \n",
- " 0 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 61 | \n",
- " ABCDESTKAFGHIJKLMNOPQR | \n",
- " 0 | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K | \n",
- " 0;8;15 | \n",
- " 22 | \n",
- " 4 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 62 | \n",
- " ABCDESTKAFGHIJKLMNOPQR | \n",
- " 0 | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K | \n",
- " 0;8;15 | \n",
- " 22 | \n",
- " 4 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 63 | \n",
- " MABCDESTKAFGHIJKLMNOPQR | \n",
- " 0 | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K | \n",
- " 0;9;16 | \n",
- " 23 | \n",
- " 4 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 64 | \n",
- " MABCDESTKAFGHIJKLMNOPQR | \n",
- " 0 | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K | \n",
- " 0;9;16 | \n",
- " 23 | \n",
- " 4 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 65 | \n",
- " MABCDESTKAFGHIJKLMNOPQR | \n",
- " 0 | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K | \n",
- " 0;9;16 | \n",
- " 23 | \n",
- " 4 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- "
\n",
- "
66 rows × 10 columns
\n",
- "
"
- ],
- "text/plain": [
- " sequence protein_idxes miss_cleavage is_prot_nterm \\\n",
- "0 AFGHIJK 0;1 0 True \n",
- "1 AFGHIJK 0;1 0 True \n",
- "2 AFGHIJK 0;1 0 True \n",
- "3 LMNOPQR 0;1 0 False \n",
- "4 LMNOPQR 0;1 0 False \n",
- ".. ... ... ... ... \n",
- "61 ABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
- "62 ABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
- "63 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
- "64 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
- "65 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
- "\n",
- " is_prot_cterm mods mod_sites \\\n",
- "0 True Dimethyl@Any N-term;Dimethyl@K 0;7 \n",
- "1 True Dimethyl@Any N-term;Dimethyl@K 0;7 \n",
- "2 True Dimethyl@Any N-term;Dimethyl@K 0;7 \n",
- "3 True Dimethyl@Any N-term 0 \n",
- "4 True Dimethyl@Any N-term 0 \n",
- ".. ... ... ... \n",
- "61 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;8;15 \n",
- "62 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;8;15 \n",
- "63 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;9;16 \n",
- "64 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;9;16 \n",
- "65 False Dimethyl4@Any N-term;Dimethyl4@K;Dimethyl4@K 0;9;16 \n",
- "\n",
- " nAA labeling_channel charge \n",
- "0 7 0 2 \n",
- "1 7 0 3 \n",
- "2 7 0 4 \n",
- "3 7 0 2 \n",
- "4 7 0 3 \n",
- ".. ... ... ... \n",
- "61 22 4 3 \n",
- "62 22 4 4 \n",
- "63 23 4 2 \n",
- "64 23 4 3 \n",
- "65 23 4 4 \n",
- "\n",
- "[66 rows x 10 columns]"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"fasta_lib = SpecLibFasta(\n",
" var_mods = [], fix_mods = [],\n",
@@ -2264,8 +2111,7 @@
"assert fasta_lib.precursor_df.mods.str.contains('Dimethyl4').any()\n",
"assert fasta_lib.precursor_df.mods.str.contains('Dimethyl@').sum()==fasta_lib.precursor_df.mods.str.contains('Dimethyl4').sum()\n",
"assert (fasta_lib.precursor_df.labeling_channel==0).sum() == fasta_lib.precursor_df.mods.str.contains('Dimethyl@').sum()\n",
- "assert (fasta_lib.precursor_df.labeling_channel==4).sum() == fasta_lib.precursor_df.mods.str.contains('Dimethyl4').sum()\n",
- "fasta_lib.precursor_df"
+ "assert (fasta_lib.precursor_df.labeling_channel==4).sum() == fasta_lib.precursor_df.mods.str.contains('Dimethyl4').sum()"
]
},
{
@@ -2281,6 +2127,10 @@
"display_name": "python3",
"language": "python",
"name": "python3"
+ },
+ "language_info": {
+ "name": "python",
+ "version": "3.9.12"
}
},
"nbformat": 4,