Skip to content

Commit

Permalink
Support for building BiblioSpec libraries from MeroX proxl.xml files. (
Browse files Browse the repository at this point in the history
…#2388)

Added support for building spectral libraries from MeroX proxl.xml files (requested by Juan)

Also, support for cleavable crosslinks. If a crosslink modification has neutral losses, those neutral losses should have the chemical formula which, when subtracted from the crosslink modification formula results in the formula of the cleaved crosslinker.
For more information see crosslinking tip:
https://skyline.ms/wiki/home/software/Skyline/page.view?name=Crosslinking
  • Loading branch information
nickshulman authored Nov 27, 2022
1 parent a954393 commit 4902f6a
Show file tree
Hide file tree
Showing 11 changed files with 503 additions and 10 deletions.
16 changes: 11 additions & 5 deletions pwiz_tools/BiblioSpec/src/ProxlXmlReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ void ProxlXmlReader::startElement(const XML_Char* name, const XML_Char** attr) {
analysisType_ = BYONIC_ANALYSIS;
else if (program == "plink")
analysisType_ = PLINK_ANALYIS;
else if (program == "merox")
analysisType_ = MEROX_ANALYSIS;
}
break;
case REPORTED_PEPTIDES_STATE:
Expand All @@ -127,7 +129,7 @@ void ProxlXmlReader::startElement(const XML_Char* name, const XML_Char** attr) {
break;
case REPORTED_PEPTIDE_STATE:
if (analysisType_ == UNKNOWN_ANALYSIS)
throw runtime_error("only Byonic, Percolator, and pLink ProxlXML files are supported; "
throw runtime_error("only Byonic, Percolator, pLink, and MeroX ProxlXML files are supported; "
"cannot handle search program: " + bal::join(searchPrograms_, ", "));
if (isScoreLookup_)
throw SAXHandler::EndEarlyException();
Expand Down Expand Up @@ -194,7 +196,8 @@ void ProxlXmlReader::startElement(const XML_Char* name, const XML_Char** attr) {
string score = bal::to_lower_copy(string(getRequiredAttrValue("annotation_name", attr)));
if (analysisType_ == PERCOLATOR_ANALYSIS && score == "q-value" ||
analysisType_ == BYONIC_ANALYSIS && score == "peptide abslogprob2d" ||
analysisType_ == PLINK_ANALYIS && score == "score") {
analysisType_ == PLINK_ANALYIS && score == "score" ||
analysisType_ == MEROX_ANALYSIS && score == "qvalue") {
curProxlPsm_->score = getDoubleRequiredAttrValue("value", attr);
}

Expand Down Expand Up @@ -306,9 +309,12 @@ double ProxlXmlReader::getScoreThreshold()
{
switch (analysisType_)
{
case BYONIC_ANALYSIS: return blibMaker_.getScoreThreshold(BYONIC);
case PERCOLATOR_ANALYSIS: return blibMaker_.getScoreThreshold(GENERIC_QVALUE_INPUT);
case PLINK_ANALYIS: return blibMaker_.getScoreThreshold(GENERIC_QVALUE_INPUT);
case BYONIC_ANALYSIS:
return blibMaker_.getScoreThreshold(BYONIC);
case PERCOLATOR_ANALYSIS:
case PLINK_ANALYIS:
case MEROX_ANALYSIS:
return blibMaker_.getScoreThreshold(GENERIC_QVALUE_INPUT);

default:
throw runtime_error("no case for analysisType_: " + lexical_cast<string>((int) analysisType_));
Expand Down
3 changes: 2 additions & 1 deletion pwiz_tools/BiblioSpec/src/ProxlXmlReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ class ProxlXmlReader : public BuildParser {
UNKNOWN_ANALYSIS,
PERCOLATOR_ANALYSIS,
BYONIC_ANALYSIS,
PLINK_ANALYIS
PLINK_ANALYIS,
MEROX_ANALYSIS
};

static double aaMasses_[128];
Expand Down
1 change: 1 addition & 0 deletions pwiz_tools/BiblioSpec/tests/Jamfile.jam
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ blib-test-build pride-mill : -o : output/pride-mill.blib : pride-mill.check zbui
blib-test-build tiny-proxl : --unicode -o : output/tiny-proxl.blib : tiny-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny.proxl.xml ;
blib-test-build tinyByonic-proxl : -o : output/tinyByonic-proxl.blib : tinyByonic-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tinyByonic.proxl.xml ;
blib-test-build tinyPlink-proxl : -o : output/tinyPlink-proxl.blib : tinyPlink-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tinyPlink.proxl.xml ;
blib-test-build tinyMerox-proxl : -o : output/tinyMerox-proxl.blib : tinyMerox-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tinyMerox.proxl.xml ;
blib-test-build tiny-msf : --unicode -o : output/tiny-msf.blib : tiny-msf.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny.msf ;
blib-test-build tiny-v2-msf : -o : output/tiny-v2-msf.blib : tiny-v2-msf.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny-v2.msf ;
blib-test-build tiny-v2-filtered-pdResult : -o : output/tiny-v2-filtered-pdResult.blib : tiny-v2-filtered-pdResult.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny-v2-filtered.pdResult ;
Expand Down
93 changes: 93 additions & 0 deletions pwiz_tools/BiblioSpec/tests/inputs/tinyMerox.proxl.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<proxl_input fasta_filename="Ribosome_for_XL_all_contained_Proteins_from_shotgun_comb.fasta">
<search_program_info>
<search_programs>
<search_program name="MeroX" display_name="MeroX" version="2.0.1.7">
<psm_annotation_types>
<filterable_psm_annotation_types>
<filterable_psm_annotation_type name="score" description="MeroX Score" filter_direction="above" default_filter="false" default_filter_value="50"/>
<filterable_psm_annotation_type name="qvalue" description="The q-value reported by MeroX" filter_direction="below" default_filter="true" default_filter_value="0.01"/>
<filterable_psm_annotation_type name="rank" description="Rank of PSM for scan" filter_direction="below" default_filter="true" default_filter_value="1"/>
</filterable_psm_annotation_types>
<descriptive_psm_annotation_types>
<descriptive_psm_annotation_type name="m/z" description="m/z"/>
<descriptive_psm_annotation_type name="obs. mass" description="obs. mass"/>
<descriptive_psm_annotation_type name="cand. mass" description="cand. mass"/>
<descriptive_psm_annotation_type name="deviation" description="deviation"/>
</descriptive_psm_annotation_types>
</psm_annotation_types>
</search_program>
</search_programs>
<default_visible_annotations>
<visible_psm_annotations>
<search_annotation search_program="MeroX" annotation_name="scan num."/>
<search_annotation search_program="MeroX" annotation_name="rank"/>
<search_annotation search_program="MeroX" annotation_name="score"/>
<search_annotation search_program="MeroX" annotation_name="qvalue"/>
<search_annotation search_program="MeroX" annotation_name="m/z"/>
<search_annotation search_program="MeroX" annotation_name="obs. mass"/>
<search_annotation search_program="MeroX" annotation_name="cand. mass"/>
<search_annotation search_program="MeroX" annotation_name="deviation"/>
</visible_psm_annotations>
</default_visible_annotations>
</search_program_info>
<linkers>
<linker name="DSSO" spacer_arm_length="23.5">
<crosslink_masses>
<crosslink_mass mass="158.003765" chemical_formula="C6O3SH6"/>
<cleaved_crosslink_mass mass="54.010565" chemical_formula="C3OH2"/>
<cleaved_crosslink_mass mass="103.993201" chemical_formula="C3O2SH4"/>
<cleaved_crosslink_mass mass="85.982636" chemical_formula="C3OSH2"/>
</crosslink_masses>
<linked_ends>
<linked_end>
<residues>
<residue>K</residue>
</residues>
</linked_end>
<linked_end>
<residues>
<residue>K</residue>
</residues>
</linked_end>
</linked_ends>
</linker>
</linkers>
<reported_peptides>
<reported_peptide reported_peptide_string="LWDKETLEK(4)-MAKTIK(3)" type="crosslink">
<peptides>
<peptide sequence="LWDKETLEK">
<linked_positions>
<linked_position position="4"/>
</linked_positions>
</peptide>
<peptide sequence="MAKTIK">
<linked_positions>
<linked_position position="3"/>
</linked_positions>
</peptide>
</peptides>
<psms>
<psm scan_file_name="tinyMeroxProxl.mzML" scan_number="22237" precursor_charge="4" precursor_retention_time="3159.7627" precursor_m_z="2010.025586" linker_mass="158.003765">
<filterable_psm_annotations>
<filterable_psm_annotation search_program="MeroX" annotation_name="score" value="78"/>
<filterable_psm_annotation search_program="MeroX" annotation_name="rank" value="1"/>
<filterable_psm_annotation search_program="MeroX" annotation_name="qvalue" value="0.000000"/>
</filterable_psm_annotations>
<descriptive_psm_annotations>
<descriptive_psm_annotation search_program="MeroX" annotation_name="m/z" value="503.2619"/>
<descriptive_psm_annotation search_program="MeroX" annotation_name="obs. mass" value="2010.0256"/>
<descriptive_psm_annotation search_program="MeroX" annotation_name="cand. mass" value="2010.0286"/>
<descriptive_psm_annotation search_program="MeroX" annotation_name="deviation" value="-1.5057"/>
</descriptive_psm_annotations>
</psm>
</psms>
</reported_peptide>
</reported_peptides>
<static_modifications>
<static_modification amino_acid="C" mass_change="57.021464"/>
</static_modifications>
<decoy_labels>
<decoy_label prefix="DEC_"/>
</decoy_labels>
</proxl_input>
Loading

0 comments on commit 4902f6a

Please sign in to comment.