Skip to content

Commit

Permalink
Remove code for ignoring ambiguous spectra from MSFReader.cpp (#3111)
Browse files Browse the repository at this point in the history
Fixed "Keep ambiguous matches" not working for .pdResult files (reported by Wes)
  • Loading branch information
nickshulman authored Jan 17, 2025
1 parent 9b90309 commit abe9cec
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 96 deletions.
83 changes: 1 addition & 82 deletions pwiz_tools/BiblioSpec/src/MSFReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,37 +310,6 @@ namespace BiblioSpec

void MSFReader::collectPsms() {
sqlite3_stmt* statement;
map<int, double> alts; // peptide id --> alt score, for breaking ties when q-values are identical
vector<string> altScoreNames;
altScoreNames.push_back("XCorr");
altScoreNames.push_back("IonScore");

if (tableExists(msfFile_, "TargetPsms")) {
for (vector<string>::const_iterator i = altScoreNames.begin(); i != altScoreNames.end(); ++i) {
if (!columnExists(msfFile_, "TargetPsms", *i)) {
continue;
}
statement = getStmt("SELECT PeptideID, " + *i + " FROM TargetPsms");
while (hasNext(&statement)) {
alts[sqlite3_column_int(statement, 0)] = sqlite3_column_double(statement, 1);
}
break;
}
} else if (tableExists(msfFile_, "PeptideScores") && tableExists(msfFile_, "ProcessingNodeScores")) {
for (vector<string>::const_iterator i = altScoreNames.begin(); i != altScoreNames.end(); ++i) {
statement = getStmt(
"SELECT PeptideID, ScoreValue "
"FROM PeptideScores JOIN ProcessingNodeScores ON PeptideScores.ScoreID = ProcessingNodeScores.ScoreID "
"WHERE ScoreName = '" + *i + "'");
while (hasNext(&statement)) {
alts[sqlite3_column_int(statement, 0)] = sqlite3_column_double(statement, 1);
}
if (!alts.empty()) {
break;
}
}
}

int resultCount, pepConfidence, protConfidence;
PSM_SCORE_TYPE scoreType;
getScoreInfo(&statement, &resultCount, &scoreType, &pepConfidence, &protConfidence);
Expand All @@ -349,7 +318,6 @@ namespace BiblioSpec
ProgressIndicator progress(resultCount);

initFileNameMap();
map<string, ProcessedMsfSpectrum> processedSpectra;
ModSet modSet = ModSet(msfFile_, !versionLess(2, 2) || filtered_);
map<int, int> fileIdMap = getFileIds();

Expand Down Expand Up @@ -390,56 +358,7 @@ namespace BiblioSpec
continue;
}

auto altIter = alts.find(peptideId);
double altScore = (altIter != alts.end()) ? altIter->second : -std::numeric_limits<double>::max();

// check if we already processed a peptide that references this spectrum
auto processedSpectraSearch = processedSpectra.find(specId);
if (processedSpectraSearch != processedSpectra.end()) {
ProcessedMsfSpectrum& processed = processedSpectraSearch->second;
// not an ambigous spectrum (yet)
if (!processed.ambiguous) {
if (qvalue > processed.qvalue || (qvalue == processed.qvalue && altScore < processed.altScore)) { // worse than other score, skip this
Verbosity::debug("Peptide %d (%s) had a worse score than another peptide (%s) "
"referencing spectrum %d (ignoring this peptide).",
peptideId, sequence.c_str(), processed.psm->unmodSeq.c_str(), specId.c_str());
continue;
} else if (qvalue == processed.qvalue && altScore == processed.altScore) { // equal, discard other and skip this
Verbosity::debug("Peptide %d (%s) had the same score as another peptide (%s) "
"referencing spectrum %d (ignoring both peptides).",
peptideId, sequence.c_str(), processed.psm->unmodSeq.c_str(), specId.c_str());

removeFromFileMap(processed.psm);
delete processed.psm;

processed.psm = NULL;
processed.ambiguous = true;
continue;
} else { // better than other score, discard other
Verbosity::debug("Peptide %d (%s) had a better score than another peptide (%s) "
"referencing spectrum %d (ignoring other peptide).",
peptideId, sequence.c_str(), processed.psm->unmodSeq.c_str(), specId.c_str());
removeFromFileMap(processed.psm);
curPSM_ = processed.psm;
curPSM_->mods.clear();
processed.qvalue = qvalue;
processed.altScore = altScore;
}
} else { // ambigous spectrum, check if score is better
Verbosity::debug("Peptide %d (%s) with score %f references same spectrum as other peptides "
"that had score %f.", peptideId, sequence.c_str(), qvalue, processed.qvalue);
if (qvalue < processed.qvalue || (qvalue == processed.qvalue && altScore > processed.altScore)) {
curPSM_ = new PSM();
processedSpectraSearch->second = ProcessedMsfSpectrum(curPSM_, qvalue, altScore);
} else {
continue;
}
}
} else {
// unseen spectrum
curPSM_ = new PSM();
processedSpectra[specId] = ProcessedMsfSpectrum(curPSM_, qvalue, altScore);
}
curPSM_ = new PSM();

if (findItr->second->charge > 0)
curPSM_->charge = findItr->second->charge;
Expand Down
1 change: 1 addition & 0 deletions pwiz_tools/BiblioSpec/tests/Jamfile.jam
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ blib-test-build tinyPlink-proxl : -o : output/tinyPlink-proxl.blib : tinyPlink-p
blib-test-build tinyPeptideProphet-proxl : -o : output/tinyPeptideProphet-proxl.blib : tinyPeptideProphet-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tinyPeptideProphet.proxl.xml ;
blib-test-build tinyMerox-proxl : -o : output/tinyMerox-proxl.blib : tinyMerox-proxl.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tinyMerox.proxl.xml ;
blib-test-build tiny-msf : --unicode -o : output/tiny-msf.blib : tiny-msf.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny.msf ;
blib-test-build tiny-msf-keep : --unicode -o -K : output/tiny-msf-keep.blib : tiny-msf-keep.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny.msf ;
blib-test-build tiny-v2-msf : -o : output/tiny-v2-msf.blib : tiny-v2-msf.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny-v2.msf ;
blib-test-build tiny-v2-filtered-pdResult : -o : output/tiny-v2-filtered-pdResult.blib : tiny-v2-filtered-pdResult.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/tiny-v2-filtered.pdResult ;
blib-test-build md_special_filtered-pdResult : --unicode -o : output/md_special_filtered-pdResult.blib : md_special_filtered-pdResult.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/md_special_filtered.pdResult ;
Expand Down
50 changes: 50 additions & 0 deletions pwiz_tools/BiblioSpec/tests/reference/tiny-msf-keep.check
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
libLSID numSpecs majorVersion minorVersion
urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:tiny-msf-keep.blib 6 1 10
id RefSpectraID position mass
1 1 1 4.008491
2 1 2 4.008491
3 1 7 4.008491
4 2 3 4.008491
5 2 4 4.008491
6 2 7 4.008491
7 3 1 4.008491
8 3 2 4.008491
9 3 7 4.008491
10 4 1 4.008491
11 4 3 4.008491
12 4 7 4.008491
13 5 1 15.994915
14 5 8 15.994915
15 5 10 4.008491
id peptideSeq precursorMZ precursorCharge peptideModSeq prevAA nextAA copies numPeaks ionMobility collisionalCrossSectionSqA ionMobilityHighEnergyOffset ionMobilityType retentionTime startTime endTime totalIonCurrent moleculeName chemicalFormula precursorAdduct inchiKey otherKeys fileID SpecIDinFile score scoreType
1 KKLVPLK 419.81906737 2 K[+4.0]K[+4.0]LVPLK[+4.0] - - 1 90 0.0 0.0 0.0 0 0.32088 N/A N/A 3448.02592301 N/A N/A N/A N/A N/A 1 2 0.002 1
2 VPKKILK 419.81915892 2 VPK[+4.0]K[+4.0]ILK[+4.0] - - 1 95 0.0 0.0 0.0 0 1.12651166 N/A N/A 4332.04377079 N/A N/A N/A N/A N/A 1 4 0.006 1
3 KKLVPLK 419.81915892 2 K[+4.0]K[+4.0]LVPLK[+4.0] - - 1 95 0.0 0.0 0.0 0 1.12651166 N/A N/A 4332.04377079 N/A N/A N/A N/A N/A 1 4 0.009 1
4 KLKVIPK 419.81921996 2 K[+4.0]LK[+4.0]VIPK[+4.0] - - 1 100 0.0 0.0 0.0 0 2.24853 N/A N/A 4803.68319678 N/A N/A N/A N/A N/A 1 6 0.002 1
5 MASASGAMAK 481.22015990 2 M[+16.0]ASASGAM[+16.0]AK[+4.0] - - 1 120 0.0 0.0 0.0 0 2.85028666 N/A N/A 7404.39071369 N/A N/A N/A N/A N/A 1 9 0.01 1
6 RPVTPKK 413.76981200 2 RPVTPKK - - 1 175 0.0 0.0 0.0 0 2.87913166 N/A N/A 16087.57 N/A N/A N/A N/A N/A 2 10 0.002 1
id fileName idFileName cutoffScore
1 D:/Elite_MAM/MITO_03.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
2 X:/Test.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
id scoreType probabilityType
0 UNKNOWN NOT_A_PROBABILITY_VALUE
1 PERCOLATOR QVALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
2 PEPTIDE PROPHET SOMETHING PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
3 SPECTRUM MILL NOT_A_PROBABILITY_VALUE
4 IDPICKER FDR PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
5 MASCOT IONS SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
6 TANDEM EXPECTATION VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
7 PROTEIN PILOT CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
8 SCAFFOLD SOMETHING PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
9 WATERS MSE PEPTIDE SCORE NOT_A_PROBABILITY_VALUE
10 OMSSA EXPECTATION SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
11 PROTEIN PROSPECTOR EXPECTATION SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
12 SEQUEST XCORR PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
13 MAXQUANT SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
14 MORPHEUS SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
15 MSGF+ SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
16 PEAKS CONFIDENCE SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
17 BYONIC SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
18 PEPTIDE SHAKER CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
19 GENERIC Q-VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
20 HARDKLOR IDOTP PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
23 changes: 10 additions & 13 deletions pwiz_tools/BiblioSpec/tests/reference/tiny-msf.check
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
libLSID numSpecs majorVersion minorVersion
urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:tiny-msf.blib 5 1 10
urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:tiny-msf.blib 4 1 10
id RefSpectraID position mass
1 1 1 4.008491
2 1 2 4.008491
3 1 7 4.008491
4 2 3 4.008491
5 2 4 4.008491
4 2 1 4.008491
5 2 3 4.008491
6 2 7 4.008491
7 3 1 4.008491
8 3 3 4.008491
9 3 7 4.008491
10 4 1 15.994915
11 4 8 15.994915
12 4 10 4.008491
7 3 1 15.994915
8 3 8 15.994915
9 3 10 4.008491
id peptideSeq precursorMZ precursorCharge peptideModSeq prevAA nextAA copies numPeaks ionMobility collisionalCrossSectionSqA ionMobilityHighEnergyOffset ionMobilityType retentionTime startTime endTime totalIonCurrent moleculeName chemicalFormula precursorAdduct inchiKey otherKeys fileID SpecIDinFile score scoreType
1 KKLVPLK 419.81906737 2 K[+4.0]K[+4.0]LVPLK[+4.0] - - 1 90 0.0 0.0 0.0 0 0.32088 N/A N/A 3448.02592301 N/A N/A N/A N/A N/A 1 2 0.002 1
2 VPKKILK 419.81915892 2 VPK[+4.0]K[+4.0]ILK[+4.0] - - 1 95 0.0 0.0 0.0 0 1.12651166 N/A N/A 4332.04377079 N/A N/A N/A N/A N/A 1 4 0.006 1
3 KLKVIPK 419.81921996 2 K[+4.0]LK[+4.0]VIPK[+4.0] - - 1 100 0.0 0.0 0.0 0 2.24853 N/A N/A 4803.68319678 N/A N/A N/A N/A N/A 1 6 0.002 1
4 MASASGAMAK 481.22015990 2 M[+16.0]ASASGAM[+16.0]AK[+4.0] - - 1 120 0.0 0.0 0.0 0 2.85028666 N/A N/A 7404.39071369 N/A N/A N/A N/A N/A 1 9 0.01 1
5 RPVTPKK 413.76981200 2 RPVTPKK - - 1 175 0.0 0.0 0.0 0 2.87913166 N/A N/A 16087.57 N/A N/A N/A N/A N/A 2 10 0.002 1
2 KLKVIPK 419.81921996 2 K[+4.0]LK[+4.0]VIPK[+4.0] - - 1 100 0.0 0.0 0.0 0 2.24853 N/A N/A 4803.68319678 N/A N/A N/A N/A N/A 1 6 0.002 1
3 MASASGAMAK 481.22015990 2 M[+16.0]ASASGAM[+16.0]AK[+4.0] - - 1 120 0.0 0.0 0.0 0 2.85028666 N/A N/A 7404.39071369 N/A N/A N/A N/A N/A 1 9 0.01 1
4 RPVTPKK 413.76981200 2 RPVTPKK - - 1 175 0.0 0.0 0.0 0 2.87913166 N/A N/A 16087.57 N/A N/A N/A N/A N/A 2 10 0.002 1
id fileName idFileName cutoffScore
1 D:/Elite_MAM/MITO_03.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
2 X:/Test.raw /BiblioSpec/tests/inputs/试验_tiny.msf -1.0
Expand All @@ -43,3 +39,4 @@ id scoreType probabilityType
17 BYONIC SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
18 PEPTIDE SHAKER CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
19 GENERIC Q-VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
20 HARDKLOR IDOTP PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
3 changes: 2 additions & 1 deletion pwiz_tools/Skyline/TestPerf/PerfThermoFAIMSTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ private void TestPopulateDocumentFromLibrary()
// Add all peptides
var filterMatchedPeptidesDlg = ShowDialog<FilterMatchedPeptidesDlg>(viewLibUI.AddAllPeptides);
var docBefore = WaitForProteinMetadataBackgroundLoaderCompletedUI();
using (new CheckDocumentState(1, 8433, 10882, 43484))
using (new CheckDocumentState(1, 8591, 11057, 44174))
{
RunDlg<MultiButtonMsgDlg>(filterMatchedPeptidesDlg.OkDialog, addLibraryPepsDlg =>
{
Expand All @@ -146,6 +146,7 @@ private void TestWizardBuildDocumentLibraryAndFinish(string documentFile)
RunUI(() =>
{
Assert.IsTrue(importPeptideSearchDlg.CurrentPage == ImportPeptideSearchDlg.Pages.spectra_page);
importPeptideSearchDlg.BuildPepSearchLibControl.IncludeAmbiguousMatches = true;
importPeptideSearchDlg.BuildPepSearchLibControl.AddSearchFiles(SearchFiles);
});
WaitForConditionUI(() => importPeptideSearchDlg.IsEarlyFinishButtonEnabled);
Expand Down

0 comments on commit abe9cec

Please sign in to comment.