Skip to content

Commit

Permalink
- added support for Proteome Discoverer 3.1 in BiblioSpec: separate p…
Browse files Browse the repository at this point in the history
…dResultDetails file, reading charge from PSM table instead of MSnSpectrumInfo, RAW file name in WorkflowInputFiles
  • Loading branch information
chambm committed Nov 6, 2023
1 parent 9f74468 commit d2ae15b
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 5 deletions.
43 changes: 38 additions & 5 deletions pwiz_tools/BiblioSpec/src/MSFReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,17 @@ namespace BiblioSpec
sqlite3_stmt* statement = NULL; // id, rt, mass, charge, peaks
bool hasCompensationVoltage = false;

if (filtered_ || !versionLess(2, 2)) { // < 2.2 and filtered, or 2.2+
if (!versionLess(3, 1) && bfs::exists(bfs::change_extension(bfs::path(msfName_), ".pdResultDetails")))
{
bfs::path pdResultDetails = bfs::change_extension(bfs::path(msfName_), ".pdResultDetails");
sqlite3_exec(msfFile_, ("ATTACH DATABASE '" + pdResultDetails.generic_string() + "' AS details").c_str(), nullptr, nullptr, nullptr);
specCount = getRowCount("MSnSpectrumInfo WHERE SpectrumID IN (SELECT DISTINCT MSnSpectrumInfoSpectrumID FROM TargetPsmsMSnSpectrumInfo)");
statement = getStmt(
std::string("SELECT SpectrumID, MSnSpectrumInfo.RetentionTime, Mass, Charge, Spectrum, MSnSpectrumInfo.WorkflowID") +
(hasCompensationVoltage ? ", CompVoltageV " : " ") +
"FROM MSnSpectrumInfo "
"JOIN details.MassSpectrumItems ON MSnSpectrumInfo.SpectrumID = details.MassSpectrumItems.ID AND MSnSpectrumInfo.WorkflowID = details.MassSpectrumItems.WorkflowID");
} else if (filtered_ || !versionLess(2, 2)) { // < 2.2 and filtered, or 2.2+
specCount = getRowCount("MSnSpectrumInfo WHERE SpectrumID IN (SELECT DISTINCT MSnSpectrumInfoSpectrumID FROM TargetPsmsMSnSpectrumInfo)");
hasCompensationVoltage = columnExists(msfFile_, "MSnSpectrumInfo", "CompVoltageV");
statement = getStmt(
Expand Down Expand Up @@ -173,7 +183,10 @@ namespace BiblioSpec
specData->id = sqlite3_column_int(statement, 0);
specData->retentionTime = sqlite3_column_double(statement, 1);
specData->charge = sqlite3_column_int(statement, 3);
specData->mz = (mass + (PROTON_MASS * specData->charge)) / specData->charge;
if (specData->charge > 0)
specData->mz = (mass + (PROTON_MASS * specData->charge)) / specData->charge;
else
specData->mz = mass;
if (hasCompensationVoltage)
{
specData->ionMobilityType = IONMOBILITY_COMPENSATION_V;
Expand Down Expand Up @@ -369,6 +382,7 @@ namespace BiblioSpec
string specId = uniqueSpecId(sqlite3_column_int(statement, 1), sqlite3_column_int(statement, 4));
string sequence = lexical_cast<string>(sqlite3_column_text(statement, 2));
double qvalue = pepConfidence <= 0 ? sqlite3_column_double(statement, 3) : 0;
int charge = sqlite3_column_int(statement, 7);

auto findItr = spectra_.find(specId);
if (findItr == spectra_.end()) {
Expand Down Expand Up @@ -427,7 +441,14 @@ namespace BiblioSpec
processedSpectra[specId] = ProcessedMsfSpectrum(curPSM_, qvalue, altScore);
}

curPSM_->charge = findItr->second->charge;
if (findItr->second->charge > 0)
curPSM_->charge = findItr->second->charge;
else
{
// if charge was 0 for the spectrum, the mz should be mass, so convert it to m/z
curPSM_->charge = charge;
findItr->second->mz = (findItr->second->mz + (PROTON_MASS * charge)) / charge;
}
curPSM_->unmodSeq = sequence;
curPSM_->mods = versionLess(2, 2) && !filtered_
? modSet.getMods(peptideId)
Expand All @@ -445,7 +466,7 @@ namespace BiblioSpec
psmFileName = fileIdToName(fileIdMapAccess->second);
fileIdMap.erase(fileIdMapAccess);
} else {
psmFileName = lexical_cast<string>(sqlite3_column_text(statement, 5));
psmFileName = bfs::path(lexical_cast<string>(sqlite3_column_text(statement, 5))).filename().string();
}

// filename
Expand Down Expand Up @@ -531,6 +552,17 @@ namespace BiblioSpec
}
}

string filenameCol;
string filenameJoin;
if (columnExists(msfFile_, "TargetPsms", "SpectrumFileId"))
{
filenameCol = "wf.FileName";
filenameJoin = " JOIN WorkflowInputfiles wf ON psms.SpectrumFileId = wf.FileId";
} else
{
filenameCol = "psms.SpectrumFileName";
}

string qValueCol;
string qValueWhere;
if (!hasQValues()) {
Expand Down Expand Up @@ -559,8 +591,9 @@ namespace BiblioSpec
}
stmtStr =
"SELECT psms.PeptideID, psm_spec.MSnSpectrumInfoSpectrumID, psms.Sequence, " +
qValueCol + ", psms.WorkflowID, psms.SpectrumFileName" + (*outProtConfidence > 0 ? ", prots.ProteinFDRConfidence" : "") +
qValueCol + ", psms.WorkflowID, " + filenameCol + (*outProtConfidence > 0 ? ", prots.ProteinFDRConfidence" : ", 0") + ", psms.Charge" +
" FROM TargetPsms psms"
+ filenameJoin +
" JOIN TargetPsmsMSnSpectrumInfo psm_spec ON psms.PeptideID = psm_spec.TargetPsmsPeptideID"
" AND psm_spec.TargetPsmsWorkflowID = psms.WorkflowID";
countStr =
Expand Down
2 changes: 2 additions & 0 deletions pwiz_tools/BiblioSpec/tests/Jamfile.jam
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ blib-test-build tiny-v2-filtered-pdResult : -o : output/tiny-v2-filtered-pdResul
blib-test-build md_special_filtered-pdResult : --unicode -o : output/md_special_filtered-pdResult.blib : md_special_filtered-pdResult.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/md_special_filtered.pdResult ;
blib-test-build example-pdResult-confidence3 : -o [email protected] : output/example-pdResult-confidence3.blib : example-pdResult-confidence3.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/example.pdResult ;
blib-test-build example-pdResult-numeric : -o [email protected] : output/example-pdResult-numeric.blib : example-pdResult-numeric.check zbuild.skip-lines : $(TEST_INPUTS_PATH)/example.pdResult ;
blib-test-build-basic pd-3_1 : -o [email protected] : $(TEST_INPUTS_PATH)/230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey.pdResult ;

blib-test-build pilot : --unicode -o : output/pilot.blib : pilot.check : $(TEST_INPUTS_PATH)/MB1_98_03.group.xml ;
blib-test-build pilot-mzid : --unicode -o : output/pilot-mzid.blib : pilot-mzid.check : $(TEST_INPUTS_PATH)/ProtPilotTest.mzid ;

Expand Down
Binary file not shown.
Binary file not shown.
56 changes: 56 additions & 0 deletions pwiz_tools/BiblioSpec/tests/reference/pd-3_1.check
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
libLSID numSpecs majorVersion minorVersion
urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:pd-3_1.blib 10 1 10
id RefSpectraID position mass
1 1 1 304.207146
2 2 1 304.207146
3 3 1 361.22861
4 3 7 57.021464
5 4 1 304.207146
6 4 10 304.207146
7 5 1 304.207146
8 6 1 304.207146
9 7 1 304.207146
10 8 1 304.207146
11 9 1 304.207146
12 9 8 304.207146
13 10 1 304.207146
14 10 10 15.994915
15 10 12 304.207146
id peptideSeq precursorMZ precursorCharge peptideModSeq prevAA nextAA copies numPeaks ionMobility collisionalCrossSectionSqA ionMobilityHighEnergyOffset ionMobilityType retentionTime startTime endTime totalIonCurrent moleculeName chemicalFormula precursorAdduct inchiKey otherKeys fileID SpecIDinFile score scoreType
1 ASLTAAR 249.99881409 2 A[+304.2]SLTAAR - - 1 279 0.0 0.0 0.0 0 10.00001593 N/A N/A 448944.42 N/A N/A N/A N/A N/A 1 174.67597 0.10103673 1
2 SLSGTAR 250.49901245 2 S[+304.2]LSGTAR - - 1 287 0.0 0.0 0.0 0 10.00029557 N/A N/A 524141.05 N/A N/A N/A N/A N/A 1 174.67598 0.12901614 1
3 CHTIMNCTR 168.00210266 3 C[+361.2]HTIMNC[+57.0]TR - - 1 258 0.0 0.0 0.0 0 10.00099629 N/A N/A 1117976.72 N/A N/A N/A N/A N/A 1 174.67600 0.12582744 1
4 DNDNDDDDDK 200.68361104 3 D[+304.2]NDNDDDDDK[+304.2] - - 1 283 0.0 0.0 0.0 0 10.00065932 N/A N/A 1243290.36 N/A N/A N/A N/A N/A 2 174.305486 0.45760199 1
5 GQEQLNDQR 349.54405640 2 G[+304.2]QEQLNDQR - - 1 255 0.0 0.0 0.0 0 10.00002682 N/A N/A 42187.50 N/A N/A N/A N/A N/A 3 174.547180 0.45065452 1
6 EQEQQTEQA 350.54451416 2 E[+304.2]QEQQTEQA - - 1 282 0.0 0.0 0.0 0 10.00067368 N/A N/A 112990.05 N/A N/A N/A N/A N/A 3 174.547182 0.00018705 1
7 EQEQQTEQA 351.04472779 2 E[+304.2]QEQQTEQA - - 1 285 0.0 0.0 0.0 0 10.00099345 N/A N/A 97466.69 N/A N/A N/A N/A N/A 3 174.547183 0.00926426 1
8 QNSDRNDSR 351.54497193 2 Q[+304.2]NSDRNDSR - - 1 286 0.0 0.0 0.0 0 10.00131399 N/A N/A 77916.64 N/A N/A N/A N/A N/A 3 174.547184 0.49736129 1
9 EQVHPLHK 400.56721924 2 E[+304.2]QVHPLHK[+304.2] - - 1 131 0.0 0.0 0.0 0 10.00060151 N/A N/A 14828.10 N/A N/A N/A N/A N/A 4 174.789774 0.49219457 1
10 SGESSNSDAMGK 450.08977173 2 S[+304.2]GESSNSDAM[+16.0]GK[+304.2] - - 1 139 0.0 0.0 0.0 0 10.00024502 N/A N/A 13120.38 N/A N/A N/A N/A N/A 5 174.1027885 0.24223713 1
id fileName idFileName cutoffScore
1 230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey1.raw /BiblioSpec/tests/inputs/230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey.pdResult 0.5
2 230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey2.raw /BiblioSpec/tests/inputs/230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey.pdResult 0.5
3 230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey3.raw /BiblioSpec/tests/inputs/230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey.pdResult 0.5
4 230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey4.raw /BiblioSpec/tests/inputs/230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey.pdResult 0.5
5 230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey5.raw /BiblioSpec/tests/inputs/230807_P1_Neo_ES904_TMTProPrecision_1ug_DIA1Th_HCD30_Survey.pdResult 0.5
id scoreType probabilityType
0 UNKNOWN NOT_A_PROBABILITY_VALUE
1 PERCOLATOR QVALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
2 PEPTIDE PROPHET SOMETHING PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
3 SPECTRUM MILL NOT_A_PROBABILITY_VALUE
4 IDPICKER FDR PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
5 MASCOT IONS SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
6 TANDEM EXPECTATION VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
7 PROTEIN PILOT CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
8 SCAFFOLD SOMETHING PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
9 WATERS MSE PEPTIDE SCORE NOT_A_PROBABILITY_VALUE
10 OMSSA EXPECTATION SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
11 PROTEIN PROSPECTOR EXPECTATION SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
12 SEQUEST XCORR PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
13 MAXQUANT SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
14 MORPHEUS SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
15 MSGF+ SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
16 PEAKS CONFIDENCE SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
17 BYONIC SCORE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT
18 PEPTIDE SHAKER CONFIDENCE PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT
19 GENERIC Q-VALUE PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT

0 comments on commit d2ae15b

Please sign in to comment.