Skip to content

Commit

Permalink
Add support for reading chemical formulas with numeric subscripts, e.…
Browse files Browse the repository at this point in the history
…g. C₁₀H₁₄N₅O₈P

reported anonymously via exception web
  • Loading branch information
bspratt authored Nov 14, 2023
1 parent 6096a59 commit 99e8470
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
12 changes: 11 additions & 1 deletion pwiz_tools/Shared/CommonUtil/Chemistry/Formula.cs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,17 @@ void CloseOutCurrentElement()
// Allow apostrophe for heavy isotopes (e.g. C' for 13C)
else if (!Char.IsWhiteSpace(ch))
{
currentElement = currentElement + ch;
// Watch out for unicode subscripts
var n = ch - '\u2080'; // Subscript zero '₀'
if (n >= 0 && n <= 9)
{
currentQuantity = (currentQuantity ?? 0) * 10 + n;
}
else
{
// Presumably part of an element description
currentElement = currentElement + ch;
}
}
}
CloseOutCurrentElement(); // Finish up the last element
Expand Down
3 changes: 2 additions & 1 deletion pwiz_tools/Skyline/Test/MassCalcTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,10 @@ public void TestGetIonFormula()
sequenceMassCalc.AddStaticModifications(new[] { labelLaK });
Assert.AreEqual(294.033, sequenceMassCalc.GetPrecursorMass("K"), .1);
Assert.AreEqual("C'6H14LaN'2O2", sequenceMassCalc.GetMolecularFormula("K").ToString());

// Check our ability to handle strangely constructed chemical formulas, and preserve nonstandard order
Assert.AreEqual("C12H9S2", ParsedMolecule.Create("C12H9S2P0").ToString()); // P0 is weird, drop it
Assert.AreEqual("C12H9S2", ParsedMolecule.Create("C\u2081\u2082H\u2089S\u2082P\u2080").ToString()); // Same thing, unicode subscripts
Assert.AreEqual("C12H9S2P1", ParsedMolecule.Create("C12H9S2P1").ToString()); // P1 is weird, but preserve it
Assert.AreEqual("H9C12P", ParsedMolecule.Create("H9C12S0P").ToString()); // S0 is weird, and not at end
}
Expand Down
6 changes: 6 additions & 0 deletions pwiz_tools/Skyline/TestFunctional/PasteMoleculesTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ private static string ToLocalText(string text)
const string caffeineSMILES = "Cn1cnc2n(C)c(=O)n(C)c(=O)c12";
const string caffeineKEGG = "C07481";
const string caffeineFormula = "C8H10N4O2";
const string caffeineFormulaUnicode = "C\u2088H\u2081\u2080N\u2084O\u2082"; // Unicode subscripts
const string caffeineFragment = "C6H5N2O"; // Not really a known fragment of caffeine

const double precursorMzAtZNeg2 = 96.0329118;
Expand Down Expand Up @@ -645,6 +646,11 @@ private void TestLegitimatePaste(string line2start, string[] fullColumnOrder)
{
docEmpty = NewDocument();
line1 = BuildTestLine(imTypeIsDrift);
if (imTypeIsDrift)
{
// Nothing to do with imType, just want to alternate styles here
line1 = line1.Replace(caffeineFormula+"\t", caffeineFormulaUnicode + "\t"); // Test with unicode subscript numbers
}
var expectedIM = imTypeIsDrift ? precursorDT : compensationVoltage;
double? expectedCV = imTypeIsDrift ? (double?)null : compensationVoltage;
var expectedTypeIM = imTypeIsDrift ? eIonMobilityUnits.drift_time_msec : eIonMobilityUnits.compensation_V;
Expand Down

0 comments on commit 99e8470

Please sign in to comment.