Skip to content

Commit

Permalink
Feature/mzMLb (#2719)
Browse files Browse the repository at this point in the history
- added mzMLb as a supported format for read/write (thanks to Andy and Rahjeet from University of Bristol)
* added mzMLb serialization test in VendorReaderTestHarness
* fixed SHA-1 hashes being compared as floating point numbers
* disabled semaphore for vendor reader tests since we haven't done multithreaded tests for a long time
---------

Co-authored-by: Ranjeet Bhamber <[email protected]>
  • Loading branch information
chambm and Ranjeet Bhamber authored Nov 3, 2023
1 parent 2250ca7 commit 6a39d96
Show file tree
Hide file tree
Showing 32 changed files with 2,342 additions and 204 deletions.
45 changes: 43 additions & 2 deletions Jamroot.jam
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ ECHO
# tarballs - useful after your initial build
#
# --without-mz5 Build without mz5 support
# --without-mzmlb Build without mzMLb support
# --without-agilent Build without Agilent support
# --without-bruker Build without Bruker support
# --without-sciex Build without Sciex support
Expand Down Expand Up @@ -152,9 +153,16 @@ if ( "--without-binary-msdata" in [ modules.peek : ARGV ] ) # support only text
{
echo "NOTICE: building without support for binary msdata formats as requested" ;
}
else if ( "--without-mz5" in [ modules.peek : ARGV ] )
else
{
echo "NOTICE: building without mz5 support as requested" ;
if ( "--without-mz5" in [ modules.peek : ARGV ] )
{
echo "NOTICE: building without mz5 support as requested" ;
}
if ( "--without-mzmlb" in [ modules.peek : ARGV ] )
{
echo "NOTICE: building without mzMLb support as requested" ;
}
}

# do we want to skip mz5 support?
Expand All @@ -170,6 +178,29 @@ rule without-mz5 ( properties * )
}
}

# do we want to skip mzMLb support?
rule without-mzmlb ( properties * )
{
if --without-mzmlb in [ modules.peek : ARGV ]
{
return <location-prefix>without-mzmlb <define>WITHOUT_MZMLB ;
}
else
{
return [ without-binary-msdata $(properties) ] ;
}
}

# do we want to skip all binary file formats (vendors and mz5)?
rule without-binary-msdata ( properties * )
{
if --without-binary-msdata in [ modules.peek : ARGV ]
{
return <location-prefix>without-binary-msdata <define>WITHOUT_MZ5 <define>WITHOUT_MZMLB <vendor-api-support>off ;
}
}


# do we want to skip all binary file formats (vendors and mz5)?
rule without-binary-msdata ( properties * )
{
Expand Down Expand Up @@ -259,6 +290,16 @@ rule mz5-build ( itemname )
}


# return the itemname if mzmlb build is on (the default)
rule mzmlb-build ( itemname )
{
if ( ! [ without-mzmlb ] )
{
return $(itemname) ; # do build mzmlb
}
}


if ! [ modules.peek : NT ]
{
# make msbuild targets a no-op
Expand Down
23 changes: 15 additions & 8 deletions pwiz.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,17 @@
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<OutDir>$(SolutionDir)build-nt-x86\msvc-release</OutDir>
<PlatformToolset>v141</PlatformToolset>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
<CLRSupport>true</CLRSupport>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'" Label="Configuration">
<PlatformToolset>v143</PlatformToolset>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<CLRSupport>true</CLRSupport>
</PropertyGroup>
<PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<PlatformToolset>v143</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
Expand Down Expand Up @@ -66,6 +68,9 @@
<IncludePath Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|Win32'">$(SolutionDir);$(SolutionDir)libraries\boost_aux;$(SolutionDir)libraries\boost_1_76_0;$(SolutionDir)libraries\SQLite;$(SolutionDir)libraries\CSpline;$(SolutionDir)libraries\Eigen;$(IncludePath)</IncludePath>
<IncludePath Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'">$(SolutionDir);$(SolutionDir)libraries\boost_aux;$(SolutionDir)libraries\boost_1_76_0;$(SolutionDir)libraries\SQLite;$(SolutionDir)libraries\CSpline;$(SolutionDir)libraries\Eigen;$(IncludePath)</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'">
<ManagedAssembly>true</ManagedAssembly>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|Win32'">
<BuildLog />
<ClCompile>
Expand All @@ -76,8 +81,8 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'">
<BuildLog />
<ClCompile>
<PreprocessorDefinitions>PWIZ_READER_THERMO;PWIZ_READER_AGILENT;PWIZ_READER_BRUKER;PWIZ_READER_WATERS;PWIZ_READER_ABI;PWIZ_READER_ABI_T2D;PWIZ_READER_SHIMADZU;PWIZ_READER_UNIFI;WIN32;USE_RAW_PTR;PWIZ_READER_BRUKER_WITH_COMPASSXTRACT;PWIZ_READER_UNIFI;PWIZ_READER_UIMF;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<CompileAsManaged>true</CompileAsManaged>
<PreprocessorDefinitions>PWIZ_READER_THERMO;PWIZ_READER_AGILENT;PWIZ_READER_BRUKER;PWIZ_READER_WATERS;PWIZ_READER_ABI;PWIZ_READER_ABI_T2D;PWIZ_READER_SHIMADZU;PWIZ_READER_UNIFI;WIN32;PWIZ_READER_BRUKER_WITH_COMPASSXTRACT;PWIZ_READER_UNIFI;PWIZ_READER_UIMF;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
Expand Down Expand Up @@ -120,7 +125,6 @@
<ClCompile Include="pwiz\analysis\demux\DemuxSolver.cpp" />
<ClCompile Include="pwiz\analysis\demux\DemuxSolverTest.cpp" />
<ClCompile Include="pwiz\analysis\demux\DemuxTestData.cpp" />
<ClCompile Include="pwiz\analysis\demux\FSDemux.cpp" />
<ClCompile Include="pwiz\analysis\demux\MatrixIO.cpp" />
<ClCompile Include="pwiz\analysis\demux\MatrixIOTest.cpp" />
<ClCompile Include="pwiz\analysis\demux\MSXDemultiplexer.cpp" />
Expand Down Expand Up @@ -286,9 +290,7 @@
<ClCompile Include="pwiz\data\identdata\MascotReader.cpp" />
<ClCompile Include="pwiz\data\identdata\MascotReader_dummy.cpp" />
<ClCompile Include="pwiz\data\identdata\Pep2MzIdent.cpp" />
<ClCompile Include="pwiz\data\identdata\Pep2MzIdentTest.cpp" />
<ClCompile Include="pwiz\data\identdata\Reader.cpp" />
<ClCompile Include="pwiz\data\identdata\ReaderTest.cpp" />
<ClCompile Include="pwiz\data\identdata\References.cpp" />
<ClCompile Include="pwiz\data\identdata\ReferencesTest.cpp" />
<ClCompile Include="pwiz\data\identdata\Serializer_protXML.cpp" />
Expand Down Expand Up @@ -342,6 +344,7 @@
<ClCompile Include="pwiz\data\msdata\mz5\ReferenceRead_mz5.cpp" />
<ClCompile Include="pwiz\data\msdata\mz5\ReferenceWrite_mz5.cpp" />
<ClCompile Include="pwiz\data\msdata\mz5\Translator_mz5.cpp" />
<ClCompile Include="pwiz\data\msdata\mzmlb\Connection_mzMLb.cpp" />
<ClCompile Include="pwiz\data\msdata\RAMPAdapter.cpp" />
<ClCompile Include="pwiz\data\msdata\RAMPAdapterTest.cpp" />
<ClCompile Include="pwiz\data\msdata\Reader.cpp" />
Expand Down Expand Up @@ -852,14 +855,17 @@
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\thermo\RawFile.cpp">
<ForcedUsingFiles Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'">ThermoFisher.CommonCore.RawFileReader.dll;ThermoFisher.CommonCore.Data.dll</ForcedUsingFiles>
<AdditionalUsingDirectories Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'">pwiz_aux\msrc\utility\vendor_api\thermo</AdditionalUsingDirectories>
<CompileAsManaged Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'">true</CompileAsManaged>
</ClCompile>
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\thermo\RawFileTest.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\thermo\RawFileValues.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\thermo\ScanFilter.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\thermo\ScanFilterTest.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\UIMF\UIMFReader.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\UIMF\UIMFReaderTest.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\UNIFI\UnifiData.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\UNIFI\UnifiData.cpp">
<ExternalWarningLevel Condition="'$(Configuration)|$(Platform)'=='BrowseOnly|x64'">Level1</ExternalWarningLevel>
</ClCompile>
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\UNIFI\UnifiDataTest.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\Waters\RawData.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\Waters\RawDataTest.cpp" />
Expand Down Expand Up @@ -960,6 +966,7 @@
<ClInclude Include="pwiz\data\msdata\mz5\ReferenceRead_mz5.hpp" />
<ClInclude Include="pwiz\data\msdata\mz5\ReferenceWrite_mz5.hpp" />
<ClInclude Include="pwiz\data\msdata\mz5\Translator_mz5.hpp" />
<ClInclude Include="pwiz\data\msdata\mzmlb\Connection_mzMLb.hpp" />
<ClInclude Include="pwiz\data\msdata\Serializer_MSn.hpp" />
<ClInclude Include="pwiz\data\msdata\Serializer_mz5.hpp" />
<ClInclude Include="pwiz\data\msdata\SpectrumList_MSn.hpp" />
Expand Down
5 changes: 2 additions & 3 deletions pwiz.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,7 @@
<ClCompile Include="pwiz\data\identdata\MascotReader.cpp" />
<ClCompile Include="pwiz\data\identdata\MascotReader_dummy.cpp" />
<ClCompile Include="pwiz\data\identdata\Pep2MzIdent.cpp" />
<ClCompile Include="pwiz\data\identdata\Pep2MzIdentTest.cpp" />
<ClCompile Include="pwiz\data\identdata\Reader.cpp" />
<ClCompile Include="pwiz\data\identdata\ReaderTest.cpp" />
<ClCompile Include="pwiz\data\identdata\References.cpp" />
<ClCompile Include="pwiz\data\identdata\ReferencesTest.cpp" />
<ClCompile Include="pwiz\data\identdata\Serializer_protXML.cpp" />
Expand Down Expand Up @@ -526,7 +524,6 @@
<ClCompile Include="pwiz\analysis\demux\DemuxSolver.cpp" />
<ClCompile Include="pwiz\analysis\demux\DemuxSolverTest.cpp" />
<ClCompile Include="pwiz\analysis\demux\DemuxTestData.cpp" />
<ClCompile Include="pwiz\analysis\demux\FSDemux.cpp" />
<ClCompile Include="pwiz\analysis\demux\MatrixIO.cpp" />
<ClCompile Include="pwiz\analysis\demux\MatrixIOTest.cpp" />
<ClCompile Include="pwiz\analysis\demux\MSXDemultiplexer.cpp" />
Expand Down Expand Up @@ -593,6 +590,7 @@
<ClCompile Include="pwiz_tools\commandline\idcat.cpp" />
<ClCompile Include="pwiz_aux\msrc\utility\vendor_api\Bruker\TsfData.cpp" />
<ClCompile Include="libraries\SQLite\sqlite3pp.cpp" />
<ClCompile Include="pwiz\data\msdata\mzmlb\Connection_mzMLb.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="libraries\boost_aux\boost\nowide\args.hpp" />
Expand Down Expand Up @@ -1066,6 +1064,7 @@
<ClInclude Include="pwiz\data\msdata\mz5\Configuration_mz5.hpp" />
<ClInclude Include="pwiz_aux\msrc\utility\vendor_api\Bruker\TsfData.hpp" />
<ClInclude Include="libraries\SQLite\sqlite3pp.h" />
<ClInclude Include="pwiz\data\msdata\mzmlb\Connection_mzMLb.hpp" />
</ItemGroup>
<ItemGroup>
<None Include="Jamroot.jam" />
Expand Down
12 changes: 8 additions & 4 deletions pwiz/data/common/diff_std.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ void diff(const CVParam& a,
// lexical_cast<int> is happy to read "1.1" as "1" - and "1.9" the same way
if ((std::string::npos == a.value.find_first_of(".eE")) &&
(std::string::npos == b.value.find_first_of(".eE"))) // any float-like chars?
{
{
bool successA, successB;
// compare as ints if possible
int ia = lexical_cast<int>(a.value, successA);
Expand All @@ -165,7 +165,7 @@ void diff(const CVParam& a,
asString = true;
else
{
if (ia != ib)
if (ia != ib)
{
a_b.value = lexical_cast<string>(ia);
b_a.value = lexical_cast<string>(ib);
Expand All @@ -174,7 +174,7 @@ void diff(const CVParam& a,
{
if ((std::string::npos == a.value.find_first_not_of("0123456789")) &&
(std::string::npos == b.value.find_first_not_of("0123456789")))
{
{
a_b.value.clear();
b_a.value.clear();
}
Expand All @@ -185,7 +185,8 @@ void diff(const CVParam& a,
}
}
}
else
else if (std::string::npos == a.value.find_first_not_of("01234567890.e-") &&
std::string::npos == b.value.find_first_not_of("01234567890.e-"))
{
// use precision to compare floating point values
bool successA, successB;
Expand All @@ -209,6 +210,9 @@ void diff(const CVParam& a,
}
}
}
else
asString = true;

if (asString)
{
diff_string(a.value, b.value, a_b.value, b_a.value);
Expand Down
94 changes: 67 additions & 27 deletions pwiz/data/msdata/BinaryDataEncoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,10 @@ void BinaryDataEncoder::Impl::encode(const double* data, size_t dataSize, std::s
}
}
if (n>=0)
{
config_.numpress = Numpress_None; // excessive error, don't numpress
if (config_.format == Format_MzMLb) return;
}
else
byteBuffer = reinterpret_cast<const void*>(&numpressed[0]);
} catch (int e) {
Expand Down Expand Up @@ -311,27 +314,52 @@ void BinaryDataEncoder::Impl::encode(const double* data, size_t dataSize, std::s
}
}

// Base64 encoding

result.resize(Base64::binaryToTextSize(byteCount));

// std::string storage is not guaranteed contiguous in older C++ standards,
// and on long strings this has caused problems in the wild. So test for
// actual contiguousness, and fall back to std::vector if needed
// thx Johan Teleman
size_t textSize;
char *first = &result[0];
char *last = &result[result.size()-1];
if ((int)result.size() == 1+(last-first)) // pointer math agrees with [] operator
textSize = Base64::binaryToText(byteBuffer, byteCount, &result[0]);
else
if (config_.format == Format_MzMLb)
{
std::vector<char> contig; // work in this contiguous memory then copy to string
contig.resize(result.size());
textSize = Base64::binaryToText(byteBuffer, byteCount, &contig[0]);
copy(contig.begin(), contig.end(), result.begin());
// no base64 encoding as storing as binary in HDF5

result.resize(byteCount);

// std::string storage is not guaranteed contiguous in older C++ standards,
// and on long strings this has caused problems in the wild. So test for
// actual contiguousness, and fall back to std::vector if needed
// thx Johan Teleman
char *first = &result[0];
char *last = &result[result.size() - 1];
if ((int)result.size() == 1 + (last - first)) // pointer math agrees with [] operator
memcpy(&result[0], byteBuffer, byteCount);
else
{
std::vector<char> contig; // work in this contiguous memory then copy to string
contig.resize(result.size());
memcpy(&contig[0], byteBuffer, byteCount);
copy(contig.begin(), contig.end(), result.begin());
}
}
else
{
// Base64 encoding

result.resize(Base64::binaryToTextSize(byteCount));

// std::string storage is not guaranteed contiguous in older C++ standards,
// and on long strings this has caused problems in the wild. So test for
// actual contiguousness, and fall back to std::vector if needed
// thx Johan Teleman
size_t textSize;
char *first = &result[0];
char *last = &result[result.size() - 1];
if ((int)result.size() == 1 + (last - first)) // pointer math agrees with [] operator
textSize = Base64::binaryToText(byteBuffer, byteCount, &result[0]);
else
{
std::vector<char> contig; // work in this contiguous memory then copy to string
contig.resize(result.size());
textSize = Base64::binaryToText(byteBuffer, byteCount, &contig[0]);
copy(contig.begin(), contig.end(), result.begin());
}
result.resize(textSize);
}
result.resize(textSize);

if (binaryByteCount != NULL)
*binaryByteCount = byteCount; // size before base64 encoding
Expand Down Expand Up @@ -467,18 +495,30 @@ void BinaryDataEncoder::Impl::decode(const char *encodedData, size_t length, pwi

if (!encodedData || !length) return;

// Base64 decoding

vector<unsigned char> binary(Base64::textToBinarySize(length));
size_t binarySize = Base64::textToBinary(encodedData, length, &binary[0]);
binary.resize(binarySize);

// buffer abstractions

void* byteBuffer = &binary[0];
size_t byteCount = binarySize;
vector<unsigned char> binary;
void* byteBuffer;
size_t byteCount;
size_t initialSize;

if (config_.format == Format_MzMLb)
{
byteBuffer = (void*) encodedData;
byteCount = length;
}
else
{
// Base64 decoding

binary.resize(Base64::textToBinarySize(length));
size_t binarySize = Base64::textToBinary(encodedData, length, &binary[0]);
binary.resize(binarySize);

byteBuffer = &binary[0];
byteCount = binarySize;
}

// decompression

vector<unsigned char> decompressed;
Expand Down
Loading

0 comments on commit 6a39d96

Please sign in to comment.