diff --git a/scopehal/Oscilloscope.cpp b/scopehal/Oscilloscope.cpp index 877825a0..0268df0b 100644 --- a/scopehal/Oscilloscope.cpp +++ b/scopehal/Oscilloscope.cpp @@ -1277,3 +1277,282 @@ void Oscilloscope::Convert16BitSamplesAVX512F(float* pout, int16_t* pin, float g } #endif /* __x86_64__ */ +/** + @brief Converts Unsigned 16-bit ADC samples to floating point + */ +void Oscilloscope::ConvertUnsigned16BitSamples(float* pout, uint16_t* pin, float gain, float offset, size_t count) +{ + //Divide large waveforms (>1M points) into blocks and multithread them + //TODO: tune split + if(count > 1000000) + { + //Round blocks to multiples of 64 samples for clean vectorization + size_t numblocks = omp_get_max_threads(); + size_t lastblock = numblocks - 1; + size_t blocksize = count / numblocks; + blocksize = blocksize - (blocksize % 64); + + #pragma omp parallel for + for(size_t i=0; i(pin + k)); + __m256i raw_samples2 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 16)); + + //Extract the low and high halves (8 samples each) from the input blocks + __m128i block0_u16 = _mm256_extracti128_si256(raw_samples1, 0); + __m128i block1_u16 = _mm256_extracti128_si256(raw_samples1, 1); + __m128i block2_u16 = _mm256_extracti128_si256(raw_samples2, 0); + __m128i block3_u16 = _mm256_extracti128_si256(raw_samples2, 1); + + //Convert the blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 8x int32 vectors + __m256i block0_i32 = _mm256_cvtepu16_epi32(block0_u16); + __m256i block1_i32 = _mm256_cvtepu16_epi32(block1_u16); + __m256i block2_i32 = _mm256_cvtepu16_epi32(block2_u16); + __m256i block3_i32 = _mm256_cvtepu16_epi32(block3_u16); + + //Convert the 32-bit int blocks to fp32 + //Sadly there's no direct epi16 to ps conversion instruction. + __m256 block0_float = _mm256_cvtepi32_ps(block0_i32); + __m256 block1_float = _mm256_cvtepi32_ps(block1_i32); + __m256 block2_float = _mm256_cvtepi32_ps(block2_i32); + __m256 block3_float = _mm256_cvtepi32_ps(block3_i32); + + //Woo! We've finally got floating point data. Now we can do the fun part. + block0_float = _mm256_mul_ps(block0_float, gains); + block1_float = _mm256_mul_ps(block1_float, gains); + block2_float = _mm256_mul_ps(block2_float, gains); + block3_float = _mm256_mul_ps(block3_float, gains); + + block0_float = _mm256_sub_ps(block0_float, offsets); + block1_float = _mm256_sub_ps(block1_float, offsets); + block2_float = _mm256_sub_ps(block2_float, offsets); + block3_float = _mm256_sub_ps(block3_float, offsets); + + //All done, store back to the output buffer + _mm256_store_ps(pout + k, block0_float); + _mm256_store_ps(pout + k + 8, block1_float); + _mm256_store_ps(pout + k + 16, block2_float); + _mm256_store_ps(pout + k + 24, block3_float); + } + + //Get any extras we didn't get in the SIMD loop + for(size_t k=end; k(pin + k)); + __m256i raw_samples2 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 16)); + __m256i raw_samples3 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 32)); + __m256i raw_samples4 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 48)); + + //Extract the low and high halves (8 samples each) from the input blocks + __m128i block0_u16 = _mm256_extracti128_si256(raw_samples1, 0); + __m128i block1_u16 = _mm256_extracti128_si256(raw_samples1, 1); + __m128i block2_u16 = _mm256_extracti128_si256(raw_samples2, 0); + __m128i block3_u16 = _mm256_extracti128_si256(raw_samples2, 1); + __m128i block4_u16 = _mm256_extracti128_si256(raw_samples3, 0); + __m128i block5_u16 = _mm256_extracti128_si256(raw_samples3, 1); + __m128i block6_u16 = _mm256_extracti128_si256(raw_samples4, 0); + __m128i block7_u16 = _mm256_extracti128_si256(raw_samples4, 1); + + //Convert the blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 8x int32 vectors + __m256i block0_i32 = _mm256_cvtepu16_epi32(block0_u16); + __m256i block1_i32 = _mm256_cvtepu16_epi32(block1_u16); + __m256i block2_i32 = _mm256_cvtepu16_epi32(block2_u16); + __m256i block3_i32 = _mm256_cvtepu16_epi32(block3_u16); + __m256i block4_i32 = _mm256_cvtepu16_epi32(block4_u16); + __m256i block5_i32 = _mm256_cvtepu16_epi32(block5_u16); + __m256i block6_i32 = _mm256_cvtepu16_epi32(block6_u16); + __m256i block7_i32 = _mm256_cvtepu16_epi32(block7_u16); + + //Convert the 32-bit int blocks to fp32 + //Sadly there's no direct epi16 to ps conversion instruction. + __m256 block0_float = _mm256_cvtepi32_ps(block0_i32); + __m256 block1_float = _mm256_cvtepi32_ps(block1_i32); + __m256 block2_float = _mm256_cvtepi32_ps(block2_i32); + __m256 block3_float = _mm256_cvtepi32_ps(block3_i32); + __m256 block4_float = _mm256_cvtepi32_ps(block4_i32); + __m256 block5_float = _mm256_cvtepi32_ps(block5_i32); + __m256 block6_float = _mm256_cvtepi32_ps(block6_i32); + __m256 block7_float = _mm256_cvtepi32_ps(block7_i32); + + //Woo! We've finally got floating point data. Now we can do the fun part. + block0_float = _mm256_fmsub_ps(block0_float, gains, offsets); + block1_float = _mm256_fmsub_ps(block1_float, gains, offsets); + block2_float = _mm256_fmsub_ps(block2_float, gains, offsets); + block3_float = _mm256_fmsub_ps(block3_float, gains, offsets); + block4_float = _mm256_fmsub_ps(block4_float, gains, offsets); + block5_float = _mm256_fmsub_ps(block5_float, gains, offsets); + block6_float = _mm256_fmsub_ps(block6_float, gains, offsets); + block7_float = _mm256_fmsub_ps(block7_float, gains, offsets); + + //All done, store back to the output buffer + _mm256_store_ps(pout + k, block0_float); + _mm256_store_ps(pout + k + 8, block1_float); + _mm256_store_ps(pout + k + 16, block2_float); + _mm256_store_ps(pout + k + 24, block3_float); + + _mm256_store_ps(pout + k + 32, block4_float); + _mm256_store_ps(pout + k + 40, block5_float); + _mm256_store_ps(pout + k + 48, block6_float); + _mm256_store_ps(pout + k + 56, block7_float); + } + + //Get any extras we didn't get in the SIMD loop + for(size_t k=end; k(pin + k)); + __m512i raw_samples2 = _mm512_loadu_si512(reinterpret_cast<__m512i*>(pin + k + 32)); + + //Extract the high and low halves (16 samples each) from the input blocks + __m256i block0_u16 = _mm512_extracti64x4_epi64(raw_samples1, 0); + __m256i block1_u16 = _mm512_extracti64x4_epi64(raw_samples1, 1); + __m256i block2_u16 = _mm512_extracti64x4_epi64(raw_samples2, 0); + __m256i block3_u16 = _mm512_extracti64x4_epi64(raw_samples2, 1); + + //Convert the blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 16x int32 vectors + __m512i block0_i32 = _mm512_cvtepu16_epi32(block0_u16); + __m512i block1_i32 = _mm512_cvtepu16_epi32(block1_u16); + __m512i block2_i32 = _mm512_cvtepu16_epi32(block2_u16); + __m512i block3_i32 = _mm512_cvtepu16_epi32(block3_u16); + + //Convert the 32-bit int blocks to fp32 + //Sadly there's no direct epi16 to ps conversion instruction. + __m512 block0_float = _mm512_cvtepi32_ps(block0_i32); + __m512 block1_float = _mm512_cvtepi32_ps(block1_i32); + __m512 block2_float = _mm512_cvtepi32_ps(block2_i32); + __m512 block3_float = _mm512_cvtepi32_ps(block3_i32); + + //Woo! We've finally got floating point data. Now we can do the fun part. + block0_float = _mm512_fmsub_ps(block0_float, gains, offsets); + block1_float = _mm512_fmsub_ps(block1_float, gains, offsets); + block2_float = _mm512_fmsub_ps(block2_float, gains, offsets); + block3_float = _mm512_fmsub_ps(block3_float, gains, offsets); + + //All done, store back to the output buffer + _mm512_store_ps(pout + k, block0_float); + _mm512_store_ps(pout + k + 16, block1_float); + _mm512_store_ps(pout + k + 32, block2_float); + _mm512_store_ps(pout + k + 48, block3_float); + } + + //Get any extras we didn't get in the SIMD loop + for(size_t k=end; km_timescale = round(FS_PER_SECOND / wh.s_rate); + wfm->m_startTimestamp = timestamp * FS_PER_SECOND; + wfm->m_startFemtoseconds = fs; + wfm->m_triggerPhase = 0; + wfm->PrepareForCpuAccess(); + SetData(wfm, m_streams.size() - 1); + wfm->Resize(wh.wave_length); + + LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); + double v_gain = wh.ch_v_gain[i].value * wh.ch_probe[i] / wh.ch_codes_per_div[i]; + LogDebug("\tv_gain: %f\n", v_gain); + LogDebug("\tcenter: %d\n", center_code); + + if(data_width == 2) + { + Oscilloscope::ConvertUnsigned16BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint16_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.ch_v_offset[i].value, + wh.wave_length); + fpos += 2 * wh.wave_length; + } + else + { + Oscilloscope::ConvertUnsigned8BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint8_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.ch_v_offset[i].value, + wh.wave_length); + fpos += wh.wave_length; + } + + wfm->MarkModifiedFromCpu(); + wave_idx += 1; + } + } + + //Process math data + for(int i = 0; i < 4; i++) + { + if(wh.math_en[i] == 1) + { + string name = string("F") + to_string(i+1); + AddStream(Unit(Unit::UNIT_VOLTS), name, Stream::STREAM_TYPE_ANALOG); + auto wfm = new UniformAnalogWaveform; + wfm->m_timescale = round(wh.math_s_interval[i] * FS_PER_SECOND); + wfm->m_startTimestamp = timestamp * FS_PER_SECOND; + wfm->m_startFemtoseconds = fs; + wfm->m_triggerPhase = 0; + wfm->PrepareForCpuAccess(); + SetData(wfm, m_streams.size() - 1); + wfm->Resize(wh.math_wave_length[i]); + + LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); + double v_gain = wh.math_v_gain[i].value / wh.math_codes_per_div; + LogDebug("\tv_gain: %f\n", v_gain); + LogDebug("\tcenter: %d\n", center_code); + + if(data_width == 2) + { + Oscilloscope::ConvertUnsigned16BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint16_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.math_v_offset[i].value, + wh.math_wave_length[i]); + fpos += 2 * wh.math_wave_length[i]; + } + else + { + Oscilloscope::ConvertUnsigned8BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint8_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.math_v_offset[i].value, + wh.math_wave_length[i]); + fpos += wh.math_wave_length[i]; + } + + wfm->MarkModifiedFromCpu(); + wave_idx += 1; + } + } + + //Process digital data + if(wh.digital_en) + { + for(int i = 0; i < 16; i++) + { + if(wh.d_ch_en[i] == 1) + { + string name = string("D") + to_string(i); + AddStream(Unit(Unit::UNIT_VOLTS), name, Stream::STREAM_TYPE_DIGITAL); + auto wfm = new UniformDigitalWaveform; + wfm->m_timescale = round(FS_PER_SECOND / wh.d_s_rate); + wfm->m_startTimestamp = timestamp * FS_PER_SECOND; + wfm->m_startFemtoseconds = fs; + wfm->m_triggerPhase = 0; + wfm->PrepareForCpuAccess(); + SetData(wfm, m_streams.size() - 1); + + LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); + for(size_t j = 0; j < (wh.d_wave_length / 8); j++) + { + uint8_t samples = *reinterpret_cast(f.c_str() + fpos); + for(int k = 0; k < 8; k++) + { + bool value = samples & 0x1; + samples >>= 1; + wfm->m_samples.push_back(value); + } + fpos += 1; + } + + wfm->MarkModifiedFromCpu(); + wave_idx += 1; + } + } + } + + m_outputsChangedSignal.emit(); +} diff --git a/scopeprotocols/SiglentBINImportFilter.h b/scopeprotocols/SiglentBINImportFilter.h new file mode 100644 index 00000000..03a8a9bf --- /dev/null +++ b/scopeprotocols/SiglentBINImportFilter.h @@ -0,0 +1,103 @@ +/*********************************************************************************************************************** +* * +* libscopeprotocols * +* * +* Copyright (c) 2012-2023 Andrew D. Zonenberg and contributors * +* All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the * +* following conditions are met: * +* * +* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the * +* following disclaimer. * +* * +* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the * +* following disclaimer in the documentation and/or other materials provided with the distribution. * +* * +* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products * +* derived from this software without specific prior written permission. * +* * +* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * +* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * +* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * +* POSSIBILITY OF SUCH DAMAGE. * +* * +***********************************************************************************************************************/ + +/** + @file + @author Andrew D. Zonenberg + @brief Declaration of SiglentBINImportFilter + */ +#ifndef SiglentBINImportFilter_h +#define SiglentBINImportFilter_h + +class SiglentBINImportFilter : public ImportFilter +{ +public: + SiglentBINImportFilter(const std::string& color); + + static std::string GetProtocolName(); + + PROTOCOL_DECODER_INITPROC(SiglentBINImportFilter) + + //Siglent binary capture structs + #pragma pack(push, 1) + struct FileHeader + { + uint32_t version; //File format version + }; + + // V2/V4 wave header + struct WaveHeader + { + int32_t ch_en[4]; //C1-C4 channel enable + struct { //C1-C4 vertical gain + double value; + char reserved[32]; + } ch_v_gain[4]; + struct { //C1-C4 vertical offset + double value; + char reserved[32]; + } ch_v_offset[4]; + int32_t digital_en; //Digital enable + int32_t d_ch_en[16]; //D0-D15 channel enable + double time_div; //Time base + char reserved9[32]; + double time_delay; //Trigger delay + char reserved10[32]; + uint32_t wave_length; //Number of samples in each analog waveform + double s_rate; //C1-C4 sampling rate + char reserved11[32]; + uint32_t d_wave_length; //Number of samples in each digital waveform + double d_s_rate; //D0-D15 sampling rate + char reserved12[32]; + double ch_probe[4]; //C1-C4 probe factor + int8_t data_width; //0:1 Byte, 1:2 Bytes + int8_t byte_order; //0:LSB, 1:MSB + char reserved13[6]; + int32_t num_hori_div; //Number of horizontal divisions + int32_t ch_codes_per_div[4];//C1-C4 codes per division + int32_t math_en[4]; //F1-F4 channel enable + struct { //F1-F4 vertical gain + double value; + char reserved[32]; + } math_v_gain[4]; + struct { //F1-F2 vertical offset + double value; + char reserved[32]; + } math_v_offset[4]; + uint32_t math_wave_length[4];//F1-F4 number of samples + double math_s_interval[4]; //F1-F4 sampling interval + int32_t math_codes_per_div; //F1-F4 codes per division + }; + #pragma pack(pop) + +protected: + void OnFileNameChanged(); +}; + +#endif diff --git a/scopeprotocols/scopeprotocols.cpp b/scopeprotocols/scopeprotocols.cpp index daed916b..642d94d5 100644 --- a/scopeprotocols/scopeprotocols.cpp +++ b/scopeprotocols/scopeprotocols.cpp @@ -165,6 +165,7 @@ void ScopeProtocolStaticInit() AddDecoderClass(ScaleFilter); AddDecoderClass(SDCmdDecoder); AddDecoderClass(SDDataDecoder); + AddDecoderClass(SiglentBINImportFilter); AddDecoderClass(SNRFilter); AddDecoderClass(SParameterCascadeFilter); AddDecoderClass(SParameterDeEmbedFilter); diff --git a/scopeprotocols/scopeprotocols.h b/scopeprotocols/scopeprotocols.h index 228bacb5..76efccc2 100644 --- a/scopeprotocols/scopeprotocols.h +++ b/scopeprotocols/scopeprotocols.h @@ -165,6 +165,7 @@ #include "ScaleFilter.h" #include "SDCmdDecoder.h" #include "SDDataDecoder.h" +#include "SiglentBINImportFilter.h" #include "SNRFilter.h" #include "SParameterCascadeFilter.h" #include "SParameterDeEmbedFilter.h"