Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

there are issues with SIMD, lets fix them! #140

Draft
wants to merge 5 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions hvcc/generators/ir2c/static/HvSignalPhasor.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,30 @@ static inline void __hv_phasor_f(SignalPhasor *o, hv_bInf_t bIn, hv_bOutf_t bOut

static inline void __hv_phasor_k_f(SignalPhasor *o, hv_bOutf_t bOut) {
#if HV_SIMD_AVX
*bOut = _mm256_sub_ps(o->phase, _mm256_set1_ps(1.0f));
o->phase = _mm256_or_ps(_mm256_andnot_ps(
_mm256_set1_ps(-INFINITY),
_mm256_add_ps(o->phase, o->inc)),
_mm256_set1_ps(1.0f));
// *bOut = _mm256_sub_ps(o->phase, _mm256_set1_ps(1.0f));
// o->phase = _mm256_or_ps(_mm256_andnot_ps(
// _mm256_set1_ps(-INFINITY),
// _mm256_add_ps(o->phase, o->inc)),
// _mm256_set1_ps(1.0f));

__m128 phase_lo = _mm256_extractf128_ps(o->phase, 0);
__m128 phase_hi = _mm256_extractf128_ps(o->phase, 1);
__m128 inc_lo = _mm256_extractf128_ps(o->inc, 0);
__m128 inc_hi = _mm256_extractf128_ps(o->inc, 1);

for (int i = 0; i < 4; i++) {
phase_lo = _mm_add_ps(phase_lo, inc_lo);
phase_lo = _mm_sub_ps(phase_lo, _mm_and_ps(_mm_cmpge_ps(phase_lo, _mm_set1_ps(2.0f)), _mm_set1_ps(2.0f)));
bOut[i] = _mm256_castps128_ps256(phase_lo);

phase_hi = _mm_add_ps(phase_hi, inc_hi);
phase_hi = _mm_sub_ps(phase_hi, _mm_and_ps(_mm_cmpge_ps(phase_hi, _mm_set1_ps(2.0f)), _mm_set1_ps(2.0f)));
bOut[i+4] = _mm256_insertf128_ps(_mm256_castps128_ps256(phase_hi), phase_hi, 1);
}

o->phase = _mm256_set_m128(phase_hi, phase_lo);


#elif HV_SIMD_SSE
*bOut = _mm_sub_ps(_mm_castsi128_ps(
_mm_or_si128(_mm_srli_epi32(o->phase, 9),
Expand Down
2 changes: 1 addition & 1 deletion tests/framework/base_signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def compile_and_run(
str(block_size or 480),
str(num_iterations or 100)])

return wav_path
return exe_path, wav_path

def _compare_wave_output(self, out_dir, c_sources, golden_path, flag=None):
# http://stackoverflow.com/questions/10580676/comparing-two-numpy-arrays-for-equality-element-wise
Expand Down
3 changes: 3 additions & 0 deletions tests/framework/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ def _compile_and_run(
source_files=source_files,
out_path=exe_path))

# run the clean command
subprocess.check_output(["make", "-C", os.path.dirname(makefile_path), "clean"])

# run the compile command
subprocess.check_output(["make", "-C", os.path.dirname(makefile_path), "-j"])

Expand Down
2 changes: 1 addition & 1 deletion tests/framework/template/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CC=clang
CXX=clang++
COMMONFLAGS=-Werror -Wno-unused-function -Wno-\#warnings {{" ".join(simd_flags)}}
COMMONFLAGS=-Werror -Wno-unused-function -gdwarf-4 -Wno-\#warnings {{" ".join(simd_flags)}}
CFLAGS=-std=c11 $(COMMONFLAGS)
CXXFLAGS=-std=c++11 -fno-exceptions -fno-rtti $(COMMONFLAGS)

Expand Down
2 changes: 1 addition & 1 deletion tests/src/test_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ int main(int argc, const char *argv[]) {
HeavyContextInterface *context = hv_heavy_new(48000.0);
hv_setPrintHook(context, &printHook);

float *outBuffers = (float *) malloc(numOutputChannels * BLOCK_SIZE * sizeof(float));
float *outBuffers = (float *) hv_malloc(numOutputChannels * BLOCK_SIZE * sizeof(float));

for (int i = 0; i < numIterations; ++i) {
hv_processInline(context, NULL, outBuffers, BLOCK_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion tests/src/test_midi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ int main(int argc, const char *argv[]) {

MidiEvent* mev;

float *outBuffers = (float *) malloc(numOutputChannels * BLOCK_SIZE * sizeof(float));
float *outBuffers = (float *) hv_malloc(numOutputChannels * BLOCK_SIZE * sizeof(float));

for (int i = 0; i < numIterations; ++i) {
for (int event=0; event < midifile[0].size(); event++) {
Expand Down
11 changes: 10 additions & 1 deletion tests/src/test_signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@
#include "tinywav.h"

int main(int argc, const char *argv[]) {
#if HV_SIMD_AVX
printf("AVX!\n");
#elif HV_SIMD_SSE
printf("SSE!\n");
#elif HV_SIMD_NEON
printf("NEON!\n");
#else // HV_SIMD_NONE
printf("NONE!\n");
#endif
if (argc < 5) return -1;
const char *outputPath = argv[1];
const double sampleRate = atof(argv[2]);
Expand All @@ -40,7 +49,7 @@ int main(int argc, const char *argv[]) {
(int32_t) hv_getSampleRate(context),
TW_FLOAT32, TW_INLINE, outputPath);

float *outBuffers = (float *) malloc(
float *outBuffers = (float *) hv_malloc(
hv_getNumOutputChannels(context) * blockSize * sizeof(float));

for (int i = 0; i < numIterations; ++i) {
Expand Down
3 changes: 3 additions & 0 deletions tests/test_signal.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ def test_line(self):
def test_phasor_control(self):
self._test_signal_patch("test-phasor-control.pd")

def test_phasor_signal(self):
self._test_signal_patch("test-phasor-signal.pd")


def main():
parser = argparse.ArgumentParser(
Expand Down
Loading