-
-
Notifications
You must be signed in to change notification settings - Fork 169
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: Improve AVX, make detection a little nicer (#865)
- Loading branch information
Showing
11 changed files
with
583 additions
and
406 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
/************************************************************************************ | ||
* | ||
* D++, A Lightweight C++ library for Discord | ||
* | ||
* Copyright 2021 Craig Edwards and D++ contributors | ||
* (https://github.com/brainboxdotcc/DPP/graphs/contributors) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
************************************************************************************/ | ||
#pragma once | ||
|
||
#if defined _MSC_VER || defined __GNUC__ || defined __clang__ | ||
|
||
#include <immintrin.h> | ||
|
||
#ifdef max | ||
#undef max | ||
#endif | ||
#ifdef min | ||
#undef min | ||
#endif | ||
|
||
namespace dpp { | ||
|
||
using avx_float = __m128; | ||
using avx_int = __m128i; | ||
|
||
/* | ||
* @brief Extracts a 32-bit integer from a 128-bit AVX register. | ||
* @param value The AVX register containing packed 32-bit integers. | ||
* @param index The index of the 32-bit integer to extract (0-3). | ||
* @return The extracted 32-bit integer. | ||
*/ | ||
inline int32_t extract_int32_from_avx(const avx_int& value, int64_t index) { | ||
switch (index) { | ||
case 0: { | ||
return _mm_extract_epi32(value, 0); | ||
} | ||
case 1: { | ||
return _mm_extract_epi32(value, 1); | ||
} | ||
case 2: { | ||
return _mm_extract_epi32(value, 2); | ||
} | ||
case 3: { | ||
return _mm_extract_epi32(value, 3); | ||
} | ||
default: { | ||
return _mm_extract_epi32(value, 0); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* @brief A class for audio mixing operations using AVX instructions. | ||
*/ | ||
class audio_mixer { | ||
public: | ||
/* | ||
* @brief The number of 32-bit values per CPU register. | ||
*/ | ||
inline static constexpr int32_t byte_blocks_per_register{ 4 }; | ||
|
||
/* | ||
* @brief Stores values from a 128-bit AVX vector to a storage location. | ||
* @tparam value_type The target value type for storage. | ||
* @param values_to_store The 128-bit AVX vector containing values to store. | ||
* @param storage_location Pointer to the storage location. | ||
*/ | ||
template<typename value_type> inline static void store_values(const avx_int& values_to_store, value_type* storage_location) { | ||
for (int64_t x = 0; x < byte_blocks_per_register; ++x) { | ||
storage_location[x] = static_cast<value_type>(extract_int32_from_avx(values_to_store, x)); | ||
} | ||
} | ||
|
||
/** | ||
* @brief Specialization for gathering non-float values into an AVX register. | ||
* @tparam value_type The type of values being gathered. | ||
* @tparam Indices Parameter pack of indices for gathering values. | ||
* @return An AVX register containing gathered values. | ||
*/ | ||
template<typename value_type> inline static avx_float gather_values(value_type* values) { | ||
alignas(16) float new_array[byte_blocks_per_register]{}; | ||
for (size_t x = 0; x < byte_blocks_per_register; ++x) { | ||
new_array[x] = static_cast<float>(values[x]); | ||
} | ||
return _mm_load_ps(new_array); | ||
} | ||
|
||
/** | ||
* @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out. | ||
* This version uses AVX instructions. | ||
* | ||
* @param data_in Pointer to the input array of int32_t values. | ||
* @param data_out Pointer to the output array of int16_t values. | ||
* @param current_gain The gain to be applied to the elements. | ||
* @param increment The increment value to be added to each element. | ||
*/ | ||
inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) { | ||
avx_float current_samples_new{ _mm_mul_ps(gather_values(data_in), | ||
_mm_add_ps(_mm_set1_ps(current_gain), _mm_mul_ps(_mm_set1_ps(increment), _mm_set_ps(0.0f, 1.0f, 2.0f, 3.0f)))) }; | ||
|
||
current_samples_new = _mm_blendv_ps(_mm_max_ps(current_samples_new, _mm_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::min()))), | ||
_mm_min_ps(current_samples_new, _mm_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::max()))), | ||
_mm_cmp_ps(current_samples_new, _mm_set1_ps(0.0f), _CMP_GE_OQ)); | ||
|
||
store_values(_mm_cvtps_epi32(current_samples_new), data_out); | ||
} | ||
|
||
/** | ||
* @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector. | ||
* This version uses AVX instructions. | ||
* | ||
* @param up_sampled_vector Pointer to the array of int32_t values. | ||
* @param decoded_data Pointer to the array of int16_t values. | ||
*/ | ||
inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) { | ||
auto newValues{ _mm_cvtps_epi32(_mm_add_ps(gather_values(up_sampled_vector), gather_values(decoded_data))) }; | ||
store_values(newValues, up_sampled_vector); | ||
} | ||
}; | ||
|
||
} // namespace dpp | ||
|
||
#endif |
Oops, something went wrong.