Skip to content

Commit

Permalink
Remove BitonicSort for arm device
Browse files Browse the repository at this point in the history
  • Loading branch information
xia-mc committed Nov 13, 2024
1 parent b63cb90 commit 21bbf8a
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .idea/PyFastUtil.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 29 additions & 1 deletion pyfastutil/src/utils/simd/BitonicSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@

#include <vector>
#include <algorithm>

#ifndef __arm64__

#include <immintrin.h>

#endif

#include <utils/PythonPCH.h>
#include "SIMD.h"
#include "utils/TimSort.h"
Expand All @@ -14,6 +20,7 @@
#include "utils/memory/PreFetch.h"

namespace simd {
#ifndef __arm64__
struct alignas(32) AVX2_MARKS {
alignas(32) const __m256i M1 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
alignas(32) const __m256i M2 = _mm256_set_epi32(6, 7, 4, 5, 2, 3, 0, 1);
Expand All @@ -29,19 +36,23 @@ namespace simd {

static AVX2_MARKS *avx2Marks = nullptr;
static AVX512_MARKS *avx512Marks = nullptr;
#endif

void init() {
#ifndef __APPLE__
if (IS_AVX2_SUPPORTED) {
avx2Marks = new AVX2_MARKS();
}
if (IS_AVX512_SUPPORTED) {
avx512Marks = new AVX512_MARKS();
}
#endif
}

#pragma clang diagnostic push
#pragma ide diagnostic ignored "portability-simd-intrinsics"
#pragma ide diagnostic ignored "NullDereference"
#ifndef __APPLE__

__forceinline void sort8Epi32AVX2(__m256i &vec) {
__m256i swapped = _mm256_permutevar8x32_epi32(vec, avx2Marks->M1);
Expand Down Expand Up @@ -293,6 +304,7 @@ namespace simd {
}
}

#endif
#pragma clang diagnostic pop

/**
Expand All @@ -316,6 +328,21 @@ namespace simd {
return;
}

#ifdef __arm64__
if (size > 5000) {
if (reverse) {
gfx::timsort(begin, end, std::greater<>());
} else {
gfx::timsort(begin, end);
}
} else {
if (reverse) {
std::sort(begin, end, std::greater<>());
} else {
std::sort(begin, end);
}
}
#else
if (!IS_AVX2_SUPPORTED && !IS_AVX512_SUPPORTED) {
// fallback
if (size > 5000) {
Expand Down Expand Up @@ -393,5 +420,6 @@ namespace simd {
mergeSortedBlocksReversed(vector, minBlockSize);
else
mergeSortedBlocks(vector, minBlockSize);
#endif
}
}
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"-O3", "-funroll-loops", "-flto", "-fPIC",
"-std=c++2b", "-Wall", "-fvisibility=hidden",
"-Wno-error=unknown-pragmas",
"-Wno-error=unguarded-availability-new" # already handle in Compat.h
# "-Wno-error=unguarded-availability-new" # already handle in Compat.h, and this option doesn't exist on gcc
]
EXTRA_LINK_ARG = ["-shared"]

Expand Down

0 comments on commit 21bbf8a

Please sign in to comment.