Skip to content

Commit

Permalink
Merge pull request #26 from simdutf/avx2_new_algo_and_tests
Browse files Browse the repository at this point in the history
Avx2 new algo and tests
  • Loading branch information
Nick-Nuon authored May 27, 2024
2 parents e687f04 + 0595946 commit 6f34ead
Show file tree
Hide file tree
Showing 10 changed files with 1,771 additions and 288 deletions.
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ We recommend you install .NET 8: https://dotnet.microsoft.com/en-us/download/dot
dotnet test
```

To see which tests are running, we recommend setting the verbosity level:

```
dotnet test -v d
```

To get a list of available tests, enter the command:

```
Expand All @@ -36,10 +42,19 @@ dotnet test --list-tests

To run specific tests, it is helpful to use the filter parameter:


```
dotnet test -c Release --filter Ascii
dotnet test --filter TooShortErrorAVX
```

Or to target specific categories:

```
dotnet test --filter "Category=scalar"
```



## Running Benchmarks

To run the benchmarks, run the following command:
Expand Down
40 changes: 29 additions & 11 deletions benchmark/Benchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -183,19 +183,37 @@ public unsafe void SIMDUtf8ValidationRealData()
{
if (allLinesUtf8 != null)
{
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByte);
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByte);
}
}

[Benchmark]
// [BenchmarkCategory("scalar")]
// public unsafe void Utf8ValidationRealDataScalar()
// {
// if (allLinesUtf8 != null)
// {
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
// }
// }

[BenchmarkCategory("scalar")]
public unsafe void Utf8ValidationRealDataScalar()
{
if (allLinesUtf8 != null)
{
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar);
// Assuming allLinesUtf8 is a byte* and its length is provided by another variable, for example, allLinesUtf8Length
RunUtf8ValidationBenchmark(allLinesUtf8, (byte* pInputBuffer, int inputLength) =>
{
int dummyUtf16CodeUnitCountAdjustment, dummyScalarCountAdjustment;
// Call the method with additional out parameters within the lambda.
// You must handle these additional out parameters inside the lambda, as they cannot be passed back through the delegate.
return SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar(pInputBuffer, inputLength, out dummyUtf16CodeUnitCountAdjustment, out dummyScalarCountAdjustment);
});
}
}


[Benchmark]
[BenchmarkCategory("arm64")]
public unsafe void SIMDUtf8ValidationRealDataArm64()
Expand All @@ -205,15 +223,15 @@ public unsafe void SIMDUtf8ValidationRealDataArm64()
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64);
}
}
[Benchmark]
[BenchmarkCategory("avx")]
public unsafe void SIMDUtf8ValidationRealDataAvx2()
{
if (allLinesUtf8 != null)
{
RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
}
}
// [Benchmark]
// [BenchmarkCategory("avx")]
// public unsafe void SIMDUtf8ValidationRealDataAvx2()
// {
// if (allLinesUtf8 != null)
// {
// RunUtf8ValidationBenchmark(allLinesUtf8, SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2);
// }
// }
[Benchmark]
[BenchmarkCategory("sse")]
public unsafe void SIMDUtf8ValidationRealDataSse()
Expand Down
3 changes: 2 additions & 1 deletion benchmark/UTF8_runtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
namespace DotnetRuntime
{

internal static unsafe partial class Utf8Utility
public static unsafe partial class Utf8Utility
{
/// <summary>
/// Returns <see langword="true"/> iff the low byte of <paramref name="value"/>
Expand Down Expand Up @@ -500,6 +500,7 @@ private static bool UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(uint valu
// the value isn't overlong using a single comparison. On big-endian platforms, we'll need
// to validate the mask and validate that the sequence isn't overlong as two separate comparisons.

// Temp16 - 2
if ((BitConverter.IsLittleEndian && UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord))
|| (!BitConverter.IsLittleEndian && (UInt32EndsWithUtf8TwoByteMask(thisDWord) && !UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord))))
{
Expand Down
1 change: 0 additions & 1 deletion benchmark/benchmark.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

<ItemGroup>
<ProjectReference Include="..\src\SimdUnicode.csproj" />
<ProjectReference Include="..\test\tests.csproj" />
</ItemGroup>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion src/Ascii.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint buff

}

return GetIndexOfFirstNonAsciiByteScalar(pBuffer, bufferLength);
return GetIndexOfFirstNonAsciiByteScalar(pBuffer, bufferLength);
}


Expand Down
Loading

0 comments on commit 6f34ead

Please sign in to comment.