From 4b55c193da159b8718f146e73bba7f55f376ed48 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 8 Jun 2018 09:13:05 -0500 Subject: [PATCH 001/325] Remove package target fallback --- deltaq/deltaq.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/deltaq/deltaq.csproj b/deltaq/deltaq.csproj index b097df6..758d086 100644 --- a/deltaq/deltaq.csproj +++ b/deltaq/deltaq.csproj @@ -14,7 +14,6 @@ Supports creating and applying patches in BSDIFF format https://github.com/jzebedee/deltaq https://github.com/jzebedee/deltaq/blob/master/LICENSE.md 1.6.0 - $(PackageTargetFallback);dnxcore50 false false false From 36cddc27949084a319d2eb9f88a15e80671182b7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 8 Jun 2018 09:13:59 -0500 Subject: [PATCH 002/325] Update test project - Remove package target fallback and runtime framework version - Update prerelease versions of test packages --- deltaq-tests/deltaq-tests.csproj | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/deltaq-tests/deltaq-tests.csproj b/deltaq-tests/deltaq-tests.csproj index 018fef2..4e78147 100644 --- a/deltaq-tests/deltaq-tests.csproj +++ b/deltaq-tests/deltaq-tests.csproj @@ -5,8 +5,6 @@ deltaq-tests deltaq-tests true - 1.0.3 - $(PackageTargetFallback);dnxcore50;portable-net46 false false false @@ -17,9 +15,12 @@ - - - + + + all + runtime; build; native; contentfiles; analyzers + + From 1c926e7c4878694cb562f7b62c903e6b9b317e08 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 10:02:58 -0500 Subject: [PATCH 003/325] Update folder structure --- deltaq.sln | 4 ++-- {deltaq => src/deltaq}/BsDiff/BsDiff.cs | 0 {deltaq => src/deltaq}/BsDiff/BsPatch.cs | 0 {deltaq => src/deltaq}/Bzip2/BZip2Constants.cs | 0 {deltaq => src/deltaq}/Bzip2/BZip2Exception.cs | 0 {deltaq => src/deltaq}/Bzip2/BZip2InputStream.cs | 0 {deltaq => src/deltaq}/Bzip2/BZip2OutputStream.cs | 0 {deltaq => src/deltaq}/Bzip2/Checksums/IChecksum.cs | 0 {deltaq => src/deltaq}/Bzip2/Checksums/StrangeCrc.cs | 0 {deltaq => src/deltaq}/Extensions.cs | 0 {deltaq => src/deltaq}/Properties/AssemblyInfo.cs | 0 {deltaq => src/deltaq}/SuffixSort/ISuffixSort.cs | 0 {deltaq => src/deltaq}/SuffixSort/SAIS.cs | 0 {deltaq => src/deltaq}/deltaq.csproj | 0 {deltaq-tests => test/deltaq-tests}/BsDiffTests.cs | 0 .../deltaq-tests}/Properties/AssemblyInfo.cs | 0 {deltaq-tests => test/deltaq-tests}/deltaq-tests.csproj | 0 17 files changed, 2 insertions(+), 2 deletions(-) rename {deltaq => src/deltaq}/BsDiff/BsDiff.cs (100%) rename {deltaq => src/deltaq}/BsDiff/BsPatch.cs (100%) rename {deltaq => src/deltaq}/Bzip2/BZip2Constants.cs (100%) rename {deltaq => src/deltaq}/Bzip2/BZip2Exception.cs (100%) rename {deltaq => src/deltaq}/Bzip2/BZip2InputStream.cs (100%) rename {deltaq => src/deltaq}/Bzip2/BZip2OutputStream.cs (100%) rename {deltaq => src/deltaq}/Bzip2/Checksums/IChecksum.cs (100%) rename {deltaq => src/deltaq}/Bzip2/Checksums/StrangeCrc.cs (100%) rename {deltaq => src/deltaq}/Extensions.cs (100%) rename {deltaq => src/deltaq}/Properties/AssemblyInfo.cs (100%) rename {deltaq => src/deltaq}/SuffixSort/ISuffixSort.cs (100%) rename {deltaq => src/deltaq}/SuffixSort/SAIS.cs (100%) rename {deltaq => src/deltaq}/deltaq.csproj (100%) rename {deltaq-tests => test/deltaq-tests}/BsDiffTests.cs (100%) rename {deltaq-tests => test/deltaq-tests}/Properties/AssemblyInfo.cs (100%) rename {deltaq-tests => test/deltaq-tests}/deltaq-tests.csproj (100%) diff --git a/deltaq.sln b/deltaq.sln index 0cfc5f1..3967bad 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.26621.2 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "deltaq", "deltaq\deltaq.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "deltaq", "src\deltaq\deltaq.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "deltaq-tests", "deltaq-tests\deltaq-tests.csproj", "{0FCB4753-5989-452D-A341-2A807BF4320C}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "deltaq-tests", "test\deltaq-tests\deltaq-tests.csproj", "{0FCB4753-5989-452D-A341-2A807BF4320C}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/deltaq/BsDiff/BsDiff.cs b/src/deltaq/BsDiff/BsDiff.cs similarity index 100% rename from deltaq/BsDiff/BsDiff.cs rename to src/deltaq/BsDiff/BsDiff.cs diff --git a/deltaq/BsDiff/BsPatch.cs b/src/deltaq/BsDiff/BsPatch.cs similarity index 100% rename from deltaq/BsDiff/BsPatch.cs rename to src/deltaq/BsDiff/BsPatch.cs diff --git a/deltaq/Bzip2/BZip2Constants.cs b/src/deltaq/Bzip2/BZip2Constants.cs similarity index 100% rename from deltaq/Bzip2/BZip2Constants.cs rename to src/deltaq/Bzip2/BZip2Constants.cs diff --git a/deltaq/Bzip2/BZip2Exception.cs b/src/deltaq/Bzip2/BZip2Exception.cs similarity index 100% rename from deltaq/Bzip2/BZip2Exception.cs rename to src/deltaq/Bzip2/BZip2Exception.cs diff --git a/deltaq/Bzip2/BZip2InputStream.cs b/src/deltaq/Bzip2/BZip2InputStream.cs similarity index 100% rename from deltaq/Bzip2/BZip2InputStream.cs rename to src/deltaq/Bzip2/BZip2InputStream.cs diff --git a/deltaq/Bzip2/BZip2OutputStream.cs b/src/deltaq/Bzip2/BZip2OutputStream.cs similarity index 100% rename from deltaq/Bzip2/BZip2OutputStream.cs rename to src/deltaq/Bzip2/BZip2OutputStream.cs diff --git a/deltaq/Bzip2/Checksums/IChecksum.cs b/src/deltaq/Bzip2/Checksums/IChecksum.cs similarity index 100% rename from deltaq/Bzip2/Checksums/IChecksum.cs rename to src/deltaq/Bzip2/Checksums/IChecksum.cs diff --git a/deltaq/Bzip2/Checksums/StrangeCrc.cs b/src/deltaq/Bzip2/Checksums/StrangeCrc.cs similarity index 100% rename from deltaq/Bzip2/Checksums/StrangeCrc.cs rename to src/deltaq/Bzip2/Checksums/StrangeCrc.cs diff --git a/deltaq/Extensions.cs b/src/deltaq/Extensions.cs similarity index 100% rename from deltaq/Extensions.cs rename to src/deltaq/Extensions.cs diff --git a/deltaq/Properties/AssemblyInfo.cs b/src/deltaq/Properties/AssemblyInfo.cs similarity index 100% rename from deltaq/Properties/AssemblyInfo.cs rename to src/deltaq/Properties/AssemblyInfo.cs diff --git a/deltaq/SuffixSort/ISuffixSort.cs b/src/deltaq/SuffixSort/ISuffixSort.cs similarity index 100% rename from deltaq/SuffixSort/ISuffixSort.cs rename to src/deltaq/SuffixSort/ISuffixSort.cs diff --git a/deltaq/SuffixSort/SAIS.cs b/src/deltaq/SuffixSort/SAIS.cs similarity index 100% rename from deltaq/SuffixSort/SAIS.cs rename to src/deltaq/SuffixSort/SAIS.cs diff --git a/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj similarity index 100% rename from deltaq/deltaq.csproj rename to src/deltaq/deltaq.csproj diff --git a/deltaq-tests/BsDiffTests.cs b/test/deltaq-tests/BsDiffTests.cs similarity index 100% rename from deltaq-tests/BsDiffTests.cs rename to test/deltaq-tests/BsDiffTests.cs diff --git a/deltaq-tests/Properties/AssemblyInfo.cs b/test/deltaq-tests/Properties/AssemblyInfo.cs similarity index 100% rename from deltaq-tests/Properties/AssemblyInfo.cs rename to test/deltaq-tests/Properties/AssemblyInfo.cs diff --git a/deltaq-tests/deltaq-tests.csproj b/test/deltaq-tests/deltaq-tests.csproj similarity index 100% rename from deltaq-tests/deltaq-tests.csproj rename to test/deltaq-tests/deltaq-tests.csproj From 18da72edbbb17cd3b2fb1fe51e6079456aa4e1b7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 12:17:52 -0500 Subject: [PATCH 004/325] Update tests --- deltaq.sln | 16 ++++++------ .../BsDiffTests.cs | 12 ++++----- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 26 +++++++++++++++++++ test/deltaq-tests/Properties/AssemblyInfo.cs | 19 -------------- test/deltaq-tests/deltaq-tests.csproj | 26 ------------------- 5 files changed, 40 insertions(+), 59 deletions(-) rename test/{deltaq-tests => DeltaQ.Tests}/BsDiffTests.cs (93%) create mode 100644 test/DeltaQ.Tests/DeltaQ.Tests.csproj delete mode 100644 test/deltaq-tests/Properties/AssemblyInfo.cs delete mode 100644 test/deltaq-tests/deltaq-tests.csproj diff --git a/deltaq.sln b/deltaq.sln index 3967bad..2b303ef 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -1,11 +1,11 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.26621.2 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.30914.41 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "deltaq", "src\deltaq\deltaq.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ", "src\deltaq\DeltaQ.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "deltaq-tests", "test\deltaq-tests\deltaq-tests.csproj", "{0FCB4753-5989-452D-A341-2A807BF4320C}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.Tests", "test\DeltaQ.Tests\DeltaQ.Tests.csproj", "{784B81AE-E39B-497B-90AE-AA7EC4B98E50}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -17,10 +17,10 @@ Global {CE1513B6-2F66-4E62-BDD1-0C41D4433A51}.Debug|Any CPU.Build.0 = Debug|Any CPU {CE1513B6-2F66-4E62-BDD1-0C41D4433A51}.Release|Any CPU.ActiveCfg = Release|Any CPU {CE1513B6-2F66-4E62-BDD1-0C41D4433A51}.Release|Any CPU.Build.0 = Release|Any CPU - {0FCB4753-5989-452D-A341-2A807BF4320C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {0FCB4753-5989-452D-A341-2A807BF4320C}.Debug|Any CPU.Build.0 = Debug|Any CPU - {0FCB4753-5989-452D-A341-2A807BF4320C}.Release|Any CPU.ActiveCfg = Release|Any CPU - {0FCB4753-5989-452D-A341-2A807BF4320C}.Release|Any CPU.Build.0 = Release|Any CPU + {784B81AE-E39B-497B-90AE-AA7EC4B98E50}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {784B81AE-E39B-497B-90AE-AA7EC4B98E50}.Debug|Any CPU.Build.0 = Debug|Any CPU + {784B81AE-E39B-497B-90AE-AA7EC4B98E50}.Release|Any CPU.ActiveCfg = Release|Any CPU + {784B81AE-E39B-497B-90AE-AA7EC4B98E50}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/test/deltaq-tests/BsDiffTests.cs b/test/DeltaQ.Tests/BsDiffTests.cs similarity index 93% rename from test/deltaq-tests/BsDiffTests.cs rename to test/DeltaQ.Tests/BsDiffTests.cs index 05fc0cc..ebc9fcc 100644 --- a/test/deltaq-tests/BsDiffTests.cs +++ b/test/DeltaQ.Tests/BsDiffTests.cs @@ -22,6 +22,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ +using DeltaQ.BsDiff; using System; using System.Collections; using System.Collections.Generic; @@ -29,10 +30,9 @@ using System.IO.Compression; using System.IO.MemoryMappedFiles; using System.Linq; -using deltaq.BsDiff; using Xunit; -namespace deltaq_tests +namespace DeltaQ.Tests { public class BsDiffTests { @@ -95,7 +95,7 @@ public void BsDiffCreateFromStreams() { using (var mmfStream = mmf.CreateViewStream()) { - BsDiff.Create(oldBuffer, newBuffer, mmfStream); + BsDiff.BsDiff.Create(oldBuffer, newBuffer, mmfStream); } using (var msA = new MemoryStream(oldBuffer)) @@ -114,7 +114,7 @@ public void BsDiffCreateFromStreams() [MemberData(nameof(BsDiffCreateNullArguments_TestData))] public void BsDiffCreateNullArguments(byte[] oldData, byte[] newData, Stream outStream) { - Assert.Throws(() => BsDiff.Create(oldData, newData, outStream)); + Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream)); } public static IEnumerable BsDiffCreateNullArguments_TestData() @@ -130,7 +130,7 @@ public static IEnumerable BsDiffCreateNullArguments_TestData() [MemberData(nameof(BsDiffCreateBadStreams_TestData))] public void BsDiffCreateBadStreams(byte[] oldData, byte[] newData, Stream outStream) { - Assert.Throws(() => BsDiff.Create(oldData, newData, outStream)); + Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream)); } public static IEnumerable BsDiffCreateBadStreams_TestData() @@ -144,7 +144,7 @@ private static byte[] BsDiffCreate(byte[] oldBuf, byte[] newBuf) { using (var outputStream = new MemoryStream()) { - BsDiff.Create(oldBuf, newBuf, outputStream); + BsDiff.BsDiff.Create(oldBuf, newBuf, outputStream); return outputStream.ToArray(); } } diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj new file mode 100644 index 0000000..7890f75 --- /dev/null +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -0,0 +1,26 @@ + + + + net5.0 + + false + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + + + + + diff --git a/test/deltaq-tests/Properties/AssemblyInfo.cs b/test/deltaq-tests/Properties/AssemblyInfo.cs deleted file mode 100644 index aaacc0b..0000000 --- a/test/deltaq-tests/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,19 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("deltaq_tests")] -[assembly: AssemblyTrademark("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("0fcb4753-5989-452d-a341-2a807bf4320c")] diff --git a/test/deltaq-tests/deltaq-tests.csproj b/test/deltaq-tests/deltaq-tests.csproj deleted file mode 100644 index 4e78147..0000000 --- a/test/deltaq-tests/deltaq-tests.csproj +++ /dev/null @@ -1,26 +0,0 @@ - - - - netcoreapp2.0 - deltaq-tests - deltaq-tests - true - false - false - false - - - - - - - - - - all - runtime; build; native; contentfiles; analyzers - - - - - From 9b13651983bfc01f86423fe816ce51dcec64d2a8 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 12:18:15 -0500 Subject: [PATCH 005/325] Update project --- src/deltaq/deltaq.csproj | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj index 758d086..2991887 100644 --- a/src/deltaq/deltaq.csproj +++ b/src/deltaq/deltaq.csproj @@ -1,24 +1,8 @@ - + - deltaq is a .NET Core class library for fast delta encoding in .NET - -Supports creating and applying patches in BSDIFF format - deltaq - 1.1.0 - J. Zebedee - netstandard2.0 - deltaq - deltaq - diff difference patch compare delta deltaq sync bsdiff vcdiff - https://github.com/jzebedee/deltaq - https://github.com/jzebedee/deltaq/blob/master/LICENSE.md - 1.6.0 - false - false - false - 1.2.0 - false + net5.0 + enable From 73c3506d8f8b529f425f84c08d6e623ef9d9917a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 12:18:45 -0500 Subject: [PATCH 006/325] Remove old AssemblyInfo --- src/deltaq/Properties/AssemblyInfo.cs | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 src/deltaq/Properties/AssemblyInfo.cs diff --git a/src/deltaq/Properties/AssemblyInfo.cs b/src/deltaq/Properties/AssemblyInfo.cs deleted file mode 100644 index 79a2f91..0000000 --- a/src/deltaq/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,19 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("deltaq")] -[assembly: AssemblyTrademark("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("ce1513b6-2f66-4e62-bdd1-0c41d4433a51")] From 79bc359bd7ae8f1f483be15e5a74149cea6b64fc Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 12:19:57 -0500 Subject: [PATCH 007/325] Apply naming change --- src/deltaq/BsDiff/BsDiff.cs | 6 +++--- src/deltaq/BsDiff/BsPatch.cs | 4 ++-- src/deltaq/Extensions.cs | 4 ++-- src/deltaq/SuffixSort/ISuffixSort.cs | 2 +- src/deltaq/SuffixSort/SAIS.cs | 4 ++-- test/DeltaQ.Tests/BsDiffTests.cs | 2 +- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/deltaq/BsDiff/BsDiff.cs b/src/deltaq/BsDiff/BsDiff.cs index b6a8fe6..068a048 100644 --- a/src/deltaq/BsDiff/BsDiff.cs +++ b/src/deltaq/BsDiff/BsDiff.cs @@ -1,5 +1,5 @@ /* - * BsDiff.cs for deltaq + * BsDiff.cs for DeltaQ * Copyright (c) 2014 J. Zebedee * * BsDiff.net is Copyright 2010 Logos Bible Software @@ -29,9 +29,9 @@ using System.Collections.Generic; using System.IO; using bz2core; -using deltaq.SuffixSort; +using DeltaQ.SuffixSort; -namespace deltaq.BsDiff +namespace DeltaQ.BsDiff { public static class BsDiff { diff --git a/src/deltaq/BsDiff/BsPatch.cs b/src/deltaq/BsDiff/BsPatch.cs index 228edf6..8ead12c 100644 --- a/src/deltaq/BsDiff/BsPatch.cs +++ b/src/deltaq/BsDiff/BsPatch.cs @@ -1,5 +1,5 @@ /* - * BsPatch.cs for deltaq + * BsPatch.cs for DeltaQ * Copyright (c) 2014 J. Zebedee * * BsDiff.net is Copyright 2010 Logos Bible Software @@ -28,7 +28,7 @@ using System; using System.IO; -namespace deltaq.BsDiff +namespace DeltaQ.BsDiff { public static class BsPatch { diff --git a/src/deltaq/Extensions.cs b/src/deltaq/Extensions.cs index dc0b122..67a33f0 100644 --- a/src/deltaq/Extensions.cs +++ b/src/deltaq/Extensions.cs @@ -1,5 +1,5 @@ /* - * Extensions.cs for deltaq + * Extensions.cs for DeltaQ * Copyright (c) 2014 J. Zebedee * * Permission is hereby granted, free of charge, to any person @@ -27,7 +27,7 @@ using System.IO; using System.Runtime.CompilerServices; -namespace deltaq +namespace DeltaQ { internal static class Extensions { diff --git a/src/deltaq/SuffixSort/ISuffixSort.cs b/src/deltaq/SuffixSort/ISuffixSort.cs index 3485c40..d5ea6ca 100644 --- a/src/deltaq/SuffixSort/ISuffixSort.cs +++ b/src/deltaq/SuffixSort/ISuffixSort.cs @@ -1,4 +1,4 @@ -namespace deltaq.SuffixSort +namespace DeltaQ.SuffixSort { public interface ISuffixSort { diff --git a/src/deltaq/SuffixSort/SAIS.cs b/src/deltaq/SuffixSort/SAIS.cs index b974d7e..47d86a0 100644 --- a/src/deltaq/SuffixSort/SAIS.cs +++ b/src/deltaq/SuffixSort/SAIS.cs @@ -1,5 +1,5 @@ /* - * SAIS.cs for deltaq + * SAIS.cs for DeltaQ * Copyright (c) 2014 J. Zebedee * * Permission is hereby granted, free of charge, to any person @@ -53,7 +53,7 @@ using System.Collections.Generic; using System.Runtime.CompilerServices; -namespace deltaq.SuffixSort +namespace DeltaQ.SuffixSort { /// /// An implementation of the induced sorting based suffix array construction algorithm. diff --git a/test/DeltaQ.Tests/BsDiffTests.cs b/test/DeltaQ.Tests/BsDiffTests.cs index ebc9fcc..66adc83 100644 --- a/test/DeltaQ.Tests/BsDiffTests.cs +++ b/test/DeltaQ.Tests/BsDiffTests.cs @@ -1,5 +1,5 @@ /* - * BsDiffTests.cs for deltaq + * BsDiffTests.cs for DeltaQ * Copyright (c) 2014 J. Zebedee * * Permission is hereby granted, free of charge, to any person diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index 7890f75..66f77a4 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -1,4 +1,4 @@ - + net5.0 @@ -20,7 +20,7 @@ - + From 4894db96daed070c54f2ab657c375cecfb84054e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 14:12:32 -0500 Subject: [PATCH 008/325] Add CLI stub --- src/DeltaQ.CLI/DeltaQ.CLI.csproj | 12 ++++++++++++ src/DeltaQ.CLI/Program.cs | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 src/DeltaQ.CLI/DeltaQ.CLI.csproj create mode 100644 src/DeltaQ.CLI/Program.cs diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CLI/DeltaQ.CLI.csproj new file mode 100644 index 0000000..c7322aa --- /dev/null +++ b/src/DeltaQ.CLI/DeltaQ.CLI.csproj @@ -0,0 +1,12 @@ + + + + Exe + net5.0 + + + + + + + diff --git a/src/DeltaQ.CLI/Program.cs b/src/DeltaQ.CLI/Program.cs new file mode 100644 index 0000000..b9e0bf6 --- /dev/null +++ b/src/DeltaQ.CLI/Program.cs @@ -0,0 +1,24 @@ +using System; +using System.IO; + +namespace DeltaQ.CLI +{ + class Program + { + static void Main(string[] args) + { + var f1 = args[1]; + var f2 = args[2]; + var o = args[3]; + switch(args[0]) + { + case "diff": + BsDiff.BsDiff.Create(File.ReadAllBytes(f1), File.ReadAllBytes(f2), File.OpenWrite(o)); + break; + case "patch": + BsDiff.BsPatch.Apply(File.ReadAllBytes(f1), File.ReadAllBytes(f2), File.OpenWrite(o)); + break; + } + } + } +} From 0fbac04de2ff34b8232796d3bb53aa1795ab3be7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 14:12:47 -0500 Subject: [PATCH 009/325] Update patching for Spans --- src/deltaq/BsDiff/BsPatch.cs | 50 +++++++++++++++++++++--------------- src/deltaq/Extensions.cs | 22 +--------------- 2 files changed, 31 insertions(+), 41 deletions(-) diff --git a/src/deltaq/BsDiff/BsPatch.cs b/src/deltaq/BsDiff/BsPatch.cs index 8ead12c..afa8c86 100644 --- a/src/deltaq/BsDiff/BsPatch.cs +++ b/src/deltaq/BsDiff/BsPatch.cs @@ -48,18 +48,20 @@ public static class BsPatch /// Writable stream where the updated data will be written public static void Apply(byte[] input, byte[] diff, Stream output) { - OpenPatchStream openPatchStream = (uOffset, uLength) => + Stream openPatchStream(long uOffset, long uLength) { - var offset = (int)uOffset; - var length = (int)uLength; - return new MemoryStream(diff, offset, - uLength > 0 - ? length - : diff.Length - offset); - }; + checked + { + var offset = (int)uOffset; + var length = (int)uLength; + return new MemoryStream(diff, offset, + uLength > 0 + ? length + : diff.Length - offset); + } + } - Stream controlStream, diffStream, extraStream; - var newSize = CreatePatchStreams(openPatchStream, out controlStream, out diffStream, out extraStream); + var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); // prepare to read three parts of the patch in parallel ApplyInternal(newSize, new MemoryStream(input), controlStream, diffStream, extraStream, output); @@ -73,8 +75,7 @@ public static void Apply(byte[] input, byte[] diff, Stream output) /// Writable stream where the updated data will be written public static void Apply(Stream input, OpenPatchStream openPatchStream, Stream output) { - Stream controlStream, diffStream, extraStream; - var newSize = CreatePatchStreams(openPatchStream, out controlStream, out diffStream, out extraStream); + var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); // prepare to read three parts of the patch in parallel ApplyInternal(newSize, input, controlStream, diffStream, extraStream, output); @@ -136,7 +137,10 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea using (ctrl) using (diff) using (extra) - using (var inputReader = new BinaryReader(input)) + using (output) + { + Span readBuffer = stackalloc byte[0x1000]; + while (output.Position < newSize) { //read control data: @@ -154,15 +158,18 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea throw new InvalidOperationException("Corrupt patch"); // read diff string in chunks - foreach (var newData in diff.BufferedRead(addSize)) + + while (addSize > 0) { - var inputData = inputReader.ReadBytes(newData.Length); + var bytesRead = diff.Read(readBuffer.SliceUpTo((int)addSize)); + var inputData = inputReader.ReadBytes(bytesRead); // add old data to diff string - for (var i = 0; i < newData.Length; i++) - newData[i] += inputData[i]; + for (var i = 0; i < bytesRead; i++) + readBuffer[i] += inputData[i]; - output.Write(newData, 0, newData.Length); + output.Write(readBuffer[..bytesRead]); + addSize -= bytesRead; } // sanity-check @@ -170,14 +177,17 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea throw new InvalidOperationException("Corrupt patch"); // read extra string in chunks - foreach (var extraData in extra.BufferedRead(copySize)) + while (copySize > 0) { - output.Write(extraData, 0, extraData.Length); + var bytesRead = extra.Read(readBuffer.SliceUpTo((int)copySize)); + output.Write(readBuffer[..bytesRead]); + copySize -= bytesRead; } // adjust position input.Seek(seekAmount, SeekOrigin.Current); } + } } } } diff --git a/src/deltaq/Extensions.cs b/src/deltaq/Extensions.cs index 67a33f0..3ab5681 100644 --- a/src/deltaq/Extensions.cs +++ b/src/deltaq/Extensions.cs @@ -37,11 +37,6 @@ public static ArraySegment Slice(this T[] buf, int offset, int count = -1) //substitute everything remaining after the offset, if count is subzero return new ArraySegment(buf, offset, count < 0 ? buf.Length - offset : count); } - - public static ArraySegment Slice(this ArraySegment segment, int offset, int count = -1) - { - return segment.Array.Slice(offset, count); - } #endregion #region Long Read/Write @@ -109,21 +104,6 @@ public static long ReadLong(this IList b) } #endregion - #region Stream reading - - public static IEnumerable BufferedRead(this Stream stream, long count, int bufferSize = 0x1000) - { - var readLength = (int) count; - if (readLength <= 0) yield break; - - using (var reader = new BinaryReader(stream)) - { - for (; readLength > 0; readLength -= bufferSize) - { - yield return reader.ReadBytes(Math.Min(readLength, bufferSize)); - } - } - } - #endregion + public static Span SliceUpTo(this Span span, int max) => span.Slice(0, Math.Min(span.Length, max)); } } From 60c5956cdac497eaf1ce220e3843ddddc906c945 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 14:13:08 -0500 Subject: [PATCH 010/325] Update solution --- deltaq.sln | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/deltaq.sln b/deltaq.sln index 2b303ef..d15e2db 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -5,7 +5,9 @@ VisualStudioVersion = 16.0.30914.41 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ", "src\deltaq\DeltaQ.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.Tests", "test\DeltaQ.Tests\DeltaQ.Tests.csproj", "{784B81AE-E39B-497B-90AE-AA7EC4B98E50}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Tests", "test\DeltaQ.Tests\DeltaQ.Tests.csproj", "{784B81AE-E39B-497B-90AE-AA7EC4B98E50}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.CLI", "src\DeltaQ.CLI\DeltaQ.CLI.csproj", "{2E9A6A2A-438E-45DD-BDBC-8156A70B284F}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -21,6 +23,10 @@ Global {784B81AE-E39B-497B-90AE-AA7EC4B98E50}.Debug|Any CPU.Build.0 = Debug|Any CPU {784B81AE-E39B-497B-90AE-AA7EC4B98E50}.Release|Any CPU.ActiveCfg = Release|Any CPU {784B81AE-E39B-497B-90AE-AA7EC4B98E50}.Release|Any CPU.Build.0 = Release|Any CPU + {2E9A6A2A-438E-45DD-BDBC-8156A70B284F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2E9A6A2A-438E-45DD-BDBC-8156A70B284F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2E9A6A2A-438E-45DD-BDBC-8156A70B284F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2E9A6A2A-438E-45DD-BDBC-8156A70B284F}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE From d3eef5e106ee79f15f6516aee2ac01f1b0bb8121 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 14:38:19 -0500 Subject: [PATCH 011/325] Fixup changes from rebase --- src/deltaq/BsDiff/BsPatch.cs | 2 +- .../DeltaQ.Tests}/BsPatchTests.cs | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) rename {deltaq-tests => test/DeltaQ.Tests}/BsPatchTests.cs (78%) diff --git a/src/deltaq/BsDiff/BsPatch.cs b/src/deltaq/BsDiff/BsPatch.cs index afa8c86..884e52b 100644 --- a/src/deltaq/BsDiff/BsPatch.cs +++ b/src/deltaq/BsDiff/BsPatch.cs @@ -137,7 +137,7 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea using (ctrl) using (diff) using (extra) - using (output) + using (var inputReader = new BinaryReader(input)) { Span readBuffer = stackalloc byte[0x1000]; diff --git a/deltaq-tests/BsPatchTests.cs b/test/DeltaQ.Tests/BsPatchTests.cs similarity index 78% rename from deltaq-tests/BsPatchTests.cs rename to test/DeltaQ.Tests/BsPatchTests.cs index 43bd0bc..1162b47 100644 --- a/deltaq-tests/BsPatchTests.cs +++ b/test/DeltaQ.Tests/BsPatchTests.cs @@ -1,12 +1,8 @@ -using deltaq.BsDiff; -using System; -using System.Collections.Generic; -using System.IO; +using System.IO; using System.Security.Cryptography; -using System.Text; using Xunit; -namespace deltaq_tests +namespace DeltaQ.Tests { public class BsPatchTests { @@ -23,17 +19,17 @@ public void BsPatchFlushesOutput() { var oldBuffer = GetRandomFilledBuffer(0x123); var newBuffer = GetRandomFilledBuffer(0x4567); - + //can't use MemoryStream directly as Flush has no effect var patchMs = new MemoryStream(); var wrappedPatchMs = new BufferedStream(patchMs); - BsDiff.Create(oldBuffer, newBuffer, wrappedPatchMs); + BsDiff.BsDiff.Create(oldBuffer, newBuffer, wrappedPatchMs); var patchBuffer = patchMs.ToArray(); var reconstructMs = new MemoryStream(); var wrappedReconstructMs = new BufferedStream(reconstructMs); - BsPatch.Apply(oldBuffer, patchBuffer, wrappedReconstructMs); + BsDiff.BsPatch.Apply(oldBuffer, patchBuffer, wrappedReconstructMs); var reconstructedBuffer = reconstructMs.ToArray(); From 156703798624ca6a03c88af0b259e565f32fd721 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 15:48:58 -0500 Subject: [PATCH 012/325] Split out subprojects and packages --- deltaq.sln | 20 ++++++++++++++++++- .../BsDiff => DeltaQ.BsDiff}/BsDiff.cs | 9 +-------- .../BsDiff => DeltaQ.BsDiff}/BsPatch.cs | 0 .../Bzip2/BZip2Constants.cs | 0 .../Bzip2/BZip2Exception.cs | 0 .../Bzip2/BZip2InputStream.cs | 0 .../Bzip2/BZip2OutputStream.cs | 0 .../Bzip2/Checksums/IChecksum.cs | 0 .../Bzip2/Checksums/StrangeCrc.cs | 0 src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 14 +++++++++++++ src/{deltaq => DeltaQ.BsDiff}/Extensions.cs | 0 .../DeltaQ.SuffixSorting.Abstractions.csproj | 11 ++++++++++ .../ISuffixSort.cs | 0 .../DeltaQ.SuffixSorting.SAIS.csproj | 11 ++++++++++ .../SAIS.cs | 0 15 files changed, 56 insertions(+), 9 deletions(-) rename src/{deltaq/BsDiff => DeltaQ.BsDiff}/BsDiff.cs (96%) rename src/{deltaq/BsDiff => DeltaQ.BsDiff}/BsPatch.cs (100%) rename src/{deltaq => DeltaQ.BsDiff}/Bzip2/BZip2Constants.cs (100%) rename src/{deltaq => DeltaQ.BsDiff}/Bzip2/BZip2Exception.cs (100%) rename src/{deltaq => DeltaQ.BsDiff}/Bzip2/BZip2InputStream.cs (100%) rename src/{deltaq => DeltaQ.BsDiff}/Bzip2/BZip2OutputStream.cs (100%) rename src/{deltaq => DeltaQ.BsDiff}/Bzip2/Checksums/IChecksum.cs (100%) rename src/{deltaq => DeltaQ.BsDiff}/Bzip2/Checksums/StrangeCrc.cs (100%) create mode 100644 src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj rename src/{deltaq => DeltaQ.BsDiff}/Extensions.cs (100%) create mode 100644 src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj rename src/{deltaq/SuffixSort => DeltaQ.SuffixSorting.Abstractions}/ISuffixSort.cs (100%) create mode 100644 src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj rename src/{deltaq/SuffixSort => DeltaQ.SuffixSorting.SAIS}/SAIS.cs (100%) diff --git a/deltaq.sln b/deltaq.sln index d15e2db..1dd8583 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -7,7 +7,13 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ", "src\deltaq\DeltaQ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Tests", "test\DeltaQ.Tests\DeltaQ.Tests.csproj", "{784B81AE-E39B-497B-90AE-AA7EC4B98E50}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.CLI", "src\DeltaQ.CLI\DeltaQ.CLI.csproj", "{2E9A6A2A-438E-45DD-BDBC-8156A70B284F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.CLI", "src\DeltaQ.CLI\DeltaQ.CLI.csproj", "{2E9A6A2A-438E-45DD-BDBC-8156A70B284F}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.BsDiff", "src\DeltaQ.BsDiff\DeltaQ.BsDiff.csproj", "{C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.SuffixSorting.Abstractions", "src\DeltaQ.SuffixSorting.Abstractions\DeltaQ.SuffixSorting.Abstractions.csproj", "{D81A3696-DBC3-46EA-8CB4-C7C3FA96564B}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.SuffixSorting.SAIS", "src\DeltaQ.SuffixSorting.SAIS\DeltaQ.SuffixSorting.SAIS.csproj", "{0C1531C0-427B-42BE-B781-E83D7B377537}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -27,6 +33,18 @@ Global {2E9A6A2A-438E-45DD-BDBC-8156A70B284F}.Debug|Any CPU.Build.0 = Debug|Any CPU {2E9A6A2A-438E-45DD-BDBC-8156A70B284F}.Release|Any CPU.ActiveCfg = Release|Any CPU {2E9A6A2A-438E-45DD-BDBC-8156A70B284F}.Release|Any CPU.Build.0 = Release|Any CPU + {C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}.Release|Any CPU.Build.0 = Release|Any CPU + {D81A3696-DBC3-46EA-8CB4-C7C3FA96564B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D81A3696-DBC3-46EA-8CB4-C7C3FA96564B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D81A3696-DBC3-46EA-8CB4-C7C3FA96564B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D81A3696-DBC3-46EA-8CB4-C7C3FA96564B}.Release|Any CPU.Build.0 = Release|Any CPU + {0C1531C0-427B-42BE-B781-E83D7B377537}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0C1531C0-427B-42BE-B781-E83D7B377537}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0C1531C0-427B-42BE-B781-E83D7B377537}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0C1531C0-427B-42BE-B781-E83D7B377537}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/deltaq/BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs similarity index 96% rename from src/deltaq/BsDiff/BsDiff.cs rename to src/DeltaQ.BsDiff/BsDiff.cs index 068a048..e41a8de 100644 --- a/src/deltaq/BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -38,8 +38,6 @@ public static class BsDiff internal const int HeaderSize = 32; internal const long Signature = 0x3034464649445342; //"BSDIFF40" - private static readonly Lazy DefaultSuffixSort = new Lazy(() => new SAIS()); - internal static Stream GetEncodingStream(Stream stream, bool output) { if (output) @@ -54,12 +52,7 @@ internal static Stream GetEncodingStream(Stream stream, bool output) /// Byte array of the changed (newer) data /// Seekable, writable stream where the patch will be written /// Suffix sort implementation to use for comparison, or null to use a default sorter - public static void Create(byte[] oldData, byte[] newData, Stream output, ISuffixSort suffixSort = null) - { - CreateInternal(oldData, newData, output, suffixSort ?? DefaultSuffixSort.Value); - } - - private static void CreateInternal(byte[] oldData, byte[] newData, Stream output, ISuffixSort suffixSort) + public static void Create(byte[] oldData, byte[] newData, Stream output, ISuffixSort suffixSort) { // check arguments if (oldData == null) diff --git a/src/deltaq/BsDiff/BsPatch.cs b/src/DeltaQ.BsDiff/BsPatch.cs similarity index 100% rename from src/deltaq/BsDiff/BsPatch.cs rename to src/DeltaQ.BsDiff/BsPatch.cs diff --git a/src/deltaq/Bzip2/BZip2Constants.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2Constants.cs similarity index 100% rename from src/deltaq/Bzip2/BZip2Constants.cs rename to src/DeltaQ.BsDiff/Bzip2/BZip2Constants.cs diff --git a/src/deltaq/Bzip2/BZip2Exception.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2Exception.cs similarity index 100% rename from src/deltaq/Bzip2/BZip2Exception.cs rename to src/DeltaQ.BsDiff/Bzip2/BZip2Exception.cs diff --git a/src/deltaq/Bzip2/BZip2InputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs similarity index 100% rename from src/deltaq/Bzip2/BZip2InputStream.cs rename to src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs diff --git a/src/deltaq/Bzip2/BZip2OutputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs similarity index 100% rename from src/deltaq/Bzip2/BZip2OutputStream.cs rename to src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs diff --git a/src/deltaq/Bzip2/Checksums/IChecksum.cs b/src/DeltaQ.BsDiff/Bzip2/Checksums/IChecksum.cs similarity index 100% rename from src/deltaq/Bzip2/Checksums/IChecksum.cs rename to src/DeltaQ.BsDiff/Bzip2/Checksums/IChecksum.cs diff --git a/src/deltaq/Bzip2/Checksums/StrangeCrc.cs b/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs similarity index 100% rename from src/deltaq/Bzip2/Checksums/StrangeCrc.cs rename to src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj new file mode 100644 index 0000000..b9157fc --- /dev/null +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -0,0 +1,14 @@ + + + + net5.0 + DeltaQ + jzebedee + true + + + + + + + diff --git a/src/deltaq/Extensions.cs b/src/DeltaQ.BsDiff/Extensions.cs similarity index 100% rename from src/deltaq/Extensions.cs rename to src/DeltaQ.BsDiff/Extensions.cs diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj new file mode 100644 index 0000000..dda6e09 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -0,0 +1,11 @@ + + + + net5.0 + DeltaQ + + jzebedee + true + + + diff --git a/src/deltaq/SuffixSort/ISuffixSort.cs b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs similarity index 100% rename from src/deltaq/SuffixSort/ISuffixSort.cs rename to src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj new file mode 100644 index 0000000..1306910 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -0,0 +1,11 @@ + + + + net5.0 + + + + + + + diff --git a/src/deltaq/SuffixSort/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs similarity index 100% rename from src/deltaq/SuffixSort/SAIS.cs rename to src/DeltaQ.SuffixSorting.SAIS/SAIS.cs From 65dbe4a821d2eee8ac10f9dbf5fe4aaf98d3df5a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 3 Feb 2021 19:21:32 -0500 Subject: [PATCH 013/325] Update projects to new layout --- src/DeltaQ.BsDiff/ArraySegmentExtensions.cs | 13 +++++ src/DeltaQ.BsDiff/BsDiff.cs | 6 +-- src/DeltaQ.BsDiff/Extensions.cs | 12 +---- .../ArraySegmentExtensions.cs | 13 +++++ src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 53 ++++++++++--------- src/deltaq/deltaq.csproj | 4 ++ test/DeltaQ.Tests/BsPatchTests.cs | 2 +- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 1 + 8 files changed, 62 insertions(+), 42 deletions(-) create mode 100644 src/DeltaQ.BsDiff/ArraySegmentExtensions.cs create mode 100644 src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs diff --git a/src/DeltaQ.BsDiff/ArraySegmentExtensions.cs b/src/DeltaQ.BsDiff/ArraySegmentExtensions.cs new file mode 100644 index 0000000..e2306f8 --- /dev/null +++ b/src/DeltaQ.BsDiff/ArraySegmentExtensions.cs @@ -0,0 +1,13 @@ +using System; + +namespace DeltaQ.BsDiff +{ + internal static class ArraySegmentExtensions + { + public static ArraySegment Slice(this T[] buf, int offset, int count = -1) + { + //substitute everything remaining after the offset, if count is subzero + return new ArraySegment(buf, offset, count < 0 ? buf.Length - offset : count); + } + } +} diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index e41a8de..718a163 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -39,11 +39,7 @@ public static class BsDiff internal const long Signature = 0x3034464649445342; //"BSDIFF40" internal static Stream GetEncodingStream(Stream stream, bool output) - { - if (output) - return new BZip2OutputStream(stream) { IsStreamOwner = false }; - return new BZip2InputStream(stream); - } + => output ? new BZip2OutputStream(stream) { IsStreamOwner = false } : new BZip2InputStream(stream); /// /// Creates a BSDIFF-format patch from two byte arrays diff --git a/src/DeltaQ.BsDiff/Extensions.cs b/src/DeltaQ.BsDiff/Extensions.cs index 3ab5681..a3c45a3 100644 --- a/src/DeltaQ.BsDiff/Extensions.cs +++ b/src/DeltaQ.BsDiff/Extensions.cs @@ -27,18 +27,10 @@ using System.IO; using System.Runtime.CompilerServices; -namespace DeltaQ +namespace DeltaQ.BsDiff { internal static class Extensions { - #region ArraySegment Slice - public static ArraySegment Slice(this T[] buf, int offset, int count = -1) - { - //substitute everything remaining after the offset, if count is subzero - return new ArraySegment(buf, offset, count < 0 ? buf.Length - offset : count); - } - #endregion - #region Long Read/Write public static void WriteLongAt(this byte[] pb, int offset, long y) { @@ -59,7 +51,7 @@ public static void WriteLong(this IList b, long y) b[4] = (byte)(y >>= 8); b[5] = (byte)(y >>= 8); b[6] = (byte)(y >>= 8); - b[7] = (byte)((y >> 8) | 0x80); + b[7] = (byte)(y >> 8 | 0x80); } else { diff --git a/src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs b/src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs new file mode 100644 index 0000000..f30518f --- /dev/null +++ b/src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs @@ -0,0 +1,13 @@ +using System; + +namespace DeltaQ.SuffixSorting.SAIS +{ + internal static class ArraySegmentExtensions + { + public static ArraySegment Slice(this T[] buf, int offset, int count = -1) + { + //substitute everything remaining after the offset, if count is subzero + return new ArraySegment(buf, offset, count < 0 ? buf.Length - offset : count); + } + } +} diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 47d86a0..97e1f84 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -48,12 +48,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +using DeltaQ.SuffixSort; using System; using System.Collections; using System.Collections.Generic; using System.Runtime.CompilerServices; -namespace DeltaQ.SuffixSort +namespace DeltaQ.SuffixSorting.SAIS { /// /// An implementation of the induced sorting based suffix array construction algorithm. @@ -99,7 +100,7 @@ private void LMS_sort(IList T, IList sa, IList c, IList b, i j = n - 1; bb = b[c1 = T[j]]; --j; - sa[bb++] = (T[j] < c1) ? ~j : j; + sa[bb++] = T[j] < c1 ? ~j : j; for (i = 0; i < n; ++i) { if (0 < (j = sa[i])) @@ -110,7 +111,7 @@ private void LMS_sort(IList T, IList sa, IList c, IList b, i bb = b[c1 = c0]; } --j; - sa[bb++] = (T[j] < c1) ? ~j : j; + sa[bb++] = T[j] < c1 ? ~j : j; sa[i] = 0; } else if (j < 0) @@ -134,7 +135,7 @@ private void LMS_sort(IList T, IList sa, IList c, IList b, i bb = b[c1 = c0]; } --j; - sa[--bb] = (T[j] > c1) ? ~(j + 1) : j; + sa[--bb] = T[j] > c1 ? ~(j + 1) : j; sa[i] = 0; } } @@ -176,21 +177,21 @@ private int LMS_post_proc(IList T, IList sa, int n, int m) do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) >= c1)); - for (; 0 <= i; ) + } while (0 <= --i && (c0 = T[i]) >= c1); + for (; 0 <= i;) { do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) <= c1)); + } while (0 <= --i && (c0 = T[i]) <= c1); if (0 <= i) { - sa[m + ((i + 1) >> 1)] = j - i; + sa[m + (i + 1 >> 1)] = j - i; j = i + 1; do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + } while (0 <= --i && (c0 = T[i]) >= c1); } } @@ -200,10 +201,10 @@ private int LMS_post_proc(IList T, IList sa, int n, int m) p = sa[i]; int plen = sa[m + (p >> 1)]; bool diff = true; - if ((plen == qlen) && ((q + plen) < n)) + if (plen == qlen && q + plen < n) { for (j = 0; - (j < plen) && (T[p + j] == T[q + j]); + j < plen && T[p + j] == T[q + j]; ++j) { } @@ -236,7 +237,7 @@ private void InduceSA(IList T, int[] sa, IList c, IList b, int n, j = n - 1; bb = b[c1 = T[j]]; - sa[bb++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + sa[bb++] = 0 < j && T[j - 1] < c1 ? ~j : j; for (i = 0; i < n; ++i) { j = sa[i]; @@ -248,7 +249,7 @@ private void InduceSA(IList T, int[] sa, IList c, IList b, int n, b[c1] = bb; bb = b[c1 = c0]; } - sa[bb++] = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + sa[bb++] = 0 < j && T[j - 1] < c1 ? ~j : j; } } @@ -266,7 +267,7 @@ private void InduceSA(IList T, int[] sa, IList c, IList b, int n, b[c1] = bb; bb = b[c1 = c0]; } - sa[--bb] = ((j == 0) || (T[j - 1] > c1)) ? ~j : j; + sa[--bb] = j == 0 || T[j - 1] > c1 ? ~j : j; } else { @@ -303,12 +304,12 @@ private void sais_main(IList T, int[] sa, int fs, int n, int k) else if (k <= fs) { c = sa.Slice(n + fs - k, sa.Length - (n + fs - k)); - if (k <= (fs - k)) + if (k <= fs - k) { b = sa.Slice(n + fs - k * 2, sa.Length - (n + fs - k * 2)); flags = 0; } - else if (k <= (MinBucketSize * 4)) + else if (k <= MinBucketSize * 4) { b = new int[k]; flags = 2; @@ -342,14 +343,14 @@ sort all the LMS-substrings */ do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + } while (0 <= --i && (c0 = T[i]) >= c1); - for (; 0 <= i; ) + for (; 0 <= i;) { do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) <= c1)); + } while (0 <= --i && (c0 = T[i]) <= c1); if (0 <= i) { if (0 <= bb) @@ -362,7 +363,7 @@ sort all the LMS-substrings */ do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + } while (0 <= --i && (c0 = T[i]) >= c1); } } if (1 < m) @@ -393,10 +394,10 @@ sort all the LMS-substrings */ { b = null; } - int newfs = (n + fs) - (m * 2); + int newfs = n + fs - m * 2; if ((flags & (1 | 4 | 8)) == 0) { - if ((k + name) <= newfs) + if (k + name <= newfs) { newfs -= k; } @@ -423,14 +424,14 @@ sort all the LMS-substrings */ do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + } while (0 <= --i && (c0 = T[i]) >= c1); - for (; 0 <= i; ) + for (; 0 <= i;) { do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) <= c1)); + } while (0 <= --i && (c0 = T[i]) <= c1); if (0 <= i) { @@ -438,7 +439,7 @@ sort all the LMS-substrings */ do { c1 = c0; - } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + } while (0 <= --i && (c0 = T[i]) >= c1); } } diff --git a/src/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj index 2991887..7596813 100644 --- a/src/deltaq/deltaq.csproj +++ b/src/deltaq/deltaq.csproj @@ -5,4 +5,8 @@ enable + + + + diff --git a/test/DeltaQ.Tests/BsPatchTests.cs b/test/DeltaQ.Tests/BsPatchTests.cs index 1162b47..e5e6a16 100644 --- a/test/DeltaQ.Tests/BsPatchTests.cs +++ b/test/DeltaQ.Tests/BsPatchTests.cs @@ -23,7 +23,7 @@ public void BsPatchFlushesOutput() //can't use MemoryStream directly as Flush has no effect var patchMs = new MemoryStream(); var wrappedPatchMs = new BufferedStream(patchMs); - BsDiff.BsDiff.Create(oldBuffer, newBuffer, wrappedPatchMs); + BsDiff.BsDiff.Create(oldBuffer, newBuffer, wrappedPatchMs, new SuffixSorting.SAIS.SAIS()); var patchBuffer = patchMs.ToArray(); diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index 66f77a4..8a7f2e8 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -20,6 +20,7 @@ + From 65c400d787d6527efbefdeca292afd4734a76523 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 14 Feb 2021 22:06:02 -0600 Subject: [PATCH 014/325] Split out projects into uniform NuGet packages targeting net5.0 and netstandard2.0 Test against net5.0, net461, and netcoreapp2.1 runtimes --- deltaq.sln | 25 ++++++++++++++++--- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 4 ++- src/DeltaQ.CLI/DeltaQ.CLI.csproj | 10 ++++++-- .../DeltaQ.SuffixSorting.Abstractions.csproj | 3 +-- .../DeltaQ.SuffixSorting.SAIS.csproj | 5 +++- src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj | 10 ++++++++ src/deltaq/deltaq.csproj | 8 +++--- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 5 ++-- 8 files changed, 55 insertions(+), 15 deletions(-) create mode 100644 src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj diff --git a/deltaq.sln b/deltaq.sln index 1dd8583..b75d9bc 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -9,11 +9,17 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Tests", "test\DeltaQ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.CLI", "src\DeltaQ.CLI\DeltaQ.CLI.csproj", "{2E9A6A2A-438E-45DD-BDBC-8156A70B284F}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.BsDiff", "src\DeltaQ.BsDiff\DeltaQ.BsDiff.csproj", "{C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.BsDiff", "src\DeltaQ.BsDiff\DeltaQ.BsDiff.csproj", "{C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.SuffixSorting.Abstractions", "src\DeltaQ.SuffixSorting.Abstractions\DeltaQ.SuffixSorting.Abstractions.csproj", "{D81A3696-DBC3-46EA-8CB4-C7C3FA96564B}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.Abstractions", "src\DeltaQ.SuffixSorting.Abstractions\DeltaQ.SuffixSorting.Abstractions.csproj", "{D81A3696-DBC3-46EA-8CB4-C7C3FA96564B}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.SuffixSorting.SAIS", "src\DeltaQ.SuffixSorting.SAIS\DeltaQ.SuffixSorting.SAIS.csproj", "{0C1531C0-427B-42BE-B781-E83D7B377537}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.SAIS", "src\DeltaQ.SuffixSorting.SAIS\DeltaQ.SuffixSorting.SAIS.csproj", "{0C1531C0-427B-42BE-B781-E83D7B377537}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.VCDiff", "src\DeltaQ.VCDiff\DeltaQ.VCDiff.csproj", "{C889CB97-5D73-4D53-8249-DD7BFD402475}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{8B14206D-43D5-4740-96BF-3772DC4C3A6B}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{03F00ECA-08B1-47A4-8ACE-4624E31741BA}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -45,10 +51,23 @@ Global {0C1531C0-427B-42BE-B781-E83D7B377537}.Debug|Any CPU.Build.0 = Debug|Any CPU {0C1531C0-427B-42BE-B781-E83D7B377537}.Release|Any CPU.ActiveCfg = Release|Any CPU {0C1531C0-427B-42BE-B781-E83D7B377537}.Release|Any CPU.Build.0 = Release|Any CPU + {C889CB97-5D73-4D53-8249-DD7BFD402475}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C889CB97-5D73-4D53-8249-DD7BFD402475}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C889CB97-5D73-4D53-8249-DD7BFD402475}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C889CB97-5D73-4D53-8249-DD7BFD402475}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {CE1513B6-2F66-4E62-BDD1-0C41D4433A51} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {784B81AE-E39B-497B-90AE-AA7EC4B98E50} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} + {2E9A6A2A-438E-45DD-BDBC-8156A70B284F} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {D81A3696-DBC3-46EA-8CB4-C7C3FA96564B} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {0C1531C0-427B-42BE-B781-E83D7B377537} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {C889CB97-5D73-4D53-8249-DD7BFD402475} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {595D8046-0D57-4408-A80A-777358A7E831} EndGlobalSection diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index b9157fc..4609d09 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -1,7 +1,7 @@  - net5.0 + net5.0;netstandard2.0 DeltaQ jzebedee true @@ -9,6 +9,8 @@ + + diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CLI/DeltaQ.CLI.csproj index c7322aa..f7b6426 100644 --- a/src/DeltaQ.CLI/DeltaQ.CLI.csproj +++ b/src/DeltaQ.CLI/DeltaQ.CLI.csproj @@ -1,10 +1,16 @@ - Exe - net5.0 + net5.0;netstandard2.0 + DeltaQ + jzebedee + true + + + + diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index dda6e09..ffd38c4 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -1,9 +1,8 @@ - net5.0 + net5.0;netstandard2.0 DeltaQ - jzebedee true diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index 1306910..0de069d 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -1,7 +1,10 @@ - net5.0 + net5.0;netstandard2.0 + DeltaQ + jzebedee + true diff --git a/src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj b/src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj new file mode 100644 index 0000000..ffd38c4 --- /dev/null +++ b/src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj @@ -0,0 +1,10 @@ + + + + net5.0;netstandard2.0 + DeltaQ + jzebedee + true + + + diff --git a/src/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj index 7596813..9bec497 100644 --- a/src/deltaq/deltaq.csproj +++ b/src/deltaq/deltaq.csproj @@ -1,8 +1,10 @@ - + - net5.0 - enable + net5.0;netstandard2.0 + DeltaQ + jzebedee + true diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index 8a7f2e8..7f23cef 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -1,13 +1,12 @@  - net5.0 - + net5.0;net461;netcoreapp2.1 false - + runtime; build; native; contentfiles; analyzers; buildtransitive From 83c3e96ef404223708cf646ac674a1257fce154e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 14 Feb 2021 22:07:03 -0600 Subject: [PATCH 015/325] Support C# 7.3 / netstandard2.0 in BsDiff --- src/DeltaQ.BsDiff/BsDiff.cs | 15 ++++++++--- src/DeltaQ.BsDiff/BsPatch.cs | 4 +-- src/DeltaQ.BsDiff/StreamExtensions.cs | 38 +++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 5 deletions(-) create mode 100644 src/DeltaQ.BsDiff/StreamExtensions.cs diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index 718a163..72f4429 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -29,7 +29,7 @@ using System.Collections.Generic; using System.IO; using bz2core; -using DeltaQ.SuffixSort; +using DeltaQ.SuffixSorting; namespace DeltaQ.BsDiff { @@ -39,7 +39,16 @@ public static class BsDiff internal const long Signature = 0x3034464649445342; //"BSDIFF40" internal static Stream GetEncodingStream(Stream stream, bool output) - => output ? new BZip2OutputStream(stream) { IsStreamOwner = false } : new BZip2InputStream(stream); + { + if (output) + { + return new BZip2OutputStream(stream) { IsStreamOwner = false }; + } + else + { + return new BZip2InputStream(stream); + } + } /// /// Creates a BSDIFF-format patch from two byte arrays @@ -125,7 +134,7 @@ 0 32 Header var s = 0; var sf = 0; var lenf = 0; - for (var i = 0; (lastscan + i < scan) && (lastpos + i < oldData.Length); ) + for (var i = 0; (lastscan + i < scan) && (lastpos + i < oldData.Length);) { if (oldData[lastpos + i] == newData[lastscan + i]) s++; diff --git a/src/DeltaQ.BsDiff/BsPatch.cs b/src/DeltaQ.BsDiff/BsPatch.cs index 884e52b..f820871 100644 --- a/src/DeltaQ.BsDiff/BsPatch.cs +++ b/src/DeltaQ.BsDiff/BsPatch.cs @@ -168,7 +168,7 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea for (var i = 0; i < bytesRead; i++) readBuffer[i] += inputData[i]; - output.Write(readBuffer[..bytesRead]); + output.Write(readBuffer.Slice(0, bytesRead)); addSize -= bytesRead; } @@ -180,7 +180,7 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea while (copySize > 0) { var bytesRead = extra.Read(readBuffer.SliceUpTo((int)copySize)); - output.Write(readBuffer[..bytesRead]); + output.Write(readBuffer.Slice(0, bytesRead)); copySize -= bytesRead; } diff --git a/src/DeltaQ.BsDiff/StreamExtensions.cs b/src/DeltaQ.BsDiff/StreamExtensions.cs new file mode 100644 index 0000000..01ddc55 --- /dev/null +++ b/src/DeltaQ.BsDiff/StreamExtensions.cs @@ -0,0 +1,38 @@ +#if NETSTANDARD2_0 +using System; +using System.Buffers; +using System.IO; + +namespace DeltaQ.BsDiff +{ + internal static class StreamExtensions + { + public static int Read(this Stream stream, Span buffer) + { + byte[] sharedBuffer = ArrayPool.Shared.Rent(buffer.Length); + try + { + int numRead = stream.Read(sharedBuffer, 0, buffer.Length); + if ((uint)numRead > (uint)buffer.Length) + { + throw new IOException(); + } + new Span(sharedBuffer, 0, numRead).CopyTo(buffer); + return numRead; + } + finally { ArrayPool.Shared.Return(sharedBuffer); } + } + + public static void Write(this Stream stream, ReadOnlySpan buffer) + { + byte[] sharedBuffer = ArrayPool.Shared.Rent(buffer.Length); + try + { + buffer.CopyTo(sharedBuffer); + stream.Write(sharedBuffer, 0, buffer.Length); + } + finally { ArrayPool.Shared.Return(sharedBuffer); } + } + } +} +#endif \ No newline at end of file From 11ff5ca351517367be876dac64796e35f1e08e93 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 14 Feb 2021 22:07:38 -0600 Subject: [PATCH 016/325] Use DeltaQ.SuffixSorting namespace in DeltaQ.SuffixSorting.Abstractions project --- src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs index d5ea6ca..eb10967 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs +++ b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs @@ -1,4 +1,4 @@ -namespace DeltaQ.SuffixSort +namespace DeltaQ.SuffixSorting { public interface ISuffixSort { From 1f5ab2ff039ef91c930cd5ca44cf419a543e174e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 14 Feb 2021 22:11:25 -0600 Subject: [PATCH 017/325] Use compact declaration for IntAccessor members --- src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 70 ++++++--------------------- 1 file changed, 15 insertions(+), 55 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 97e1f84..f03e7a0 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -48,7 +48,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -using DeltaQ.SuffixSort; using System; using System.Collections; using System.Collections.Generic; @@ -533,76 +532,37 @@ private class IntAccessor : IList { private readonly byte[] _buffer; - public IntAccessor(byte[] buf) - { - _buffer = buf; - } + public IntAccessor(byte[] buf) => _buffer = buf; - public int IndexOf(int item) - { - throw new NotImplementedException(); - } + public int IndexOf(int item) => throw new NotImplementedException(); - public void Insert(int index, int item) - { - throw new NotImplementedException(); - } + public void Insert(int index, int item) => throw new NotImplementedException(); - public void RemoveAt(int index) - { - throw new NotImplementedException(); - } + public void RemoveAt(int index) => throw new NotImplementedException(); public int this[int index] { - get { return _buffer[index]; } - set { _buffer[index] = (byte)value; } + get => _buffer[index]; + set => _buffer[index] = (byte)value; } - public void Add(int item) - { - throw new NotImplementedException(); - } + public void Add(int item) => throw new NotImplementedException(); - public void Clear() - { - throw new NotImplementedException(); - } + public void Clear() => throw new NotImplementedException(); - public bool Contains(int item) - { - throw new NotImplementedException(); - } + public bool Contains(int item) => throw new NotImplementedException(); - public void CopyTo(int[] array, int arrayIndex) - { - throw new NotImplementedException(); - } + public void CopyTo(int[] array, int arrayIndex) => throw new NotImplementedException(); - public int Count - { - get { return _buffer.Length; } - } + public int Count => _buffer.Length; - public bool IsReadOnly - { - get { return false; } - } + public bool IsReadOnly => false; - public bool Remove(int item) - { - throw new NotImplementedException(); - } + public bool Remove(int item) => throw new NotImplementedException(); - public IEnumerator GetEnumerator() - { - throw new NotImplementedException(); - } + public IEnumerator GetEnumerator() => throw new NotImplementedException(); - IEnumerator IEnumerable.GetEnumerator() - { - throw new NotImplementedException(); - } + IEnumerator IEnumerable.GetEnumerator() => throw new NotImplementedException(); } } } \ No newline at end of file From d38c7493405e4e08f5afedf46f9eee529886b6fd Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 14 Feb 2021 22:30:24 -0600 Subject: [PATCH 018/325] Add tests for SAIS --- test/DeltaQ.Tests/BsDiffTests.cs | 8 +-- test/DeltaQ.Tests/SAISChecker.cs | 106 +++++++++++++++++++++++++++++++ test/DeltaQ.Tests/SAISTests.cs | 39 ++++++++++++ 3 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 test/DeltaQ.Tests/SAISChecker.cs create mode 100644 test/DeltaQ.Tests/SAISTests.cs diff --git a/test/DeltaQ.Tests/BsDiffTests.cs b/test/DeltaQ.Tests/BsDiffTests.cs index 66adc83..bc221a6 100644 --- a/test/DeltaQ.Tests/BsDiffTests.cs +++ b/test/DeltaQ.Tests/BsDiffTests.cs @@ -95,7 +95,7 @@ public void BsDiffCreateFromStreams() { using (var mmfStream = mmf.CreateViewStream()) { - BsDiff.BsDiff.Create(oldBuffer, newBuffer, mmfStream); + BsDiff.BsDiff.Create(oldBuffer, newBuffer, mmfStream, new SuffixSorting.SAIS.SAIS()); } using (var msA = new MemoryStream(oldBuffer)) @@ -114,7 +114,7 @@ public void BsDiffCreateFromStreams() [MemberData(nameof(BsDiffCreateNullArguments_TestData))] public void BsDiffCreateNullArguments(byte[] oldData, byte[] newData, Stream outStream) { - Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream)); + Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); } public static IEnumerable BsDiffCreateNullArguments_TestData() @@ -130,7 +130,7 @@ public static IEnumerable BsDiffCreateNullArguments_TestData() [MemberData(nameof(BsDiffCreateBadStreams_TestData))] public void BsDiffCreateBadStreams(byte[] oldData, byte[] newData, Stream outStream) { - Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream)); + Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); } public static IEnumerable BsDiffCreateBadStreams_TestData() @@ -144,7 +144,7 @@ private static byte[] BsDiffCreate(byte[] oldBuf, byte[] newBuf) { using (var outputStream = new MemoryStream()) { - BsDiff.BsDiff.Create(oldBuf, newBuf, outputStream); + BsDiff.BsDiff.Create(oldBuf, newBuf, outputStream, new SuffixSorting.SAIS.SAIS()); return outputStream.ToArray(); } } diff --git a/test/DeltaQ.Tests/SAISChecker.cs b/test/DeltaQ.Tests/SAISChecker.cs new file mode 100644 index 0000000..72fd68d --- /dev/null +++ b/test/DeltaQ.Tests/SAISChecker.cs @@ -0,0 +1,106 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DeltaQ.Tests +{ + internal class SAISChecker + { + internal static int Check(byte[] T, int[] SA, int n, bool verbose = false) + { + int[] C = new int[256]; + int i, p, q, t; + int c; + + if (verbose) { Console.Write(@"sufcheck: "); } + if (n == 0) + { + if (verbose) { Console.WriteLine("Done."); } + return 0; + } + + /* Check arguments. */ + if ((T == null) || (SA == null) || (n < 0)) + { + if (verbose) { Console.WriteLine("Invalid arguments."); } + return -1; + } + + /* check range: [0..n-1] */ + for (i = 0; i < n; ++i) + { + if ((SA[i] < 0) || (n <= SA[i])) + { + if (verbose) + { + Console.WriteLine("Out of the range [0," + (n - 1) + "]."); + Console.WriteLine(" SA[" + i + "]=" + SA[i]); + } + return -2; + } + } + + /* check first characters. */ + for (i = 1; i < n; ++i) + { + if (T[SA[i - 1]] > T[SA[i]]) + { + if (verbose) + { + Console.WriteLine("Suffixes in wrong order."); + Console.Write(" T[SA[" + (i - 1) + "]=" + SA[i - 1] + "]=" + T[SA[i - 1]]); + Console.WriteLine(" > T[SA[" + i + "]=" + SA[i] + "]=" + T[SA[i]]); + } + return -3; + } + } + + /* check suffixes. */ + for (i = 0; i < 256; ++i) { C[i] = 0; } + for (i = 0; i < n; ++i) { ++C[T[i]]; } + for (i = 0, p = 0; i < 256; ++i) + { + t = C[i]; + C[i] = p; + p += t; + } + + q = C[T[n - 1]]; + C[T[n - 1]] += 1; + for (i = 0; i < n; ++i) + { + p = SA[i]; + if (0 < p) + { + c = T[--p]; + t = C[c]; + } + else + { + c = T[p = n - 1]; + t = q; + } + if ((t < 0) || (p != SA[t])) + { + if (verbose) + { + Console.WriteLine("Suffixes in wrong position."); + Console.WriteLine(" SA[" + t + "]=" + ((0 <= t) ? SA[t] : -1) + " or"); + Console.WriteLine(" SA[" + i + "]=" + SA[i]); + } + return -4; + } + if (t != q) + { + ++C[c]; + if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } + } + } + + if (verbose) { Console.WriteLine("Done."); } + return 0; + } + } +} diff --git a/test/DeltaQ.Tests/SAISTests.cs b/test/DeltaQ.Tests/SAISTests.cs new file mode 100644 index 0000000..58d7dc5 --- /dev/null +++ b/test/DeltaQ.Tests/SAISTests.cs @@ -0,0 +1,39 @@ +using DeltaQ.SuffixSorting; +using DeltaQ.SuffixSorting.SAIS; +using System.Diagnostics; +using Xunit; + +namespace DeltaQ.Tests +{ + using static SAISChecker; + public class SAISTests + { + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(4)] + [InlineData(8)] + [InlineData(16)] + [InlineData(32)] + [InlineData(51)] + [InlineData(0x8000)] + public void CheckRandomBuffer(int size) + { + byte[] T = new byte[size]; + + var provider = new System.Security.Cryptography.RNGCryptoServiceProvider(); + provider.GetBytes(T); + + ISuffixSort sort = new SAIS(); + var sw = Stopwatch.StartNew(); + int[] SA = sort.Sort(T); + sw.Stop(); + + Debug.WriteLine(sw.Elapsed); + + var result = Check(T, SA, T.Length, false); + Assert.Equal(0, result); + } + } +} From 2cfb06e45ea9b05b8718914cc8ca4036566a448b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 14 Feb 2021 23:17:07 -0600 Subject: [PATCH 019/325] Use ref struct IntAccessor --- .../DeltaQ.SuffixSorting.SAIS.csproj | 3 +- src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 102 ++++++++++-------- 2 files changed, 59 insertions(+), 46 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index 0de069d..cb76c19 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -1,4 +1,4 @@ - + net5.0;netstandard2.0 @@ -9,6 +9,7 @@ + diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index f03e7a0..4a704e5 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -49,8 +49,6 @@ */ using System; -using System.Collections; -using System.Collections.Generic; using System.Runtime.CompilerServices; namespace DeltaQ.SuffixSorting.SAIS @@ -63,7 +61,7 @@ public class SAIS : ISuffixSort private const int MinBucketSize = 256; [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void GetCounts(IList T, IList c, int n, int k) + private static void GetCounts(IntAccessor T, Span c, int n, int k) { int i; for (i = 0; i < k; ++i) @@ -74,7 +72,7 @@ private void GetCounts(IList T, IList c, int n, int k) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void GetBuckets(IList c, IList b, int k, bool end) + private static void GetBuckets(Span c, Span b, int k, bool end) { int i, sum = 0; for (i = 0; i < k; ++i) @@ -86,13 +84,13 @@ private void GetBuckets(IList c, IList b, int k, bool end) /* sort all type LMS suffixes */ - private void LMS_sort(IList T, IList sa, IList c, IList b, int n, int k) + private static void LMS_sort(IntAccessor T, Span sa, Span c, Span b, int n, int k) { int bb, i, j; int c0, c1; /* compute SAl */ - if (Equals(c, b)) + if (c == b) GetCounts(T, c, n, k); GetBuckets(c, b, k, false); /* find starts of buckets */ @@ -120,7 +118,7 @@ private void LMS_sort(IList T, IList sa, IList c, IList b, i } /* compute SAs */ - if (Equals(c, b)) + if (c == b) GetCounts(T, c, n, k); GetBuckets(c, b, k, true); /* find ends of buckets */ @@ -140,7 +138,7 @@ private void LMS_sort(IList T, IList sa, IList c, IList b, i } } - private int LMS_post_proc(IList T, IList sa, int n, int m) + private static int LMS_post_proc(IntAccessor T, Span sa, int n, int m) { int i, j, p, q; int qlen, name; @@ -224,13 +222,13 @@ private int LMS_post_proc(IList T, IList sa, int n, int m) return name; } - private void InduceSA(IList T, int[] sa, IList c, IList b, int n, int k) + private static void InduceSA(IntAccessor T, int[] sa, Span c, Span b, int n, int k) { int bb, i, j; int c0, c1; /* compute SAl */ - if (Equals(c, b)) + if (c == b) GetCounts(T, c, n, k); GetBuckets(c, b, k, false); /* find starts of buckets */ @@ -253,7 +251,7 @@ private void InduceSA(IList T, int[] sa, IList c, IList b, int n, } /* compute SAs */ - if (Equals(c, b)) + if (c == b) GetCounts(T, c, n, k); GetBuckets(c, b, k, true); /* find ends of buckets */ @@ -278,9 +276,9 @@ private void InduceSA(IList T, int[] sa, IList c, IList b, int n, /* find the suffix array SA of T[0..n-1] in {0..k-1}^n use a working space (excluding T and SA) of at most 2n+O(1) for a constant alphabet */ - private void sais_main(IList T, int[] sa, int fs, int n, int k) + private void sais_main(IntAccessor T, int[] sa, int fs, int n, int k) { - IList c, b; + Span c, b; int i, j, bb, m; int name; int c0, c1; @@ -415,7 +413,7 @@ sort all the LMS-substrings */ } } - sais_main(sa.Slice(m + newfs, sa.Length - (m + newfs)), sa, newfs, m, name); + sais_main(new IntAccessor(sa.Slice(m + newfs, sa.Length - (m + newfs))), sa, newfs, m, name); i = n - 1; j = m * 2 - 1; @@ -513,6 +511,11 @@ public int[] Sort(byte[] T) if (T == null) throw new ArgumentNullException(nameof(T)); + Span span = T; + return Sort(span); + } + public int[] Sort(ReadOnlySpan T) + { var sa = new int[T.Length + 1]; if (T.Length <= 1) @@ -522,47 +525,56 @@ public int[] Sort(byte[] T) sa[0] = 0; } } - else - sais_main(new IntAccessor(T), sa, 0, T.Length, 256); + else sais_main(new IntAccessor(T), sa, 0, T.Length, 256); return sa; } - private class IntAccessor : IList + private ref struct IntAccessor { - private readonly byte[] _buffer; - - public IntAccessor(byte[] buf) => _buffer = buf; - - public int IndexOf(int item) => throw new NotImplementedException(); + private readonly Span intSpan; + private readonly ReadOnlySpan byteSpan; + private readonly bool packedIndex; - public void Insert(int index, int item) => throw new NotImplementedException(); - - public void RemoveAt(int index) => throw new NotImplementedException(); - - public int this[int index] + public IntAccessor(ReadOnlySpan buffer) { - get => _buffer[index]; - set => _buffer[index] = (byte)value; + this.byteSpan = buffer; + this.intSpan = default; + this.packedIndex = true; + } + public IntAccessor(Span buffer) + { + this.byteSpan = default; + this.intSpan = buffer; + this.packedIndex = false; } - public void Add(int item) => throw new NotImplementedException(); - - public void Clear() => throw new NotImplementedException(); - - public bool Contains(int item) => throw new NotImplementedException(); - - public void CopyTo(int[] array, int arrayIndex) => throw new NotImplementedException(); - - public int Count => _buffer.Length; - - public bool IsReadOnly => false; - - public bool Remove(int item) => throw new NotImplementedException(); - - public IEnumerator GetEnumerator() => throw new NotImplementedException(); + public int this[int index] + { + get + { + if (packedIndex) + { + return byteSpan[index]; + } + else + { + return intSpan[index]; + } + } - IEnumerator IEnumerable.GetEnumerator() => throw new NotImplementedException(); + set + { + if (packedIndex) + { + throw new InvalidOperationException("Can't use setter while accessing read only span"); + } + else + { + intSpan[index] = (byte)value; + } + } + } } } } \ No newline at end of file From e00407d5eae18001cbcd4d80320821e2e5aff51c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 14 Feb 2021 23:51:57 -0600 Subject: [PATCH 020/325] Completely Span-ify SAIS --- .../ArraySegmentExtensions.cs | 13 ------ src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 45 ++++++++++++++----- 2 files changed, 34 insertions(+), 24 deletions(-) delete mode 100644 src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs diff --git a/src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs b/src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs deleted file mode 100644 index f30518f..0000000 --- a/src/DeltaQ.SuffixSorting.SAIS/ArraySegmentExtensions.cs +++ /dev/null @@ -1,13 +0,0 @@ -using System; - -namespace DeltaQ.SuffixSorting.SAIS -{ - internal static class ArraySegmentExtensions - { - public static ArraySegment Slice(this T[] buf, int offset, int count = -1) - { - //substitute everything remaining after the offset, if count is subzero - return new ArraySegment(buf, offset, count < 0 ? buf.Length - offset : count); - } - } -} diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 4a704e5..8c70812 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -49,6 +49,8 @@ */ using System; +using System.Buffers; +using System.Diagnostics; using System.Runtime.CompilerServices; namespace DeltaQ.SuffixSorting.SAIS @@ -63,11 +65,11 @@ public class SAIS : ISuffixSort [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void GetCounts(IntAccessor T, Span c, int n, int k) { - int i; - for (i = 0; i < k; ++i) - c[i] = 0; + c.Slice(0, k).Clear(); + //for (i = 0; i < k; ++i) + // c[i] = 0; - for (i = 0; i < n; ++i) + for (int i = 0; i < n; ++i) c[T[i]]++; } @@ -222,7 +224,7 @@ private static int LMS_post_proc(IntAccessor T, Span sa, int n, int m) return name; } - private static void InduceSA(IntAccessor T, int[] sa, Span c, Span b, int n, int k) + private static void InduceSA(IntAccessor T, Span sa, Span c, Span b, int n, int k) { int bb, i, j; int c0, c1; @@ -276,7 +278,7 @@ private static void InduceSA(IntAccessor T, int[] sa, Span c, Span b, /* find the suffix array SA of T[0..n-1] in {0..k-1}^n use a working space (excluding T and SA) of at most 2n+O(1) for a constant alphabet */ - private void sais_main(IntAccessor T, int[] sa, int fs, int n, int k) + private void sais_main(IntAccessor T, Span sa, int fs, int n, int k) { Span c, b; int i, j, bb, m; @@ -286,7 +288,7 @@ private void sais_main(IntAccessor T, int[] sa, int fs, int n, int k) if (k <= MinBucketSize) { - c = new int[k]; + c = new int[k];// ArrayPool.Shared.Rent(k); if (k <= fs) { b = sa.Slice(n + fs - k, sa.Length - (n + fs - k)); @@ -327,10 +329,12 @@ private void sais_main(IntAccessor T, int[] sa, int fs, int n, int k) sort all the LMS-substrings */ GetCounts(T, c, n, k); GetBuckets(c, b, k, true); /* find ends of buckets */ - for (i = 0; i < n; ++i) - { - sa[i] = 0; - } + + sa.Slice(0, n).Clear(); + //for (i = 0; i < n; ++i) + //{ + // sa[i] = 0; + //} bb = -1; i = n - 1; @@ -530,6 +534,25 @@ public int[] Sort(ReadOnlySpan T) return sa; } + //private readonly ref struct PooledArray + //{ + // private readonly T[] _array; + + // public readonly Span Span; + + // public PooledArray(int k) + // { + // _array = ArrayPool.Shared.Rent(k); + + // Span = new Span(_array, 0, k); + // } + + // public void Dispose() + // { + // ArrayPool.Shared.Return(_array); + // } + //} + private ref struct IntAccessor { private readonly Span intSpan; From 80df3a8c1a2682504813d4ba73b855095616b005 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 15 Feb 2021 13:55:05 -0600 Subject: [PATCH 021/325] Use Span instead of IList for BsDiff --- src/DeltaQ.BsDiff/BsDiff.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index 72f4429..6acca62 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -241,10 +241,10 @@ 0 32 Header output.Position = endPosition; } - private static int CompareBytes(IList left, IList right) + private static int CompareBytes(ReadOnlySpan left, ReadOnlySpan right) { var diff = 0; - for (var i = 0; i < left.Count && i < right.Count; i++) + for (var i = 0; i < left.Length && i < right.Length; i++) { diff = left[i] - right[i]; if (diff != 0) @@ -253,10 +253,10 @@ private static int CompareBytes(IList left, IList right) return diff; } - private static int MatchLength(IList oldData, IList newData) + private static int MatchLength(ReadOnlySpan oldData, ReadOnlySpan newData) { int i; - for (i = 0; i < oldData.Count && i < newData.Count; i++) + for (i = 0; i < oldData.Length && i < newData.Length; i++) { if (oldData[i] != newData[i]) break; @@ -265,7 +265,7 @@ private static int MatchLength(IList oldData, IList newData) return i; } - private static int Search(IList I, byte[] oldData, IList newData, int start, int end, out int pos) + private static int Search(ReadOnlySpan I, byte[] oldData, ReadOnlySpan newData, int start, int end, out int pos) { while (true) { From 63bc8389aee28fd52cc7054c70711830466a6521 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 16 Feb 2021 22:00:01 -0600 Subject: [PATCH 022/325] Use Span implementation instead of IList for packed longs --- src/DeltaQ.BsDiff/BsDiff.cs | 33 +++++++------- src/DeltaQ.BsDiff/BsPatch.cs | 20 +++++---- src/DeltaQ.BsDiff/Extensions.cs | 77 ++++++++++++--------------------- 3 files changed, 55 insertions(+), 75 deletions(-) diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index 6acca62..f31035f 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -26,6 +26,7 @@ */ using System; +using System.Buffers; using System.Collections.Generic; using System.IO; using bz2core; @@ -83,15 +84,18 @@ 0 32 Header 32 ?? Bzip2ed ctrl block ?? ?? Bzip2ed diff block ?? ?? Bzip2ed extra block */ - var header = new byte[HeaderSize]; - header.WriteLong(Signature); - header.WriteLongAt(24, newData.Length); + Span header = stackalloc byte[HeaderSize]; + header.WritePackedLong(Signature); + header.Slice(24).WritePackedLong(newData.Length); var startPosition = output.Position; - output.Write(header, 0, header.Length); + output.Write(header); var I = suffixSort.Sort(oldData); + //backing for ctrl writes + Span buf = stackalloc byte[sizeof(long)]; + using (var msControl = new MemoryStream()) using (var msDiff = new MemoryStream()) using (var msExtra = new MemoryStream()) @@ -195,18 +199,15 @@ 0 32 Header if (extraLength > 0) extraStream.Write(newData, lastscan + lenf, extraLength); - //backing for ctrl writes - var buf = new byte[8]; - //write ctrl block - buf.WriteLong(lenf); - ctrlStream.Write(buf, 0, 8); + buf.WritePackedLong(lenf); + ctrlStream.Write(buf); - buf.WriteLong(extraLength); - ctrlStream.Write(buf, 0, 8); + buf.WritePackedLong(extraLength); + ctrlStream.Write(buf); - buf.WriteLong((pos - lenb) - (lastpos + lenf)); - ctrlStream.Write(buf, 0, 8); + buf.WritePackedLong((pos - lenb) - (lastpos + lenf)); + ctrlStream.Write(buf); lastscan = scan - lenb; lastpos = pos - lenb; @@ -220,14 +221,14 @@ 0 32 Header msControl.CopyTo(output); // compute size of compressed ctrl data - header.WriteLongAt(8, msControl.Length); + header.Slice(8).WritePackedLong(msControl.Length); // write compressed diff data msDiff.Seek(0, SeekOrigin.Begin); msDiff.CopyTo(output); // compute size of compressed diff data - header.WriteLongAt(16, msDiff.Length); + header.Slice(16).WritePackedLong(msDiff.Length); // write compressed extra data msExtra.Seek(0, SeekOrigin.Begin); @@ -237,7 +238,7 @@ 0 32 Header // seek to the beginning, write the header, then seek back to end var endPosition = output.Position; output.Position = startPosition; - output.Write(header, 0, header.Length); + output.Write(header); output.Position = endPosition; } diff --git a/src/DeltaQ.BsDiff/BsPatch.cs b/src/DeltaQ.BsDiff/BsPatch.cs index f820871..1cf835e 100644 --- a/src/DeltaQ.BsDiff/BsPatch.cs +++ b/src/DeltaQ.BsDiff/BsPatch.cs @@ -93,18 +93,18 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre if (!patchStream.CanSeek) throw new ArgumentException("Patch stream must be seekable", nameof(openPatchStream)); - var header = new byte[BsDiff.HeaderSize]; - patchStream.Read(header, 0, BsDiff.HeaderSize); + Span header = stackalloc byte[BsDiff.HeaderSize]; + patchStream.Read(header); // check for appropriate magic - var signature = header.ReadLong(); + var signature = header.ReadPackedLong(); if (signature != BsDiff.Signature) throw new InvalidOperationException("Corrupt patch"); // read lengths from header - controlLength = header.ReadLongAt(8); - diffLength = header.ReadLongAt(16); - newSize = header.ReadLongAt(24); + controlLength = header.Slice(8).ReadPackedLong(); + diffLength = header.Slice(16).ReadPackedLong(); + newSize = header.Slice(24).ReadPackedLong(); if (controlLength < 0 || diffLength < 0 || newSize < 0) throw new InvalidOperationException("Corrupt patch"); @@ -140,18 +140,20 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea using (var inputReader = new BinaryReader(input)) { Span readBuffer = stackalloc byte[0x1000]; + Span ctrlBuffer = stackalloc byte[24]; while (output.Position < newSize) { //read control data: // set of triples (x,y,z) meaning + ctrl.Read(ctrlBuffer); // add x bytes from oldfile to x bytes from the diff block; - var addSize = ctrl.ReadLong(); + var addSize = ctrlBuffer.ReadPackedLong(); // copy y bytes from the extra block; - var copySize = ctrl.ReadLong(); + var copySize = ctrlBuffer.Slice(8).ReadPackedLong(); // seek forwards in oldfile by z bytes; - var seekAmount = ctrl.ReadLong(); + var seekAmount = ctrlBuffer.Slice(16).ReadPackedLong(); // sanity-check if (output.Position + addSize > newSize) diff --git a/src/DeltaQ.BsDiff/Extensions.cs b/src/DeltaQ.BsDiff/Extensions.cs index a3c45a3..7e057f6 100644 --- a/src/DeltaQ.BsDiff/Extensions.cs +++ b/src/DeltaQ.BsDiff/Extensions.cs @@ -31,70 +31,47 @@ namespace DeltaQ.BsDiff { internal static class Extensions { - #region Long Read/Write - public static void WriteLongAt(this byte[] pb, int offset, long y) - { - pb.Slice(offset, sizeof(long)).WriteLong(y); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void WriteLong(this IList b, long y) + public static void WritePackedLong(this Span span, long y) { if (y < 0) { y = -y; - b[0] = (byte)y; - b[1] = (byte)(y >>= 8); - b[2] = (byte)(y >>= 8); - b[3] = (byte)(y >>= 8); - b[4] = (byte)(y >>= 8); - b[5] = (byte)(y >>= 8); - b[6] = (byte)(y >>= 8); - b[7] = (byte)(y >> 8 | 0x80); + span[0] = (byte)y; + span[1] = (byte)(y >>= 8); + span[2] = (byte)(y >>= 8); + span[3] = (byte)(y >>= 8); + span[4] = (byte)(y >>= 8); + span[5] = (byte)(y >>= 8); + span[6] = (byte)(y >>= 8); + span[7] = (byte)((y >> 8) | 0x80); } else { - b[0] = (byte)y; - b[1] = (byte)(y >>= 8); - b[2] = (byte)(y >>= 8); - b[3] = (byte)(y >>= 8); - b[4] = (byte)(y >>= 8); - b[5] = (byte)(y >>= 8); - b[6] = (byte)(y >>= 8); - b[7] = (byte)(y >> 8); + span[0] = (byte)y; + span[1] = (byte)(y >>= 8); + span[2] = (byte)(y >>= 8); + span[3] = (byte)(y >>= 8); + span[4] = (byte)(y >>= 8); + span[5] = (byte)(y >>= 8); + span[6] = (byte)(y >>= 8); + span[7] = (byte)(y >> 8); } } - public static long ReadLong(this Stream stream) - { - var buf = new byte[sizeof(long)]; - if (stream.Read(buf, 0, sizeof(long)) != sizeof(long)) - throw new InvalidOperationException("Could not read long from stream"); - - return buf.ReadLong(); - } - - public static long ReadLongAt(this byte[] buf, int offset) - { - return buf.Slice(offset, sizeof(long)).ReadLong(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static long ReadLong(this IList b) + public static long ReadPackedLong(this Span span) { - long y = b[7] & 0x7F; - y <<= 8; y += b[6]; - y <<= 8; y += b[5]; - y <<= 8; y += b[4]; - y <<= 8; y += b[3]; - y <<= 8; y += b[2]; - y <<= 8; y += b[1]; - y <<= 8; y += b[0]; + long y = span[7] & 0x7F; + y <<= 8; y += span[6]; + y <<= 8; y += span[5]; + y <<= 8; y += span[4]; + y <<= 8; y += span[3]; + y <<= 8; y += span[2]; + y <<= 8; y += span[1]; + y <<= 8; y += span[0]; - return (b[7] & 0x80) != 0 ? -y : y; + return (span[7] & 0x80) != 0 ? -y : y; } - #endregion public static Span SliceUpTo(this Span span, int max) => span.Slice(0, Math.Min(span.Length, max)); } From 48e76ae9065be9ead985c328baf4cc62637cdce2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:08:52 -0600 Subject: [PATCH 023/325] Update ISuffixSort signature Drop SuffixSorting.Abstractions package version to 0.1.0 --- .../DeltaQ.SuffixSorting.Abstractions.csproj | 5 +++++ src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index ffd38c4..5e71565 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -5,6 +5,11 @@ DeltaQ jzebedee true + 0.1.0 + + + + diff --git a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs index eb10967..d5ac9c2 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs +++ b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs @@ -1,7 +1,12 @@ -namespace DeltaQ.SuffixSorting +using System; +using System.Buffers; + +namespace DeltaQ.SuffixSorting { public interface ISuffixSort { - int[] Sort(byte[] buffer); + ReadOnlyMemory Sort(ReadOnlySpan textBuffer); + IMemoryOwner SortOwned(ReadOnlySpan textBuffer); + int Sort(ReadOnlySpan textBuffer, Span suffixBuffer); } } \ No newline at end of file From c1054172ad2b7699aecff8965044c6c2a1b45d25 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:10:12 -0600 Subject: [PATCH 024/325] Update SAIS to match new ISuffixSort Reference Toolkit.HighPerformance in SuffixSorting.SAIS Drop package version in SuffixSorting.SAIS down to 0.1.0 --- .../DeltaQ.SuffixSorting.SAIS.csproj | 4 +- src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 60 ++++++++----------- 2 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index cb76c19..812c21e 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -5,10 +5,12 @@ DeltaQ jzebedee true + 0.1.0 - + + diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 8c70812..57b4834 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -48,6 +48,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Buffers; using System.Diagnostics; @@ -502,57 +503,44 @@ sort all the LMS-substrings */ InduceSA(T, sa, c, b, n, k); } - /*- Suffixsorting -*/ - /* byte */ - /// /// Constructs the suffix array of a given string (as byte array) in linear time. /// - /// input bytes + /// input bytes /// 0 if no error occurred, -1 or -2 otherwise - public int[] Sort(byte[] T) + public ReadOnlyMemory Sort(ReadOnlySpan textBuffer) { - if (T == null) - throw new ArgumentNullException(nameof(T)); + var suffixBuffer = new int[textBuffer.Length]; + Sort(textBuffer, suffixBuffer); + return suffixBuffer; + } - Span span = T; - return Sort(span); + public MemoryOwner SortOwned(ReadOnlySpan textBuffer) + { + var owner = MemoryOwner.Allocate(textBuffer.Length); + Sort(textBuffer, owner.Span); + return owner; } - public int[] Sort(ReadOnlySpan T) + + IMemoryOwner ISuffixSort.SortOwned(ReadOnlySpan textBuffer) => SortOwned(textBuffer); + + public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { - var sa = new int[T.Length + 1]; + if (suffixBuffer.Length < textBuffer.Length) + throw new ArgumentOutOfRangeException(nameof(suffixBuffer), $"Span must have a minimum size of ({nameof(textBuffer)}.Length+1)"); - if (T.Length <= 1) + if (textBuffer.Length <= 1) { - if (T.Length == 1) + if (textBuffer.Length == 1) { - sa[0] = 0; + suffixBuffer[0] = 0; } } - else sais_main(new IntAccessor(T), sa, 0, T.Length, 256); - - return sa; + else sais_main(new IntAccessor(textBuffer), suffixBuffer, 0, textBuffer.Length, 256); + + return textBuffer.Length; } - //private readonly ref struct PooledArray - //{ - // private readonly T[] _array; - - // public readonly Span Span; - - // public PooledArray(int k) - // { - // _array = ArrayPool.Shared.Rent(k); - - // Span = new Span(_array, 0, k); - // } - - // public void Dispose() - // { - // ArrayPool.Shared.Return(_array); - // } - //} - private ref struct IntAccessor { private readonly Span intSpan; From c9e4bc4fe67c885d4425f97e6d99276f79b8a301 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:10:58 -0600 Subject: [PATCH 025/325] Use Spans in SAISChecker --- test/DeltaQ.Tests/SAISChecker.cs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/DeltaQ.Tests/SAISChecker.cs b/test/DeltaQ.Tests/SAISChecker.cs index 72fd68d..abc86fb 100644 --- a/test/DeltaQ.Tests/SAISChecker.cs +++ b/test/DeltaQ.Tests/SAISChecker.cs @@ -1,14 +1,10 @@ using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace DeltaQ.Tests { internal class SAISChecker { - internal static int Check(byte[] T, int[] SA, int n, bool verbose = false) + internal static int Check(ReadOnlySpan T, ReadOnlySpan SA, int n, bool verbose = false) { int[] C = new int[256]; int i, p, q, t; From a3e2f8b6132b2cea9be8917bab133e696ea09de2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:11:23 -0600 Subject: [PATCH 026/325] Update package references in DeltaQ.BsDiff --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 4609d09..ce41340 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -8,9 +8,8 @@ - - - + + From 22d272678e15a726ba1a199279bd23fa40baf015 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:11:36 -0600 Subject: [PATCH 027/325] Remove ArraySegmentExtensions in DeltaQ.BsDiff --- src/DeltaQ.BsDiff/ArraySegmentExtensions.cs | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 src/DeltaQ.BsDiff/ArraySegmentExtensions.cs diff --git a/src/DeltaQ.BsDiff/ArraySegmentExtensions.cs b/src/DeltaQ.BsDiff/ArraySegmentExtensions.cs deleted file mode 100644 index e2306f8..0000000 --- a/src/DeltaQ.BsDiff/ArraySegmentExtensions.cs +++ /dev/null @@ -1,13 +0,0 @@ -using System; - -namespace DeltaQ.BsDiff -{ - internal static class ArraySegmentExtensions - { - public static ArraySegment Slice(this T[] buf, int offset, int count = -1) - { - //substitute everything remaining after the offset, if count is subzero - return new ArraySegment(buf, offset, count < 0 ? buf.Length - offset : count); - } - } -} From 895acd77d82b951e92d0fdbf17356eddf32cbf22 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:12:22 -0600 Subject: [PATCH 028/325] Remove StreamExtensions in DeltaQ.BsDiff Redundant with the reference to Toolkit.HighPerformance --- src/DeltaQ.BsDiff/StreamExtensions.cs | 38 --------------------------- 1 file changed, 38 deletions(-) delete mode 100644 src/DeltaQ.BsDiff/StreamExtensions.cs diff --git a/src/DeltaQ.BsDiff/StreamExtensions.cs b/src/DeltaQ.BsDiff/StreamExtensions.cs deleted file mode 100644 index 01ddc55..0000000 --- a/src/DeltaQ.BsDiff/StreamExtensions.cs +++ /dev/null @@ -1,38 +0,0 @@ -#if NETSTANDARD2_0 -using System; -using System.Buffers; -using System.IO; - -namespace DeltaQ.BsDiff -{ - internal static class StreamExtensions - { - public static int Read(this Stream stream, Span buffer) - { - byte[] sharedBuffer = ArrayPool.Shared.Rent(buffer.Length); - try - { - int numRead = stream.Read(sharedBuffer, 0, buffer.Length); - if ((uint)numRead > (uint)buffer.Length) - { - throw new IOException(); - } - new Span(sharedBuffer, 0, numRead).CopyTo(buffer); - return numRead; - } - finally { ArrayPool.Shared.Return(sharedBuffer); } - } - - public static void Write(this Stream stream, ReadOnlySpan buffer) - { - byte[] sharedBuffer = ArrayPool.Shared.Rent(buffer.Length); - try - { - buffer.CopyTo(sharedBuffer); - stream.Write(sharedBuffer, 0, buffer.Length); - } - finally { ArrayPool.Shared.Return(sharedBuffer); } - } - } -} -#endif \ No newline at end of file From 7265e72a70f754743e8d3fdfbb17ed01897f3549 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:13:18 -0600 Subject: [PATCH 029/325] Update BsPatch Use ROMs instead of array types Flush instead of disposing output stream --- src/DeltaQ.BsDiff/BsPatch.cs | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/DeltaQ.BsDiff/BsPatch.cs b/src/DeltaQ.BsDiff/BsPatch.cs index 1cf835e..b656e31 100644 --- a/src/DeltaQ.BsDiff/BsPatch.cs +++ b/src/DeltaQ.BsDiff/BsPatch.cs @@ -25,6 +25,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.IO; @@ -46,25 +47,16 @@ public static class BsPatch /// Byte array of the original (older) data /// Byte array of the BSDIFF-format patch data /// Writable stream where the updated data will be written - public static void Apply(byte[] input, byte[] diff, Stream output) + public static void Apply(ReadOnlyMemory input, ReadOnlyMemory diff, Stream output) { - Stream openPatchStream(long uOffset, long uLength) - { - checked - { - var offset = (int)uOffset; - var length = (int)uLength; - return new MemoryStream(diff, offset, - uLength > 0 - ? length - : diff.Length - offset); - } - } - var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); // prepare to read three parts of the patch in parallel - ApplyInternal(newSize, new MemoryStream(input), controlStream, diffStream, extraStream, output); + ApplyInternal(newSize, input.AsStream(), controlStream, diffStream, extraStream, output); + return; + + Stream openPatchStream(long offset, long length) + => diff.Slice((int)offset, length > 0 ? (int)length : diff.Length - (int)offset).AsStream(); } /// @@ -133,7 +125,6 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea if (!output.CanWrite) throw new ArgumentException("Output stream must be writable", nameof(output)); - using (output) using (ctrl) using (diff) using (extra) @@ -190,6 +181,8 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea input.Seek(seekAmount, SeekOrigin.Current); } } + + output.Flush(); } } } From 5a72f919f2eaacb58b598a99e92b282e358c8ec8 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:41:47 -0600 Subject: [PATCH 030/325] Update BsDiff Use Spans for parameters Fix off-by-one bug with suffix array Break out header buffer into sub-spans, making the writes clearer Revert to using recursive Search (for testing, will replace) Use fast SequenceCompareTo for CompareBytes --- src/DeltaQ.BsDiff/BsDiff.cs | 93 ++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index f31035f..305a58e 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -25,12 +25,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +using bz2core; +using DeltaQ.SuffixSorting; +using Microsoft.Toolkit.HighPerformance.Buffers; +using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.Buffers; -using System.Collections.Generic; using System.IO; -using bz2core; -using DeltaQ.SuffixSorting; namespace DeltaQ.BsDiff { @@ -58,7 +59,7 @@ internal static Stream GetEncodingStream(Stream stream, bool output) /// Byte array of the changed (newer) data /// Seekable, writable stream where the patch will be written /// Suffix sort implementation to use for comparison, or null to use a default sorter - public static void Create(byte[] oldData, byte[] newData, Stream output, ISuffixSort suffixSort) + public static void Create(ReadOnlySpan oldData, ReadOnlySpan newData, Stream output, ISuffixSort suffixSort) { // check arguments if (oldData == null) @@ -85,17 +86,23 @@ 0 32 Header ?? ?? Bzip2ed diff block ?? ?? Bzip2ed extra block */ Span header = stackalloc byte[HeaderSize]; - header.WritePackedLong(Signature); - header.Slice(24).WritePackedLong(newData.Length); + Span header_signature = header.Slice(0, sizeof(long)); + Span header_compressed_ctrl = header.Slice(sizeof(long), sizeof(long)); + Span header_compressed_diff = header.Slice(sizeof(long) * 2, sizeof(long)); + Span header_newdata_len = header.Slice(sizeof(long) * 3, sizeof(long)); + header_signature.WritePackedLong(Signature); + header_newdata_len.WritePackedLong(newData.Length); var startPosition = output.Position; output.Write(header); - var I = suffixSort.Sort(oldData); - //backing for ctrl writes Span buf = stackalloc byte[sizeof(long)]; + //the memory allocated for the suffix array MUST be at least (n+1) + //this is only required for bsdiff, so we allocate it ourselves + //instead of using the ISuffixSort overloads that might allocate only (n) + using (MemoryOwner saOwner = MemoryOwner.Allocate(oldData.Length + 1, AllocationMode.Clear)) using (var msControl = new MemoryStream()) using (var msDiff = new MemoryStream()) using (var msExtra = new MemoryStream()) @@ -104,6 +111,9 @@ 0 32 Header using (var diffStream = GetEncodingStream(msDiff, true)) using (var extraStream = GetEncodingStream(msExtra, true)) { + Span I = saOwner.Span; + suffixSort.Sort(oldData, I); + var scan = 0; var pos = 0; var len = 0; @@ -197,7 +207,7 @@ 0 32 Header //write extra string var extraLength = (scan - lenb) - (lastscan + lenf); if (extraLength > 0) - extraStream.Write(newData, lastscan + lenf, extraLength); + extraStream.Write(newData.Slice(lastscan + lenf, extraLength)); //write ctrl block buf.WritePackedLong(lenf); @@ -221,14 +231,14 @@ 0 32 Header msControl.CopyTo(output); // compute size of compressed ctrl data - header.Slice(8).WritePackedLong(msControl.Length); + header_compressed_ctrl.WritePackedLong(msControl.Length); // write compressed diff data msDiff.Seek(0, SeekOrigin.Begin); msDiff.CopyTo(output); // compute size of compressed diff data - header.Slice(16).WritePackedLong(msDiff.Length); + header_compressed_diff.WritePackedLong(msDiff.Length); // write compressed extra data msExtra.Seek(0, SeekOrigin.Begin); @@ -242,17 +252,7 @@ 0 32 Header output.Position = endPosition; } - private static int CompareBytes(ReadOnlySpan left, ReadOnlySpan right) - { - var diff = 0; - for (var i = 0; i < left.Length && i < right.Length; i++) - { - diff = left[i] - right[i]; - if (diff != 0) - break; - } - return diff; - } + private static int CompareBytes(ReadOnlySpan left, ReadOnlySpan right) => left.SequenceCompareTo(right); private static int MatchLength(ReadOnlySpan oldData, ReadOnlySpan newData) { @@ -266,34 +266,43 @@ private static int MatchLength(ReadOnlySpan oldData, ReadOnlySpan ne return i; } - private static int Search(ReadOnlySpan I, byte[] oldData, ReadOnlySpan newData, int start, int end, out int pos) + private static int Search(ReadOnlySpan I, ReadOnlySpan oldData, ReadOnlySpan newData, int start, int end, out int pos) { - while (true) + int x, y; + if (end - start < 2) { - if (end - start < 2) - { - var startLength = MatchLength(oldData.Slice(I[start]), newData); - var endLength = MatchLength(oldData.Slice(I[end]), newData); + //x = MatchLength(oldData[I[start]..], newData); + //y = MatchLength(oldData[I[end]..], newData); + x = MatchLength(oldData.Slice(I[start]), newData); + y = MatchLength(oldData.Slice(I[end]), newData); - if (startLength > endLength) - { - pos = I[start]; - return startLength; - } - - pos = I[end]; - return endLength; + if (x > y) + { + pos = I[start]; + return x; } - - var midPoint = start + (end - start) / 2; - if (CompareBytes(oldData.Slice(I[midPoint]), newData) < 0) + else { - start = midPoint; - continue; + pos = I[end]; + return y; } + //throw new ApplicationException($"start:{start} end:{end} I:{I.Length} I[start]:{I[start]} I[end]:{I[end]} oldData:{oldData.Length} newData:{newData.Length}", e); + } - end = midPoint; + x = start + (end - start) / 2; + //var midPoint = start + (end - start) / 2; + if (CompareBytes(oldData.Slice(I[x]), newData) < 0) + { + return Search(I, oldData, newData, x, end, out pos); + //start = midPoint; + //continue; + } + else + { + return Search(I, oldData, newData, start, x, out pos); } + + //end = midPoint; } } } From eb51a2e949e654cd8a949bf72ffe7f08d3a03197 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:42:25 -0600 Subject: [PATCH 031/325] Update BsDiff tests Use theories Use ROS/ROM overloads --- test/DeltaQ.Tests/BsDiffTests.cs | 109 ++++++++++++++----------------- 1 file changed, 49 insertions(+), 60 deletions(-) diff --git a/test/DeltaQ.Tests/BsDiffTests.cs b/test/DeltaQ.Tests/BsDiffTests.cs index bc221a6..d3b5318 100644 --- a/test/DeltaQ.Tests/BsDiffTests.cs +++ b/test/DeltaQ.Tests/BsDiffTests.cs @@ -40,7 +40,7 @@ public class BsDiffTests private static byte[] GetBuffer(int size) { - var rand = new Random(63*13*63*13); + var rand = new Random(63 * 13 * 63 * 13); var buf = new byte[size]; rand.NextBytes(buf); @@ -48,66 +48,59 @@ private static byte[] GetBuffer(int size) return buf; } - private static IEnumerable GetBuffers(IEnumerable sizes) - { - return sizes.Select(GetBuffer); - } + public static IEnumerable TestDoubleBuffers(IEnumerable sizes) + => sizes.Select(size => new object[] { GetBuffer(size), GetBuffer(size) }); - [Fact] - public void BsDiffCreateFromBuffers() + [Theory] + [MemberData(nameof(TestDoubleBuffers), new int[] { 0, 1, 512, 999, 1024, 4096 })] + public void BsDiffCreateFromBuffers(byte[] oldBuffer, byte[] newBuffer) { - foreach (var oldBuffer in GetBuffers(Sizes)) - foreach (var newBuffer in GetBuffers(Sizes)) - { - var patchBuf = BsDiffCreate(oldBuffer, newBuffer); - var finishedBuf = BsDiffApply(oldBuffer, patchBuf); + var patchBuf = BsDiffCreate(oldBuffer, newBuffer); + var finishedBuf = BsDiffApply(oldBuffer, patchBuf); - Assert.Equal(newBuffer, finishedBuf); - } + Assert.True(newBuffer.AsSpan().SequenceEqual(finishedBuf.Span)); } - [Fact] - public void BsDiffCreateFromBuffers_Identical() + public static IEnumerable TestSingleBuffers(IEnumerable sizes) + => sizes.Select(size => new object[] { GetBuffer(size) }); + + [Theory] + [MemberData(nameof(TestSingleBuffers), new int[] { 0, 1, 512, 999, 1024, 4096 })] + public void BsDiffCreateFromBuffers_Identical(byte[] oldBuffer) { - foreach (var oldBuffer in GetBuffers(Sizes)) - { - var newBuffer = new byte[oldBuffer.Length]; - Buffer.BlockCopy(oldBuffer, 0, newBuffer, 0, oldBuffer.Length); - - var patchBuf = BsDiffCreate(oldBuffer, newBuffer); - var finishedBuf = BsDiffApply(oldBuffer, patchBuf); - - Assert.Equal(oldBuffer, finishedBuf); - Assert.Equal(newBuffer, finishedBuf); - } + var newBuffer = new byte[oldBuffer.Length]; + Buffer.BlockCopy(oldBuffer, 0, newBuffer, 0, oldBuffer.Length); + + var patchBuf = BsDiffCreate(oldBuffer, newBuffer); + var finishedBuf = BsDiffApply(oldBuffer, patchBuf); + + Assert.True(oldBuffer.AsSpan().SequenceEqual(finishedBuf.Span)); + Assert.True(newBuffer.AsSpan().SequenceEqual(finishedBuf.Span)); } - [Fact] - public void BsDiffCreateFromStreams() + [Theory] + [MemberData(nameof(TestDoubleBuffers), new int[] { 0, 1, 512, 999, 1024, 4096 })] + public void BsDiffCreateFromStreams(byte[] oldBuffer, byte[] newBuffer) { const int outputSize = 0x2A000; - foreach (var oldBuffer in GetBuffers(Sizes)) - foreach (var newBuffer in GetBuffers(Sizes)) + byte[] bytesOut; + using (var mmf = MemoryMappedFile.CreateNew(null, outputSize, MemoryMappedFileAccess.ReadWrite)) + { + using (var mmfStream = mmf.CreateViewStream()) + { + BsDiff.BsDiff.Create(oldBuffer, newBuffer, mmfStream, new SuffixSorting.SAIS.SAIS()); + } + + using (var msA = new MemoryStream(oldBuffer)) + using (var msOutput = new MemoryStream()) { - byte[] bytesOut; - using (var mmf = MemoryMappedFile.CreateNew(null, outputSize, MemoryMappedFileAccess.ReadWrite)) - { - using (var mmfStream = mmf.CreateViewStream()) - { - BsDiff.BsDiff.Create(oldBuffer, newBuffer, mmfStream, new SuffixSorting.SAIS.SAIS()); - } - - using (var msA = new MemoryStream(oldBuffer)) - using (var msOutput = new MemoryStream()) - { - BsPatch.Apply(msA, mmf.CreateViewStream, msOutput); - bytesOut = msOutput.ToArray(); - } - } - - Assert.Equal(newBuffer, bytesOut); + BsPatch.Apply(msA, mmf.CreateViewStream, msOutput); + bytesOut = msOutput.ToArray(); } + } + + Assert.Equal(newBuffer, bytesOut); } [Theory] @@ -140,22 +133,18 @@ public static IEnumerable BsDiffCreateBadStreams_TestData() yield return new object[] { emptybuf, emptybuf, new DeflateStream(new MemoryStream(), CompressionMode.Compress) }; } - private static byte[] BsDiffCreate(byte[] oldBuf, byte[] newBuf) + private static ReadOnlyMemory BsDiffCreate(ReadOnlySpan oldBuf, ReadOnlySpan newBuf) { - using (var outputStream = new MemoryStream()) - { - BsDiff.BsDiff.Create(oldBuf, newBuf, outputStream, new SuffixSorting.SAIS.SAIS()); - return outputStream.ToArray(); - } + var outputStream = new MemoryStream(); + BsDiff.BsDiff.Create(oldBuf, newBuf, outputStream, new SuffixSorting.SAIS.SAIS()); + return outputStream.GetBuffer().AsMemory(0, (int)outputStream.Length); } - private static byte[] BsDiffApply(byte[] oldBuffer, byte[] patchBuffer) + private static ReadOnlyMemory BsDiffApply(ReadOnlyMemory oldBuffer, ReadOnlyMemory patchBuffer) { - using (var outputStream = new MemoryStream()) - { - BsPatch.Apply(oldBuffer, patchBuffer, outputStream); - return outputStream.ToArray(); - } + var outputStream = new MemoryStream(); + BsPatch.Apply(oldBuffer, patchBuffer, outputStream); + return outputStream.GetBuffer().AsMemory(0, (int)outputStream.Length); } } } From 99c504cf579a71078d2516a03d9fe424e4530adf Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 06:58:42 -0600 Subject: [PATCH 032/325] Update SAISTests --- test/DeltaQ.Tests/SAISTests.cs | 89 ++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/test/DeltaQ.Tests/SAISTests.cs b/test/DeltaQ.Tests/SAISTests.cs index 58d7dc5..2f23cd8 100644 --- a/test/DeltaQ.Tests/SAISTests.cs +++ b/test/DeltaQ.Tests/SAISTests.cs @@ -1,6 +1,7 @@ -using DeltaQ.SuffixSorting; -using DeltaQ.SuffixSorting.SAIS; -using System.Diagnostics; +using DeltaQ.SuffixSorting.SAIS; +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; +using System.Buffers; using Xunit; namespace DeltaQ.Tests @@ -8,6 +9,24 @@ namespace DeltaQ.Tests using static SAISChecker; public class SAISTests { +#if NET461 + private static void RandomFillBuffer(byte[] buffer) + { + var rand = new Random(63 * 13 * 63 * 13); + rand.NextBytes(buffer); + } +#else + private static MemoryOwner GetOwnedRandomBuffer(int size) + { + var rand = new Random(63 * 13 * 63 * 13); + + var owner = MemoryOwner.Allocate(size); + rand.NextBytes(owner.Span); + + return owner; + } +#endif + [Theory] [InlineData(0)] [InlineData(1)] @@ -18,22 +37,60 @@ public class SAISTests [InlineData(32)] [InlineData(51)] [InlineData(0x8000)] + [InlineData(0x80000)] + [InlineData(0x800000)] public void CheckRandomBuffer(int size) { - byte[] T = new byte[size]; - - var provider = new System.Security.Cryptography.RNGCryptoServiceProvider(); - provider.GetBytes(T); - - ISuffixSort sort = new SAIS(); - var sw = Stopwatch.StartNew(); - int[] SA = sort.Sort(T); - sw.Stop(); +#if NET461 + var ownedT = ArrayPool.Shared.Rent(size); + try +#else + using (var ownedT = GetOwnedRandomBuffer(size)) +#endif + { +#if NET461 + RandomFillBuffer(ownedT); + Span T = ownedT; +#else + Span T = ownedT.Span; +#endif + using (var ownedSA = new SAIS().SortOwned(T)) + { + Span SA = ownedSA.Span; + var result = Check(T, SA, T.Length, false); + Assert.Equal(0, result); + } + } +#if NET461 + finally + { + ArrayPool.Shared.Return(ownedT); + } +#endif + } - Debug.WriteLine(sw.Elapsed); + //[Theory] + //[InlineData(0)] + //[InlineData(1)] + //[InlineData(2)] + //[InlineData(4)] + //[InlineData(8)] + //[InlineData(16)] + //[InlineData(32)] + //[InlineData(51)] + //[InlineData(0x8000)] + //public void CheckRandomBufferContinuous(int size) + //{ + // const int repetitions = 100_000; + // for (int i = 0; i < repetitions; i++) + // { + // CheckRandomBuffer(size); - var result = Check(T, SA, T.Length, false); - Assert.Equal(0, result); - } + // if (i % 100 == 0) + // { + // Debug.WriteLine("Gen0:{0} Gen1:{1} Gen2:{2}", GC.CollectionCount(0), GC.CollectionCount(1), GC.CollectionCount(2)); + // } + // } + //} } } From fb6fcb825ec3a82fcd92803df1fa30fe97569d0a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 23 Feb 2021 07:05:55 -0600 Subject: [PATCH 033/325] Update SAISTests Expose CheckRandomBufferContinuous --- test/DeltaQ.Tests/SAISTests.cs | 48 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/test/DeltaQ.Tests/SAISTests.cs b/test/DeltaQ.Tests/SAISTests.cs index 2f23cd8..c01002f 100644 --- a/test/DeltaQ.Tests/SAISTests.cs +++ b/test/DeltaQ.Tests/SAISTests.cs @@ -9,6 +9,8 @@ namespace DeltaQ.Tests using static SAISChecker; public class SAISTests { + private readonly SAIS _sais = new SAIS(); + #if NET461 private static void RandomFillBuffer(byte[] buffer) { @@ -54,7 +56,7 @@ public void CheckRandomBuffer(int size) #else Span T = ownedT.Span; #endif - using (var ownedSA = new SAIS().SortOwned(T)) + using (var ownedSA = _sais.SortOwned(T)) { Span SA = ownedSA.Span; var result = Check(T, SA, T.Length, false); @@ -69,28 +71,28 @@ public void CheckRandomBuffer(int size) #endif } - //[Theory] - //[InlineData(0)] - //[InlineData(1)] - //[InlineData(2)] - //[InlineData(4)] - //[InlineData(8)] - //[InlineData(16)] - //[InlineData(32)] - //[InlineData(51)] - //[InlineData(0x8000)] - //public void CheckRandomBufferContinuous(int size) - //{ - // const int repetitions = 100_000; - // for (int i = 0; i < repetitions; i++) - // { - // CheckRandomBuffer(size); + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(4)] + [InlineData(8)] + [InlineData(16)] + [InlineData(32)] + [InlineData(51)] + [InlineData(0x1000)] + public void CheckRandomBufferContinuous(int size) + { + const int repetitions = 2_000; + for (int i = 0; i < repetitions; i++) + { + CheckRandomBuffer(size); - // if (i % 100 == 0) - // { - // Debug.WriteLine("Gen0:{0} Gen1:{1} Gen2:{2}", GC.CollectionCount(0), GC.CollectionCount(1), GC.CollectionCount(2)); - // } - // } - //} + if (i % 100 == 0) + { + System.Diagnostics.Debug.WriteLine("Gen0:{0} Gen1:{1} Gen2:{2}", GC.CollectionCount(0), GC.CollectionCount(1), GC.CollectionCount(2)); + } + } + } } } From a688e22bdcd2a3f48ac089b42819932528f50e50 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 24 Feb 2021 00:49:26 -0600 Subject: [PATCH 034/325] Replace recursive Search (again) --- src/DeltaQ.BsDiff/BsDiff.cs | 48 ++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index 305a58e..7ac6ec4 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -268,41 +268,35 @@ private static int MatchLength(ReadOnlySpan oldData, ReadOnlySpan ne private static int Search(ReadOnlySpan I, ReadOnlySpan oldData, ReadOnlySpan newData, int start, int end, out int pos) { - int x, y; - if (end - start < 2) + while (true) { - //x = MatchLength(oldData[I[start]..], newData); - //y = MatchLength(oldData[I[end]..], newData); - x = MatchLength(oldData.Slice(I[start]), newData); - y = MatchLength(oldData.Slice(I[end]), newData); + if (end - start < 2) + { + var x = MatchLength(oldData.Slice(I[start]), newData); + var y = MatchLength(oldData.Slice(I[end]), newData); + + if (x > y) + { + pos = I[start]; + return x; + } + else + { + pos = I[end]; + return y; + } + } - if (x > y) + var midPoint = start + (end - start) / 2; + if (CompareBytes(oldData.Slice(I[midPoint]), newData) < 0) { - pos = I[start]; - return x; + start = midPoint; } else { - pos = I[end]; - return y; + end = midPoint; } - //throw new ApplicationException($"start:{start} end:{end} I:{I.Length} I[start]:{I[start]} I[end]:{I[end]} oldData:{oldData.Length} newData:{newData.Length}", e); } - - x = start + (end - start) / 2; - //var midPoint = start + (end - start) / 2; - if (CompareBytes(oldData.Slice(I[x]), newData) < 0) - { - return Search(I, oldData, newData, x, end, out pos); - //start = midPoint; - //continue; - } - else - { - return Search(I, oldData, newData, start, x, out pos); - } - - //end = midPoint; } } } From a29d8ea46c7c169cb35bbb1309510fcabece635a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 24 Feb 2021 00:55:57 -0600 Subject: [PATCH 035/325] Break out test projects --- .../BsDiffTests.cs | 0 .../BsPatchTests.cs | 0 .../DeltaQ.BsDiff.Tests.csproj | 26 +++++++++++++++++++ .../DeltaQ.SuffixSorting.SAIS.Tests.csproj | 25 ++++++++++++++++++ .../SAISChecker.cs | 0 .../SAISTests.cs | 0 test/DeltaQ.Tests/DeltaQ.Tests.csproj | 7 ++--- 7 files changed, 55 insertions(+), 3 deletions(-) rename test/{DeltaQ.Tests => DeltaQ.BsDiff.Tests}/BsDiffTests.cs (100%) rename test/{DeltaQ.Tests => DeltaQ.BsDiff.Tests}/BsPatchTests.cs (100%) create mode 100644 test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj create mode 100644 test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj rename test/{DeltaQ.Tests => DeltaQ.SuffixSorting.SAIS.Tests}/SAISChecker.cs (100%) rename test/{DeltaQ.Tests => DeltaQ.SuffixSorting.SAIS.Tests}/SAISTests.cs (100%) diff --git a/test/DeltaQ.Tests/BsDiffTests.cs b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs similarity index 100% rename from test/DeltaQ.Tests/BsDiffTests.cs rename to test/DeltaQ.BsDiff.Tests/BsDiffTests.cs diff --git a/test/DeltaQ.Tests/BsPatchTests.cs b/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs similarity index 100% rename from test/DeltaQ.Tests/BsPatchTests.cs rename to test/DeltaQ.BsDiff.Tests/BsPatchTests.cs diff --git a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj new file mode 100644 index 0000000..7ec2f77 --- /dev/null +++ b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj @@ -0,0 +1,26 @@ + + + + net5.0;net461;netcoreapp2.1 + false + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + + + + + + diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj new file mode 100644 index 0000000..d0d7708 --- /dev/null +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj @@ -0,0 +1,25 @@ + + + + net5.0;net461;netcoreapp2.1 + false + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + + + + + diff --git a/test/DeltaQ.Tests/SAISChecker.cs b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISChecker.cs similarity index 100% rename from test/DeltaQ.Tests/SAISChecker.cs rename to test/DeltaQ.SuffixSorting.SAIS.Tests/SAISChecker.cs diff --git a/test/DeltaQ.Tests/SAISTests.cs b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs similarity index 100% rename from test/DeltaQ.Tests/SAISTests.cs rename to test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index 7f23cef..cdb2389 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -3,23 +3,24 @@ net5.0;net461;netcoreapp2.1 false + true + true - + runtime; build; native; contentfiles; analyzers; buildtransitive all - + runtime; build; native; contentfiles; analyzers; buildtransitive all - From 6452746afff1f17eb0ebaef2646c1a4723244e10 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 24 Feb 2021 00:56:07 -0600 Subject: [PATCH 036/325] Update solution --- deltaq.sln | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/deltaq.sln b/deltaq.sln index b75d9bc..d3f5837 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -21,6 +21,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{8B14206D-43D EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{03F00ECA-08B1-47A4-8ACE-4624E31741BA}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.BsDiff.Tests", "test\DeltaQ.BsDiff.Tests\DeltaQ.BsDiff.Tests.csproj", "{279B6F7C-7FB0-42AA-8804-8FF64A990A9A}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.SAIS.Tests", "test\DeltaQ.SuffixSorting.SAIS.Tests\DeltaQ.SuffixSorting.SAIS.Tests.csproj", "{2D37444E-3C89-4E1E-A0E6-C009F205EA84}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -55,6 +59,14 @@ Global {C889CB97-5D73-4D53-8249-DD7BFD402475}.Debug|Any CPU.Build.0 = Debug|Any CPU {C889CB97-5D73-4D53-8249-DD7BFD402475}.Release|Any CPU.ActiveCfg = Release|Any CPU {C889CB97-5D73-4D53-8249-DD7BFD402475}.Release|Any CPU.Build.0 = Release|Any CPU + {279B6F7C-7FB0-42AA-8804-8FF64A990A9A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {279B6F7C-7FB0-42AA-8804-8FF64A990A9A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {279B6F7C-7FB0-42AA-8804-8FF64A990A9A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {279B6F7C-7FB0-42AA-8804-8FF64A990A9A}.Release|Any CPU.Build.0 = Release|Any CPU + {2D37444E-3C89-4E1E-A0E6-C009F205EA84}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2D37444E-3C89-4E1E-A0E6-C009F205EA84}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2D37444E-3C89-4E1E-A0E6-C009F205EA84}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2D37444E-3C89-4E1E-A0E6-C009F205EA84}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -67,6 +79,8 @@ Global {D81A3696-DBC3-46EA-8CB4-C7C3FA96564B} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {0C1531C0-427B-42BE-B781-E83D7B377537} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {C889CB97-5D73-4D53-8249-DD7BFD402475} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {279B6F7C-7FB0-42AA-8804-8FF64A990A9A} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} + {2D37444E-3C89-4E1E-A0E6-C009F205EA84} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {595D8046-0D57-4408-A80A-777358A7E831} From 60e706f3b5fbe6cfbb1d2788ec7c0b0aef8cd0a1 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 02:40:41 -0600 Subject: [PATCH 037/325] Add DeltaQ.Utility.Memory Helper project to include memory extensions and references from the WinCT HighPerformance Toolkit --- src/DeltaQ.BsDiff/BsDiff.cs | 2 +- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 3 ++- .../DeltaQ.SuffixSorting.Abstractions.csproj | 2 +- .../ISuffixSort.cs | 3 +-- .../DeltaQ.SuffixSorting.SAIS.csproj | 6 +++--- src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 20 ++++--------------- .../DeltaQ.Utility.Memory.csproj | 17 ++++++++++++++++ .../SAISTests.cs | 2 +- 8 files changed, 30 insertions(+), 25 deletions(-) create mode 100644 src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index 7ac6ec4..1c117d8 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -101,7 +101,7 @@ 0 32 Header //the memory allocated for the suffix array MUST be at least (n+1) //this is only required for bsdiff, so we allocate it ourselves - //instead of using the ISuffixSort overloads that might allocate only (n) + //instead of using the ISuffixSort overloads that only require allocations of (n) using (MemoryOwner saOwner = MemoryOwner.Allocate(oldData.Length + 1, AllocationMode.Clear)) using (var msControl = new MemoryStream()) using (var msDiff = new MemoryStream()) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index ce41340..aade477 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -4,12 +4,13 @@ net5.0;netstandard2.0 DeltaQ jzebedee + 0.1.1 true - + diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 5e71565..96341f1 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -5,7 +5,7 @@ DeltaQ jzebedee true - 0.1.0 + 0.2.0 diff --git a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs index d5ac9c2..387b9cd 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs +++ b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs @@ -5,8 +5,7 @@ namespace DeltaQ.SuffixSorting { public interface ISuffixSort { - ReadOnlyMemory Sort(ReadOnlySpan textBuffer); - IMemoryOwner SortOwned(ReadOnlySpan textBuffer); + IMemoryOwner Sort(ReadOnlySpan textBuffer); int Sort(ReadOnlySpan textBuffer, Span suffixBuffer); } } \ No newline at end of file diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index 812c21e..f914286 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -5,12 +5,12 @@ DeltaQ jzebedee true - 0.1.0 + 0.2.0 - - + + diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 57b4834..7d7b4f0 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -51,7 +51,6 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Buffers; -using System.Diagnostics; using System.Runtime.CompilerServices; namespace DeltaQ.SuffixSorting.SAIS @@ -503,31 +502,20 @@ sort all the LMS-substrings */ InduceSA(T, sa, c, b, n, k); } - /// - /// Constructs the suffix array of a given string (as byte array) in linear time. - /// - /// input bytes - /// 0 if no error occurred, -1 or -2 otherwise - public ReadOnlyMemory Sort(ReadOnlySpan textBuffer) - { - var suffixBuffer = new int[textBuffer.Length]; - Sort(textBuffer, suffixBuffer); - return suffixBuffer; - } - - public MemoryOwner SortOwned(ReadOnlySpan textBuffer) + public MemoryOwner Sort(ReadOnlySpan textBuffer) { var owner = MemoryOwner.Allocate(textBuffer.Length); Sort(textBuffer, owner.Span); return owner; } - IMemoryOwner ISuffixSort.SortOwned(ReadOnlySpan textBuffer) => SortOwned(textBuffer); + IMemoryOwner ISuffixSort.Sort(ReadOnlySpan textBuffer) + => Sort(textBuffer); public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { if (suffixBuffer.Length < textBuffer.Length) - throw new ArgumentOutOfRangeException(nameof(suffixBuffer), $"Span must have a minimum size of ({nameof(textBuffer)}.Length+1)"); + throw new ArgumentException("Output span must have length greater than or equal to input span", nameof(suffixBuffer)); if (textBuffer.Length <= 1) { diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj new file mode 100644 index 0000000..2677192 --- /dev/null +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -0,0 +1,17 @@ + + + + net5.0;netstandard2.0 + DeltaQ + jzebedee + true + 0.2.0 + latest + enable + + + + + + + diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs index c01002f..d3f27c0 100644 --- a/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs @@ -56,7 +56,7 @@ public void CheckRandomBuffer(int size) #else Span T = ownedT.Span; #endif - using (var ownedSA = _sais.SortOwned(T)) + using (var ownedSA = _sais.Sort(T)) { Span SA = ownedSA.Span; var result = Check(T, SA, T.Length, false); From e4fdd9bba52e79e932dc127086ee1271d3e55e72 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 02:40:59 -0600 Subject: [PATCH 038/325] Update solution --- deltaq.sln | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/deltaq.sln b/deltaq.sln index d3f5837..15ae7f2 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.BsDiff.Tests", "test EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.SAIS.Tests", "test\DeltaQ.SuffixSorting.SAIS.Tests\DeltaQ.SuffixSorting.SAIS.Tests.csproj", "{2D37444E-3C89-4E1E-A0E6-C009F205EA84}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.Utility.Memory", "src\DeltaQ.Utility.Memory\DeltaQ.Utility.Memory.csproj", "{96F1E46E-53CB-4463-82E2-0F81BEB87080}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -67,6 +69,10 @@ Global {2D37444E-3C89-4E1E-A0E6-C009F205EA84}.Debug|Any CPU.Build.0 = Debug|Any CPU {2D37444E-3C89-4E1E-A0E6-C009F205EA84}.Release|Any CPU.ActiveCfg = Release|Any CPU {2D37444E-3C89-4E1E-A0E6-C009F205EA84}.Release|Any CPU.Build.0 = Release|Any CPU + {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Debug|Any CPU.Build.0 = Debug|Any CPU + {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Release|Any CPU.ActiveCfg = Release|Any CPU + {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -81,6 +87,7 @@ Global {C889CB97-5D73-4D53-8249-DD7BFD402475} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {279B6F7C-7FB0-42AA-8804-8FF64A990A9A} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {2D37444E-3C89-4E1E-A0E6-C009F205EA84} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} + {96F1E46E-53CB-4463-82E2-0F81BEB87080} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {595D8046-0D57-4408-A80A-777358A7E831} From 50d4d255df48478d9b9adf0b384a9f785757adbf Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 02:44:56 -0600 Subject: [PATCH 039/325] Consolidate package versions --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index aade477..bcfc3a6 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -9,7 +9,7 @@ - + From cdef6368a8a8c1b6f00af4c3ea058b6ea66bdeef Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 10:11:19 -0600 Subject: [PATCH 040/325] Update BsDiff --- src/DeltaQ.BsDiff/BsDiff.cs | 16 +++++--- src/DeltaQ.BsDiff/BsPatch.cs | 41 +++++++++++-------- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 1 + .../{Extensions.cs => SpanExtensions.cs} | 2 +- 4 files changed, 36 insertions(+), 24 deletions(-) rename src/DeltaQ.BsDiff/{Extensions.cs => SpanExtensions.cs} (98%) diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/BsDiff.cs index 1c117d8..ed0d4ca 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/BsDiff.cs @@ -53,12 +53,12 @@ internal static Stream GetEncodingStream(Stream stream, bool output) } /// - /// Creates a BSDIFF-format patch from two byte arrays + /// Creates a BSDIFF-format patch from two byte buffers /// - /// Byte array of the original (older) data - /// Byte array of the changed (newer) data + /// Byte buffer of the original (older) data + /// Byte buffer of the changed (newer) data /// Seekable, writable stream where the patch will be written - /// Suffix sort implementation to use for comparison, or null to use a default sorter + /// Suffix sort implementation to use for comparison public static void Create(ReadOnlySpan oldData, ReadOnlySpan newData, Stream output, ISuffixSort suffixSort) { // check arguments @@ -86,11 +86,14 @@ 0 32 Header ?? ?? Bzip2ed diff block ?? ?? Bzip2ed extra block */ Span header = stackalloc byte[HeaderSize]; + Span header_signature = header.Slice(0, sizeof(long)); + header_signature.WritePackedLong(Signature); + Span header_compressed_ctrl = header.Slice(sizeof(long), sizeof(long)); Span header_compressed_diff = header.Slice(sizeof(long) * 2, sizeof(long)); + Span header_newdata_len = header.Slice(sizeof(long) * 3, sizeof(long)); - header_signature.WritePackedLong(Signature); header_newdata_len.WritePackedLong(newData.Length); var startPosition = output.Position; @@ -252,7 +255,8 @@ 0 32 Header output.Position = endPosition; } - private static int CompareBytes(ReadOnlySpan left, ReadOnlySpan right) => left.SequenceCompareTo(right); + private static int CompareBytes(ReadOnlySpan left, ReadOnlySpan right) + => left.SequenceCompareTo(right); private static int MatchLength(ReadOnlySpan oldData, ReadOnlySpan newData) { diff --git a/src/DeltaQ.BsDiff/BsPatch.cs b/src/DeltaQ.BsDiff/BsPatch.cs index b656e31..caac93a 100644 --- a/src/DeltaQ.BsDiff/BsPatch.cs +++ b/src/DeltaQ.BsDiff/BsPatch.cs @@ -25,6 +25,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +using Microsoft.Toolkit.HighPerformance.Buffers; using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.IO; @@ -94,9 +95,9 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre throw new InvalidOperationException("Corrupt patch"); // read lengths from header - controlLength = header.Slice(8).ReadPackedLong(); - diffLength = header.Slice(16).ReadPackedLong(); - newSize = header.Slice(24).ReadPackedLong(); + controlLength = header.Slice(sizeof(long)).ReadPackedLong(); + diffLength = header.Slice(sizeof(long) * 2).ReadPackedLong(); + newSize = header.Slice(sizeof(long) * 3).ReadPackedLong(); if (controlLength < 0 || diffLength < 0 || newSize < 0) throw new InvalidOperationException("Corrupt patch"); @@ -116,7 +117,7 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre return newSize; } - private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Stream diff, Stream extra, Stream output) + private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Stream diff, Stream extra, Stream output, int bufferSize = 0x1000) { if (!input.CanRead) throw new ArgumentException("Input stream must be readable", nameof(input)); @@ -128,11 +129,14 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea using (ctrl) using (diff) using (extra) - using (var inputReader = new BinaryReader(input)) { - Span readBuffer = stackalloc byte[0x1000]; - Span ctrlBuffer = stackalloc byte[24]; + using var diffBufferOwner = SpanOwner.Allocate(bufferSize); + using var inputBufferOwner = SpanOwner.Allocate(bufferSize); + Span ctrlBuffer = stackalloc byte[sizeof(long) * 3]; + + var diffBuffer = diffBufferOwner.Span; + var inputBuffer = inputBufferOwner.Span; while (output.Position < newSize) { //read control data: @@ -142,9 +146,9 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea // add x bytes from oldfile to x bytes from the diff block; var addSize = ctrlBuffer.ReadPackedLong(); // copy y bytes from the extra block; - var copySize = ctrlBuffer.Slice(8).ReadPackedLong(); + var copySize = ctrlBuffer.Slice(sizeof(long)).ReadPackedLong(); // seek forwards in oldfile by z bytes; - var seekAmount = ctrlBuffer.Slice(16).ReadPackedLong(); + var seekAmount = ctrlBuffer.Slice(sizeof(long) * 2).ReadPackedLong(); // sanity-check if (output.Position + addSize > newSize) @@ -154,15 +158,18 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea while (addSize > 0) { - var bytesRead = diff.Read(readBuffer.SliceUpTo((int)addSize)); - var inputData = inputReader.ReadBytes(bytesRead); + var diffBytesRead = diff.Read(diffBuffer.SliceUpTo((int)addSize)); + var inputBytesRead = input.Read(inputBuffer); + + if (inputBytesRead != diffBytesRead) + throw new InvalidOperationException("Corrupt patch"); // add old data to diff string - for (var i = 0; i < bytesRead; i++) - readBuffer[i] += inputData[i]; + for (var i = 0; i < diffBytesRead; i++) + diffBuffer[i] += inputBuffer[i]; - output.Write(readBuffer.Slice(0, bytesRead)); - addSize -= bytesRead; + output.Write(diffBuffer.Slice(0, diffBytesRead)); + addSize -= diffBytesRead; } // sanity-check @@ -172,8 +179,8 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea // read extra string in chunks while (copySize > 0) { - var bytesRead = extra.Read(readBuffer.SliceUpTo((int)copySize)); - output.Write(readBuffer.Slice(0, bytesRead)); + var bytesRead = extra.Read(diffBuffer.SliceUpTo((int)copySize)); + output.Write(diffBuffer.Slice(0, bytesRead)); copySize -= bytesRead; } diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index bcfc3a6..87991c8 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -6,6 +6,7 @@ jzebedee 0.1.1 true + preview diff --git a/src/DeltaQ.BsDiff/Extensions.cs b/src/DeltaQ.BsDiff/SpanExtensions.cs similarity index 98% rename from src/DeltaQ.BsDiff/Extensions.cs rename to src/DeltaQ.BsDiff/SpanExtensions.cs index 7e057f6..f064496 100644 --- a/src/DeltaQ.BsDiff/Extensions.cs +++ b/src/DeltaQ.BsDiff/SpanExtensions.cs @@ -29,7 +29,7 @@ namespace DeltaQ.BsDiff { - internal static class Extensions + internal static class SpanExtensions { public static void WritePackedLong(this Span span, long y) { From b1ef4dfd69639a1f10d2629cb112e1de24e49887 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 10:15:43 -0600 Subject: [PATCH 041/325] Rename BsDiff and BsPatch to Diff and Patch --- src/DeltaQ.BsDiff/{BsDiff.cs => Diff.cs} | 2 +- src/DeltaQ.BsDiff/{BsPatch.cs => Patch.cs} | 20 ++++++++++---------- test/DeltaQ.BsDiff.Tests/BsDiffTests.cs | 13 ++++++------- test/DeltaQ.BsDiff.Tests/BsPatchTests.cs | 7 ++++--- 4 files changed, 21 insertions(+), 21 deletions(-) rename src/DeltaQ.BsDiff/{BsDiff.cs => Diff.cs} (99%) rename src/DeltaQ.BsDiff/{BsPatch.cs => Patch.cs} (91%) diff --git a/src/DeltaQ.BsDiff/BsDiff.cs b/src/DeltaQ.BsDiff/Diff.cs similarity index 99% rename from src/DeltaQ.BsDiff/BsDiff.cs rename to src/DeltaQ.BsDiff/Diff.cs index ed0d4ca..2c74f33 100644 --- a/src/DeltaQ.BsDiff/BsDiff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -35,7 +35,7 @@ namespace DeltaQ.BsDiff { - public static class BsDiff + public static class Diff { internal const int HeaderSize = 32; internal const long Signature = 0x3034464649445342; //"BSDIFF40" diff --git a/src/DeltaQ.BsDiff/BsPatch.cs b/src/DeltaQ.BsDiff/Patch.cs similarity index 91% rename from src/DeltaQ.BsDiff/BsPatch.cs rename to src/DeltaQ.BsDiff/Patch.cs index caac93a..8974402 100644 --- a/src/DeltaQ.BsDiff/BsPatch.cs +++ b/src/DeltaQ.BsDiff/Patch.cs @@ -32,7 +32,7 @@ namespace DeltaQ.BsDiff { - public static class BsPatch + public static class Patch { /// /// Opens a BSDIFF-format patch at a specific position @@ -78,7 +78,7 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre { // read header long controlLength, diffLength, newSize; - using (var patchStream = openPatchStream(0, BsDiff.HeaderSize)) + using (var patchStream = openPatchStream(0, Diff.HeaderSize)) { // check patch stream capabilities if (!patchStream.CanRead) @@ -86,12 +86,12 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre if (!patchStream.CanSeek) throw new ArgumentException("Patch stream must be seekable", nameof(openPatchStream)); - Span header = stackalloc byte[BsDiff.HeaderSize]; + Span header = stackalloc byte[Diff.HeaderSize]; patchStream.Read(header); // check for appropriate magic var signature = header.ReadPackedLong(); - if (signature != BsDiff.Signature) + if (signature != Diff.Signature) throw new InvalidOperationException("Corrupt patch"); // read lengths from header @@ -105,14 +105,14 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre // prepare to read three parts of the patch in parallel Stream - compressedControlStream = openPatchStream(BsDiff.HeaderSize, controlLength), - compressedDiffStream = openPatchStream(BsDiff.HeaderSize + controlLength, diffLength), - compressedExtraStream = openPatchStream(BsDiff.HeaderSize + controlLength + diffLength, 0); + compressedControlStream = openPatchStream(Diff.HeaderSize, controlLength), + compressedDiffStream = openPatchStream(Diff.HeaderSize + controlLength, diffLength), + compressedExtraStream = openPatchStream(Diff.HeaderSize + controlLength + diffLength, 0); // decompress each part (to read it) - ctrl = BsDiff.GetEncodingStream(compressedControlStream, false); - diff = BsDiff.GetEncodingStream(compressedDiffStream, false); - extra = BsDiff.GetEncodingStream(compressedExtraStream, false); + ctrl = Diff.GetEncodingStream(compressedControlStream, false); + diff = Diff.GetEncodingStream(compressedDiffStream, false); + extra = Diff.GetEncodingStream(compressedExtraStream, false); return newSize; } diff --git a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs index d3b5318..56ab4a6 100644 --- a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs +++ b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs @@ -24,7 +24,6 @@ */ using DeltaQ.BsDiff; using System; -using System.Collections; using System.Collections.Generic; using System.IO; using System.IO.Compression; @@ -89,13 +88,13 @@ public void BsDiffCreateFromStreams(byte[] oldBuffer, byte[] newBuffer) { using (var mmfStream = mmf.CreateViewStream()) { - BsDiff.BsDiff.Create(oldBuffer, newBuffer, mmfStream, new SuffixSorting.SAIS.SAIS()); + Diff.Create(oldBuffer, newBuffer, mmfStream, new SuffixSorting.SAIS.SAIS()); } using (var msA = new MemoryStream(oldBuffer)) using (var msOutput = new MemoryStream()) { - BsPatch.Apply(msA, mmf.CreateViewStream, msOutput); + Patch.Apply(msA, mmf.CreateViewStream, msOutput); bytesOut = msOutput.ToArray(); } } @@ -107,7 +106,7 @@ public void BsDiffCreateFromStreams(byte[] oldBuffer, byte[] newBuffer) [MemberData(nameof(BsDiffCreateNullArguments_TestData))] public void BsDiffCreateNullArguments(byte[] oldData, byte[] newData, Stream outStream) { - Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); + Assert.Throws(() => Diff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); } public static IEnumerable BsDiffCreateNullArguments_TestData() @@ -123,7 +122,7 @@ public static IEnumerable BsDiffCreateNullArguments_TestData() [MemberData(nameof(BsDiffCreateBadStreams_TestData))] public void BsDiffCreateBadStreams(byte[] oldData, byte[] newData, Stream outStream) { - Assert.Throws(() => BsDiff.BsDiff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); + Assert.Throws(() => Diff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); } public static IEnumerable BsDiffCreateBadStreams_TestData() @@ -136,14 +135,14 @@ public static IEnumerable BsDiffCreateBadStreams_TestData() private static ReadOnlyMemory BsDiffCreate(ReadOnlySpan oldBuf, ReadOnlySpan newBuf) { var outputStream = new MemoryStream(); - BsDiff.BsDiff.Create(oldBuf, newBuf, outputStream, new SuffixSorting.SAIS.SAIS()); + Diff.Create(oldBuf, newBuf, outputStream, new SuffixSorting.SAIS.SAIS()); return outputStream.GetBuffer().AsMemory(0, (int)outputStream.Length); } private static ReadOnlyMemory BsDiffApply(ReadOnlyMemory oldBuffer, ReadOnlyMemory patchBuffer) { var outputStream = new MemoryStream(); - BsPatch.Apply(oldBuffer, patchBuffer, outputStream); + Patch.Apply(oldBuffer, patchBuffer, outputStream); return outputStream.GetBuffer().AsMemory(0, (int)outputStream.Length); } } diff --git a/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs b/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs index e5e6a16..b375844 100644 --- a/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs +++ b/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs @@ -1,4 +1,5 @@ -using System.IO; +using DeltaQ.BsDiff; +using System.IO; using System.Security.Cryptography; using Xunit; @@ -23,13 +24,13 @@ public void BsPatchFlushesOutput() //can't use MemoryStream directly as Flush has no effect var patchMs = new MemoryStream(); var wrappedPatchMs = new BufferedStream(patchMs); - BsDiff.BsDiff.Create(oldBuffer, newBuffer, wrappedPatchMs, new SuffixSorting.SAIS.SAIS()); + Diff.Create(oldBuffer, newBuffer, wrappedPatchMs, new SuffixSorting.SAIS.SAIS()); var patchBuffer = patchMs.ToArray(); var reconstructMs = new MemoryStream(); var wrappedReconstructMs = new BufferedStream(reconstructMs); - BsDiff.BsPatch.Apply(oldBuffer, patchBuffer, wrappedReconstructMs); + Patch.Apply(oldBuffer, patchBuffer, wrappedReconstructMs); var reconstructedBuffer = reconstructMs.ToArray(); From 419a11992a2dc349f5c52a7286cbfcb99cbb3088 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 10:45:30 -0600 Subject: [PATCH 042/325] Update tests Remove netcoreapp targeting Use latest LangVersion --- test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj | 3 ++- .../DeltaQ.SuffixSorting.SAIS.Tests.csproj | 3 ++- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 5 ++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj index 7ec2f77..d168730 100644 --- a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj +++ b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj @@ -1,8 +1,9 @@  - net5.0;net461;netcoreapp2.1 + net5.0;net461 false + latest diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj index d0d7708..3ec556b 100644 --- a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj @@ -1,8 +1,9 @@  - net5.0;net461;netcoreapp2.1 + net5.0;net461 false + latest diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index cdb2389..1414104 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -1,10 +1,9 @@  - net5.0;net461;netcoreapp2.1 + net5.0;net461 false - true - true + latest From d70cd652df30341aca7160d6650f25ff4ce3fc8c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 10:45:55 -0600 Subject: [PATCH 043/325] Replace MMF implementation from BsDiffTests --- test/DeltaQ.BsDiff.Tests/BsDiffTests.cs | 41 ++++++++++++------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs index 56ab4a6..1328408 100644 --- a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs +++ b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs @@ -23,11 +23,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ using DeltaQ.BsDiff; +using Microsoft.Toolkit.HighPerformance.Buffers; +using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.Collections.Generic; using System.IO; using System.IO.Compression; -using System.IO.MemoryMappedFiles; using System.Linq; using Xunit; @@ -79,27 +80,25 @@ public void BsDiffCreateFromBuffers_Identical(byte[] oldBuffer) [Theory] [MemberData(nameof(TestDoubleBuffers), new int[] { 0, 1, 512, 999, 1024, 4096 })] - public void BsDiffCreateFromStreams(byte[] oldBuffer, byte[] newBuffer) + public void BsDiffCreateFromStreams(byte[] oldData, byte[] newData) { - const int outputSize = 0x2A000; - - byte[] bytesOut; - using (var mmf = MemoryMappedFile.CreateNew(null, outputSize, MemoryMappedFileAccess.ReadWrite)) - { - using (var mmfStream = mmf.CreateViewStream()) - { - Diff.Create(oldBuffer, newBuffer, mmfStream, new SuffixSorting.SAIS.SAIS()); - } - - using (var msA = new MemoryStream(oldBuffer)) - using (var msOutput = new MemoryStream()) - { - Patch.Apply(msA, mmf.CreateViewStream, msOutput); - bytesOut = msOutput.ToArray(); - } - } - - Assert.Equal(newBuffer, bytesOut); + using var outputOwner = MemoryOwner.Allocate(0x2000); + + Diff.Create(oldData, newData, outputOwner.Memory.AsStream(), new SuffixSorting.SAIS.SAIS()); + + using var msOld = new MemoryStream(oldData); + using var msPatchOutput = new MemoryStream(); + Patch.Apply(msOld, OpenPatchStream, msPatchOutput); + + Span newSpan, reconstructedSpan; + newSpan = newData; + reconstructedSpan = msPatchOutput.GetBuffer().AsSpan(0, (int)msPatchOutput.Length); + + Assert.True(newSpan.SequenceEqual(reconstructedSpan)); + return; + + Stream OpenPatchStream(long start, long len) + => (len > 0 ? outputOwner.Memory.Slice((int)start, (int)len) : outputOwner.Memory.Slice((int)start)).AsStream(); } [Theory] From 79638efceae2a04ba2473282259cb7d4e27e0df0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 10:53:57 -0600 Subject: [PATCH 044/325] Rename for clarity --- src/DeltaQ.BsDiff/Patch.cs | 11 ++++++----- test/DeltaQ.BsDiff.Tests/BsDiffTests.cs | 12 ++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/DeltaQ.BsDiff/Patch.cs b/src/DeltaQ.BsDiff/Patch.cs index 8974402..9ef7716 100644 --- a/src/DeltaQ.BsDiff/Patch.cs +++ b/src/DeltaQ.BsDiff/Patch.cs @@ -53,7 +53,8 @@ public static void Apply(ReadOnlyMemory input, ReadOnlyMemory diff, var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); // prepare to read three parts of the patch in parallel - ApplyInternal(newSize, input.AsStream(), controlStream, diffStream, extraStream, output); + using var inputStream = input.AsStream(); + ApplyInternal(newSize, inputStream, controlStream, diffStream, extraStream, output); return; Stream openPatchStream(long offset, long length) @@ -78,16 +79,16 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre { // read header long controlLength, diffLength, newSize; - using (var patchStream = openPatchStream(0, Diff.HeaderSize)) + using (var headerStream = openPatchStream(0, Diff.HeaderSize)) { // check patch stream capabilities - if (!patchStream.CanRead) + if (!headerStream.CanRead) throw new ArgumentException("Patch stream must be readable", nameof(openPatchStream)); - if (!patchStream.CanSeek) + if (!headerStream.CanSeek) throw new ArgumentException("Patch stream must be seekable", nameof(openPatchStream)); Span header = stackalloc byte[Diff.HeaderSize]; - patchStream.Read(header); + headerStream.Read(header); // check for appropriate magic var signature = header.ReadPackedLong(); diff --git a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs index 1328408..7d04cc0 100644 --- a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs +++ b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs @@ -53,7 +53,7 @@ public static IEnumerable TestDoubleBuffers(IEnumerable sizes) [Theory] [MemberData(nameof(TestDoubleBuffers), new int[] { 0, 1, 512, 999, 1024, 4096 })] - public void BsDiffCreateFromBuffers(byte[] oldBuffer, byte[] newBuffer) + public void BsDiffRoundtripFromBuffers(byte[] oldBuffer, byte[] newBuffer) { var patchBuf = BsDiffCreate(oldBuffer, newBuffer); var finishedBuf = BsDiffApply(oldBuffer, patchBuf); @@ -66,7 +66,7 @@ public static IEnumerable TestSingleBuffers(IEnumerable sizes) [Theory] [MemberData(nameof(TestSingleBuffers), new int[] { 0, 1, 512, 999, 1024, 4096 })] - public void BsDiffCreateFromBuffers_Identical(byte[] oldBuffer) + public void BsDiffRoundtripFromBuffers_Identical(byte[] oldBuffer) { var newBuffer = new byte[oldBuffer.Length]; Buffer.BlockCopy(oldBuffer, 0, newBuffer, 0, oldBuffer.Length); @@ -80,7 +80,7 @@ public void BsDiffCreateFromBuffers_Identical(byte[] oldBuffer) [Theory] [MemberData(nameof(TestDoubleBuffers), new int[] { 0, 1, 512, 999, 1024, 4096 })] - public void BsDiffCreateFromStreams(byte[] oldData, byte[] newData) + public void BsDiffRoundtripFromStreams(byte[] oldData, byte[] newData) { using var outputOwner = MemoryOwner.Allocate(0x2000); @@ -103,14 +103,14 @@ Stream OpenPatchStream(long start, long len) [Theory] [MemberData(nameof(BsDiffCreateNullArguments_TestData))] - public void BsDiffCreateNullArguments(byte[] oldData, byte[] newData, Stream outStream) + public void BsDiffCreateNullArgumentsThrows(byte[] oldData, byte[] newData, Stream outStream) { Assert.Throws(() => Diff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); } public static IEnumerable BsDiffCreateNullArguments_TestData() { - var emptybuf = new byte[0]; + var emptybuf = Array.Empty(); var ms = new MemoryStream(); yield return new object[] { null, emptybuf, ms }; yield return new object[] { emptybuf, null, ms }; @@ -119,7 +119,7 @@ public static IEnumerable BsDiffCreateNullArguments_TestData() [Theory] [MemberData(nameof(BsDiffCreateBadStreams_TestData))] - public void BsDiffCreateBadStreams(byte[] oldData, byte[] newData, Stream outStream) + public void BsDiffCreateBadStreamsThrows(byte[] oldData, byte[] newData, Stream outStream) { Assert.Throws(() => Diff.Create(oldData, newData, outStream, new SuffixSorting.SAIS.SAIS())); } From ae5ff7b5ab8e5e7d1bb35e3394f4351ffcd2b042 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 11:23:01 -0600 Subject: [PATCH 045/325] Remove old comments --- src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs | 2 -- src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs | 3 --- src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 12 +----------- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs index c3d4a8a..cf5a45d 100644 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs +++ b/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs @@ -188,7 +188,6 @@ public override void SetLength(long value) /// The offset to start obtaining data from. /// The number of bytes of data to write. /// Any access - // ReSharper disable once ParameterHidesMember public override void Write(byte[] buffer, int offset, int count) { throw new NotSupportedException("BZip2InputStream Write not supported"); @@ -215,7 +214,6 @@ public override void WriteByte(byte value) /// than the number of bytes requested if that number of bytes are not /// currently available or zero if the end of the stream is reached. /// - // ReSharper disable once ParameterHidesMember public override int Read(byte[] buffer, int offset, int count) { if (buffer == null) diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs index 205863b..8e208a1 100644 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs +++ b/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs @@ -802,7 +802,6 @@ Recompute the tables based on the accumulated frequencies. char ll_i = selector[i]; var j = 0; char tmp = pos[j]; - // ReSharper disable once LoopVariableIsNeverChangedInsideLoop while (ll_i != tmp) { j++; @@ -1614,7 +1613,6 @@ void GenerateMTFValues() int j = 0; char tmp = yy[j]; - // ReSharper disable once LoopVariableIsNeverChangedInsideLoop while (ll_i != tmp) { j++; @@ -1956,7 +1954,6 @@ The current block size is 100000 * this number. int workDone; int workLimit; bool firstAttempt; - // ReSharper disable once NotAccessedField.Local int nBlocksRandomised; int currentChar = -1; diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 7d7b4f0..8acc2ae 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -66,8 +66,6 @@ public class SAIS : ISuffixSort private static void GetCounts(IntAccessor T, Span c, int n, int k) { c.Slice(0, k).Clear(); - //for (i = 0; i < k; ++i) - // c[i] = 0; for (int i = 0; i < n; ++i) c[T[i]]++; @@ -76,8 +74,7 @@ private static void GetCounts(IntAccessor T, Span c, int n, int k) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void GetBuckets(Span c, Span b, int k, bool end) { - int i, sum = 0; - for (i = 0; i < k; ++i) + for (int i = 0, sum = 0; i < k; ++i) { sum += c[i]; b[i] = end ? sum : sum - c[i]; @@ -331,10 +328,6 @@ sort all the LMS-substrings */ GetBuckets(c, b, k, true); /* find ends of buckets */ sa.Slice(0, n).Clear(); - //for (i = 0; i < n; ++i) - //{ - // sa[i] = 0; - //} bb = -1; i = n - 1; @@ -408,7 +401,6 @@ sort all the LMS-substrings */ } } - // ReSharper disable once LoopVariableIsNeverChangedInsideLoop for (i = m + (n >> 1) - 1, j = m * 2 + newfs - 1; m <= i; --i) { if (sa[i] != 0) @@ -473,10 +465,8 @@ sort all the LMS-substrings */ c1 = T[p]; do { - // ReSharper disable once PossibleNullReferenceException int q = b[c0 = c1]; - // ReSharper disable once LoopVariableIsNeverChangedInsideLoop while (q < j) { sa[--j] = 0; From b630d3ed3a923d67271ce9465089f57d06385328 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 6 Mar 2021 11:23:40 -0600 Subject: [PATCH 046/325] Use nameof() --- src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs | 2 +- src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs | 6 +++--- src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs index cf5a45d..24537cb 100644 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs +++ b/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs @@ -218,7 +218,7 @@ public override int Read(byte[] buffer, int offset, int count) { if (buffer == null) { - throw new ArgumentNullException("buffer"); + throw new ArgumentNullException(nameof(buffer)); } for (var i = 0; i < count; ++i) diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs index 8e208a1..7d2dfbe 100644 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs +++ b/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs @@ -251,17 +251,17 @@ public override void Write(byte[] buffer, int offset, int count) { if (buffer == null) { - throw new ArgumentNullException("buffer"); + throw new ArgumentNullException(nameof(buffer)); } if (offset < 0) { - throw new ArgumentOutOfRangeException("offset"); + throw new ArgumentOutOfRangeException(nameof(offset)); } if (count < 0) { - throw new ArgumentOutOfRangeException("count"); + throw new ArgumentOutOfRangeException(nameof(count)); } if (buffer.Length - offset < count) diff --git a/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs b/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs index c56f32c..f785788 100644 --- a/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs +++ b/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs @@ -161,7 +161,7 @@ public void Update(int value) public void Update(byte[] buffer) { if (buffer == null) - throw new ArgumentNullException("buffer"); + throw new ArgumentNullException(nameof(buffer)); Update(buffer, 0, buffer.Length); } @@ -175,13 +175,13 @@ public void Update(byte[] buffer) public void Update(byte[] buffer, int offset, int count) { if (buffer == null) - throw new ArgumentNullException("buffer"); + throw new ArgumentNullException(nameof(buffer)); if (offset < 0) - throw new ArgumentOutOfRangeException("offset", "cannot be less than zero"); + throw new ArgumentOutOfRangeException(nameof(offset), "cannot be less than zero"); if (count < 0) - throw new ArgumentOutOfRangeException("count", "cannot be less than zero"); + throw new ArgumentOutOfRangeException(nameof(count), "cannot be less than zero"); if (offset + count > buffer.Length) - throw new ArgumentOutOfRangeException("count"); + throw new ArgumentOutOfRangeException(nameof(count)); for (var i = 0; i < count; i++) { From 18a81aabb8188b97e071c5dc44b3744acffaab18 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 08:58:23 -0500 Subject: [PATCH 047/325] Bump package versions --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 4 ++-- src/DeltaQ.CLI/DeltaQ.CLI.csproj | 13 ++++++------- .../DeltaQ.Utility.Memory.csproj | 2 +- src/deltaq/deltaq.csproj | 5 ++++- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 87991c8..705a609 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -4,9 +4,9 @@ net5.0;netstandard2.0 DeltaQ jzebedee - 0.1.1 + 0.2.0 true - preview + latest diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CLI/DeltaQ.CLI.csproj index f7b6426..780edd6 100644 --- a/src/DeltaQ.CLI/DeltaQ.CLI.csproj +++ b/src/DeltaQ.CLI/DeltaQ.CLI.csproj @@ -1,18 +1,17 @@ - + + Exe net5.0;netstandard2.0 DeltaQ jzebedee true + dq - + + - - - - - + diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 2677192..08ccea7 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -11,7 +11,7 @@ - + diff --git a/src/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj index 9bec497..de82084 100644 --- a/src/deltaq/deltaq.csproj +++ b/src/deltaq/deltaq.csproj @@ -4,11 +4,14 @@ net5.0;netstandard2.0 DeltaQ jzebedee + 0.2.0 true + latest - + + From bd51dc167c19d32650d51aede93a03ae79c91193 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:00:24 -0500 Subject: [PATCH 048/325] Bump package versions and update solution --- deltaq.sln | 7 ++++++- test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj | 2 +- .../DeltaQ.SuffixSorting.SAIS.Tests.csproj | 2 +- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/deltaq.sln b/deltaq.sln index 15ae7f2..8eb1039 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -25,7 +25,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.BsDiff.Tests", "test EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.SAIS.Tests", "test\DeltaQ.SuffixSorting.SAIS.Tests\DeltaQ.SuffixSorting.SAIS.Tests.csproj", "{2D37444E-3C89-4E1E-A0E6-C009F205EA84}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.Utility.Memory", "src\DeltaQ.Utility.Memory\DeltaQ.Utility.Memory.csproj", "{96F1E46E-53CB-4463-82E2-0F81BEB87080}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Utility.Memory", "src\DeltaQ.Utility.Memory\DeltaQ.Utility.Memory.csproj", "{96F1E46E-53CB-4463-82E2-0F81BEB87080}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -73,6 +73,10 @@ Global {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Debug|Any CPU.Build.0 = Debug|Any CPU {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Release|Any CPU.ActiveCfg = Release|Any CPU {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Release|Any CPU.Build.0 = Release|Any CPU + {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Debug|Any CPU.Build.0 = Debug|Any CPU + {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Release|Any CPU.ActiveCfg = Release|Any CPU + {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -88,6 +92,7 @@ Global {279B6F7C-7FB0-42AA-8804-8FF64A990A9A} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {2D37444E-3C89-4E1E-A0E6-C009F205EA84} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {96F1E46E-53CB-4463-82E2-0F81BEB87080} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {31200DA9-2136-4D22-89D6-F99219C9DDAA} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {595D8046-0D57-4408-A80A-777358A7E831} diff --git a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj index d168730..e7bd74c 100644 --- a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj +++ b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj @@ -7,7 +7,7 @@ - + runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj index 3ec556b..6ca859f 100644 --- a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj @@ -7,7 +7,7 @@ - + runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index 1414104..e836fcc 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -7,7 +7,7 @@ - + runtime; build; native; contentfiles; analyzers; buildtransitive From 6beab12b61e85ea0d072aef9cbc06979b137dfbd Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:02:22 -0500 Subject: [PATCH 049/325] Add LibDivSufSort project --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj new file mode 100644 index 0000000..2def991 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -0,0 +1,17 @@ + + + + net5.0;netstandard2.0 + DeltaQ + jzebedee + true + 0.2.0 + + + + + + + + + From 3378cdc8109de303c75665fe3408f42716e148ef Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:05:19 -0500 Subject: [PATCH 050/325] Add LibDivSufSort project to solution --- deltaq.sln | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/deltaq.sln b/deltaq.sln index 8eb1039..69b5405 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -27,6 +27,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.SAIS.T EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Utility.Memory", "src\DeltaQ.Utility.Memory\DeltaQ.Utility.Memory.csproj", "{96F1E46E-53CB-4463-82E2-0F81BEB87080}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LibDivSufSort", "src\DeltaQ.SuffixSorting.LibDivSufSort\DeltaQ.SuffixSorting.LibDivSufSort.csproj", "{E89B007E-0BDE-4642-B40F-CCB7569F88B8}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -73,10 +75,10 @@ Global {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Debug|Any CPU.Build.0 = Debug|Any CPU {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Release|Any CPU.ActiveCfg = Release|Any CPU {96F1E46E-53CB-4463-82E2-0F81BEB87080}.Release|Any CPU.Build.0 = Release|Any CPU - {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Debug|Any CPU.Build.0 = Debug|Any CPU - {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Release|Any CPU.ActiveCfg = Release|Any CPU - {31200DA9-2136-4D22-89D6-F99219C9DDAA}.Release|Any CPU.Build.0 = Release|Any CPU + {E89B007E-0BDE-4642-B40F-CCB7569F88B8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E89B007E-0BDE-4642-B40F-CCB7569F88B8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E89B007E-0BDE-4642-B40F-CCB7569F88B8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E89B007E-0BDE-4642-B40F-CCB7569F88B8}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -92,7 +94,7 @@ Global {279B6F7C-7FB0-42AA-8804-8FF64A990A9A} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {2D37444E-3C89-4E1E-A0E6-C009F205EA84} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {96F1E46E-53CB-4463-82E2-0F81BEB87080} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} - {31200DA9-2136-4D22-89D6-F99219C9DDAA} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {E89B007E-0BDE-4642-B40F-CCB7569F88B8} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {595D8046-0D57-4408-A80A-777358A7E831} From dd8f53d8138d37353ab92da4245a375c2ee89c12 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:11:45 -0500 Subject: [PATCH 051/325] Add initial C files for conversion --- .../divsufsort.c | 398 +++++++++ .../sssort.c | 815 ++++++++++++++++++ .../trsort.c | 586 +++++++++++++ .../utils.c | 381 ++++++++ 4 files changed, 2180 insertions(+) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.c create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.c create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/utils.c diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.c new file mode 100644 index 0000000..9f64b4f --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.c @@ -0,0 +1,398 @@ +/* + * divsufsort.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" +#ifdef _OPENMP +# include +#endif + + +/*- Private Functions -*/ + +/* Sorts suffixes of type B*. */ +static +saidx_t +sort_typeBstar(const sauchar_t *T, saidx_t *SA, + saidx_t *bucket_A, saidx_t *bucket_B, + saidx_t n) { + saidx_t *PAb, *ISAb, *buf; +#ifdef _OPENMP + saidx_t *curbuf; + saidx_t l; +#endif + saidx_t i, j, k, t, m, bufsize; + saint_t c0, c1; +#ifdef _OPENMP + saint_t d0, d1; + int tmp; +#endif + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef _OPENMP + tmp = omp_get_max_threads(); + buf = SA + m, bufsize = (n - (2 * m)) / tmp; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) + { + tmp = omp_get_thread_num(); + curbuf = buf + tmp * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of tyoe B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const sauchar_t *T, saidx_t *SA, + saidx_t *bucket_A, saidx_t *bucket_B, + saidx_t n, saidx_t m) { + saidx_t *i, *j, *k; + saidx_t s; + saint_t c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +saidx_t +construct_BWT(const sauchar_t *T, saidx_t *SA, + saidx_t *bucket_A, saidx_t *bucket_B, + saidx_t n, saidx_t m) { + saidx_t *i, *j, *k, *orig; + saidx_t s; + saint_t c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((saidx_t)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +saint_t +divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { + saidx_t *bucket_A, *bucket_B; + saidx_t m; + saint_t err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +saidx_t +divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) { + saidx_t *B; + saidx_t *bucket_A, *bucket_B; + saidx_t m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); } + bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n); + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} + +const char * +divsufsort_version(void) { + return PROJECT_VERSION_FULL; +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.c new file mode 100644 index 0000000..4a18fd2 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.c @@ -0,0 +1,815 @@ +/* + * sssort.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" + + +/*- Private Functions -*/ + +static const saint_t lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +saint_t +ss_ilg(saidx_t n) { +#if SS_BLOCKSIZE == 0 +# if defined(BUILD_DIVSUFSORT64) + return (n >> 32) ? + ((n >> 48) ? + ((n >> 56) ? + 56 + lg_table[(n >> 56) & 0xff] : + 48 + lg_table[(n >> 48) & 0xff]) : + ((n >> 40) ? + 40 + lg_table[(n >> 40) & 0xff] : + 32 + lg_table[(n >> 32) & 0xff])) : + ((n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff])); +# else + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +# endif +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const saint_t sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +saidx_t +ss_isqrt(saidx_t x) { + saidx_t y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +saint_t +ss_compare(const sauchar_t *T, + const saidx_t *p1, const saidx_t *p2, + saidx_t depth) { + const sauchar_t *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *last, saidx_t depth) { + saidx_t *i, *j; + saidx_t t; + saint_t r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const sauchar_t *Td, const saidx_t *PA, + saidx_t *SA, saidx_t i, saidx_t size) { + saidx_t j, k; + saidx_t v; + saint_t c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) { + saidx_t i, m; + saidx_t t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +saidx_t * +ss_median3(const sauchar_t *Td, const saidx_t *PA, + saidx_t *v1, saidx_t *v2, saidx_t *v3) { + saidx_t *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +saidx_t * +ss_median5(const sauchar_t *Td, const saidx_t *PA, + saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { + saidx_t *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +saidx_t * +ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) { + saidx_t *middle; + saidx_t t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +saidx_t * +ss_partition(const saidx_t *PA, + saidx_t *first, saidx_t *last, saidx_t depth) { + saidx_t *a, *b; + saidx_t t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *last, + saidx_t depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; + const sauchar_t *Td; + saidx_t *a, *b, *c, *d, *e, *f; + saidx_t s, t; + saint_t ssize; + saint_t limit; + saint_t v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { + saidx_t t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { + saidx_t *a, *b, t; + saidx_t l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t depth) { + const saidx_t *p; + saidx_t *a, *b; + saidx_t len, half; + saint_t q, r; + saint_t x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t *buf, saidx_t depth) { + saidx_t *a, *b, *c, *bufend; + saidx_t t; + saint_t r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t *buf, saidx_t depth) { + const saidx_t *p1, *p2; + saidx_t *a, *b, *c, *bufend; + saidx_t t; + saint_t r; + saint_t x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t *buf, saidx_t bufsize, saidx_t depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; + saidx_t *l, *r, *lm, *rm; + saidx_t m, len, half; + saint_t ssize; + saint_t check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +/* Substring sort */ +void +sssort(const sauchar_t *T, const saidx_t *PA, + saidx_t *first, saidx_t *last, + saidx_t *buf, saidx_t bufsize, + saidx_t depth, saidx_t n, saint_t lastsuffix) { + saidx_t *a; +#if SS_BLOCKSIZE != 0 + saidx_t *b, *middle, *curbuf; + saidx_t j, k, curbufsize, limit; +#endif + saidx_t i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c new file mode 100644 index 0000000..6fe3e67 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c @@ -0,0 +1,586 @@ +/* + * trsort.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" + + +/*- Private Functions -*/ + +static const saint_t lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +static INLINE +saint_t +tr_ilg(saidx_t n) { +#if defined(BUILD_DIVSUFSORT64) + return (n >> 32) ? + ((n >> 48) ? + ((n >> 56) ? + 56 + lg_table[(n >> 56) & 0xff] : + 48 + lg_table[(n >> 48) & 0xff]) : + ((n >> 40) ? + 40 + lg_table[(n >> 40) & 0xff] : + 32 + lg_table[(n >> 32) & 0xff])) : + ((n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff])); +#else + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#endif +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { + saidx_t *a, *b; + saidx_t t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) { + saidx_t j, k; + saidx_t v; + saidx_t c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) { + saidx_t i, m; + saidx_t t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +saidx_t * +tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) { + saidx_t *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +saidx_t * +tr_median5(const saidx_t *ISAd, + saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { + saidx_t *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +saidx_t * +tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { + saidx_t *middle; + saidx_t t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + saidx_t chance; + saidx_t remain; + saidx_t incval; + saidx_t count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +saint_t +trbudget_check(trbudget_t *budget, saidx_t size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const saidx_t *ISAd, + saidx_t *first, saidx_t *middle, saidx_t *last, + saidx_t **pa, saidx_t **pb, saidx_t v) { + saidx_t *a, *b, *c, *d, *e, *f; + saidx_t t, s; + saidx_t x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(saidx_t *ISA, const saidx_t *SA, + saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, + saidx_t depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + saidx_t *c, *d, *e; + saidx_t s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(saidx_t *ISA, const saidx_t *SA, + saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, + saidx_t depth) { + saidx_t *c, *d, *e; + saidx_t s, v; + saidx_t rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(saidx_t *ISA, const saidx_t *ISAd, + saidx_t *SA, saidx_t *first, saidx_t *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE]; + saidx_t *a, *b, *c; + saidx_t t; + saidx_t v, x = 0; + saidx_t incr = ISAd - ISA; + saint_t limit, next; + saint_t ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +/* Tandem repeat sort */ +void +trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) { + saidx_t *ISAd; + saidx_t *first, *last; + trbudget_t budget; + saidx_t t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/utils.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/utils.c new file mode 100644 index 0000000..90fb23e --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/utils.c @@ -0,0 +1,381 @@ +/* + * utils.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" + + +/*- Private Function -*/ + +/* Binary search for inverse bwt. */ +static +saidx_t +binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) { + saidx_t half, i; + for(i = 0, half = size >> 1; + 0 < size; + size = half, half >>= 1) { + if(A[i + half] < value) { + i += half + 1; + half -= (size & 1) ^ 1; + } + } + return i; +} + + +/*- Functions -*/ + +/* Burrows-Wheeler transform. */ +saint_t +bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, + saidx_t n, saidx_t *idx) { + saidx_t *A, i, j, p, t; + saint_t c; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } + if(n <= 1) { + if(n == 1) { U[0] = T[0]; } + *idx = n; + return 0; + } + + if((A = SA) == NULL) { + i = divbwt(T, U, NULL, n); + if(0 <= i) { *idx = i; i = 0; } + return (saint_t)i; + } + + /* BW transform. */ + if(T == U) { + t = n; + for(i = 0, j = 0; i < n; ++i) { + p = t - 1; + t = A[i]; + if(0 <= p) { + c = T[j]; + U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; + A[j] = c; + j++; + } else { + *idx = i; + } + } + p = t - 1; + if(0 <= p) { + c = T[j]; + U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; + A[j] = c; + } else { + *idx = i; + } + } else { + U[0] = T[n - 1]; + for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } + *idx = i + 1; + for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; } + } + + if(SA == NULL) { + /* Deallocate memory. */ + free(A); + } + + return 0; +} + +/* Inverse Burrows-Wheeler transform. */ +saint_t +inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A, + saidx_t n, saidx_t idx) { + saidx_t C[ALPHABET_SIZE]; + sauchar_t D[ALPHABET_SIZE]; + saidx_t *B; + saidx_t i, p; + saint_t c, d; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || + (n < idx) || ((0 < n) && (idx == 0))) { + return -1; + } + if(n <= 1) { return 0; } + + if((B = A) == NULL) { + /* Allocate n*sizeof(saidx_t) bytes of memory. */ + if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } + } + + /* Inverse BW transform. */ + for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } + for(i = 0; i < n; ++i) { ++C[T[i]]; } + for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) { + p = C[c]; + if(0 < p) { + C[c] = i; + D[d++] = (sauchar_t)c; + i += p; + } + } + for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } + for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; } + for(c = 0; c < d; ++c) { C[c] = C[D[c]]; } + for(i = 0, p = idx; i < n; ++i) { + U[i] = D[binarysearch_lower(C, d, p)]; + p = B[p - 1]; + } + + if(A == NULL) { + /* Deallocate memory. */ + free(B); + } + + return 0; +} + +/* Checks the suffix array SA of the string T. */ +saint_t +sufcheck(const sauchar_t *T, const saidx_t *SA, + saidx_t n, saint_t verbose) { + saidx_t C[ALPHABET_SIZE]; + saidx_t i, p, q, t; + saint_t c; + + if(verbose) { fprintf(stderr, "sufcheck: "); } + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { + if(verbose) { fprintf(stderr, "Invalid arguments.\n"); } + return -1; + } + if(n == 0) { + if(verbose) { fprintf(stderr, "Done.\n"); } + return 0; + } + + /* check range: [0..n-1] */ + for(i = 0; i < n; ++i) { + if((SA[i] < 0) || (n <= SA[i])) { + if(verbose) { + fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + n - 1, i, SA[i]); + } + return -2; + } + } + + /* check first characters. */ + for(i = 1; i < n; ++i) { + if(T[SA[i - 1]] > T[SA[i]]) { + if(verbose) { + fprintf(stderr, "Suffixes in wrong order.\n" + " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" + " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", + i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); + } + return -3; + } + } + + /* check suffixes. */ + for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } + for(i = 0; i < n; ++i) { ++C[T[i]]; } + for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) { + t = C[i]; + C[i] = p; + p += t; + } + + q = C[T[n - 1]]; + C[T[n - 1]] += 1; + for(i = 0; i < n; ++i) { + p = SA[i]; + if(0 < p) { + c = T[--p]; + t = C[c]; + } else { + c = T[p = n - 1]; + t = q; + } + if((t < 0) || (p != SA[t])) { + if(verbose) { + fprintf(stderr, "Suffix in wrong position.\n" + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + t, (0 <= t) ? SA[t] : -1, i, SA[i]); + } + return -4; + } + if(t != q) { + ++C[c]; + if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } + } + } + + if(1 <= verbose) { fprintf(stderr, "Done.\n"); } + return 0; +} + + +static +int +_compare(const sauchar_t *T, saidx_t Tsize, + const sauchar_t *P, saidx_t Psize, + saidx_t suf, saidx_t *match) { + saidx_t i, j; + saint_t r; + for(i = suf + *match, j = *match, r = 0; + (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } + *match = j; + return (r == 0) ? -(j != Psize) : r; +} + +/* Search for the pattern P in the string T. */ +saidx_t +sa_search(const sauchar_t *T, saidx_t Tsize, + const sauchar_t *P, saidx_t Psize, + const saidx_t *SA, saidx_t SAsize, + saidx_t *idx) { + saidx_t size, lsize, rsize, half; + saidx_t match, lmatch, rmatch; + saidx_t llmatch, lrmatch, rlmatch, rrmatch; + saidx_t i, j, k; + saint_t r; + + if(idx != NULL) { *idx = -1; } + if((T == NULL) || (P == NULL) || (SA == NULL) || + (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } + if((Tsize == 0) || (SAsize == 0)) { return 0; } + if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; } + + for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) { + match = MIN(lmatch, rmatch); + r = _compare(T, Tsize, P, Psize, SA[i + half], &match); + if(r < 0) { + i += half + 1; + half -= (size & 1) ^ 1; + lmatch = match; + } else if(r > 0) { + rmatch = match; + } else { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for(llmatch = lmatch, lrmatch = match, half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) { + lmatch = MIN(llmatch, lrmatch); + r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); + if(r < 0) { + j += half + 1; + half -= (lsize & 1) ^ 1; + llmatch = lmatch; + } else { + lrmatch = lmatch; + } + } + + /* right part */ + for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) { + rmatch = MIN(rlmatch, rrmatch); + r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); + if(r <= 0) { + k += half + 1; + half -= (rsize & 1) ^ 1; + rlmatch = rmatch; + } else { + rrmatch = rmatch; + } + } + + break; + } + } + + if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; +} + +/* Search for the character c in the string T. */ +saidx_t +sa_simplesearch(const sauchar_t *T, saidx_t Tsize, + const saidx_t *SA, saidx_t SAsize, + saint_t c, saidx_t *idx) { + saidx_t size, lsize, rsize, half; + saidx_t i, j, k, p; + saint_t r; + + if(idx != NULL) { *idx = -1; } + if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } + if((Tsize == 0) || (SAsize == 0)) { return 0; } + + for(i = j = k = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) { + p = SA[i + half]; + r = (p < Tsize) ? T[p] - c : -1; + if(r < 0) { + i += half + 1; + half -= (size & 1) ^ 1; + } else if(r == 0) { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for(half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) { + p = SA[j + half]; + r = (p < Tsize) ? T[p] - c : -1; + if(r < 0) { + j += half + 1; + half -= (lsize & 1) ^ 1; + } + } + + /* right part */ + for(half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) { + p = SA[k + half]; + r = (p < Tsize) ? T[p] - c : -1; + if(r <= 0) { + k += half + 1; + half -= (rsize & 1) ^ 1; + } + } + + break; + } + } + + if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; +} From f6b5dde25aaff1b31270a863ed5f971e1cc694a3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:14:55 -0500 Subject: [PATCH 052/325] Begin converting utils --- src/DeltaQ.SuffixSorting.LibDivSufSort/{utils.c => Utils.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/DeltaQ.SuffixSorting.LibDivSufSort/{utils.c => Utils.cs} (100%) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/utils.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs similarity index 100% rename from src/DeltaQ.SuffixSorting.LibDivSufSort/utils.c rename to src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs From 2b292e8b1623cb1b314ad41a4b96eee61bee8212 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:15:40 -0500 Subject: [PATCH 053/325] Roll into C# helper class --- .../Utils.cs | 773 ++++++++++-------- 1 file changed, 415 insertions(+), 358 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index 90fb23e..463f755 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -1,381 +1,438 @@ -/* - * utils.c for libdivsufsort - * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "divsufsort_private.h" - - -/*- Private Function -*/ - -/* Binary search for inverse bwt. */ -static -saidx_t -binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) { - saidx_t half, i; - for(i = 0, half = size >> 1; - 0 < size; - size = half, half >>= 1) { - if(A[i + half] < value) { - i += half + 1; - half -= (size & 1) ^ 1; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + class Utils + { + /*- Private Function -*/ + + /* Binary search for inverse bwt. */ + static + saidx_t + binarysearch_lower(const saidx_t* A, saidx_t size, saidx_t value) { + saidx_t half, i; + for(i = 0, half = size >> 1; + 0 < size; + size = half, half >>= 1) { + if(A[i + half] < value) { + i += half + 1; + half -= (size & 1) ^ 1; + } + } + return i; } - } - return i; -} -/*- Functions -*/ - -/* Burrows-Wheeler transform. */ -saint_t -bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, - saidx_t n, saidx_t *idx) { - saidx_t *A, i, j, p, t; - saint_t c; - - /* Check arguments. */ - if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } - if(n <= 1) { - if(n == 1) { U[0] = T[0]; } - *idx = n; - return 0; - } - - if((A = SA) == NULL) { - i = divbwt(T, U, NULL, n); - if(0 <= i) { *idx = i; i = 0; } - return (saint_t)i; - } - - /* BW transform. */ - if(T == U) { - t = n; - for(i = 0, j = 0; i < n; ++i) { - p = t - 1; - t = A[i]; - if(0 <= p) { - c = T[j]; - U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; - A[j] = c; - j++; - } else { - *idx = i; - } - } - p = t - 1; - if(0 <= p) { - c = T[j]; - U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; - A[j] = c; - } else { - *idx = i; - } - } else { - U[0] = T[n - 1]; - for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } - *idx = i + 1; - for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; } - } - - if(SA == NULL) { - /* Deallocate memory. */ - free(A); - } - - return 0; -} + /*- Functions -*/ -/* Inverse Burrows-Wheeler transform. */ -saint_t -inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A, - saidx_t n, saidx_t idx) { - saidx_t C[ALPHABET_SIZE]; - sauchar_t D[ALPHABET_SIZE]; - saidx_t *B; - saidx_t i, p; - saint_t c, d; - - /* Check arguments. */ - if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || - (n < idx) || ((0 < n) && (idx == 0))) { - return -1; - } - if(n <= 1) { return 0; } - - if((B = A) == NULL) { - /* Allocate n*sizeof(saidx_t) bytes of memory. */ - if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } - } - - /* Inverse BW transform. */ - for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } - for(i = 0; i < n; ++i) { ++C[T[i]]; } - for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) { - p = C[c]; - if(0 < p) { - C[c] = i; - D[d++] = (sauchar_t)c; - i += p; - } - } - for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } - for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; } - for(c = 0; c < d; ++c) { C[c] = C[D[c]]; } - for(i = 0, p = idx; i < n; ++i) { - U[i] = D[binarysearch_lower(C, d, p)]; - p = B[p - 1]; - } - - if(A == NULL) { - /* Deallocate memory. */ - free(B); - } - - return 0; -} + /* Burrows-Wheeler transform. */ + saint_t + bw_transform(const sauchar_t* T, sauchar_t* U, saidx_t* SA, + saidx_t n, saidx_t* idx) + { + saidx_t* A, i, j, p, t; + saint_t c; -/* Checks the suffix array SA of the string T. */ -saint_t -sufcheck(const sauchar_t *T, const saidx_t *SA, - saidx_t n, saint_t verbose) { - saidx_t C[ALPHABET_SIZE]; - saidx_t i, p, q, t; - saint_t c; - - if(verbose) { fprintf(stderr, "sufcheck: "); } - - /* Check arguments. */ - if((T == NULL) || (SA == NULL) || (n < 0)) { - if(verbose) { fprintf(stderr, "Invalid arguments.\n"); } - return -1; - } - if(n == 0) { - if(verbose) { fprintf(stderr, "Done.\n"); } - return 0; - } - - /* check range: [0..n-1] */ - for(i = 0; i < n; ++i) { - if((SA[i] < 0) || (n <= SA[i])) { - if(verbose) { - fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - n - 1, i, SA[i]); - } - return -2; - } - } - - /* check first characters. */ - for(i = 1; i < n; ++i) { - if(T[SA[i - 1]] > T[SA[i]]) { - if(verbose) { - fprintf(stderr, "Suffixes in wrong order.\n" - " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" - " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", - i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); - } - return -3; - } - } - - /* check suffixes. */ - for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } - for(i = 0; i < n; ++i) { ++C[T[i]]; } - for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) { - t = C[i]; - C[i] = p; - p += t; - } - - q = C[T[n - 1]]; - C[T[n - 1]] += 1; - for(i = 0; i < n; ++i) { - p = SA[i]; - if(0 < p) { - c = T[--p]; - t = C[c]; - } else { - c = T[p = n - 1]; - t = q; - } - if((t < 0) || (p != SA[t])) { - if(verbose) { - fprintf(stderr, "Suffix in wrong position.\n" - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - t, (0 <= t) ? SA[t] : -1, i, SA[i]); - } - return -4; + /* Check arguments. */ + if ((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } + if (n <= 1) + { + if (n == 1) { U[0] = T[0]; } + *idx = n; + return 0; + } + + if ((A = SA) == NULL) + { + i = divbwt(T, U, NULL, n); + if (0 <= i) { *idx = i; i = 0; } + return (saint_t)i; + } + + /* BW transform. */ + if (T == U) + { + t = n; + for (i = 0, j = 0; i < n; ++i) + { + p = t - 1; + t = A[i]; + if (0 <= p) + { + c = T[j]; + U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; + A[j] = c; + j++; + } + else + { + *idx = i; + } + } + p = t - 1; + if (0 <= p) + { + c = T[j]; + U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; + A[j] = c; + } + else + { + *idx = i; + } + } + else + { + U[0] = T[n - 1]; + for (i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } + *idx = i + 1; + for (++i; i < n; ++i) { U[i] = T[A[i] - 1]; } + } + + if (SA == NULL) + { + /* Deallocate memory. */ + free(A); + } + + return 0; } - if(t != q) { - ++C[c]; - if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } + + /* Inverse Burrows-Wheeler transform. */ + saint_t + inverse_bw_transform(const sauchar_t* T, sauchar_t* U, saidx_t* A, + saidx_t n, saidx_t idx) + { + saidx_t C[ALPHABET_SIZE]; + sauchar_t D[ALPHABET_SIZE]; + saidx_t* B; + saidx_t i, p; + saint_t c, d; + + /* Check arguments. */ + if ((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || + (n < idx) || ((0 < n) && (idx == 0))) + { + return -1; + } + if (n <= 1) { return 0; } + + if ((B = A) == NULL) + { + /* Allocate n*sizeof(saidx_t) bytes of memory. */ + if ((B = (saidx_t*)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } + } + + /* Inverse BW transform. */ + for (c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } + for (i = 0; i < n; ++i) { ++C[T[i]]; } + for (c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) + { + p = C[c]; + if (0 < p) + { + C[c] = i; + D[d++] = (sauchar_t)c; + i += p; + } + } + for (i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } + for (; i < n; ++i) { B[C[T[i]]++] = i + 1; } + for (c = 0; c < d; ++c) { C[c] = C[D[c]]; } + for (i = 0, p = idx; i < n; ++i) + { + U[i] = D[binarysearch_lower(C, d, p)]; + p = B[p - 1]; + } + + if (A == NULL) + { + /* Deallocate memory. */ + free(B); + } + + return 0; } - } - if(1 <= verbose) { fprintf(stderr, "Done.\n"); } - return 0; -} + /* Checks the suffix array SA of the string T. */ + saint_t + sufcheck(const sauchar_t* T, const saidx_t* SA, + saidx_t n, saint_t verbose) + { + saidx_t C[ALPHABET_SIZE]; + saidx_t i, p, q, t; + saint_t c; + + if (verbose) { fprintf(stderr, "sufcheck: "); } + + /* Check arguments. */ + if ((T == NULL) || (SA == NULL) || (n < 0)) + { + if (verbose) { fprintf(stderr, "Invalid arguments.\n"); } + return -1; + } + if (n == 0) + { + if (verbose) { fprintf(stderr, "Done.\n"); } + return 0; + } + /* check range: [0..n-1] */ + for (i = 0; i < n; ++i) + { + if ((SA[i] < 0) || (n <= SA[i])) + { + if (verbose) + { + fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" + + + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + n - 1, i, SA[i]); + } + return -2; + } + } + + /* check first characters. */ + for (i = 1; i < n; ++i) + { + if (T[SA[i - 1]] > T[SA[i]]) + { + if (verbose) + { + fprintf(stderr, "Suffixes in wrong order.\n" -static -int -_compare(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - saidx_t suf, saidx_t *match) { - saidx_t i, j; - saint_t r; - for(i = suf + *match, j = *match, r = 0; - (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } - *match = j; - return (r == 0) ? -(j != Psize) : r; -} -/* Search for the pattern P in the string T. */ -saidx_t -sa_search(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - const saidx_t *SA, saidx_t SAsize, - saidx_t *idx) { - saidx_t size, lsize, rsize, half; - saidx_t match, lmatch, rmatch; - saidx_t llmatch, lrmatch, rlmatch, rrmatch; - saidx_t i, j, k; - saint_t r; - - if(idx != NULL) { *idx = -1; } - if((T == NULL) || (P == NULL) || (SA == NULL) || - (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } - if((Tsize == 0) || (SAsize == 0)) { return 0; } - if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; } - - for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) { - match = MIN(lmatch, rmatch); - r = _compare(T, Tsize, P, Psize, SA[i + half], &match); - if(r < 0) { - i += half + 1; - half -= (size & 1) ^ 1; - lmatch = match; - } else if(r > 0) { - rmatch = match; - } else { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; - - /* left part */ - for(llmatch = lmatch, lrmatch = match, half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) { - lmatch = MIN(llmatch, lrmatch); - r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); - if(r < 0) { - j += half + 1; - half -= (lsize & 1) ^ 1; - llmatch = lmatch; - } else { - lrmatch = lmatch; + " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" + + + " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", + i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); + } + return -3; + } + } + + /* check suffixes. */ + for (i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } + for (i = 0; i < n; ++i) { ++C[T[i]]; } + for (i = 0, p = 0; i < ALPHABET_SIZE; ++i) + { + t = C[i]; + C[i] = p; + p += t; } - } - - /* right part */ - for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) { - rmatch = MIN(rlmatch, rrmatch); - r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); - if(r <= 0) { - k += half + 1; - half -= (rsize & 1) ^ 1; - rlmatch = rmatch; - } else { - rrmatch = rmatch; + + q = C[T[n - 1]]; + C[T[n - 1]] += 1; + for (i = 0; i < n; ++i) + { + p = SA[i]; + if (0 < p) + { + c = T[--p]; + t = C[c]; + } + else + { + c = T[p = n - 1]; + t = q; + } + if ((t < 0) || (p != SA[t])) + { + if (verbose) + { + fprintf(stderr, "Suffix in wrong position.\n" + + + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" + + + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + t, (0 <= t) ? SA[t] : -1, i, SA[i]); + } + return -4; + } + if (t != q) + { + ++C[c]; + if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } + } } - } - break; + if (1 <= verbose) { fprintf(stderr, "Done.\n"); } + return 0; } - } - if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } - return k - j; -} -/* Search for the character c in the string T. */ -saidx_t -sa_simplesearch(const sauchar_t *T, saidx_t Tsize, - const saidx_t *SA, saidx_t SAsize, - saint_t c, saidx_t *idx) { - saidx_t size, lsize, rsize, half; - saidx_t i, j, k, p; - saint_t r; - - if(idx != NULL) { *idx = -1; } - if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } - if((Tsize == 0) || (SAsize == 0)) { return 0; } - - for(i = j = k = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) { - p = SA[i + half]; - r = (p < Tsize) ? T[p] - c : -1; - if(r < 0) { - i += half + 1; - half -= (size & 1) ^ 1; - } else if(r == 0) { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; - - /* left part */ - for(half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) { - p = SA[j + half]; - r = (p < Tsize) ? T[p] - c : -1; - if(r < 0) { - j += half + 1; - half -= (lsize & 1) ^ 1; + static + int + _compare(const sauchar_t* T, saidx_t Tsize, + const sauchar_t* P, saidx_t Psize, + saidx_t suf, saidx_t* match) + { + saidx_t i, j; + saint_t r; + for (i = suf + *match, j = *match, r = 0; + (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } + *match = j; + return (r == 0) ? -(j != Psize) : r; + } + + /* Search for the pattern P in the string T. */ + saidx_t + sa_search(const sauchar_t* T, saidx_t Tsize, + const sauchar_t* P, saidx_t Psize, + const saidx_t* SA, saidx_t SAsize, + saidx_t* idx) + { + saidx_t size, lsize, rsize, half; + saidx_t match, lmatch, rmatch; + saidx_t llmatch, lrmatch, rlmatch, rrmatch; + saidx_t i, j, k; + saint_t r; + + if (idx != NULL) { *idx = -1; } + if ((T == NULL) || (P == NULL) || (SA == NULL) || + (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } + if ((Tsize == 0) || (SAsize == 0)) { return 0; } + if (Psize == 0) { if (idx != NULL) { *idx = 0; } return SAsize; } + + for (i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) + { + match = MIN(lmatch, rmatch); + r = _compare(T, Tsize, P, Psize, SA[i + half], &match); + if (r < 0) + { + i += half + 1; + half -= (size & 1) ^ 1; + lmatch = match; + } + else if (r > 0) + { + rmatch = match; + } + else + { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for (llmatch = lmatch, lrmatch = match, half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) + { + lmatch = MIN(llmatch, lrmatch); + r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); + if (r < 0) + { + j += half + 1; + half -= (lsize & 1) ^ 1; + llmatch = lmatch; + } + else + { + lrmatch = lmatch; + } + } + + /* right part */ + for (rlmatch = match, rrmatch = rmatch, half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) + { + rmatch = MIN(rlmatch, rrmatch); + r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); + if (r <= 0) + { + k += half + 1; + half -= (rsize & 1) ^ 1; + rlmatch = rmatch; + } + else + { + rrmatch = rmatch; + } + } + + break; + } } - } - - /* right part */ - for(half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) { - p = SA[k + half]; - r = (p < Tsize) ? T[p] - c : -1; - if(r <= 0) { - k += half + 1; - half -= (rsize & 1) ^ 1; + + if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; + } + + /* Search for the character c in the string T. */ + saidx_t + sa_simplesearch(const sauchar_t* T, saidx_t Tsize, + const saidx_t* SA, saidx_t SAsize, + saint_t c, saidx_t* idx) + { + saidx_t size, lsize, rsize, half; + saidx_t i, j, k, p; + saint_t r; + + if (idx != NULL) { *idx = -1; } + if ((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } + if ((Tsize == 0) || (SAsize == 0)) { return 0; } + + for (i = j = k = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) + { + p = SA[i + half]; + r = (p < Tsize) ? T[p] - c : -1; + if (r < 0) + { + i += half + 1; + half -= (size & 1) ^ 1; + } + else if (r == 0) + { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for (half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) + { + p = SA[j + half]; + r = (p < Tsize) ? T[p] - c : -1; + if (r < 0) + { + j += half + 1; + half -= (lsize & 1) ^ 1; + } + } + + /* right part */ + for (half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) + { + p = SA[k + half]; + r = (p < Tsize) ? T[p] - c : -1; + if (r <= 0) + { + k += half + 1; + half -= (rsize & 1) ^ 1; + } + } + + break; + } } - } - break; + if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; } - } - if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } - return k - j; + } } From e8970be3bdf6501d1beaacb52a16015a492d4189 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:16:20 -0500 Subject: [PATCH 054/325] Add typedefs --- src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index 463f755..06948ed 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -3,6 +3,9 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using sauchar_t = System.Byte; +using saint_t = System.Int32; +using saidx_t = System.Int32; namespace DeltaQ.SuffixSorting.LibDivSufSort { From 26b130bdcfe036deff06ddce7fe1fef8fbf6d20f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:16:46 -0500 Subject: [PATCH 055/325] Add constants --- src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index 06948ed..c40ba96 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -11,6 +11,8 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort { class Utils { + private const int ALPHABET_SIZE = sizeof(byte) + 1; + /*- Private Function -*/ /* Binary search for inverse bwt. */ From 965caeefce2270ee9ad2b0685af098c1725c1db1 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:17:53 -0500 Subject: [PATCH 056/325] Fix signatures and formatting --- .../Utils.cs | 667 +++++++++--------- 1 file changed, 340 insertions(+), 327 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index c40ba96..dd0e971 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -18,426 +18,439 @@ class Utils /* Binary search for inverse bwt. */ static saidx_t - binarysearch_lower(const saidx_t* A, saidx_t size, saidx_t value) { - saidx_t half, i; - for(i = 0, half = size >> 1; - 0 < size; - size = half, half >>= 1) { - if(A[i + half] < value) { - i += half + 1; - half -= (size & 1) ^ 1; + binarysearch_lower(ReadOnlySpan A, saidx_t size, saidx_t value) + { + saidx_t half, i; + for (i = 0, half = size >> 1; + 0 < size; + size = half, half >>= 1) + { + if (A[i + half] < value) + { + i += half + 1; + half -= (size & 1) ^ 1; + } + } + return i; } - } - return i; - } - /*- Functions -*/ + /*- Functions -*/ - /* Burrows-Wheeler transform. */ - saint_t - bw_transform(const sauchar_t* T, sauchar_t* U, saidx_t* SA, - saidx_t n, saidx_t* idx) - { - saidx_t* A, i, j, p, t; - saint_t c; - - /* Check arguments. */ - if ((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } - if (n <= 1) + /* Burrows-Wheeler transform. */ + saint_t + bw_transform(ReadOnlySpan T, sauchar_t* U, saidx_t* SA, + saidx_t n, saidx_t* idx) { - if (n == 1) { U[0] = T[0]; } - *idx = n; - return 0; - } + saidx_t* A, i, j, p, t; + saint_t c; - if ((A = SA) == NULL) - { - i = divbwt(T, U, NULL, n); - if (0 <= i) { *idx = i; i = 0; } - return (saint_t)i; - } + /* Check arguments. */ + if ((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } + if (n <= 1) + { + if (n == 1) { U[0] = T[0]; } + *idx = n; + return 0; + } - /* BW transform. */ - if (T == U) - { - t = n; - for (i = 0, j = 0; i < n; ++i) + if ((A = SA) == NULL) { + i = divbwt(T, U, NULL, n); + if (0 <= i) { *idx = i; i = 0; } + return (saint_t)i; + } + + /* BW transform. */ + if (T == U) + { + t = n; + for (i = 0, j = 0; i < n; ++i) + { + p = t - 1; + t = A[i]; + if (0 <= p) + { + c = T[j]; + U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; + A[j] = c; + j++; + } + else + { + *idx = i; + } + } p = t - 1; - t = A[i]; if (0 <= p) { c = T[j]; U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; A[j] = c; - j++; } else { *idx = i; } } - p = t - 1; - if (0 <= p) - { - c = T[j]; - U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; - A[j] = c; - } else { - *idx = i; + U[0] = T[n - 1]; + for (i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } + *idx = i + 1; + for (++i; i < n; ++i) { U[i] = T[A[i] - 1]; } } - } - else - { - U[0] = T[n - 1]; - for (i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } - *idx = i + 1; - for (++i; i < n; ++i) { U[i] = T[A[i] - 1]; } - } - if (SA == NULL) - { - /* Deallocate memory. */ - free(A); - } - - return 0; - } - - /* Inverse Burrows-Wheeler transform. */ - saint_t - inverse_bw_transform(const sauchar_t* T, sauchar_t* U, saidx_t* A, - saidx_t n, saidx_t idx) - { - saidx_t C[ALPHABET_SIZE]; - sauchar_t D[ALPHABET_SIZE]; - saidx_t* B; - saidx_t i, p; - saint_t c, d; - - /* Check arguments. */ - if ((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || - (n < idx) || ((0 < n) && (idx == 0))) - { - return -1; - } - if (n <= 1) { return 0; } + if (SA == NULL) + { + /* Deallocate memory. */ + free(A); + } - if ((B = A) == NULL) - { - /* Allocate n*sizeof(saidx_t) bytes of memory. */ - if ((B = (saidx_t*)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } + return 0; } - /* Inverse BW transform. */ - for (c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } - for (i = 0; i < n; ++i) { ++C[T[i]]; } - for (c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) + /* Inverse Burrows-Wheeler transform. */ + saint_t + inverse_bw_transform(ReadOnlySpan T, sauchar_t* U, saidx_t* A, + saidx_t n, saidx_t idx) { - p = C[c]; - if (0 < p) + saidx_t C[ALPHABET_SIZE]; + sauchar_t D[ALPHABET_SIZE]; + saidx_t* B; + saidx_t i, p; + saint_t c, d; + + /* Check arguments. */ + if ((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || + (n < idx) || ((0 < n) && (idx == 0))) { - C[c] = i; - D[d++] = (sauchar_t)c; - i += p; + return -1; } - } - for (i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } - for (; i < n; ++i) { B[C[T[i]]++] = i + 1; } - for (c = 0; c < d; ++c) { C[c] = C[D[c]]; } - for (i = 0, p = idx; i < n; ++i) - { - U[i] = D[binarysearch_lower(C, d, p)]; - p = B[p - 1]; - } - - if (A == NULL) - { - /* Deallocate memory. */ - free(B); - } + if (n <= 1) { return 0; } - return 0; - } + if ((B = A) == NULL) + { + /* Allocate n*sizeof(saidx_t) bytes of memory. */ + if ((B = (saidx_t*)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } + } - /* Checks the suffix array SA of the string T. */ - saint_t - sufcheck(const sauchar_t* T, const saidx_t* SA, - saidx_t n, saint_t verbose) - { - saidx_t C[ALPHABET_SIZE]; - saidx_t i, p, q, t; - saint_t c; + /* Inverse BW transform. */ + for (c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } + for (i = 0; i < n; ++i) { ++C[T[i]]; } + for (c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) + { + p = C[c]; + if (0 < p) + { + C[c] = i; + D[d++] = (sauchar_t)c; + i += p; + } + } + for (i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } + for (; i < n; ++i) { B[C[T[i]]++] = i + 1; } + for (c = 0; c < d; ++c) { C[c] = C[D[c]]; } + for (i = 0, p = idx; i < n; ++i) + { + U[i] = D[binarysearch_lower(C, d, p)]; + p = B[p - 1]; + } - if (verbose) { fprintf(stderr, "sufcheck: "); } + if (A == NULL) + { + /* Deallocate memory. */ + free(B); + } - /* Check arguments. */ - if ((T == NULL) || (SA == NULL) || (n < 0)) - { - if (verbose) { fprintf(stderr, "Invalid arguments.\n"); } - return -1; - } - if (n == 0) - { - if (verbose) { fprintf(stderr, "Done.\n"); } return 0; } - /* check range: [0..n-1] */ - for (i = 0; i < n; ++i) + /* Checks the suffix array SA of the string T. */ + saint_t + sufcheck(ReadOnlySpan T, ReadOnlySpan SA, + saidx_t n, saint_t verbose) { - if ((SA[i] < 0) || (n <= SA[i])) + saidx_t C[ALPHABET_SIZE]; + saidx_t i, p, q, t; + saint_t c; + + if (verbose) { fprintf(stderr, "sufcheck: "); } + + /* Check arguments. */ + if ((T == NULL) || (SA == NULL) || (n < 0)) + { + if (verbose) { fprintf(stderr, "Invalid arguments.\n"); } + return -1; + } + if (n == 0) { - if (verbose) + if (verbose) { fprintf(stderr, "Done.\n"); } + return 0; + } + + /* check range: [0..n-1] */ + for (i = 0; i < n; ++i) + { + if ((SA[i] < 0) || (n <= SA[i])) { - fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" + if (verbose) + { + fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" + + - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - n - 1, i, SA[i]); + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + n - 1, i, SA[i]); + } + return -2; } - return -2; } - } - /* check first characters. */ - for (i = 1; i < n; ++i) - { - if (T[SA[i - 1]] > T[SA[i]]) + /* check first characters. */ + for (i = 1; i < n; ++i) { - if (verbose) + if (T[SA[i - 1]] > T[SA[i]]) { - fprintf(stderr, "Suffixes in wrong order.\n" + if (verbose) + { + fprintf(stderr, "Suffixes in wrong order.\n" + - " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" + " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" + - " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", - i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); - } - return -3; - } - } - /* check suffixes. */ - for (i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } - for (i = 0; i < n; ++i) { ++C[T[i]]; } - for (i = 0, p = 0; i < ALPHABET_SIZE; ++i) - { - t = C[i]; - C[i] = p; - p += t; - } - q = C[T[n - 1]]; - C[T[n - 1]] += 1; - for (i = 0; i < n; ++i) - { - p = SA[i]; - if (0 < p) - { - c = T[--p]; - t = C[c]; + " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", + i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); + } + return -3; + } } - else + + /* check suffixes. */ + for (i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } + for (i = 0; i < n; ++i) { ++C[T[i]]; } + for (i = 0, p = 0; i < ALPHABET_SIZE; ++i) { - c = T[p = n - 1]; - t = q; + t = C[i]; + C[i] = p; + p += t; } - if ((t < 0) || (p != SA[t])) + + q = C[T[n - 1]]; + C[T[n - 1]] += 1; + for (i = 0; i < n; ++i) { - if (verbose) + p = SA[i]; + if (0 < p) + { + c = T[--p]; + t = C[c]; + } + else { - fprintf(stderr, "Suffix in wrong position.\n" + c = T[p = n - 1]; + t = q; + } + if ((t < 0) || (p != SA[t])) + { + if (verbose) + { + fprintf(stderr, "Suffix in wrong position.\n" + + - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" + - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - t, (0 <= t) ? SA[t] : -1, i, SA[i]); + + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + t, (0 <= t) ? SA[t] : -1, i, SA[i]); + } + return -4; + } + if (t != q) + { + ++C[c]; + if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } } - return -4; - } - if (t != q) - { - ++C[c]; - if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } } - } - if (1 <= verbose) { fprintf(stderr, "Done.\n"); } - return 0; - } + if (1 <= verbose) { fprintf(stderr, "Done.\n"); } + return 0; + } - static - int - _compare(const sauchar_t* T, saidx_t Tsize, - const sauchar_t* P, saidx_t Psize, - saidx_t suf, saidx_t* match) - { - saidx_t i, j; - saint_t r; - for (i = suf + *match, j = *match, r = 0; - (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } - *match = j; - return (r == 0) ? -(j != Psize) : r; - } + static + int + _compare(ReadOnlySpan T, saidx_t Tsize, + ReadOnlySpan P, saidx_t Psize, + saidx_t suf, saidx_t* match) + { + saidx_t i, j; + saint_t r; + for (i = suf + *match, j = *match, r = 0; + (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } + *match = j; + return (r == 0) ? -(j != Psize) : r; + } - /* Search for the pattern P in the string T. */ - saidx_t - sa_search(const sauchar_t* T, saidx_t Tsize, - const sauchar_t* P, saidx_t Psize, - const saidx_t* SA, saidx_t SAsize, - saidx_t* idx) - { - saidx_t size, lsize, rsize, half; - saidx_t match, lmatch, rmatch; - saidx_t llmatch, lrmatch, rlmatch, rrmatch; - saidx_t i, j, k; - saint_t r; - - if (idx != NULL) { *idx = -1; } - if ((T == NULL) || (P == NULL) || (SA == NULL) || - (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } - if ((Tsize == 0) || (SAsize == 0)) { return 0; } - if (Psize == 0) { if (idx != NULL) { *idx = 0; } return SAsize; } - - for (i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) + /* Search for the pattern P in the string T. */ + saidx_t + sa_search(ReadOnlySpan T, saidx_t Tsize, + ReadOnlySpan P, saidx_t Psize, + ReadOnlySpan SA, saidx_t SAsize, + saidx_t* idx) { - match = MIN(lmatch, rmatch); - r = _compare(T, Tsize, P, Psize, SA[i + half], &match); - if (r < 0) - { - i += half + 1; - half -= (size & 1) ^ 1; - lmatch = match; - } - else if (r > 0) + saidx_t size, lsize, rsize, half; + saidx_t match, lmatch, rmatch; + saidx_t llmatch, lrmatch, rlmatch, rrmatch; + saidx_t i, j, k; + saint_t r; + + if (idx != NULL) { *idx = -1; } + if ((T == NULL) || (P == NULL) || (SA == NULL) || + (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } + if ((Tsize == 0) || (SAsize == 0)) { return 0; } + if (Psize == 0) { if (idx != NULL) { *idx = 0; } return SAsize; } + + for (i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) { - rmatch = match; - } - else - { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; - - /* left part */ - for (llmatch = lmatch, lrmatch = match, half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) + match = MIN(lmatch, rmatch); + r = _compare(T, Tsize, P, Psize, SA[i + half], &match); + if (r < 0) { - lmatch = MIN(llmatch, lrmatch); - r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); - if (r < 0) - { - j += half + 1; - half -= (lsize & 1) ^ 1; - llmatch = lmatch; - } - else - { - lrmatch = lmatch; - } + i += half + 1; + half -= (size & 1) ^ 1; + lmatch = match; } - - /* right part */ - for (rlmatch = match, rrmatch = rmatch, half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) + else if (r > 0) { - rmatch = MIN(rlmatch, rrmatch); - r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); - if (r <= 0) + rmatch = match; + } + else + { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for (llmatch = lmatch, lrmatch = match, half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) { - k += half + 1; - half -= (rsize & 1) ^ 1; - rlmatch = rmatch; + lmatch = MIN(llmatch, lrmatch); + r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); + if (r < 0) + { + j += half + 1; + half -= (lsize & 1) ^ 1; + llmatch = lmatch; + } + else + { + lrmatch = lmatch; + } } - else + + /* right part */ + for (rlmatch = match, rrmatch = rmatch, half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) { - rrmatch = rmatch; + rmatch = MIN(rlmatch, rrmatch); + r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); + if (r <= 0) + { + k += half + 1; + half -= (rsize & 1) ^ 1; + rlmatch = rmatch; + } + else + { + rrmatch = rmatch; + } } - } - break; + break; + } } - } - if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } - return k - j; - } + if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; + } - /* Search for the character c in the string T. */ - saidx_t - sa_simplesearch(const sauchar_t* T, saidx_t Tsize, - const saidx_t* SA, saidx_t SAsize, - saint_t c, saidx_t* idx) - { - saidx_t size, lsize, rsize, half; - saidx_t i, j, k, p; - saint_t r; + /* Search for the character c in the string T. */ + saidx_t + sa_simplesearch(ReadOnlySpan T, saidx_t Tsize, + ReadOnlySpan SA, saidx_t SAsize, + saint_t c, saidx_t* idx) + { + saidx_t size, lsize, rsize, half; + saidx_t i, j, k, p; + saint_t r; - if (idx != NULL) { *idx = -1; } - if ((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } - if ((Tsize == 0) || (SAsize == 0)) { return 0; } + if (idx != NULL) { *idx = -1; } + if ((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } + if ((Tsize == 0) || (SAsize == 0)) { return 0; } - for (i = j = k = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) - { - p = SA[i + half]; - r = (p < Tsize) ? T[p] - c : -1; - if (r < 0) - { - i += half + 1; - half -= (size & 1) ^ 1; - } - else if (r == 0) + for (i = j = k = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; - - /* left part */ - for (half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) + p = SA[i + half]; + r = (p < Tsize) ? T[p] - c : -1; + if (r < 0) { - p = SA[j + half]; - r = (p < Tsize) ? T[p] - c : -1; - if (r < 0) + i += half + 1; + half -= (size & 1) ^ 1; + } + else if (r == 0) + { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for (half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) { - j += half + 1; - half -= (lsize & 1) ^ 1; + p = SA[j + half]; + r = (p < Tsize) ? T[p] - c : -1; + if (r < 0) + { + j += half + 1; + half -= (lsize & 1) ^ 1; + } } - } - /* right part */ - for (half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) - { - p = SA[k + half]; - r = (p < Tsize) ? T[p] - c : -1; - if (r <= 0) + /* right part */ + for (half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) { - k += half + 1; - half -= (rsize & 1) ^ 1; + p = SA[k + half]; + r = (p < Tsize) ? T[p] - c : -1; + if (r <= 0) + { + k += half + 1; + half -= (rsize & 1) ^ 1; + } } - } - break; + break; + } } - } - if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } - return k - j; - } + if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; + } } } From 1fb56ad7d8ba79ece3949d5b1ed05b9a38c6d646 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:19:02 -0500 Subject: [PATCH 057/325] Add the rest of the owl --- .../Utils.cs | 181 ++++++++---------- 1 file changed, 80 insertions(+), 101 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index dd0e971..6e65d28 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -6,6 +6,7 @@ using sauchar_t = System.Byte; using saint_t = System.Int32; using saidx_t = System.Int32; +using System.Diagnostics; namespace DeltaQ.SuffixSorting.LibDivSufSort { @@ -13,12 +14,8 @@ class Utils { private const int ALPHABET_SIZE = sizeof(byte) + 1; - /*- Private Function -*/ - /* Binary search for inverse bwt. */ - static - saidx_t - binarysearch_lower(ReadOnlySpan A, saidx_t size, saidx_t value) + static saidx_t binarysearch_lower(ReadOnlySpan A, saidx_t size, saidx_t value) { saidx_t half, i; for (i = 0, half = size >> 1; @@ -39,32 +36,32 @@ class Utils /* Burrows-Wheeler transform. */ saint_t - bw_transform(ReadOnlySpan T, sauchar_t* U, saidx_t* SA, - saidx_t n, saidx_t* idx) + bw_transform(ReadOnlySpan T, Span U, Span SA, + saidx_t n, ref saidx_t idx) { - saidx_t* A, i, j, p, t; + Span A; saint_t c; /* Check arguments. */ - if ((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } + if ((T == null) || (U == null) || (n < 0) || (idx == null)) { return -1; } if (n <= 1) { if (n == 1) { U[0] = T[0]; } - *idx = n; + idx = n; return 0; } - if ((A = SA) == NULL) + if ((A = SA) == null) { - i = divbwt(T, U, NULL, n); - if (0 <= i) { *idx = i; i = 0; } + saidx_t i = divbwt(T, U, null, n); + if (0 <= i) { idx = i; i = 0; } return (saint_t)i; } /* BW transform. */ if (T == U) { - t = n; + saidx_t i, j, p, t = n; for (i = 0, j = 0; i < n; ++i) { p = t - 1; @@ -78,7 +75,7 @@ class Utils } else { - *idx = i; + idx = i; } } p = t - 1; @@ -90,49 +87,55 @@ class Utils } else { - *idx = i; + idx = i; } } else { + saidx_t i; U[0] = T[n - 1]; for (i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } - *idx = i + 1; + idx = i + 1; for (++i; i < n; ++i) { U[i] = T[A[i] - 1]; } } - if (SA == NULL) - { - /* Deallocate memory. */ - free(A); - } - return 0; } /* Inverse Burrows-Wheeler transform. */ saint_t - inverse_bw_transform(ReadOnlySpan T, sauchar_t* U, saidx_t* A, + inverse_bw_transform(ReadOnlySpan T, Span U, Span A, saidx_t n, saidx_t idx) { - saidx_t C[ALPHABET_SIZE]; - sauchar_t D[ALPHABET_SIZE]; - saidx_t* B; + Span C = new saidx_t[ALPHABET_SIZE]; + Span D = new sauchar_t[ALPHABET_SIZE]; + //saidx_t C[ALPHABET_SIZE]; + //sauchar_t D[ALPHABET_SIZE]; + Span B; saidx_t i, p; saint_t c, d; /* Check arguments. */ - if ((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || + if ((T == null) || (U == null) || (n < 0) || (idx < 0) || (n < idx) || ((0 < n) && (idx == 0))) { return -1; } if (n <= 1) { return 0; } - if ((B = A) == NULL) + if ((B = A) == null) { /* Allocate n*sizeof(saidx_t) bytes of memory. */ - if ((B = (saidx_t*)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } + try + { + B = new saidx_t[n];// (saidx_t*)malloc((size_t)n * sizeof(saidx_t)); + //if (B == null) { return -2; } + } + //TODO: fixme + catch (Exception) + { + return -2; + } } /* Inverse BW transform. */ @@ -157,7 +160,7 @@ class Utils p = B[p - 1]; } - if (A == NULL) + if (A == null) { /* Deallocate memory. */ free(B); @@ -169,23 +172,24 @@ class Utils /* Checks the suffix array SA of the string T. */ saint_t sufcheck(ReadOnlySpan T, ReadOnlySpan SA, - saidx_t n, saint_t verbose) + saidx_t n) { - saidx_t C[ALPHABET_SIZE]; + Span C = new saidx_t[ALPHABET_SIZE]; + //saidx_t C[ALPHABET_SIZE]; saidx_t i, p, q, t; saint_t c; - if (verbose) { fprintf(stderr, "sufcheck: "); } + Debug.Write("sufcheck: "); /* Check arguments. */ - if ((T == NULL) || (SA == NULL) || (n < 0)) + if ((T == null) || (SA == null) || (n < 0)) { - if (verbose) { fprintf(stderr, "Invalid arguments.\n"); } + Debug.WriteLine("Invalid arguments."); return -1; } if (n == 0) { - if (verbose) { fprintf(stderr, "Done.\n"); } + Debug.WriteLine("Done."); return 0; } @@ -194,16 +198,8 @@ class Utils { if ((SA[i] < 0) || (n <= SA[i])) { - if (verbose) - { - fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" - - - - - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - n - 1, i, SA[i]); - } + Debug.WriteLine("Out of the range [0,{0}]", n - 1); + Debug.WriteLine("SA[{0}]={1}", i, SA[i]); return -2; } } @@ -213,21 +209,9 @@ class Utils { if (T[SA[i - 1]] > T[SA[i]]) { - if (verbose) - { - fprintf(stderr, "Suffixes in wrong order.\n" - - - - - " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" - - - - - " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", - i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); - } + Debug.WriteLine("Suffixes in wrong order."); + Debug.WriteLine(" T[SA[{0}]={1}]={2}", i - 1, SA[i - 1], T[SA[i - 1]]); + Debug.WriteLine(" > T[SA[{0}]={1}]={2}", i, SA[i], T[SA[i]]); return -3; } } @@ -259,21 +243,9 @@ class Utils } if ((t < 0) || (p != SA[t])) { - if (verbose) - { - fprintf(stderr, "Suffix in wrong position.\n" - - - - - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" - - - - - " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", - t, (0 <= t) ? SA[t] : -1, i, SA[i]); - } + Debug.WriteLine("Suffix in wrong position."); + Debug.WriteLine(" SA[{0}]={1} or", t, 0 <= t ? SA[t] : -1); + Debug.WriteLine(" SA[{0}]={1}", i, SA[i]); return -4; } if (t != q) @@ -283,7 +255,7 @@ class Utils } } - if (1 <= verbose) { fprintf(stderr, "Done.\n"); } + Debug.WriteLine("Done."); return 0; } @@ -291,23 +263,24 @@ class Utils static int _compare(ReadOnlySpan T, saidx_t Tsize, - ReadOnlySpan P, saidx_t Psize, - saidx_t suf, saidx_t* match) + ReadOnlySpan P, saidx_t Psize, + saidx_t suf, ref saidx_t match) { saidx_t i, j; saint_t r; - for (i = suf + *match, j = *match, r = 0; + for (i = suf + match, j = match, r = 0; (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } - *match = j; - return (r == 0) ? -(j != Psize) : r; + match = j; + //TODO: checkme + return (r == 0) ? (j != Psize ? -1 : 0) : r; } /* Search for the pattern P in the string T. */ saidx_t sa_search(ReadOnlySpan T, saidx_t Tsize, - ReadOnlySpan P, saidx_t Psize, + ReadOnlySpan P, saidx_t Psize, ReadOnlySpan SA, saidx_t SAsize, - saidx_t* idx) + ref saidx_t idx) { saidx_t size, lsize, rsize, half; saidx_t match, lmatch, rmatch; @@ -315,18 +288,18 @@ class Utils saidx_t i, j, k; saint_t r; - if (idx != NULL) { *idx = -1; } - if ((T == NULL) || (P == NULL) || (SA == NULL) || + if (idx != null) { idx = -1; } + if ((T == null) || (P == null) || (SA == null) || (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } if ((Tsize == 0) || (SAsize == 0)) { return 0; } - if (Psize == 0) { if (idx != NULL) { *idx = 0; } return SAsize; } + if (Psize == 0) { if (idx != null) { idx = 0; } return SAsize; } for (i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; 0 < size; size = half, half >>= 1) { - match = MIN(lmatch, rmatch); - r = _compare(T, Tsize, P, Psize, SA[i + half], &match); + match = Math.Min(lmatch, rmatch); + r = _compare(T, Tsize, P, Psize, SA[i + half], ref match); if (r < 0) { i += half + 1; @@ -339,15 +312,18 @@ class Utils } else { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + lsize = half; + j = i; + rsize = size - half - 1; + k = i + half + 1; /* left part */ for (llmatch = lmatch, lrmatch = match, half = lsize >> 1; 0 < lsize; lsize = half, half >>= 1) { - lmatch = MIN(llmatch, lrmatch); - r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); + lmatch = Math.Min(llmatch, lrmatch); + r = _compare(T, Tsize, P, Psize, SA[j + half], ref lmatch); if (r < 0) { j += half + 1; @@ -365,8 +341,8 @@ class Utils 0 < rsize; rsize = half, half >>= 1) { - rmatch = MIN(rlmatch, rrmatch); - r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); + rmatch = Math.Min(rlmatch, rrmatch); + r = _compare(T, Tsize, P, Psize, SA[k + half], ref rmatch); if (r <= 0) { k += half + 1; @@ -383,7 +359,7 @@ class Utils } } - if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + if (idx != null) { idx = (0 < (k - j)) ? j : i; } return k - j; } @@ -391,14 +367,14 @@ class Utils saidx_t sa_simplesearch(ReadOnlySpan T, saidx_t Tsize, ReadOnlySpan SA, saidx_t SAsize, - saint_t c, saidx_t* idx) + saint_t c, ref saidx_t idx) { saidx_t size, lsize, rsize, half; saidx_t i, j, k, p; saint_t r; - if (idx != NULL) { *idx = -1; } - if ((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } + if (idx != null) { idx = -1; } + if ((T == null) || (SA == null) || (Tsize < 0) || (SAsize < 0)) { return -1; } if ((Tsize == 0) || (SAsize == 0)) { return 0; } for (i = j = k = 0, size = SAsize, half = size >> 1; @@ -414,7 +390,10 @@ class Utils } else if (r == 0) { - lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + lsize = half; + j = i; + rsize = size - half - 1; + k = i + half + 1; /* left part */ for (half = lsize >> 1; @@ -448,7 +427,7 @@ class Utils } } - if (idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + if (idx != null) { idx = (0 < (k - j)) ? j : i; } return k - j; } From bb741556754003554d05fcfd03429b4a6eb1a74c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 09:19:57 -0500 Subject: [PATCH 058/325] Add LibDivSufSort stub --- .../LibDivSufSort.cs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs new file mode 100644 index 0000000..15a43bb --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs @@ -0,0 +1,18 @@ +using System; +using System.Buffers; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + public class LibDivSufSort : ISuffixSort + { + public IMemoryOwner Sort(ReadOnlySpan textBuffer) + { + throw new NotImplementedException(); + } + + public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) + { + throw new NotImplementedException(); + } + } +} From dc1b7eb9cfabaae78ea9ac19aa5ac520699a3ec3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 13:06:29 -0500 Subject: [PATCH 059/325] Begin converting divsufsort --- .../{divsufsort.c => divsufsort.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/DeltaQ.SuffixSorting.LibDivSufSort/{divsufsort.c => divsufsort.cs} (100%) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs similarity index 100% rename from src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.c rename to src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs From 9496b7c1f1df49298bc345992e0763b4326ab504 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 13:08:22 -0500 Subject: [PATCH 060/325] Roll DivSufSort into cs --- .../LibDivSufSort.cs | 3 +- .../divsufsort.cs | 658 ++++++++++-------- 2 files changed, 352 insertions(+), 309 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs index 15a43bb..2efcbea 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs @@ -3,7 +3,7 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort { - public class LibDivSufSort : ISuffixSort + public partial class LibDivSufSort : ISuffixSort { public IMemoryOwner Sort(ReadOnlySpan textBuffer) { @@ -14,5 +14,6 @@ public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { throw new NotImplementedException(); } + } } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs index 9f64b4f..74b97e4 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs @@ -1,100 +1,88 @@ -/* - * divsufsort.c for libdivsufsort - * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "divsufsort_private.h" -#ifdef _OPENMP -# include -#endif - - -/*- Private Functions -*/ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using sauchar_t = System.Byte; +using saint_t = System.Int32; +using saidx_t = System.Int32; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + public partial class LibDivSufSort + { + /*- Private Functions -*/ -/* Sorts suffixes of type B*. */ -static -saidx_t -sort_typeBstar(const sauchar_t *T, saidx_t *SA, - saidx_t *bucket_A, saidx_t *bucket_B, + /* Sorts suffixes of type B*. */ + static + saidx_t + sort_typeBstar(const sauchar_t* T, saidx_t *SA, + saidx_t* bucket_A, saidx_t *bucket_B, saidx_t n) { - saidx_t *PAb, *ISAb, *buf; + saidx_t* PAb, *ISAb, *buf; #ifdef _OPENMP - saidx_t *curbuf; - saidx_t l; + saidx_t* curbuf; + saidx_t l; #endif - saidx_t i, j, k, t, m, bufsize; - saint_t c0, c1; -#ifdef _OPENMP - saint_t d0, d1; - int tmp; + saidx_t i, j, k, t, m, bufsize; + saint_t c0, c1; +# ifdef _OPENMP + saint_t d0, d1; + int tmp; #endif /* Initialize bucket arrays. */ - for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } - for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } - - /* Count the number of occurrences of the first one or two characters of each - type A, B and B* suffix. Moreover, store the beginning position of all - type B* suffixes into the array SA. */ - for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + for(i = 0; i= c1)); - if(0 <= i) { - /* type B* suffix. */ - ++BUCKET_BSTAR(c0, c1); - SA[--m] = i; - /* type B suffix. */ - for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { - ++BUCKET_B(c0, c1); - } + do { ++BUCKET_A(c1 = c0); } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + if (0 <= i) + { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) + { + ++BUCKET_B(c0, c1); + } } - } - m = n - m; +} +m = n - m; /* note: A type B* suffix is lexicographically smaller than a type B suffix that begins with the same first two characters. */ - /* Calculate the index of start/end point of each bucket. */ - for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { +/* Calculate the index of start/end point of each bucket. */ +for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) +{ t = i + BUCKET_A(c0); BUCKET_A(c0) = i + j; /* start point */ i = t + BUCKET_B(c0, c0); - for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { - j += BUCKET_BSTAR(c0, c1); - BUCKET_BSTAR(c0, c1) = j; /* end point */ - i += BUCKET_B(c0, c1); + for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) + { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); } - } +} - if(0 < m) { +if (0 < m) +{ /* Sort the type B* suffixes by their first two characters. */ PAb = SA + n - m; ISAb = SA + m; - for(i = m - 2; 0 <= i; --i) { - t = PAb[i], c0 = T[t], c1 = T[t + 1]; - SA[--BUCKET_BSTAR(c0, c1)] = i; + for (i = m - 2; 0 <= i; --i) + { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; } t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; SA[--BUCKET_BSTAR(c0, c1)] = m - 1; @@ -106,221 +94,264 @@ begins with the same first two characters. c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) { - tmp = omp_get_thread_num(); - curbuf = buf + tmp * bufsize; - k = 0; - for(;;) { - #pragma omp critical(sssort_lock) + tmp = omp_get_thread_num(); + curbuf = buf + tmp * bufsize; + k = 0; + for (; ; ) { - if(0 < (l = j)) { - d0 = c0, d1 = c1; - do { - k = BUCKET_BSTAR(d0, d1); - if(--d1 <= d0) { - d1 = ALPHABET_SIZE - 1; - if(--d0 < 0) { break; } - } - } while(((l - k) <= 1) && (0 < (l = k))); - c0 = d0, c1 = d1, j = k; - } +#pragma omp critical(sssort_lock) + { + if (0 < (l = j)) + { + d0 = c0, d1 = c1; + do + { + k = BUCKET_BSTAR(d0, d1); + if (--d1 <= d0) + { + d1 = ALPHABET_SIZE - 1; + if (--d0 < 0) { break; } + } + } while (((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if (l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); } - if(l == 0) { break; } - sssort(T, PAb, SA + k, SA + l, - curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); - } } #else buf = SA + m, bufsize = n - (2 * m); - for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { - for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { - i = BUCKET_BSTAR(c0, c1); - if(1 < (j - i)) { - sssort(T, PAb, SA + i, SA + j, - buf, bufsize, 2, n, *(SA + i) == (m - 1)); + for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) + { + for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) + { + i = BUCKET_BSTAR(c0, c1); + if (1 < (j - i)) + { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } } - } } #endif /* Compute ranks of type B* substrings. */ - for(i = m - 1; 0 <= i; --i) { - if(0 <= SA[i]) { + for (i = m - 1; 0 <= i; --i) + { + if (0 <= SA[i]) + { + j = i; + do { ISAb[SA[i]] = i; } while ((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if (i <= 0) { break; } + } j = i; - do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); - SA[i + 1] = i - j; - if(i <= 0) { break; } - } - j = i; - do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); - ISAb[SA[i]] = j; + do { ISAb[SA[i] = ~SA[i]] = j; } while (SA[--i] < 0); + ISAb[SA[i]] = j; } /* Construct the inverse suffix array of type B* suffixes using trsort. */ trsort(ISAb, SA, m, 1); /* Set the sorted order of tyoe B* suffixes. */ - for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { - for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } - if(0 <= i) { - t = i; - for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } - SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; - } + for (i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) + { + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if (0 <= i) + { + t = i; + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } } /* Calculate the index of start/end point of each bucket. */ BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ - for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { - i = BUCKET_A(c0 + 1) - 1; - for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { - t = i - BUCKET_B(c0, c1); - BUCKET_B(c0, c1) = i; /* end point */ - - /* Move all type B* suffixes to the correct position. */ - for(i = t, j = BUCKET_BSTAR(c0, c1); - j <= k; - --i, --k) { SA[i] = SA[k]; } - } - BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ - BUCKET_B(c0, c0) = i; /* end point */ + for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) + { + i = BUCKET_A(c0 + 1) - 1; + for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) + { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for (i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ } - } +} - return m; +return m; } /* Constructs the suffix array by using the sorted order of type B* suffixes. */ static void -construct_SA(const sauchar_t *T, saidx_t *SA, - saidx_t *bucket_A, saidx_t *bucket_B, - saidx_t n, saidx_t m) { - saidx_t *i, *j, *k; - saidx_t s; - saint_t c0, c1, c2; - - if(0 < m) { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { - /* Scan the suffix array from right to left. */ - for(i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; - i <= j; - --j) { - if(0 < (s = *j)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); - *j = ~s; - c0 = T[--s]; - if((0 < s) && (T[s - 1] > c0)) { s = ~s; } - if(c0 != c2) { - if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); - } - assert(k < j); - *k-- = s; - } else { - assert(((s == 0) && (T[s] == c1)) || (s < 0)); - *j = ~s; +construct_SA(const sauchar_t* T, saidx_t* SA, + saidx_t* bucket_A, saidx_t* bucket_B, + saidx_t n, saidx_t m) +{ + saidx_t* i, *j, *k; + saidx_t s; + saint_t c0, c1, c2; + + if (0 < m) + { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) + { + /* Scan the suffix array from right to left. */ + for (i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) + { + if (0 < (s = *j)) + { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if (c0 != c2) + { + if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } + else + { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } } - } } - } - - /* Construct the suffix array by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); - /* Scan the suffix array from left to right. */ - for(i = SA, j = SA + n; i < j; ++i) { - if(0 < (s = *i)) { - assert(T[s - 1] >= T[s]); - c0 = T[--s]; - if((s == 0) || (T[s - 1] < c0)) { s = ~s; } - if(c0 != c2) { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - assert(i < k); - *k++ = s; - } else { - assert(s < 0); - *i = ~s; + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for (i = SA, j = SA + n; i < j; ++i) + { + if (0 < (s = *i)) + { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if ((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if (c0 != c2) + { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } + else + { + assert(s < 0); + *i = ~s; + } } - } } /* Constructs the burrows-wheeler transformed string directly by using the sorted order of type B* suffixes. */ static saidx_t -construct_BWT(const sauchar_t *T, saidx_t *SA, - saidx_t *bucket_A, saidx_t *bucket_B, - saidx_t n, saidx_t m) { - saidx_t *i, *j, *k, *orig; - saidx_t s; - saint_t c0, c1, c2; - - if(0 < m) { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { - /* Scan the suffix array from right to left. */ - for(i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; - i <= j; - --j) { - if(0 < (s = *j)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); - c0 = T[--s]; - *j = ~((saidx_t)c0); - if((0 < s) && (T[s - 1] > c0)) { s = ~s; } - if(c0 != c2) { - if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); - } - assert(k < j); - *k-- = s; - } else if(s != 0) { - *j = ~s; -#ifndef NDEBUG - } else { - assert(T[s] == c1); +construct_BWT(const sauchar_t* T, saidx_t* SA, + saidx_t* bucket_A, saidx_t* bucket_B, + saidx_t n, saidx_t m) +{ + saidx_t* i, *j, *k, *orig; + saidx_t s; + saint_t c0, c1, c2; + + if (0 < m) + { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) + { + /* Scan the suffix array from right to left. */ + for (i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) + { + if (0 < (s = *j)) + { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((saidx_t)c0); + if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if (c0 != c2) + { + if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } + else if (s != 0) + { + *j = ~s; +# ifndef NDEBUG + } + else + { + assert(T[s] == c1); #endif + } + } } - } } - } - - /* Construct the BWTed string by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); - /* Scan the suffix array from left to right. */ - for(i = SA, j = SA + n, orig = SA; i < j; ++i) { - if(0 < (s = *i)) { - assert(T[s - 1] >= T[s]); - c0 = T[--s]; - *i = c0; - if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } - if(c0 != c2) { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - assert(i < k); - *k++ = s; - } else if(s != 0) { - *i = ~s; - } else { - orig = i; + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for (i = SA, j = SA + n, orig = SA; i < j; ++i) + { + if (0 < (s = *i)) + { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if ((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } + if (c0 != c2) + { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } + else if (s != 0) + { + *i = ~s; + } + else + { + orig = i; + } } - } - return orig - SA; + return orig - SA; } @@ -329,70 +360,81 @@ the sorted order of type B suffixes. */ /*- Function -*/ saint_t -divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { - saidx_t *bucket_A, *bucket_B; - saidx_t m; - saint_t err = 0; - - /* Check arguments. */ - if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } - else if(n == 0) { return 0; } - else if(n == 1) { SA[0] = 0; return 0; } - else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } - - bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); - - /* Suffixsort. */ - if((bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); - construct_SA(T, SA, bucket_A, bucket_B, n, m); - } else { - err = -2; - } - - free(bucket_B); - free(bucket_A); - - return err; +divsufsort(const sauchar_t* T, saidx_t* SA, saidx_t n) +{ + saidx_t* bucket_A, *bucket_B; + saidx_t m; + saint_t err = 0; + + /* Check arguments. */ + if ((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if (n == 0) { return 0; } + else if (n == 1) { SA[0] = 0; return 0; } + else if (n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + /* Suffixsort. */ + if ((bucket_A != NULL) && (bucket_B != NULL)) + { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } + else + { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; } saidx_t -divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) { - saidx_t *B; - saidx_t *bucket_A, *bucket_B; - saidx_t m, pidx, i; - - /* Check arguments. */ - if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } - else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } - - if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); } - bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); - - /* Burrows-Wheeler Transform. */ - if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, B, bucket_A, bucket_B, n); - pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); - - /* Copy to output string. */ - U[0] = T[n - 1]; - for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } - for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } - pidx += 1; - } else { - pidx = -2; - } - - free(bucket_B); - free(bucket_A); - if(A == NULL) { free(B); } - - return pidx; +divbwt(const sauchar_t* T, sauchar_t* U, saidx_t* A, saidx_t n) +{ + saidx_t* B; + saidx_t* bucket_A, *bucket_B; + saidx_t m, pidx, i; + + /* Check arguments. */ + if ((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if (n <= 1) { if (n == 1) { U[0] = T[0]; } return n; } + + if ((B = A) == NULL) { B = (saidx_t*)malloc((size_t)(n + 1) * sizeof(saidx_t)); } + bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + /* Burrows-Wheeler Transform. */ + if ((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) + { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n); + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + + /* Copy to output string. */ + U[0] = T[n - 1]; + for (i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } + for (i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } + pidx += 1; + } + else + { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if (A == NULL) { free(B); } + + return pidx; } -const char * +const char* divsufsort_version(void) { - return PROJECT_VERSION_FULL; + return PROJECT_VERSION_FULL; +} + + } } From 2b5736e5aecfb8e0e903070bffa8439492f859f3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 13:13:06 -0500 Subject: [PATCH 061/325] Fix const signatures and preprocessor directives --- .../divsufsort.cs | 695 +++++++++--------- 1 file changed, 345 insertions(+), 350 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs index 74b97e4..50db6fe 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs @@ -16,425 +16,420 @@ public partial class LibDivSufSort /* Sorts suffixes of type B*. */ static saidx_t - sort_typeBstar(const sauchar_t* T, saidx_t *SA, - saidx_t* bucket_A, saidx_t *bucket_B, - saidx_t n) { - saidx_t* PAb, *ISAb, *buf; -#ifdef _OPENMP - saidx_t* curbuf; - saidx_t l; + sort_typeBstar(ReadOnlySpan T, saidx_t* SA, + saidx_t* bucket_A, saidx_t* bucket_B, + saidx_t n) + { + saidx_t* PAb, *ISAb, *buf; +#if _OPENMP + saidx_t* curbuf; + saidx_t l; #endif - saidx_t i, j, k, t, m, bufsize; - saint_t c0, c1; -# ifdef _OPENMP - saint_t d0, d1; - int tmp; + saidx_t i, j, k, t, m, bufsize; + saint_t c0, c1; +#if _OPENMP + saint_t d0, d1; + int tmp; #endif - /* Initialize bucket arrays. */ - for(i = 0; i= c1)); - if (0 <= i) - { - /* type B* suffix. */ - ++BUCKET_BSTAR(c0, c1); - SA[--m] = i; - /* type B suffix. */ - for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) - { - ++BUCKET_B(c0, c1); - } - } -} -m = n - m; -/* -note: - A type B* suffix is lexicographically smaller than a type B suffix that - begins with the same first two characters. -*/ - -/* Calculate the index of start/end point of each bucket. */ -for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) -{ - t = i + BUCKET_A(c0); - BUCKET_A(c0) = i + j; /* start point */ - i = t + BUCKET_B(c0, c0); - for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) - { - j += BUCKET_BSTAR(c0, c1); - BUCKET_BSTAR(c0, c1) = j; /* end point */ - i += BUCKET_B(c0, c1); - } -} + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for (i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) + { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + if (0 <= i) + { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) + { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; + /* + note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. + */ + + /* Calculate the index of start/end point of each bucket. */ + for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) + { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) + { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } -if (0 < m) -{ - /* Sort the type B* suffixes by their first two characters. */ - PAb = SA + n - m; ISAb = SA + m; - for (i = m - 2; 0 <= i; --i) - { - t = PAb[i], c0 = T[t], c1 = T[t + 1]; - SA[--BUCKET_BSTAR(c0, c1)] = i; - } - t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; - SA[--BUCKET_BSTAR(c0, c1)] = m - 1; - - /* Sort the type B* substrings using sssort. */ -#ifdef _OPENMP - tmp = omp_get_max_threads(); - buf = SA + m, bufsize = (n - (2 * m)) / tmp; - c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; -#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) - { - tmp = omp_get_thread_num(); - curbuf = buf + tmp * bufsize; - k = 0; - for (; ; ) - { -#pragma omp critical(sssort_lock) + if (0 < m) { - if (0 < (l = j)) + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for (i = m - 2; 0 <= i; --i) + { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#if _OPENMP + tmp = omp_get_max_threads(); + buf = SA + m, bufsize = (n - (2 * m)) / tmp; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) { - d0 = c0, d1 = c1; - do + tmp = omp_get_thread_num(); + curbuf = buf + tmp * bufsize; + k = 0; + for (; ; ) { - k = BUCKET_BSTAR(d0, d1); - if (--d1 <= d0) +#pragma omp critical(sssort_lock) { - d1 = ALPHABET_SIZE - 1; - if (--d0 < 0) { break; } + if (0 < (l = j)) + { + d0 = c0, d1 = c1; + do + { + k = BUCKET_BSTAR(d0, d1); + if (--d1 <= d0) + { + d1 = ALPHABET_SIZE - 1; + if (--d0 < 0) { break; } + } + } while (((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } } - } while (((l - k) <= 1) && (0 < (l = k))); - c0 = d0, c1 = d1, j = k; + if (l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } } - } - if (l == 0) { break; } - sssort(T, PAb, SA + k, SA + l, - curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); - } - } #else - buf = SA + m, bufsize = n - (2 * m); - for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) - { - for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) - { - i = BUCKET_BSTAR(c0, c1); - if (1 < (j - i)) - { - sssort(T, PAb, SA + i, SA + j, - buf, bufsize, 2, n, *(SA + i) == (m - 1)); - } - } - } + buf = SA + m, bufsize = n - (2 * m); + for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) + { + for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) + { + i = BUCKET_BSTAR(c0, c1); + if (1 < (j - i)) + { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } #endif - /* Compute ranks of type B* substrings. */ - for (i = m - 1; 0 <= i; --i) - { - if (0 <= SA[i]) - { - j = i; - do { ISAb[SA[i]] = i; } while ((0 <= --i) && (0 <= SA[i])); - SA[i + 1] = i - j; - if (i <= 0) { break; } - } - j = i; - do { ISAb[SA[i] = ~SA[i]] = j; } while (SA[--i] < 0); - ISAb[SA[i]] = j; - } + /* Compute ranks of type B* substrings. */ + for (i = m - 1; 0 <= i; --i) + { + if (0 <= SA[i]) + { + j = i; + do { ISAb[SA[i]] = i; } while ((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if (i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while (SA[--i] < 0); + ISAb[SA[i]] = j; + } - /* Construct the inverse suffix array of type B* suffixes using trsort. */ - trsort(ISAb, SA, m, 1); + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); - /* Set the sorted order of tyoe B* suffixes. */ - for (i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) - { - for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } - if (0 <= i) - { - t = i; - for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } - SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; - } - } + /* Set the sorted order of tyoe B* suffixes. */ + for (i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) + { + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if (0 <= i) + { + t = i; + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } - /* Calculate the index of start/end point of each bucket. */ - BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ - for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) - { - i = BUCKET_A(c0 + 1) - 1; - for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) - { - t = i - BUCKET_B(c0, c1); - BUCKET_B(c0, c1) = i; /* end point */ + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) + { + i = BUCKET_A(c0 + 1) - 1; + for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) + { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ - /* Move all type B* suffixes to the correct position. */ - for (i = t, j = BUCKET_BSTAR(c0, c1); - j <= k; - --i, --k) { SA[i] = SA[k]; } + /* Move all type B* suffixes to the correct position. */ + for (i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; } - BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ - BUCKET_B(c0, c0) = i; /* end point */ - } -} -return m; -} + /* Constructs the suffix array by using the sorted order of type B* suffixes. */ + static + void + construct_SA(ReadOnlySpan T, saidx_t* SA, + saidx_t* bucket_A, saidx_t* bucket_B, + saidx_t n, saidx_t m) + { + saidx_t* i, *j, *k; + saidx_t s; + saint_t c0, c1, c2; -/* Constructs the suffix array by using the sorted order of type B* suffixes. */ -static -void -construct_SA(const sauchar_t* T, saidx_t* SA, - saidx_t* bucket_A, saidx_t* bucket_B, - saidx_t n, saidx_t m) -{ - saidx_t* i, *j, *k; - saidx_t s; - saint_t c0, c1, c2; + if (0 < m) + { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) + { + /* Scan the suffix array from right to left. */ + for (i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) + { + if (0 < (s = *j)) + { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if (c0 != c2) + { + if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } + else + { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } - if (0 < m) - { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) - { - /* Scan the suffix array from right to left. */ - for (i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; - i <= j; - --j) + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for (i = SA, j = SA + n; i < j; ++i) { - if (0 < (s = *j)) + if (0 < (s = *i)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); - *j = ~s; + assert(T[s - 1] >= T[s]); c0 = T[--s]; - if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if ((s == 0) || (T[s - 1] < c0)) { s = ~s; } if (c0 != c2) { - if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); } - assert(k < j); - *k-- = s; + assert(i < k); + *k++ = s; } else { - assert(((s == 0) && (T[s] == c1)) || (s < 0)); - *j = ~s; + assert(s < 0); + *i = ~s; } } } - } - /* Construct the suffix array by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); - /* Scan the suffix array from left to right. */ - for (i = SA, j = SA + n; i < j; ++i) - { - if (0 < (s = *i)) + /* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ + static + saidx_t + construct_BWT(ReadOnlySpan T, saidx_t* SA, + saidx_t* bucket_A, saidx_t* bucket_B, + saidx_t n, saidx_t m) { - assert(T[s - 1] >= T[s]); - c0 = T[--s]; - if ((s == 0) || (T[s - 1] < c0)) { s = ~s; } - if (c0 != c2) + saidx_t* i, *j, *k, *orig; + saidx_t s; + saint_t c0, c1, c2; + + if (0 < m) { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) + { + /* Scan the suffix array from right to left. */ + for (i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) + { + if (0 < (s = *j)) + { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((saidx_t)c0); + if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if (c0 != c2) + { + if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } + else if (s != 0) + { + *j = ~s; +#if !NDEBUG + } + else + { + assert(T[s] == c1); +#endif + } + } + } } - assert(i < k); - *k++ = s; - } - else - { - assert(s < 0); - *i = ~s; - } - } -} -/* Constructs the burrows-wheeler transformed string directly - by using the sorted order of type B* suffixes. */ -static -saidx_t -construct_BWT(const sauchar_t* T, saidx_t* SA, - saidx_t* bucket_A, saidx_t* bucket_B, - saidx_t n, saidx_t m) -{ - saidx_t* i, *j, *k, *orig; - saidx_t s; - saint_t c0, c1, c2; - - if (0 < m) - { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) - { - /* Scan the suffix array from right to left. */ - for (i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; - i <= j; - --j) + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for (i = SA, j = SA + n, orig = SA; i < j; ++i) { - if (0 < (s = *j)) + if (0 < (s = *i)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); + assert(T[s - 1] >= T[s]); c0 = T[--s]; - *j = ~((saidx_t)c0); - if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } + *i = c0; + if ((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } if (c0 != c2) { - if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); } - assert(k < j); - *k-- = s; + assert(i < k); + *k++ = s; } else if (s != 0) { - *j = ~s; -# ifndef NDEBUG + *i = ~s; } else { - assert(T[s] == c1); -#endif + orig = i; } } - } - } - /* Construct the BWTed string by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); - /* Scan the suffix array from left to right. */ - for (i = SA, j = SA + n, orig = SA; i < j; ++i) - { - if (0 < (s = *i)) - { - assert(T[s - 1] >= T[s]); - c0 = T[--s]; - *i = c0; - if ((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } - if (c0 != c2) - { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - assert(i < k); - *k++ = s; - } - else if (s != 0) - { - *i = ~s; - } - else - { - orig = i; + return orig - SA; } - } - - return orig - SA; -} - -/*---------------------------------------------------------------------------*/ -/*- Function -*/ + /*---------------------------------------------------------------------------*/ -saint_t -divsufsort(const sauchar_t* T, saidx_t* SA, saidx_t n) -{ - saidx_t* bucket_A, *bucket_B; - saidx_t m; - saint_t err = 0; + /*- Function -*/ - /* Check arguments. */ - if ((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } - else if (n == 0) { return 0; } - else if (n == 1) { SA[0] = 0; return 0; } - else if (n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } - - bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + saint_t + divsufsort(ReadOnlySpan T, saidx_t* SA, saidx_t n) + { + saidx_t* bucket_A, *bucket_B; + saidx_t m; + saint_t err = 0; - /* Suffixsort. */ - if ((bucket_A != NULL) && (bucket_B != NULL)) - { - m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); - construct_SA(T, SA, bucket_A, bucket_B, n, m); - } - else - { - err = -2; - } + /* Check arguments. */ + if ((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if (n == 0) { return 0; } + else if (n == 1) { SA[0] = 0; return 0; } + else if (n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } - free(bucket_B); - free(bucket_A); + bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); - return err; -} + /* Suffixsort. */ + if ((bucket_A != NULL) && (bucket_B != NULL)) + { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } + else + { + err = -2; + } -saidx_t -divbwt(const sauchar_t* T, sauchar_t* U, saidx_t* A, saidx_t n) -{ - saidx_t* B; - saidx_t* bucket_A, *bucket_B; - saidx_t m, pidx, i; + free(bucket_B); + free(bucket_A); - /* Check arguments. */ - if ((T == NULL) || (U == NULL) || (n < 0)) { return -1; } - else if (n <= 1) { if (n == 1) { U[0] = T[0]; } return n; } + return err; + } - if ((B = A) == NULL) { B = (saidx_t*)malloc((size_t)(n + 1) * sizeof(saidx_t)); } - bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + saidx_t + divbwt(ReadOnlySpan T, sauchar_t* U, saidx_t* A, saidx_t n) + { + saidx_t* B; + saidx_t* bucket_A, *bucket_B; + saidx_t m, pidx, i; - /* Burrows-Wheeler Transform. */ - if ((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) - { - m = sort_typeBstar(T, B, bucket_A, bucket_B, n); - pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); - - /* Copy to output string. */ - U[0] = T[n - 1]; - for (i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } - for (i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } - pidx += 1; - } - else - { - pidx = -2; - } + /* Check arguments. */ + if ((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if (n <= 1) { if (n == 1) { U[0] = T[0]; } return n; } - free(bucket_B); - free(bucket_A); - if (A == NULL) { free(B); } + if ((B = A) == NULL) { B = (saidx_t*)malloc((size_t)(n + 1) * sizeof(saidx_t)); } + bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); - return pidx; -} + /* Burrows-Wheeler Transform. */ + if ((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) + { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n); + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + + /* Copy to output string. */ + U[0] = T[n - 1]; + for (i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } + for (i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } + pidx += 1; + } + else + { + pidx = -2; + } -const char* -divsufsort_version(void) { - return PROJECT_VERSION_FULL; -} + free(bucket_B); + free(bucket_A); + if (A == NULL) { free(B); } + return pidx; + } } } From 37ecae3891adb7633ce7d2925b95a8ee3b902c36 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 9 May 2021 13:14:09 -0500 Subject: [PATCH 062/325] Fix nulls and some spans --- .../divsufsort.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs index 50db6fe..f558392 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs @@ -210,7 +210,7 @@ the sorted order of type B* suffixes. */ { /* Scan the suffix array from right to left. */ for (i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + j = SA + BUCKET_A(c1 + 1) - 1, k = null, c2 = -1; i <= j; --j) { @@ -287,7 +287,7 @@ the sorted order of type B* suffixes. */ { /* Scan the suffix array from right to left. */ for (i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + j = SA + BUCKET_A(c1 + 1) - 1, k = null, c2 = -1; i <= j; --j) { @@ -361,14 +361,14 @@ the sorted order of type B suffixes. */ /*- Function -*/ saint_t - divsufsort(ReadOnlySpan T, saidx_t* SA, saidx_t n) + divsufsort(ReadOnlySpan T, Span SA, saidx_t n) { - saidx_t* bucket_A, *bucket_B; + Span bucket_A, bucket_B; saidx_t m; saint_t err = 0; /* Check arguments. */ - if ((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + if ((T == null) || (SA == null) || (n < 0)) { return -1; } else if (n == 0) { return 0; } else if (n == 1) { SA[0] = 0; return 0; } else if (n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } @@ -377,7 +377,7 @@ the sorted order of type B suffixes. */ bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); /* Suffixsort. */ - if ((bucket_A != NULL) && (bucket_B != NULL)) + if ((bucket_A != null) && (bucket_B != null)) { m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); construct_SA(T, SA, bucket_A, bucket_B, n, m); @@ -401,15 +401,15 @@ the sorted order of type B suffixes. */ saidx_t m, pidx, i; /* Check arguments. */ - if ((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + if ((T == null) || (U == null) || (n < 0)) { return -1; } else if (n <= 1) { if (n == 1) { U[0] = T[0]; } return n; } - if ((B = A) == NULL) { B = (saidx_t*)malloc((size_t)(n + 1) * sizeof(saidx_t)); } + if ((B = A) == null) { B = (saidx_t*)malloc((size_t)(n + 1) * sizeof(saidx_t)); } bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); /* Burrows-Wheeler Transform. */ - if ((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) + if ((B != null) && (bucket_A != null) && (bucket_B != null)) { m = sort_typeBstar(T, B, bucket_A, bucket_B, n); pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); @@ -427,7 +427,7 @@ the sorted order of type B suffixes. */ free(bucket_B); free(bucket_A); - if (A == NULL) { free(B); } + if (A == null) { free(B); } return pidx; } From 10cca207517f99757a8b2e7090407274005082cb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 11 May 2021 12:34:05 -0500 Subject: [PATCH 063/325] Add SpanOwners and BUCKET_A/BUCKET_B/BUCKET_BSTAR impls --- .../divsufsort.cs | 91 ++++++++++--------- 1 file changed, 50 insertions(+), 41 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs index f558392..048c11f 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs @@ -6,21 +6,26 @@ using sauchar_t = System.Byte; using saint_t = System.Int32; using saidx_t = System.Int32; +using Microsoft.Toolkit.HighPerformance.Buffers; +using System.Diagnostics; +using System.Runtime.CompilerServices; namespace DeltaQ.SuffixSorting.LibDivSufSort { public partial class LibDivSufSort { - /*- Private Functions -*/ + private const int ALPHABET_SIZE = sizeof(byte) + 1; + private const int BUCKET_A_SIZE = ALPHABET_SIZE; + private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; /* Sorts suffixes of type B*. */ static saidx_t - sort_typeBstar(ReadOnlySpan T, saidx_t* SA, - saidx_t* bucket_A, saidx_t* bucket_B, + sort_typeBstar(ReadOnlySpan T, Span SA, + Span bucket_A, Span bucket_B, saidx_t n) { - saidx_t* PAb, *ISAb, *buf; + saidx_t PAb, ISAb, buf; #if _OPENMP saidx_t* curbuf; saidx_t l; @@ -33,8 +38,10 @@ public partial class LibDivSufSort #endif /* Initialize bucket arrays. */ - for (i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } - for (i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + Debug.Assert(bucket_A.Length == BUCKET_A_SIZE); + Debug.Assert(bucket_B.Length == BUCKET_B_SIZE); + bucket_A.Clear(); + bucket_B.Clear(); /* Count the number of occurrences of the first one or two characters of each type A, B and B* suffix. Moreover, store the beginning position of all @@ -42,16 +49,16 @@ type B* suffixes into the array SA. */ for (i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { /* type A suffix. */ - do { ++BUCKET_A(c1 = c0); } while ((0 <= --i) && ((c0 = T[i]) >= c1)); + do { ++BUCKET_A(bucket_A, c1 = c0); } while ((0 <= --i) && ((c0 = T[i]) >= c1)); if (0 <= i) { /* type B* suffix. */ - ++BUCKET_BSTAR(c0, c1); + ++BUCKET_BSTAR(bucket_B, c0, c1); SA[--m] = i; /* type B suffix. */ for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { - ++BUCKET_B(c0, c1); + ++BUCKET_B(bucket_B, c0, c1); } } } @@ -65,14 +72,14 @@ begins with the same first two characters. /* Calculate the index of start/end point of each bucket. */ for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { - t = i + BUCKET_A(c0); - BUCKET_A(c0) = i + j; /* start point */ - i = t + BUCKET_B(c0, c0); + t = i + BUCKET_A(bucket_A, c0); + BUCKET_A(bucket_A, c0) = i + j; /* start point */ + i = t + BUCKET_B(bucket_B, c0, c0); for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { - j += BUCKET_BSTAR(c0, c1); - BUCKET_BSTAR(c0, c1) = j; /* end point */ - i += BUCKET_B(c0, c1); + j += BUCKET_BSTAR(bucket_B, c0, c1); + BUCKET_BSTAR(bucket_B, c0, c1) = j; /* end point */ + i += BUCKET_B(bucket_B, c0, c1); } } @@ -83,10 +90,10 @@ begins with the same first two characters. for (i = m - 2; 0 <= i; --i) { t = PAb[i], c0 = T[t], c1 = T[t + 1]; - SA[--BUCKET_BSTAR(c0, c1)] = i; + SA[--BUCKET_BSTAR(bucket_B, c0, c1)] = i; } t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; - SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + SA[--BUCKET_BSTAR(bucket_B, c0, c1)] = m - 1; /* Sort the type B* substrings using sssort. */ #if _OPENMP @@ -128,7 +135,7 @@ begins with the same first two characters. { for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { - i = BUCKET_BSTAR(c0, c1); + i = BUCKET_BSTAR(bucket_B, c0, c1); if (1 < (j - i)) { sssort(T, PAb, SA + i, SA + j, @@ -169,26 +176,33 @@ begins with the same first two characters. } /* Calculate the index of start/end point of each bucket. */ - BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + BUCKET_B(bucket_B, ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { - i = BUCKET_A(c0 + 1) - 1; + i = BUCKET_A(bucket_A, c0 + 1) - 1; for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { - t = i - BUCKET_B(c0, c1); - BUCKET_B(c0, c1) = i; /* end point */ + t = i - BUCKET_B(bucket_B, c0, c1); + BUCKET_B(bucket_B, c0, c1) = i; /* end point */ /* Move all type B* suffixes to the correct position. */ - for (i = t, j = BUCKET_BSTAR(c0, c1); + for (i = t, j = BUCKET_BSTAR(bucket_B, c0, c1); j <= k; --i, --k) { SA[i] = SA[k]; } } - BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ - BUCKET_B(c0, c0) = i; /* end point */ + BUCKET_BSTAR(bucket_B, c0, c0 + 1) = i - BUCKET_B(bucket_B, c0, c0) + 1; /* start point */ + BUCKET_B(bucket_B, c0, c0) = i; /* end point */ } } return m; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static ref int BUCKET_A(Span bucket_A, int c0) => ref bucket_A[c0]; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static ref int BUCKET_B(Span bucket_B, int c0, int c1) => ref bucket_B[((c1) << 8) | (c0)]; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static ref int BUCKET_BSTAR(Span bucket_B, int c0, int c1) => ref bucket_B[((c0) << 8) | (c1)]; } /* Constructs the suffix array by using the sorted order of type B* suffixes. */ @@ -363,34 +377,29 @@ the sorted order of type B suffixes. */ saint_t divsufsort(ReadOnlySpan T, Span SA, saidx_t n) { - Span bucket_A, bucket_B; saidx_t m; - saint_t err = 0; /* Check arguments. */ if ((T == null) || (SA == null) || (n < 0)) { return -1; } else if (n == 0) { return 0; } else if (n == 1) { SA[0] = 0; return 0; } - else if (n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + else if (n == 2) { /*TODO: checkme*/m = T[0] < T[1] ? 1 : 0; SA[m ^ 1] = 0; SA[m] = 1; return 0; } - bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + using var owner_A = SpanOwner.Allocate(BUCKET_A_SIZE); + using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE); + + Span bucket_A = owner_A.Span; + Span bucket_B = owner_B.Span; /* Suffixsort. */ - if ((bucket_A != null) && (bucket_B != null)) - { - m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); - construct_SA(T, SA, bucket_A, bucket_B, n, m); - } - else + if (bucket_A == null || bucket_B == null) { - err = -2; + return -2; } - free(bucket_B); - free(bucket_A); - - return err; + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + return 0; } saidx_t From 84cd2a327c2f0267a79742c1d8af5c37b82c93de Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 11 May 2021 13:10:54 -0500 Subject: [PATCH 064/325] Begin converting sssort --- src/DeltaQ.SuffixSorting.LibDivSufSort/{sssort.c => sssort.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/DeltaQ.SuffixSorting.LibDivSufSort/{sssort.c => sssort.cs} (100%) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs similarity index 100% rename from src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.c rename to src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs From 314b268a97af84705fd5a182b44f4e5eb5910b44 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 11 May 2021 13:11:55 -0500 Subject: [PATCH 065/325] Move into partial class --- .../sssort.cs | 598 ++++++++++-------- 1 file changed, 336 insertions(+), 262 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs index 4a18fd2..d1ea8b8 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs @@ -1,4 +1,14 @@ -/* +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + public partial class LibDivSufSort + { + /* * sssort.c for libdivsufsort * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. * @@ -24,12 +34,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "divsufsort_private.h" +# include "divsufsort_private.h" -/*- Private Functions -*/ + /*- Private Functions -*/ -static const saint_t lg_table[256]= { + static const saint_t lg_table[256] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, @@ -42,11 +52,12 @@ #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) -static INLINE -saint_t -ss_ilg(saidx_t n) { + static INLINE + saint_t +ss_ilg(saidx_t n) + { #if SS_BLOCKSIZE == 0 -# if defined(BUILD_DIVSUFSORT64) +#if defined(BUILD_DIVSUFSORT64) return (n >> 32) ? ((n >> 48) ? ((n >> 56) ? @@ -62,15 +73,15 @@ static INLINE ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff])); -# else - return (n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]); -# endif +#else + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#endif #elif SS_BLOCKSIZE < 256 return lg_table[n]; #else @@ -78,7 +89,7 @@ static INLINE 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]; #endif -} + } #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ @@ -133,31 +144,31 @@ static INLINE #endif /* SS_BLOCKSIZE != 0 */ -/*---------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------*/ -/* Compares two suffixes. */ -static INLINE -saint_t -ss_compare(const sauchar_t *T, - const saidx_t *p1, const saidx_t *p2, + /* Compares two suffixes. */ + static INLINE + saint_t +ss_compare(const sauchar_t* T, + const saidx_t* p1, const saidx_t* p2, saidx_t depth) { - const sauchar_t *U1, *U2, *U1n, *U2n; + const sauchar_t* U1, *U2, *U1n, *U2n; for(U1 = T + depth + *p1, U2 = T + depth + *p2, U1n = T + *(p1 + 1) + 2, U2n = T + *(p2 + 1) + 2; - (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + (U1 Td[PA[*v2]]) { SWAP(v1, v2); } - if(Td[PA[*v2]] > Td[PA[*v3]]) { - if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } - else { return v3; } - } - return v2; -} + /* Simple top-down heapsort. */ + static + void + ss_heapsort(const sauchar_t* Td, const saidx_t* PA, saidx_t* SA, saidx_t size) + { + saidx_t i, m; + saidx_t t; + + m = size; + if ((size % 2) == 0) + { + m--; + if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } -/* Returns the median of five elements. */ -static INLINE -saidx_t * -ss_median5(const sauchar_t *Td, const saidx_t *PA, - saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { - saidx_t *t; - if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } - if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } - if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } - if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } - if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } - if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } - return v3; -} + for (i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if ((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for (i = m - 1; 0 < i; --i) + { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } + } -/* Returns the pivot element. */ -static INLINE -saidx_t * -ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) { - saidx_t *middle; - saidx_t t; - t = last - first; - middle = first + t / 2; + /*---------------------------------------------------------------------------*/ + + /* Returns the median of three elements. */ + static INLINE +saidx_t* +ss_median3(const sauchar_t* Td, const saidx_t* PA, + saidx_t* v1, saidx_t* v2, saidx_t* v3) + { + saidx_t* t; + if (Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if (Td[PA[*v2]] > Td[PA[*v3]]) + { + if (Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; + } - if(t <= 512) { - if(t <= 32) { - return ss_median3(Td, PA, first, middle, last - 1); - } else { - t >>= 2; - return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + /* Returns the median of five elements. */ + static INLINE +saidx_t* +ss_median5(const sauchar_t* Td, const saidx_t* PA, + saidx_t* v1, saidx_t* v2, saidx_t* v3, saidx_t* v4, saidx_t* v5) + { + saidx_t* t; + if (Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if (Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if (Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if (Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if (Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if (Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; } - } - t >>= 3; - first = ss_median3(Td, PA, first, first + t, first + (t << 1)); - middle = ss_median3(Td, PA, middle - t, middle, middle + t); - last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); - return ss_median3(Td, PA, first, middle, last); -} + /* Returns the pivot element. */ + static INLINE +saidx_t* +ss_pivot(const sauchar_t* Td, const saidx_t* PA, saidx_t* first, saidx_t* last) + { + saidx_t* middle; + saidx_t t; + + t = last - first; + middle = first + t / 2; + + if (t <= 512) + { + if (t <= 32) + { + return ss_median3(Td, PA, first, middle, last - 1); + } + else + { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); + } -/*---------------------------------------------------------------------------*/ -/* Binary partition for substrings. */ -static INLINE -saidx_t * -ss_partition(const saidx_t *PA, - saidx_t *first, saidx_t *last, saidx_t depth) { - saidx_t *a, *b; - saidx_t t; - for(a = first - 1, b = last;;) { - for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } - for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } - if(b <= a) { break; } - t = ~*b; - *b = *a; - *a = t; - } - if(first < a) { *first = ~*first; } - return a; -} + /*---------------------------------------------------------------------------*/ + + /* Binary partition for substrings. */ + static INLINE +saidx_t* +ss_partition(const saidx_t* PA, + saidx_t* first, saidx_t* last, saidx_t depth) + { + saidx_t* a, *b; + saidx_t t; + for (a = first - 1, b = last; ;) + { + for (; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for (; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if (b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if (first < a) { *first = ~*first; } + return a; + } -/* Multikey introsort for medium size groups. */ -static -void -ss_mintrosort(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *last, - saidx_t depth) { + /* Multikey introsort for medium size groups. */ + static + void + ss_mintrosort(const sauchar_t* T, const saidx_t* PA, + saidx_t* first, saidx_t* last, + saidx_t depth) + { #define STACK_SIZE SS_MISORT_STACKSIZE - struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; - const sauchar_t *Td; - saidx_t *a, *b, *c, *d, *e, *f; - saidx_t s, t; - saint_t ssize; - saint_t limit; - saint_t v, x = 0; - - for(ssize = 0, limit = ss_ilg(last - first);;) { - - if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { + struct { saidx_t* a, * b, c; saint_t d; +} +stack[STACK_SIZE]; +const sauchar_t* Td; +saidx_t* a, *b, *c, *d, *e, *f; +saidx_t s, t; +saint_t ssize; +saint_t limit; +saint_t v, x = 0; + +for (ssize = 0, limit = ss_ilg(last - first); ;) +{ + + if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) + { #if 1 < SS_INSERTIONSORT_THRESHOLD if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } #endif - STACK_POP(first, last, depth, limit); - continue; + STACK_POP(first, last, depth, limit); + continue; } Td = T + depth; - if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } - if(limit < 0) { - for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { - if((x = Td[PA[*a]]) != v) { - if(1 < (a - first)) { break; } - v = x; - first = a; + if (limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if (limit < 0) + { + for (a = first + 1, v = Td[PA[*first]]; a < last; ++a) + { + if ((x = Td[PA[*a]]) != v) + { + if (1 < (a - first)) { break; } + v = x; + first = a; + } } - } - if(Td[PA[*first] - 1] < v) { - first = ss_partition(PA, first, a, depth); - } - if((a - first) <= (last - a)) { - if(1 < (a - first)) { - STACK_PUSH(a, last, depth, -1); - last = a, depth += 1, limit = ss_ilg(a - first); - } else { - first = a, limit = -1; + if (Td[PA[*first] - 1] < v) + { + first = ss_partition(PA, first, a, depth); } - } else { - if(1 < (last - a)) { - STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); - first = a, limit = -1; - } else { - last = a, depth += 1, limit = ss_ilg(a - first); + if ((a - first) <= (last - a)) + { + if (1 < (a - first)) + { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } + else + { + first = a, limit = -1; + } } - } - continue; + else + { + if (1 < (last - a)) + { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } + else + { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; } /* choose pivot */ @@ -366,77 +410,101 @@ struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; SWAP(*first, *a); /* partition */ - for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } - if(((a = b) < last) && (x < v)) { - for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } + for (b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if (((a = b) < last) && (x < v)) + { + for (; (++b < last) && ((x = Td[PA[*b]]) <= v);) + { + if (x == v) { SWAP(*b, *a); ++a; } + } } - for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } - if((b < (d = c)) && (x > v)) { - for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } + for (c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if ((b < (d = c)) && (x > v)) + { + for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) + { + if (x == v) { SWAP(*c, *d); --d; } + } } - for(; b < c;) { - SWAP(*b, *c); - for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } - for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } + for (; b < c;) + { + SWAP(*b, *c); + for (; (++b < c) && ((x = Td[PA[*b]]) <= v);) + { + if (x == v) { SWAP(*b, *a); ++a; } + } + for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) + { + if (x == v) { SWAP(*c, *d); --d; } + } } - if(a <= d) { - c = b - 1; - - if((s = a - first) > (t = b - a)) { s = t; } - for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - if((s = d - c) > (t = last - d - 1)) { s = t; } - for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - - a = first + (b - a), c = last - (d - c); - b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); - - if((a - first) <= (last - c)) { - if((last - c) <= (c - b)) { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(c, last, depth, limit); - last = a; - } else if((a - first) <= (c - b)) { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - last = a; - } else { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(first, a, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); + if (a <= d) + { + c = b - 1; + + if ((s = a - first) > (t = b - a)) { s = t; } + for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if ((s = d - c) > (t = last - d - 1)) { s = t; } + for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if ((a - first) <= (last - c)) + { + if ((last - c) <= (c - b)) + { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } + else if ((a - first) <= (c - b)) + { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } + else + { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } } - } else { - if((a - first) <= (c - b)) { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(first, a, depth, limit); - first = c; - } else if((last - c) <= (c - b)) { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - first = c; - } else { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(c, last, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); + else + { + if ((a - first) <= (c - b)) + { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } + else if ((last - c) <= (c - b)) + { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } + else + { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } } - } - } else { - limit += 1; - if(Td[PA[*first] - 1] < v) { - first = ss_partition(PA, first, last, depth); - limit = ss_ilg(last - first); - } - depth += 1; } - } + else + { + limit += 1; + if (Td[PA[*first] - 1] < v) + { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } +} #undef STACK_SIZE } @@ -744,21 +812,22 @@ struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; /* Substring sort */ void -sssort(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *last, - saidx_t *buf, saidx_t bufsize, - saidx_t depth, saidx_t n, saint_t lastsuffix) { - saidx_t *a; +sssort(const sauchar_t* T, const saidx_t* PA, + saidx_t* first, saidx_t* last, + saidx_t* buf, saidx_t bufsize, + saidx_t depth, saidx_t n, saint_t lastsuffix) +{ + saidx_t* a; #if SS_BLOCKSIZE != 0 saidx_t *b, *middle, *curbuf; saidx_t j, k, curbufsize, limit; #endif - saidx_t i; + saidx_t i; - if(lastsuffix != 0) { ++first; } + if (lastsuffix != 0) { ++first; } #if SS_BLOCKSIZE == 0 - ss_mintrosort(T, PA, first, last, depth); + ss_mintrosort(T, PA, first, last, depth); #else if((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && @@ -802,14 +871,19 @@ struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; } #endif - if(lastsuffix != 0) { - /* Insert last type B* suffix. */ - saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; - for(a = first, i = *(first - 1); - (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); - ++a) { - *(a - 1) = *a; + if (lastsuffix != 0) + { + /* Insert last type B* suffix. */ + saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for (a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) + { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + } - *(a - 1) = i; - } } From d989d2bff69047dfadec66ecf85d08994d31d2ba Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 11 May 2021 14:32:02 -0500 Subject: [PATCH 066/325] Convert more of sssort --- .../sssort.cs | 916 ++++++++---------- 1 file changed, 421 insertions(+), 495 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs index d1ea8b8..48b3767 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs @@ -3,43 +3,21 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using sauchar_t = System.Byte; +using saint_t = System.Int32; +using saidx_t = System.Int32; +using System.Runtime.CompilerServices; namespace DeltaQ.SuffixSorting.LibDivSufSort { public partial class LibDivSufSort { - /* - * sssort.c for libdivsufsort - * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -# include "divsufsort_private.h" - - - /*- Private Functions -*/ - - static const saint_t lg_table[256] = { + //# define SS_BLOCKSIZE (1024) + private const int SS_BLOCKSIZE = 1024; + //# define SS_INSERTIONSORT_THRESHOLD (8) + private const int SS_INSERTIONSORT_THRESHOLD = 8; + + private static readonly saint_t[] lg_table_array = new[] { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, @@ -49,53 +27,17 @@ public partial class LibDivSufSort 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; + internal static ReadOnlySpan lg_table => lg_table_array; -#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) - - static INLINE - saint_t -ss_ilg(saidx_t n) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static /*INLINE*/ saint_t ss_ilg(saidx_t n) { -#if SS_BLOCKSIZE == 0 -#if defined(BUILD_DIVSUFSORT64) - return (n >> 32) ? - ((n >> 48) ? - ((n >> 56) ? - 56 + lg_table[(n >> 56) & 0xff] : - 48 + lg_table[(n >> 48) & 0xff]) : - ((n >> 40) ? - 40 + lg_table[(n >> 40) & 0xff] : - 32 + lg_table[(n >> 32) & 0xff])) : - ((n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff])); -#else - return (n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]); -#endif -#elif SS_BLOCKSIZE < 256 - return lg_table[n]; -#else - return (n & 0xff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]; -#endif + return (n & 0xff00) != 0 ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; } -#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ - -#if SS_BLOCKSIZE != 0 - -static const saint_t sqq_table[256] = { + private static readonly saint_t[] sqq_table_array = new[] { 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, @@ -113,405 +55,399 @@ static INLINE 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 }; + private static ReadOnlySpan sqq_table => sqq_table_array; -static INLINE -saidx_t -ss_isqrt(saidx_t x) { - saidx_t y, e; - - if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } - e = (x & 0xffff0000) ? - ((x & 0xff000000) ? - 24 + lg_table[(x >> 24) & 0xff] : - 16 + lg_table[(x >> 16) & 0xff]) : - ((x & 0x0000ff00) ? - 8 + lg_table[(x >> 8) & 0xff] : - 0 + lg_table[(x >> 0) & 0xff]); - - if(e >= 16) { - y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); - if(e >= 24) { y = (y + 1 + x / y) >> 1; } - y = (y + 1 + x / y) >> 1; - } else if(e >= 8) { - y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; - } else { - return sqq_table[x] >> 4; - } - - return (x < (y * y)) ? y - 1 : y; -} - -#endif /* SS_BLOCKSIZE != 0 */ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static /*INLINE*/ saidx_t ss_isqrt(saidx_t x) + { + saidx_t y, e; + + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) != 0 ? + ((x & 0xff000000) != 0 ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) != 0 ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if (e >= 16) + { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if (e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } + else if (e >= 8) + { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } + else + { + return sqq_table[x] >> 4; + } + return (x < (y * y)) ? y - 1 : y; + } /*---------------------------------------------------------------------------*/ /* Compares two suffixes. */ - static INLINE - saint_t -ss_compare(const sauchar_t* T, - const saidx_t* p1, const saidx_t* p2, - saidx_t depth) { - const sauchar_t* U1, *U2, *U1n, *U2n; - - for(U1 = T + depth + *p1, - U2 = T + depth + *p2, - U1n = T + *(p1 + 1) + 2, - U2n = T + *(p2 + 1) + 2; - (U1 T, + ReadOnlySpan p1, ReadOnlySpan p2, + saidx_t depth) + { + ReadOnlySpan U1, *U2, *U1n, *U2n; + + for (U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) + { + } - /*---------------------------------------------------------------------------*/ + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); + } -#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) - static INLINE -void -ss_fixdown(const sauchar_t* Td, const saidx_t* PA, - saidx_t* SA, saidx_t i, saidx_t size) - { - saidx_t j, k; - saidx_t v; - saint_t c, d, e; + /*---------------------------------------------------------------------------*/ - for (v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) + /* Insertionsort for small size groups */ + static void ss_insertionsort(ReadOnlySpan T, ReadOnlySpan PA, + saidx_t* first, saidx_t* last, saidx_t depth) { - d = Td[PA[SA[k = j++]]]; - if (d < (e = Td[PA[SA[j]]])) { k = j; d = e; } - if (d <= c) { break; } + saidx_t* i, *j; + saidx_t t; + saint_t r; + + for (i = last - 2; first <= i; --i) + { + for (t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) + { + do { *(j - 1) = *j; } while ((++j < last) && (*j < 0)); + if (last <= j) { break; } + } + if (r == 0) { *j = ~*j; } + *(j - 1) = t; + } } - SA[i] = v; - } - /* Simple top-down heapsort. */ - static - void - ss_heapsort(const sauchar_t* Td, const saidx_t* PA, saidx_t* SA, saidx_t size) - { - saidx_t i, m; - saidx_t t; + /*---------------------------------------------------------------------------*/ - m = size; - if ((size % 2) == 0) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static /*INLINE*/ void ss_fixdown(ReadOnlySpan Td, ReadOnlySpan PA, + saidx_t* SA, saidx_t i, saidx_t size) { - m--; - if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } - } + saidx_t j, k; + saidx_t v; + saint_t c, d, e; - for (i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } - if ((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } - for (i = m - 1; 0 < i; --i) - { - t = SA[0], SA[0] = SA[i]; - ss_fixdown(Td, PA, SA, 0, i); - SA[i] = t; + for (v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) + { + d = Td[PA[SA[k = j++]]]; + if (d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if (d <= c) { break; } + } + SA[i] = v; } - } - - /*---------------------------------------------------------------------------*/ - - /* Returns the median of three elements. */ - static INLINE -saidx_t* -ss_median3(const sauchar_t* Td, const saidx_t* PA, - saidx_t* v1, saidx_t* v2, saidx_t* v3) - { - saidx_t* t; - if (Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } - if (Td[PA[*v2]] > Td[PA[*v3]]) + /* Simple top-down heapsort. */ + static void ss_heapsort(ReadOnlySpan Td, ReadOnlySpan PA, saidx_t* SA, saidx_t size) { - if (Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } - else { return v3; } - } - return v2; - } - - /* Returns the median of five elements. */ - static INLINE -saidx_t* -ss_median5(const sauchar_t* Td, const saidx_t* PA, - saidx_t* v1, saidx_t* v2, saidx_t* v3, saidx_t* v4, saidx_t* v5) - { - saidx_t* t; - if (Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } - if (Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } - if (Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } - if (Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } - if (Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } - if (Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } - return v3; - } - - /* Returns the pivot element. */ - static INLINE -saidx_t* -ss_pivot(const sauchar_t* Td, const saidx_t* PA, saidx_t* first, saidx_t* last) - { - saidx_t* middle; - saidx_t t; + saidx_t i, m; + saidx_t t; - t = last - first; - middle = first + t / 2; - - if (t <= 512) - { - if (t <= 32) + m = size; + if ((size % 2) == 0) { - return ss_median3(Td, PA, first, middle, last - 1); + m--; + if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } } - else + + for (i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if ((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for (i = m - 1; 0 < i; --i) { - t >>= 2; - return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; } } - t >>= 3; - first = ss_median3(Td, PA, first, first + t, first + (t << 1)); - middle = ss_median3(Td, PA, middle - t, middle, middle + t); - last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); - return ss_median3(Td, PA, first, middle, last); - } - - - /*---------------------------------------------------------------------------*/ - - /* Binary partition for substrings. */ - static INLINE -saidx_t* -ss_partition(const saidx_t* PA, - saidx_t* first, saidx_t* last, saidx_t depth) - { - saidx_t* a, *b; - saidx_t t; - for (a = first - 1, b = last; ;) - { - for (; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } - for (; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } - if (b <= a) { break; } - t = ~*b; - *b = *a; - *a = t; - } - if (first < a) { *first = ~*first; } - return a; - } - /* Multikey introsort for medium size groups. */ - static - void - ss_mintrosort(const sauchar_t* T, const saidx_t* PA, - saidx_t* first, saidx_t* last, - saidx_t depth) - { -#define STACK_SIZE SS_MISORT_STACKSIZE - struct { saidx_t* a, * b, c; saint_t d; -} -stack[STACK_SIZE]; -const sauchar_t* Td; -saidx_t* a, *b, *c, *d, *e, *f; -saidx_t s, t; -saint_t ssize; -saint_t limit; -saint_t v, x = 0; - -for (ssize = 0, limit = ss_ilg(last - first); ;) -{ - if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) - { -#if 1 < SS_INSERTIONSORT_THRESHOLD - if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } -#endif - STACK_POP(first, last, depth, limit); - continue; - } + /*---------------------------------------------------------------------------*/ - Td = T + depth; - if (limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } - if (limit < 0) - { - for (a = first + 1, v = Td[PA[*first]]; a < last; ++a) + /* Returns the median of three elements. */ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static /*INLINE*/ saidx_t* ss_median3(ReadOnlySpan Td, ReadOnlySpan PA, + saidx_t* v1, saidx_t* v2, saidx_t* v3) { - if ((x = Td[PA[*a]]) != v) + saidx_t* t; + if (Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if (Td[PA[*v2]] > Td[PA[*v3]]) { - if (1 < (a - first)) { break; } - v = x; - first = a; + if (Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } } + return v2; } - if (Td[PA[*first] - 1] < v) + + /* Returns the median of five elements. */ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static /*INLINE*/ saidx_t* ss_median5(ReadOnlySpan Td, ReadOnlySpan PA, + saidx_t* v1, saidx_t* v2, saidx_t* v3, saidx_t* v4, saidx_t* v5) { - first = ss_partition(PA, first, a, depth); + saidx_t* t; + if (Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if (Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if (Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if (Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if (Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if (Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; } - if ((a - first) <= (last - a)) + + /* Returns the pivot element. */ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static /*INLINE*/ saidx_t* ss_pivot(ReadOnlySpan Td, ReadOnlySpan PA, saidx_t* first, saidx_t* last) { - if (1 < (a - first)) - { - STACK_PUSH(a, last, depth, -1); - last = a, depth += 1, limit = ss_ilg(a - first); - } - else + saidx_t* middle; + saidx_t t; + + t = last - first; + middle = first + t / 2; + + if (t <= 512) { - first = a, limit = -1; + if (t <= 32) + { + return ss_median3(Td, PA, first, middle, last - 1); + } + else + { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); } - else + + + /*---------------------------------------------------------------------------*/ + + /* Binary partition for substrings. */ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static /*INLINE*/ saidx_t* ss_partition(ReadOnlySpan PA, + saidx_t* first, saidx_t* last, saidx_t depth) { - if (1 < (last - a)) + saidx_t* a, *b; + saidx_t t; + for (a = first - 1, b = last; ;) { - STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); - first = a, limit = -1; - } - else - { - last = a, depth += 1, limit = ss_ilg(a - first); + for (; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for (; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if (b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; } + if (first < a) { *first = ~*first; } + return a; } - continue; - } - /* choose pivot */ - a = ss_pivot(Td, PA, first, last); - v = Td[PA[*a]]; - SWAP(*first, *a); + //#define STACK_SIZE SS_MISORT_STACKSIZE + //#define SS_MISORT_STACKSIZE (16) + private const int STACK_SIZE = 16; - /* partition */ - for (b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } - if (((a = b) < last) && (x < v)) - { - for (; (++b < last) && ((x = Td[PA[*b]]) <= v);) + private struct stack { - if (x == v) { SWAP(*b, *a); ++a; } + ref saidx_t a; + ref saidx_t b; + saidx_t c; + saint_t d; } - } - for (c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } - if ((b < (d = c)) && (x > v)) - { - for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) - { - if (x == v) { SWAP(*c, *d); --d; } - } - } - for (; b < c;) - { - SWAP(*b, *c); - for (; (++b < c) && ((x = Td[PA[*b]]) <= v);) - { - if (x == v) { SWAP(*b, *a); ++a; } - } - for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) - { - if (x == v) { SWAP(*c, *d); --d; } - } - } - if (a <= d) - { - c = b - 1; - - if ((s = a - first) > (t = b - a)) { s = t; } - for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - if ((s = d - c) > (t = last - d - 1)) { s = t; } - for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + /* Multikey introsort for medium size groups. */ + static void ss_mintrosort(ReadOnlySpan T, ReadOnlySpan PA, + ref saidx_t first, ref saidx_t last, + saidx_t depth) + { + //struct { saidx_t* a, * b, c; saint_t d; } stack[STACK_SIZE]; + Span stack = stackalloc stack[STACK_SIZE]; - a = first + (b - a), c = last - (d - c); - b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + ReadOnlySpan Td; + ref saidx_t a, b, c, d, e, f; + saidx_t s, t; + saint_t ssize; + saint_t limit; + saint_t v, x = 0; - if ((a - first) <= (last - c)) - { - if ((last - c) <= (c - b)) - { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(c, last, depth, limit); - last = a; - } - else if ((a - first) <= (c - b)) - { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - last = a; - } - else - { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(first, a, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); - } - } - else - { - if ((a - first) <= (c - b)) - { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(first, a, depth, limit); - first = c; - } - else if ((last - c) <= (c - b)) - { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - first = c; - } - else + for (ssize = 0, limit = ss_ilg(last - first); ;) { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(c, last, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); + + if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) + { + if (1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if (limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if (limit < 0) + { + for (a = first + 1, v = Td[PA[ref first]]; a < last; ++a) + { + if ((x = Td[PA[ref a]]) != v) + { + if (1 < (a - first)) { break; } + v = x; + first = a; + } + } + if (Td[PA[ref first] - 1] < v) + { + first = ss_partition(PA, first, a, depth); + } + if ((a - first) <= (last - a)) + { + if (1 < (a - first)) + { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } + else + { + first = a, limit = -1; + } + } + else + { + if (1 < (last - a)) + { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } + else + { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[ref a]]; + SWAP(ref first, ref a); + + /* partition */ + for (b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if (((a = b) < last) && (x < v)) + { + for (; (++b < last) && ((x = Td[PA[*b]]) <= v);) + { + if (x == v) { SWAP(*b, *a); ++a; } + } + } + for (c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if ((b < (d = c)) && (x > v)) + { + for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) + { + if (x == v) { SWAP(*c, *d); --d; } + } + } + for (; b < c;) + { + SWAP(*b, *c); + for (; (++b < c) && ((x = Td[PA[*b]]) <= v);) + { + if (x == v) { SWAP(*b, *a); ++a; } + } + for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) + { + if (x == v) { SWAP(*c, *d); --d; } + } + } + + if (a <= d) + { + c = b - 1; + + if ((s = a - first) > (t = b - a)) { s = t; } + for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if ((s = d - c) > (t = last - d - 1)) { s = t; } + for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if ((a - first) <= (last - c)) + { + if ((last - c) <= (c - b)) + { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } + else if ((a - first) <= (c - b)) + { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } + else + { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + else + { + if ((a - first) <= (c - b)) + { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } + else if ((last - c) <= (c - b)) + { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } + else + { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } + else + { + limit += 1; + if (Td[PA[*first] - 1] < v) + { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } } } - } - else - { - limit += 1; - if (Td[PA[*first] - 1] < v) - { - first = ss_partition(PA, first, last, depth); - limit = ss_ilg(last - first); - } - depth += 1; - } -} -#undef STACK_SIZE -} - -#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ - -/*---------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------*/ #if SS_BLOCKSIZE != 0 @@ -806,84 +742,74 @@ struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; #endif /* SS_BLOCKSIZE != 0 */ -/*---------------------------------------------------------------------------*/ + /*---------------------------------------------------------------------------*/ -/*- Function -*/ + /*- Function -*/ -/* Substring sort */ -void -sssort(const sauchar_t* T, const saidx_t* PA, - saidx_t* first, saidx_t* last, - saidx_t* buf, saidx_t bufsize, - saidx_t depth, saidx_t n, saint_t lastsuffix) -{ - saidx_t* a; -#if SS_BLOCKSIZE != 0 - saidx_t *b, *middle, *curbuf; - saidx_t j, k, curbufsize, limit; -#endif - saidx_t i; - - if (lastsuffix != 0) { ++first; } - -#if SS_BLOCKSIZE == 0 - ss_mintrosort(T, PA, first, last, depth); -#else - if((bufsize < SS_BLOCKSIZE) && - (bufsize < (last - first)) && - (bufsize < (limit = ss_isqrt(last - first)))) { - if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } - buf = middle = last - limit, bufsize = limit; - } else { - middle = last, limit = 0; - } - for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { -#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE - ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); -#elif 1 < SS_BLOCKSIZE - ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); -#endif - curbufsize = last - (a + SS_BLOCKSIZE); - curbuf = a + SS_BLOCKSIZE; - if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } - for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { - ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); - } - } -#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE - ss_mintrosort(T, PA, a, middle, depth); -#elif 1 < SS_BLOCKSIZE - ss_insertionsort(T, PA, a, middle, depth); -#endif - for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { - if(i & 1) { - ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); - a -= k; - } - } - if(limit != 0) { -#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE - ss_mintrosort(T, PA, middle, last, depth); -#elif 1 < SS_BLOCKSIZE - ss_insertionsort(T, PA, middle, last, depth); -#endif - ss_inplacemerge(T, PA, first, middle, last, depth); - } -#endif - - if (lastsuffix != 0) - { - /* Insert last type B* suffix. */ - saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; - for (a = first, i = *(first - 1); - (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); - ++a) + /* Substring sort */ + void + sssort(ReadOnlySpan T, ReadOnlySpan PA, + ref saidx_t first, ref saidx_t last, + Span buf, saidx_t bufsize, + saidx_t depth, saidx_t n, saint_t lastsuffix) { - *(a - 1) = *a; + saidx_t* a; + saidx_t* b, *middle, *curbuf; + saidx_t j, k, curbufsize, limit; + saidx_t i; + + if (lastsuffix != 0) { ++first; } + + if ((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) + { + if (SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } + else + { + middle = last, limit = 0; + } + for (a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) + { + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if (curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for (b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) + { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } + ss_mintrosort(T, PA, a, middle, depth); + for (k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) + { + if (i & 1) + { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if (limit != 0) + { + ss_mintrosort(T, PA, middle, last, depth); + ss_inplacemerge(T, PA, first, middle, last, depth); + } + + if (lastsuffix != 0) + { + /* Insert last type B* suffix. */ + saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for (a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) + { + *(a - 1) = *a; + } + *(a - 1) = i; + } } - *(a - 1) = i; - } -} } } From 3da3d4524d6cb72c1ce5ffaf2df75235fd20884f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 7 Jun 2021 13:51:50 -0500 Subject: [PATCH 067/325] Interim commit --- .../Utils.cs | 21 --- .../divsufsort.cs | 152 ++++++++---------- .../sssort.cs | 19 +-- 3 files changed, 73 insertions(+), 119 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index 6e65d28..ac0b023 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -123,21 +123,6 @@ static saidx_t binarysearch_lower(ReadOnlySpan A, saidx_t size, saidx_t } if (n <= 1) { return 0; } - if ((B = A) == null) - { - /* Allocate n*sizeof(saidx_t) bytes of memory. */ - try - { - B = new saidx_t[n];// (saidx_t*)malloc((size_t)n * sizeof(saidx_t)); - //if (B == null) { return -2; } - } - //TODO: fixme - catch (Exception) - { - return -2; - } - } - /* Inverse BW transform. */ for (c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } for (i = 0; i < n; ++i) { ++C[T[i]]; } @@ -160,12 +145,6 @@ static saidx_t binarysearch_lower(ReadOnlySpan A, saidx_t size, saidx_t p = B[p - 1]; } - if (A == null) - { - /* Deallocate memory. */ - free(B); - } - return 0; } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs index 048c11f..2238504 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs @@ -9,11 +9,19 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace DeltaQ.SuffixSorting.LibDivSufSort { public partial class LibDivSufSort { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static ref int BUCKET_A(Span bucket_A, int c0) => ref bucket_A[c0]; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static ref int BUCKET_B(Span bucket_B, int c0, int c1) => ref bucket_B[((c1) << 8) | (c0)]; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static ref int BUCKET_BSTAR(Span bucket_B, int c0, int c1) => ref bucket_B[((c0) << 8) | (c1)]; + private const int ALPHABET_SIZE = sizeof(byte) + 1; private const int BUCKET_A_SIZE = ALPHABET_SIZE; private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; @@ -25,7 +33,7 @@ public partial class LibDivSufSort Span bucket_A, Span bucket_B, saidx_t n) { - saidx_t PAb, ISAb, buf; + Span PAb, ISAb, buf; #if _OPENMP saidx_t* curbuf; saidx_t l; @@ -86,51 +94,24 @@ begins with the same first two characters. if (0 < m) { /* Sort the type B* suffixes by their first two characters. */ - PAb = SA + n - m; ISAb = SA + m; + //PAb = SA + n - m; ISAb = SA + m; + PAb = SA[(n - m)..]; + ISAb = SA[m..]; for (i = m - 2; 0 <= i; --i) { - t = PAb[i], c0 = T[t], c1 = T[t + 1]; + t = PAb[i]; + c0 = T[t]; + c1 = T[t + 1]; SA[--BUCKET_BSTAR(bucket_B, c0, c1)] = i; } - t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + t = PAb[m - 1]; + c0 = T[t]; + c1 = T[t + 1]; SA[--BUCKET_BSTAR(bucket_B, c0, c1)] = m - 1; /* Sort the type B* substrings using sssort. */ -#if _OPENMP - tmp = omp_get_max_threads(); - buf = SA + m, bufsize = (n - (2 * m)) / tmp; - c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; -#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) - { - tmp = omp_get_thread_num(); - curbuf = buf + tmp * bufsize; - k = 0; - for (; ; ) - { -#pragma omp critical(sssort_lock) - { - if (0 < (l = j)) - { - d0 = c0, d1 = c1; - do - { - k = BUCKET_BSTAR(d0, d1); - if (--d1 <= d0) - { - d1 = ALPHABET_SIZE - 1; - if (--d0 < 0) { break; } - } - } while (((l - k) <= 1) && (0 < (l = k))); - c0 = d0, c1 = d1, j = k; - } - } - if (l == 0) { break; } - sssort(T, PAb, SA + k, SA + l, - curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); - } - } -#else - buf = SA + m, bufsize = n - (2 * m); + buf = SA[m..]; + bufsize = n - (2 * m); for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) @@ -138,12 +119,10 @@ begins with the same first two characters. i = BUCKET_BSTAR(bucket_B, c0, c1); if (1 < (j - i)) { - sssort(T, PAb, SA + i, SA + j, - buf, bufsize, 2, n, *(SA + i) == (m - 1)); + sssort(T, PAb, ref SA[i], ref SA[j], buf, bufsize, 2, n, SA[i] == (m - 1)); } } } -#endif /* Compute ranks of type B* substrings. */ for (i = m - 1; 0 <= i; --i) @@ -196,23 +175,15 @@ begins with the same first two characters. } return m; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int BUCKET_A(Span bucket_A, int c0) => ref bucket_A[c0]; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int BUCKET_B(Span bucket_B, int c0, int c1) => ref bucket_B[((c1) << 8) | (c0)]; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int BUCKET_BSTAR(Span bucket_B, int c0, int c1) => ref bucket_B[((c0) << 8) | (c1)]; } /* Constructs the suffix array by using the sorted order of type B* suffixes. */ static void - construct_SA(ReadOnlySpan T, saidx_t* SA, - saidx_t* bucket_A, saidx_t* bucket_B, + construct_SA(ReadOnlySpan T, Span SA, + Span bucket_A, Span bucket_B, saidx_t n, saidx_t m) { - saidx_t* i, *j, *k; saidx_t s; saint_t c0, c1, c2; @@ -223,31 +194,34 @@ the sorted order of type B* suffixes. */ for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { /* Scan the suffix array from right to left. */ - for (i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = null, c2 = -1; + c2 = -1; + for (ref saidx_t i = ref Unsafe.Add(ref MemoryMarshal.GetReference(SA), BUCKET_BSTAR(bucket_B, c1, c1 + 1)), + j = ref Unsafe.Add(ref MemoryMarshal.GetReference(SA), BUCKET_A(bucket_A, c1 + 1) - 1), + k = ref Unsafe.NullRef(); i <= j; --j) { - if (0 < (s = *j)) + if (0 < (s = j)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); - *j = ~s; + Debug.Assert(T[s] == c1); + Debug.Assert(((s + 1) < n) && (T[s] <= T[s + 1])); + Debug.Assert(T[s - 1] <= T[s]); + j = ~s; c0 = T[--s]; if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } if (c0 != c2) { - if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); + if (0 <= c2) { BUCKET_B(bucket_B, c2, c1) = k - MemoryMarshal.GetReference(SA); } + k = Unsafe.Add(ref MemoryMarshal.GetReference(SA), BUCKET_B(bucket_B, c2 = c0, c1)); } - assert(k < j); - *k-- = s; + Debug.Assert(k < j); + k = ref Unsafe.Subtract(ref k, 1); + k = s; } else { - assert(((s == 0) && (T[s] == c1)) || (s < 0)); - *j = ~s; + Debug.Assert(((s == 0) && (T[s] == c1)) || (s < 0)); + j = ~s; } } } @@ -262,7 +236,7 @@ the sorted order of type B suffixes. */ { if (0 < (s = *i)) { - assert(T[s - 1] >= T[s]); + Debug.Assert(T[s - 1] >= T[s]); c0 = T[--s]; if ((s == 0) || (T[s - 1] < c0)) { s = ~s; } if (c0 != c2) @@ -270,12 +244,12 @@ the sorted order of type B suffixes. */ BUCKET_A(c2) = k - SA; k = SA + BUCKET_A(c2 = c0); } - assert(i < k); + Debug.Assert(i < k); *k++ = s; } else { - assert(s < 0); + Debug.Assert(s < 0); *i = ~s; } } @@ -285,11 +259,11 @@ the sorted order of type B suffixes. */ by using the sorted order of type B* suffixes. */ static saidx_t - construct_BWT(ReadOnlySpan T, saidx_t* SA, - saidx_t* bucket_A, saidx_t* bucket_B, + construct_BWT(ReadOnlySpan T, Span SA, + Span bucket_A, Span bucket_B, saidx_t n, saidx_t m) { - saidx_t* i, *j, *k, *orig; + saidx_t i, j, k, orig; saidx_t s; saint_t c0, c1, c2; @@ -307,9 +281,9 @@ the sorted order of type B* suffixes. */ { if (0 < (s = *j)) { - assert(T[s] == c1); - assert(((s + 1) < n) && (T[s] <= T[s + 1])); - assert(T[s - 1] <= T[s]); + Debug.Assert(T[s] == c1); + Debug.Assert(((s + 1) < n) && (T[s] <= T[s + 1])); + Debug.Assert(T[s - 1] <= T[s]); c0 = T[--s]; *j = ~((saidx_t)c0); if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } @@ -318,17 +292,17 @@ the sorted order of type B* suffixes. */ if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } k = SA + BUCKET_B(c2 = c0, c1); } - assert(k < j); + Debug.Assert(k < j); *k-- = s; } else if (s != 0) { *j = ~s; -#if !NDEBUG +#if DEBUG } else { - assert(T[s] == c1); + Debug.Assert(T[s] == c1); #endif } } @@ -344,7 +318,7 @@ the sorted order of type B suffixes. */ { if (0 < (s = *i)) { - assert(T[s - 1] >= T[s]); + Debug.Assert(T[s - 1] >= T[s]); c0 = T[--s]; *i = c0; if ((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } @@ -353,7 +327,7 @@ the sorted order of type B suffixes. */ BUCKET_A(c2) = k - SA; k = SA + BUCKET_A(c2 = c0); } - assert(i < k); + Debug.Assert(i < k); *k++ = s; } else if (s != 0) @@ -403,20 +377,20 @@ the sorted order of type B suffixes. */ } saidx_t - divbwt(ReadOnlySpan T, sauchar_t* U, saidx_t* A, saidx_t n) + divbwt(ReadOnlySpan T, Span U, Span A, saidx_t n) { - saidx_t* B; - saidx_t* bucket_A, *bucket_B; + Span B; + Span bucket_A, bucket_B; saidx_t m, pidx, i; /* Check arguments. */ if ((T == null) || (U == null) || (n < 0)) { return -1; } else if (n <= 1) { if (n == 1) { U[0] = T[0]; } return n; } - if ((B = A) == null) { B = (saidx_t*)malloc((size_t)(n + 1) * sizeof(saidx_t)); } - bucket_A = (saidx_t*)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t*)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); - + if ((B = A) == null) { B = new saidx_t[n + 1]; } + bucket_A = new saidx_t[BUCKET_A_SIZE]; + bucket_B = new saidx_t[BUCKET_B_SIZE]; + /* Burrows-Wheeler Transform. */ if ((B != null) && (bucket_A != null) && (bucket_B != null)) { @@ -434,9 +408,9 @@ the sorted order of type B suffixes. */ pidx = -2; } - free(bucket_B); - free(bucket_A); - if (A == null) { free(B); } + //free(bucket_B); + //free(bucket_A); + //if (A == null) { free(B); } return pidx; } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs index 48b3767..097bf86 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs @@ -747,18 +747,17 @@ struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; /*- Function -*/ /* Substring sort */ - void - sssort(ReadOnlySpan T, ReadOnlySpan PA, - ref saidx_t first, ref saidx_t last, + void sssort(ReadOnlySpan T, ReadOnlySpan PA, + saidx_t first, saidx_t last, Span buf, saidx_t bufsize, - saidx_t depth, saidx_t n, saint_t lastsuffix) + saidx_t depth, saidx_t n, bool lastsuffix) { - saidx_t* a; - saidx_t* b, *middle, *curbuf; + ref saidx_t a; + ref saidx_t b, middle, curbuf; saidx_t j, k, curbufsize, limit; saidx_t i; - if (lastsuffix != 0) { ++first; } + if (lastsuffix) { ++first; } if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && @@ -797,10 +796,12 @@ struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; ss_inplacemerge(T, PA, first, middle, last, depth); } - if (lastsuffix != 0) + if (lastsuffix) { /* Insert last type B* suffix. */ - saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + Span PAi = stackalloc saidx_t[2]; + PAi[0] = PA[*(first - 1)]; + PAi[1] = n - 2; for (a = first, i = *(first - 1); (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); ++a) From 3d075130417eece65ac927fd4a4877f99df10b7c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 02:48:30 -0500 Subject: [PATCH 068/325] interim commit --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 2 +- .../LibDivSufSort.cs | 16 +- .../RsDivSufSort.cs | 349 ++++++++++++++++++ .../divsufsort.cs | 155 -------- 4 files changed, 365 insertions(+), 157 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index 2def991..ccd0faf 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -1,7 +1,7 @@ - net5.0;netstandard2.0 + net5.0 DeltaQ jzebedee true diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs index 2efcbea..cc0627e 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs @@ -12,7 +12,21 @@ public IMemoryOwner Sort(ReadOnlySpan textBuffer) public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { - throw new NotImplementedException(); + if(textBuffer.Length != suffixBuffer.Length) + { + throw new ArgumentException($"{nameof(textBuffer)} and {nameof(suffixBuffer)} should have the same length"); + } + + //TODO: add 0/1/2 fast cases + + //let T = Text(T); + //let mut SA = SuffixArray(SA); + + //// Suffixsort. + //construct_SA(&T, &mut SA, res.A, res.B, res.m); + var res = sort_typeBstar(textBuffer, SA); + //construct_SA(&T, &mut SA, res.A, res.B, res.m); + construct_SA(textBuffer, suffixBuffer, res.A, res.B, res.m); } } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs new file mode 100644 index 0000000..02d58c4 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -0,0 +1,349 @@ +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + public partial class DivSufSort + { + private const int ALPHABET_SIZE = sizeof(byte) + 1; + private const int BUCKET_A_SIZE = ALPHABET_SIZE; + private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; + + public ref struct SortTypeBstarResult + { + public Span A; + public Span B; + public int m; + } + + public ref struct BBucket + { + public readonly Span B; + public BBucket(Span B) + { + this.B = B; + } + + public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; + } + + //fn sort_typeBstar(T: &Text, SA: &mut SuffixArray) -> SortTypeBstarResult { + public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) + { + var n = T.Length; + + using var owner_A = SpanOwner.Allocate(BUCKET_A_SIZE); + using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE); + + Span A = owner_A.Span; + BBucket B = new(owner_B.Span); + + int c0, c1, i, j, k, t, m; + + // Count the number of occurences of the first one or two characters of each + // type A, B and B* suffix. Moreover, store the beginning position of all + // type B* suffixes into the array SA. + i = n - 1; + m = n; + c0 = T[n - 1]; + + while (0 <= i) + { + // type A suffix (originally do..while) + while (true) + { + c1 = c0; + A[c1] += 1; + + // original loop condition + i -= 1; + if (0 > i) + { + break; + } + + c0 = T[i]; + if (c0 < c1) + { + break; + } + } + + if (0 <= i) + { + // type B* suffix + B.bstar()[(c0, c1)] += 1; + + m -= 1; + SA[m] = i; + + // type B suffix + + // init + i -= 1; + c1 = c0; + + while (true) + { + // cond + if (0 > i) + { + break; + } + c0 = T[i]; + if (c0 > c1) + { + break; + } + + // body + B[(c0, c1)] += 1; + + // iter + i -= 1; + c1 = c0; + } + } + } + m = n - m; + + // // Note: A type B* suffix is lexicographically smaller than a type B suffix + // // that beings with the same first two characters. + + // // Calculate the index of start/end point of each bucket. + // { + // i = 0; + // j = 0; + // for c0 in 0..(ALPHABET_SIZE as Idx) { + // // body + // t = i + A[c0]; + // A[c0] = i + j; // start point + // i = t + B.b()[(c0, c0)]; + + // for c1 in (c0 + 1)..(ALPHABET_SIZE as Idx) { + // j += B.bstar()[(c0, c1)]; + // B.bstar()[(c0, c1)] = j; // end point + // i += B.b()[(c0, c1)]; + // } + // } + // } + + // if (0 < m) { + // // Sort the type B* suffixes by their first two characters + // let PAb = SAPtr(n - m); + // let ISAb = SAPtr(m); + + // for i in (0..=(m - 2)).rev() { + // t = SA[PAb + i]; + // c0 = T.get(t); + // c1 = T.get(t + 1); + // B.bstar()[(c0, c1)] -= 1; + // SA[B.bstar()[(c0, c1)]] = i; + // } + // t = SA[PAb + m - 1]; + // c0 = T.get(t); + // c1 = T.get(t + 1); + // B.bstar()[(c0, c1)] -= 1; + // SA[B.bstar()[(c0, c1)]] = m - 1; + + // // Sort the type B* substrings using sssort. + // let buf = SAPtr(m); + // let bufsize = n - (2 * m); + + // // init (outer) + // c0 = ALPHABET_SIZE as Idx - 2; + // j = m; + // while 0 < j { + // // init (inner) + // c1 = ALPHABET_SIZE as Idx - 1; + // while c0 < c1 { + // // body (inner) + // i = B.bstar()[(c0, c1)]; + + // if (1 < (j - i)) { + // SA_dump!(&SA.range(i..j), "sssort(A)"); + // sssort::sssort( + // T, + // SA, + // PAb, + // SAPtr(i), + // SAPtr(j), + // buf, + // bufsize, + // 2, + // n, + // SA[i] == (m - 1), + // ); + // SA_dump!(&SA.range(i..j), "sssort(B)"); + // } + + // // iter (inner) + // j = i; + // c1 -= 1; + // } + + // // iter (outer) + // c0 -= 1; + // } + + // // Compute ranks of type B* substrings + // i = m - 1; + // while 0 <= i { + // if (0 <= SA[i]) { + // j = i; + // loop { + // { + // let SAi = SA[i]; + // SA[ISAb + SAi] = i; + // } + + // i -= 1; + // if !((0 <= i) && (0 <= SA[i])) { + // break; + // } + // } + + // SA[i + 1] = i - j; + // if (i <= 0) { + // break; + // } + // } + // j = i; + // loop { + // SA[i] = !SA[i]; + // { + // let idx = ISAb + SA[i]; + // SA[idx] = j; + // } + + // i -= 1; + // if !(SA[i] < 0) { + // break; + // } + // } + // { + // let idx = ISAb + SA[i]; + // SA[idx] = j; + // } + + // i -= 1; + // } + + // // Construct the inverse suffix array of type B* suffixes using trsort. + // trsort::trsort(ISAb, SA, m, 1); + + // // Set the sorted order of type B* suffixes + // { + // // init + // i = n - 1; + // j = m; + // c0 = T.get(n - 1); + // while 0 <= i { + // // init + // i -= 1; + // c1 = c0; + + // loop { + // // cond + // if !(0 <= i) { + // break; + // } + // c0 = T.get(i); + // if !(c0 >= c1) { + // break; + // } + + // // body (empty) + + // // iter + // i -= 1; + // c1 = c0; + // } + + // if 0 <= i { + // t = i; + + // // init + // i -= 1; + // c1 = c0; + + // loop { + // // cond + // if !(0 <= i) { + // break; + // } + // c0 = T.get(i); + // if !(c0 <= c1) { + // break; + // } + + // // body (empty) + + // // iter + // i -= 1; + // c1 = c0; + // } + + // j -= 1; + // { + // let pos = SA[ISAb + j]; + // SA[pos] = if (t == 0) || (1 < (t - i)) { t } else { !t }; + // } + // } + // } + // } // End: Set the sorted order of type B* suffixes + + // // Calculate the index of start/end point of each bucket + // { + // B.b()[(ALPHABET_SIZE as Idx - 1, ALPHABET_SIZE as Idx - 1)] = n; // end point + + // // init + // c0 = ALPHABET_SIZE as Idx - 2; + // k = m - 1; + + // while 0 <= c0 { + // i = A[c0 + 1] - 1; + + // // init + // c1 = ALPHABET_SIZE as Idx - 1; + // while c0 < c1 { + // t = i - B.b()[(c0, c1)]; + // B.b()[(c0, c1)] = i; // end point + + // // Move all type B* suffixes to the correct position + // { + // // init + // i = t; + // j = B.bstar()[(c0, c1)]; + + // while j <= k { + // SA[i] = SA[k]; + + // // iter + // i -= 1; + // k -= 1; + // } + // } // End: Move all type B* suffixes to the correct position + + // // iter + // c1 -= 1; + // } + // B.bstar()[(c0, c0 + 1)] = i - B.b()[(c0, c0)] + 1; + // B.b()[(c0, c0)] = i; // end point + + // // iter + // c0 -= 1; + // } + // } // End: Calculate the index of start/end point of each bucket + // } + + // SortTypeBstarResult { A, B, m } + } + //} + + } +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs index 2238504..c90feb6 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs @@ -22,161 +22,6 @@ public partial class LibDivSufSort [MethodImpl(MethodImplOptions.AggressiveInlining)] static ref int BUCKET_BSTAR(Span bucket_B, int c0, int c1) => ref bucket_B[((c0) << 8) | (c1)]; - private const int ALPHABET_SIZE = sizeof(byte) + 1; - private const int BUCKET_A_SIZE = ALPHABET_SIZE; - private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; - - /* Sorts suffixes of type B*. */ - static - saidx_t - sort_typeBstar(ReadOnlySpan T, Span SA, - Span bucket_A, Span bucket_B, - saidx_t n) - { - Span PAb, ISAb, buf; -#if _OPENMP - saidx_t* curbuf; - saidx_t l; -#endif - saidx_t i, j, k, t, m, bufsize; - saint_t c0, c1; -#if _OPENMP - saint_t d0, d1; - int tmp; -#endif - - /* Initialize bucket arrays. */ - Debug.Assert(bucket_A.Length == BUCKET_A_SIZE); - Debug.Assert(bucket_B.Length == BUCKET_B_SIZE); - bucket_A.Clear(); - bucket_B.Clear(); - - /* Count the number of occurrences of the first one or two characters of each - type A, B and B* suffix. Moreover, store the beginning position of all - type B* suffixes into the array SA. */ - for (i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) - { - /* type A suffix. */ - do { ++BUCKET_A(bucket_A, c1 = c0); } while ((0 <= --i) && ((c0 = T[i]) >= c1)); - if (0 <= i) - { - /* type B* suffix. */ - ++BUCKET_BSTAR(bucket_B, c0, c1); - SA[--m] = i; - /* type B suffix. */ - for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) - { - ++BUCKET_B(bucket_B, c0, c1); - } - } - } - m = n - m; - /* - note: - A type B* suffix is lexicographically smaller than a type B suffix that - begins with the same first two characters. - */ - - /* Calculate the index of start/end point of each bucket. */ - for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) - { - t = i + BUCKET_A(bucket_A, c0); - BUCKET_A(bucket_A, c0) = i + j; /* start point */ - i = t + BUCKET_B(bucket_B, c0, c0); - for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) - { - j += BUCKET_BSTAR(bucket_B, c0, c1); - BUCKET_BSTAR(bucket_B, c0, c1) = j; /* end point */ - i += BUCKET_B(bucket_B, c0, c1); - } - } - - if (0 < m) - { - /* Sort the type B* suffixes by their first two characters. */ - //PAb = SA + n - m; ISAb = SA + m; - PAb = SA[(n - m)..]; - ISAb = SA[m..]; - for (i = m - 2; 0 <= i; --i) - { - t = PAb[i]; - c0 = T[t]; - c1 = T[t + 1]; - SA[--BUCKET_BSTAR(bucket_B, c0, c1)] = i; - } - t = PAb[m - 1]; - c0 = T[t]; - c1 = T[t + 1]; - SA[--BUCKET_BSTAR(bucket_B, c0, c1)] = m - 1; - - /* Sort the type B* substrings using sssort. */ - buf = SA[m..]; - bufsize = n - (2 * m); - for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) - { - for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) - { - i = BUCKET_BSTAR(bucket_B, c0, c1); - if (1 < (j - i)) - { - sssort(T, PAb, ref SA[i], ref SA[j], buf, bufsize, 2, n, SA[i] == (m - 1)); - } - } - } - - /* Compute ranks of type B* substrings. */ - for (i = m - 1; 0 <= i; --i) - { - if (0 <= SA[i]) - { - j = i; - do { ISAb[SA[i]] = i; } while ((0 <= --i) && (0 <= SA[i])); - SA[i + 1] = i - j; - if (i <= 0) { break; } - } - j = i; - do { ISAb[SA[i] = ~SA[i]] = j; } while (SA[--i] < 0); - ISAb[SA[i]] = j; - } - - /* Construct the inverse suffix array of type B* suffixes using trsort. */ - trsort(ISAb, SA, m, 1); - - /* Set the sorted order of tyoe B* suffixes. */ - for (i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) - { - for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } - if (0 <= i) - { - t = i; - for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } - SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; - } - } - - /* Calculate the index of start/end point of each bucket. */ - BUCKET_B(bucket_B, ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ - for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) - { - i = BUCKET_A(bucket_A, c0 + 1) - 1; - for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) - { - t = i - BUCKET_B(bucket_B, c0, c1); - BUCKET_B(bucket_B, c0, c1) = i; /* end point */ - - /* Move all type B* suffixes to the correct position. */ - for (i = t, j = BUCKET_BSTAR(bucket_B, c0, c1); - j <= k; - --i, --k) { SA[i] = SA[k]; } - } - BUCKET_BSTAR(bucket_B, c0, c0 + 1) = i - BUCKET_B(bucket_B, c0, c0) + 1; /* start point */ - BUCKET_B(bucket_B, c0, c0) = i; /* end point */ - } - } - - return m; - } - /* Constructs the suffix array by using the sorted order of type B* suffixes. */ static void From e30b38d6f32487e9b755c90b29d95859f276b176 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 03:09:25 -0500 Subject: [PATCH 069/325] interim commit --- .../RsDivSufSort.cs | 54 ++++++++++++++++--- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 02d58c4..2176372 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1,6 +1,7 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; @@ -13,6 +14,35 @@ public partial class DivSufSort private const int BUCKET_A_SIZE = ALPHABET_SIZE; private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; + public void divsufsort(ReadOnlySpan T, Span SA) + { + Debug.Assert(T.Length == SA.Length); + + var n = T.Length; + + switch(n) + { + case 0: return; + case 1: + SA[0] = 0; + return; + //case 2: + // if(T[0] < T[1]) + // { + // SA.copy + // } + // break; + } + + var result = sort_typeBstar(T, SA); + construct_SA(T, SA, result.A, result.B, result.m); + } + + private void construct_SA(ReadOnlySpan t, Span sA, Span a, Span b, int m) + { + throw new NotImplementedException(); + } + public ref struct SortTypeBstarResult { public Span A; @@ -20,19 +50,24 @@ public ref struct SortTypeBstarResult public int m; } + public ref struct BStarBucket + { + public readonly Span B; + public BStarBucket(Span B) => this.B = B; + + public ref int this[(int c0, int c1) index] => ref B[(index.c0 << 8) | index.c1]; + } + public ref struct BBucket { public readonly Span B; - public BBucket(Span B) - { - this.B = B; - } + public BBucket(Span B) => this.B = B; public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; } //fn sort_typeBstar(T: &Text, SA: &mut SuffixArray) -> SortTypeBstarResult { - public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) + public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) { var n = T.Length; @@ -40,7 +75,10 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE); Span A = owner_A.Span; - BBucket B = new(owner_B.Span); + Span B = owner_B.Span; + + BBucket Bb = new(B); + BStarBucket Bstar = new(B); int c0, c1, i, j, k, t, m; @@ -76,7 +114,7 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA if (0 <= i) { // type B* suffix - B.bstar()[(c0, c1)] += 1; + Bstar[(c0, c1)] += 1; m -= 1; SA[m] = i; @@ -101,7 +139,7 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA } // body - B[(c0, c1)] += 1; + Bb[(c0, c1)] += 1; // iter i -= 1; From b339b7dd17a472f5904d1a82ce6c3aff3f6275e7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 03:11:43 -0500 Subject: [PATCH 070/325] interim commit --- .../RsDivSufSort.cs | 489 +++++++++--------- 1 file changed, 248 insertions(+), 241 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 2176372..485b220 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -20,18 +20,18 @@ public void divsufsort(ReadOnlySpan T, Span SA) var n = T.Length; - switch(n) + switch (n) { case 0: return; case 1: SA[0] = 0; return; - //case 2: - // if(T[0] < T[1]) - // { - // SA.copy - // } - // break; + //case 2: + // if(T[0] < T[1]) + // { + // SA.copy + // } + // break; } var result = sort_typeBstar(T, SA); @@ -149,239 +149,246 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) } m = n - m; - // // Note: A type B* suffix is lexicographically smaller than a type B suffix - // // that beings with the same first two characters. - - // // Calculate the index of start/end point of each bucket. - // { - // i = 0; - // j = 0; - // for c0 in 0..(ALPHABET_SIZE as Idx) { - // // body - // t = i + A[c0]; - // A[c0] = i + j; // start point - // i = t + B.b()[(c0, c0)]; - - // for c1 in (c0 + 1)..(ALPHABET_SIZE as Idx) { - // j += B.bstar()[(c0, c1)]; - // B.bstar()[(c0, c1)] = j; // end point - // i += B.b()[(c0, c1)]; - // } - // } - // } - - // if (0 < m) { - // // Sort the type B* suffixes by their first two characters - // let PAb = SAPtr(n - m); - // let ISAb = SAPtr(m); - - // for i in (0..=(m - 2)).rev() { - // t = SA[PAb + i]; - // c0 = T.get(t); - // c1 = T.get(t + 1); - // B.bstar()[(c0, c1)] -= 1; - // SA[B.bstar()[(c0, c1)]] = i; - // } - // t = SA[PAb + m - 1]; - // c0 = T.get(t); - // c1 = T.get(t + 1); - // B.bstar()[(c0, c1)] -= 1; - // SA[B.bstar()[(c0, c1)]] = m - 1; - - // // Sort the type B* substrings using sssort. - // let buf = SAPtr(m); - // let bufsize = n - (2 * m); - - // // init (outer) - // c0 = ALPHABET_SIZE as Idx - 2; - // j = m; - // while 0 < j { - // // init (inner) - // c1 = ALPHABET_SIZE as Idx - 1; - // while c0 < c1 { - // // body (inner) - // i = B.bstar()[(c0, c1)]; - - // if (1 < (j - i)) { - // SA_dump!(&SA.range(i..j), "sssort(A)"); - // sssort::sssort( - // T, - // SA, - // PAb, - // SAPtr(i), - // SAPtr(j), - // buf, - // bufsize, - // 2, - // n, - // SA[i] == (m - 1), - // ); - // SA_dump!(&SA.range(i..j), "sssort(B)"); - // } - - // // iter (inner) - // j = i; - // c1 -= 1; - // } - - // // iter (outer) - // c0 -= 1; - // } - - // // Compute ranks of type B* substrings - // i = m - 1; - // while 0 <= i { - // if (0 <= SA[i]) { - // j = i; - // loop { - // { - // let SAi = SA[i]; - // SA[ISAb + SAi] = i; - // } - - // i -= 1; - // if !((0 <= i) && (0 <= SA[i])) { - // break; - // } - // } - - // SA[i + 1] = i - j; - // if (i <= 0) { - // break; - // } - // } - // j = i; - // loop { - // SA[i] = !SA[i]; - // { - // let idx = ISAb + SA[i]; - // SA[idx] = j; - // } - - // i -= 1; - // if !(SA[i] < 0) { - // break; - // } - // } - // { - // let idx = ISAb + SA[i]; - // SA[idx] = j; - // } - - // i -= 1; - // } - - // // Construct the inverse suffix array of type B* suffixes using trsort. - // trsort::trsort(ISAb, SA, m, 1); - - // // Set the sorted order of type B* suffixes - // { - // // init - // i = n - 1; - // j = m; - // c0 = T.get(n - 1); - // while 0 <= i { - // // init - // i -= 1; - // c1 = c0; - - // loop { - // // cond - // if !(0 <= i) { - // break; - // } - // c0 = T.get(i); - // if !(c0 >= c1) { - // break; - // } - - // // body (empty) - - // // iter - // i -= 1; - // c1 = c0; - // } - - // if 0 <= i { - // t = i; - - // // init - // i -= 1; - // c1 = c0; - - // loop { - // // cond - // if !(0 <= i) { - // break; - // } - // c0 = T.get(i); - // if !(c0 <= c1) { - // break; - // } - - // // body (empty) - - // // iter - // i -= 1; - // c1 = c0; - // } - - // j -= 1; - // { - // let pos = SA[ISAb + j]; - // SA[pos] = if (t == 0) || (1 < (t - i)) { t } else { !t }; - // } - // } - // } - // } // End: Set the sorted order of type B* suffixes - - // // Calculate the index of start/end point of each bucket - // { - // B.b()[(ALPHABET_SIZE as Idx - 1, ALPHABET_SIZE as Idx - 1)] = n; // end point - - // // init - // c0 = ALPHABET_SIZE as Idx - 2; - // k = m - 1; - - // while 0 <= c0 { - // i = A[c0 + 1] - 1; - - // // init - // c1 = ALPHABET_SIZE as Idx - 1; - // while c0 < c1 { - // t = i - B.b()[(c0, c1)]; - // B.b()[(c0, c1)] = i; // end point - - // // Move all type B* suffixes to the correct position - // { - // // init - // i = t; - // j = B.bstar()[(c0, c1)]; - - // while j <= k { - // SA[i] = SA[k]; - - // // iter - // i -= 1; - // k -= 1; - // } - // } // End: Move all type B* suffixes to the correct position - - // // iter - // c1 -= 1; - // } - // B.bstar()[(c0, c0 + 1)] = i - B.b()[(c0, c0)] + 1; - // B.b()[(c0, c0)] = i; // end point - - // // iter - // c0 -= 1; - // } - // } // End: Calculate the index of start/end point of each bucket - // } - - // SortTypeBstarResult { A, B, m } - } - //} + // Note: A type B* suffix is lexicographically smaller than a type B suffix + // that beings with the same first two characters. + + // Calculate the index of start/end point of each bucket. + { + i = 0; + j = 0; + for (c0 = 0; c0 < ALPHABET_SIZE; c0++) + { + // body + t = i + A[c0]; + A[c0] = i + j; // start point + i = t + Bb[(c0, c0)]; + + for (c1 = c0 + 1; c1 < ALPHABET_SIZE; c1++) + { + j += Bstar[(c0, c1)]; + Bstar[(c0, c1)] = j; // end point + i += Bb[(c0, c1)]; + } + } + } + + if (0 < m) + { + // Sort the type B* suffixes by their first two characters + let PAb = SAPtr(n - m); + let ISAb = SAPtr(m); + + for i in (0.. = (m - 2)).rev() { + t = SA[PAb + i]; + c0 = T.get(t); + c1 = T.get(t + 1); + B.bstar()[(c0, c1)] -= 1; + SA[B.bstar()[(c0, c1)]] = i; + } + t = SA[PAb + m - 1]; + c0 = T.get(t); + c1 = T.get(t + 1); + B.bstar()[(c0, c1)] -= 1; + SA[B.bstar()[(c0, c1)]] = m - 1; + + // Sort the type B* substrings using sssort. + let buf = SAPtr(m); + let bufsize = n - (2 * m); + + // init (outer) + c0 = ALPHABET_SIZE as Idx - 2; + j = m; + while 0 < j { + // init (inner) + c1 = ALPHABET_SIZE as Idx - 1; + while c0 < c1 { + // body (inner) + i = B.bstar()[(c0, c1)]; + + if (1 < (j - i)) + { + SA_dump!(&SA.range(i..j), "sssort(A)"); + sssort::sssort( + T, + SA, + PAb, + SAPtr(i), + SAPtr(j), + buf, + bufsize, + 2, + n, + SA[i] == (m - 1), + + ); + SA_dump!(&SA.range(i..j), "sssort(B)"); + } + + // iter (inner) + j = i; + c1 -= 1; + } + + // iter (outer) + c0 -= 1; + } + + // Compute ranks of type B* substrings + i = m - 1; + while 0 <= i { + if (0 <= SA[i]) + { + j = i; + loop { + { + let SAi = SA[i]; + SA[ISAb + SAi] = i; + } + + i -= 1; + if !((0 <= i) && (0 <= SA[i])) { + break; + } + } + SA[i + 1] = i - j; + if (i <= 0) + { + break; + } + } + j = i; + loop { + SA[i] = !SA[i]; + { + let idx = ISAb + SA[i]; + SA[idx] = j; + } + + i -= 1; + if !(SA[i] < 0) { + break; + } + } + { + let idx = ISAb + SA[i]; + SA[idx] = j; + } + + i -= 1; + } + + // // Construct the inverse suffix array of type B* suffixes using trsort. + // trsort::trsort(ISAb, SA, m, 1); + + // // Set the sorted order of type B* suffixes + // { + // // init + // i = n - 1; + // j = m; + // c0 = T.get(n - 1); + // while 0 <= i { + // // init + // i -= 1; + // c1 = c0; + + // loop { + // // cond + // if !(0 <= i) { + // break; + // } + // c0 = T.get(i); + // if !(c0 >= c1) { + // break; + // } + + // // body (empty) + + // // iter + // i -= 1; + // c1 = c0; + // } + + // if 0 <= i { + // t = i; + + // // init + // i -= 1; + // c1 = c0; + + // loop { + // // cond + // if !(0 <= i) { + // break; + // } + // c0 = T.get(i); + // if !(c0 <= c1) { + // break; + // } + + // // body (empty) + + // // iter + // i -= 1; + // c1 = c0; + // } + + // j -= 1; + // { + // let pos = SA[ISAb + j]; + // SA[pos] = if (t == 0) || (1 < (t - i)) { t } else { !t }; + // } + // } + // } + // } // End: Set the sorted order of type B* suffixes + + // // Calculate the index of start/end point of each bucket + // { + // B.b()[(ALPHABET_SIZE as Idx - 1, ALPHABET_SIZE as Idx - 1)] = n; // end point + + // // init + // c0 = ALPHABET_SIZE as Idx - 2; + // k = m - 1; + + // while 0 <= c0 { + // i = A[c0 + 1] - 1; + + // // init + // c1 = ALPHABET_SIZE as Idx - 1; + // while c0 < c1 { + // t = i - B.b()[(c0, c1)]; + // B.b()[(c0, c1)] = i; // end point + + // // Move all type B* suffixes to the correct position + // { + // // init + // i = t; + // j = B.bstar()[(c0, c1)]; + + // while j <= k { + // SA[i] = SA[k]; + + // // iter + // i -= 1; + // k -= 1; + // } + // } // End: Move all type B* suffixes to the correct position + + // // iter + // c1 -= 1; + // } + // B.bstar()[(c0, c0 + 1)] = i - B.b()[(c0, c0)] + 1; + // B.b()[(c0, c0)] = i; // end point + + // // iter + // c0 -= 1; + // } + // } // End: Calculate the index of start/end point of each bucket + // } + + // SortTypeBstarResult { A, B, m } + } + //} + + } } -} From a8d401c94f6cc421fd1a86b9d0a2d22bbd8ea532 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 19:33:54 -0500 Subject: [PATCH 071/325] interim commit --- .../RsDivSufSort.cs | 115 +++++++++++------- 1 file changed, 69 insertions(+), 46 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 485b220..7e72743 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -5,6 +5,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using SAPtr = System.Index; namespace DeltaQ.SuffixSorting.LibDivSufSort { @@ -66,6 +67,16 @@ public ref struct BBucket public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; } + //[DebuggerDisplay("")] + //public ref struct SAPtr + //{ + // public readonly Index Index; + // public SAPtr(Index idx) + // { + // this.Index = idx; + // } + //} + //fn sort_typeBstar(T: &Text, SA: &mut SuffixArray) -> SortTypeBstarResult { public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) { @@ -175,53 +186,58 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) if (0 < m) { // Sort the type B* suffixes by their first two characters - let PAb = SAPtr(n - m); - let ISAb = SAPtr(m); - - for i in (0.. = (m - 2)).rev() { - t = SA[PAb + i]; - c0 = T.get(t); - c1 = T.get(t + 1); - B.bstar()[(c0, c1)] -= 1; - SA[B.bstar()[(c0, c1)]] = i; + SAPtr PAb = new(n - m); + SAPtr ISAb = new(m); + + for (i = m - 2; i > 0; i--) + { + //for i in (0.. = (m - 2)).rev() { + t = SA[PAb.Value + i]; + c0 = T[t]; + c1 = T[t + 1]; + Bstar[(c0, c1)] -= 1; + SA[Bstar[(c0, c1)]] = i; } - t = SA[PAb + m - 1]; - c0 = T.get(t); - c1 = T.get(t + 1); - B.bstar()[(c0, c1)] -= 1; - SA[B.bstar()[(c0, c1)]] = m - 1; + t = SA[PAb.Value + m - 1]; + c0 = T[t]; + c1 = T[t + 1]; + Bstar[(c0, c1)] -= 1; + SA[Bstar[(c0, c1)]] = m - 1; // Sort the type B* substrings using sssort. - let buf = SAPtr(m); - let bufsize = n - (2 * m); + SAPtr buf = new(m); + var bufsize = n - (2 * m); // init (outer) - c0 = ALPHABET_SIZE as Idx - 2; + c0 = ALPHABET_SIZE - 2; j = m; - while 0 < j { + while (0 < j) + { // init (inner) - c1 = ALPHABET_SIZE as Idx - 1; - while c0 < c1 { + c1 = ALPHABET_SIZE - 1; + while (c0 < c1) + { // body (inner) - i = B.bstar()[(c0, c1)]; + i = Bstar[(c0, c1)]; if (1 < (j - i)) { - SA_dump!(&SA.range(i..j), "sssort(A)"); - sssort::sssort( - T, - SA, - PAb, - SAPtr(i), - SAPtr(j), - buf, - bufsize, - 2, - n, - SA[i] == (m - 1), - - ); - SA_dump!(&SA.range(i..j), "sssort(B)"); + Debugger.Break(); + //SA_dump!(&SA.range(i..j), "sssort(A)"); + //sssort::sssort( + // T, + // SA, + // PAb, + // SAPtr(i), + // SAPtr(j), + // buf, + // bufsize, + // 2, + // n, + // SA[i] == (m - 1), + + //); + //SA_dump!(&SA.range(i..j), "sssort(B)"); } // iter (inner) @@ -235,18 +251,21 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) // Compute ranks of type B* substrings i = m - 1; - while 0 <= i { + while (0 <= i) + { if (0 <= SA[i]) { j = i; - loop { + while (true) + { { - let SAi = SA[i]; - SA[ISAb + SAi] = i; + var SAi = SA[i]; + SA[ISAb.Value + SAi] = i; } i -= 1; - if !((0 <= i) && (0 <= SA[i])) { + if (!((0 <= i) && (0 <= SA[i]))) + { break; } } @@ -258,20 +277,24 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) } } j = i; - loop { - SA[i] = !SA[i]; + while (true) + { + //TODO: check this + //SA[i] = !SA[i]; + SA[i] = ~SA[i]; { - let idx = ISAb + SA[i]; + var idx = ISAb.Value + SA[i]; SA[idx] = j; } i -= 1; - if !(SA[i] < 0) { + if (!(SA[i] < 0)) + { break; } } { - let idx = ISAb + SA[i]; + var idx = ISAb.Value + SA[i]; SA[idx] = j; } From 717b7171b2afc2630e9fb37263e8ca43732a51d6 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 19:36:19 -0500 Subject: [PATCH 072/325] interim commit --- .../RsDivSufSort.cs | 124 ++++++++++-------- 1 file changed, 69 insertions(+), 55 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 7e72743..b0fcbb0 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -301,69 +301,78 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) i -= 1; } - // // Construct the inverse suffix array of type B* suffixes using trsort. - // trsort::trsort(ISAb, SA, m, 1); + // Construct the inverse suffix array of type B* suffixes using trsort. + trsort(ISAb, SA, m, 1); - // // Set the sorted order of type B* suffixes - // { - // // init - // i = n - 1; - // j = m; - // c0 = T.get(n - 1); - // while 0 <= i { - // // init - // i -= 1; - // c1 = c0; - - // loop { - // // cond - // if !(0 <= i) { - // break; - // } - // c0 = T.get(i); - // if !(c0 >= c1) { - // break; - // } - - // // body (empty) + // Set the sorted order of type B* suffixes + { + // init + i = n - 1; + j = m; + c0 = T[n - 1]; + while (0 <= i) + { + // init + i -= 1; + c1 = c0; - // // iter - // i -= 1; - // c1 = c0; - // } + while (true) + { + // cond + if (!(0 <= i)) + { + break; + } + c0 = T[i]; + if (!(c0 >= c1)) + { + break; + } - // if 0 <= i { - // t = i; + // body (empty) - // // init - // i -= 1; - // c1 = c0; + // iter + i -= 1; + c1 = c0; + } - // loop { - // // cond - // if !(0 <= i) { - // break; - // } - // c0 = T.get(i); - // if !(c0 <= c1) { - // break; - // } + if (0 <= i) + { + t = i; - // // body (empty) + // init + i -= 1; + c1 = c0; - // // iter - // i -= 1; - // c1 = c0; - // } + while (true) + { + // cond + if (!(0 <= i)) + { + break; + } + c0 = T[i]; + if (!(c0 <= c1)) + { + break; + } + + // body (empty) + + // iter + i -= 1; + c1 = c0; + } - // j -= 1; - // { - // let pos = SA[ISAb + j]; - // SA[pos] = if (t == 0) || (1 < (t - i)) { t } else { !t }; - // } - // } - // } - // } // End: Set the sorted order of type B* suffixes + j -= 1; + { + var pos = SA[ISAb.Value + j]; + //TODO: check complement + SA[pos] = (t == 0 || (1 < (t - i))) ? t : ~t; + } + } + } + } // End: Set the sorted order of type B* suffixes // // Calculate the index of start/end point of each bucket // { @@ -414,4 +423,9 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) //} } + + private void trsort(SAPtr iSAb, Span sA, int m, int v) + { + throw new NotImplementedException(); + } } From dae5642627532d98bb57c8a1d8948cddcfad73bb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 19:38:35 -0500 Subject: [PATCH 073/325] interim commit --- .../RsDivSufSort.cs | 94 ++++++++++--------- 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index b0fcbb0..2339437 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -374,58 +374,60 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) } } // End: Set the sorted order of type B* suffixes - // // Calculate the index of start/end point of each bucket - // { - // B.b()[(ALPHABET_SIZE as Idx - 1, ALPHABET_SIZE as Idx - 1)] = n; // end point - - // // init - // c0 = ALPHABET_SIZE as Idx - 2; - // k = m - 1; - - // while 0 <= c0 { - // i = A[c0 + 1] - 1; - - // // init - // c1 = ALPHABET_SIZE as Idx - 1; - // while c0 < c1 { - // t = i - B.b()[(c0, c1)]; - // B.b()[(c0, c1)] = i; // end point - - // // Move all type B* suffixes to the correct position - // { - // // init - // i = t; - // j = B.bstar()[(c0, c1)]; - - // while j <= k { - // SA[i] = SA[k]; - - // // iter - // i -= 1; - // k -= 1; - // } - // } // End: Move all type B* suffixes to the correct position - - // // iter - // c1 -= 1; - // } - // B.bstar()[(c0, c0 + 1)] = i - B.b()[(c0, c0)] + 1; - // B.b()[(c0, c0)] = i; // end point - - // // iter - // c0 -= 1; - // } - // } // End: Calculate the index of start/end point of each bucket - // } - - // SortTypeBstarResult { A, B, m } + // Calculate the index of start/end point of each bucket + { + Bb[(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1)] = n; // end point + + // init + c0 = ALPHABET_SIZE - 2; + k = m - 1; + + while (0 <= c0) + { + i = A[c0 + 1] - 1; + + // init + c1 = ALPHABET_SIZE - 1; + while (c0 < c1) + { + t = i - Bb[(c0, c1)]; + Bb[(c0, c1)] = i; // end point + + // Move all type B* suffixes to the correct position + { + // init + i = t; + j = Bstar[(c0, c1)]; + + while (j <= k) + { + SA[i] = SA[k]; + + // iter + i -= 1; + k -= 1; + } + } // End: Move all type B* suffixes to the correct position + + // iter + c1 -= 1; + } + Bstar[(c0, c0 + 1)] = i - Bb[(c0, c0)] + 1; + Bb[(c0, c0)] = i; // end point + + // iter + c0 -= 1; + } + } // End: Calculate the index of start/end point of each bucket } - //} + return new SortTypeBstarResult { A = A, B = B, m = m }; } + //} private void trsort(SAPtr iSAb, Span sA, int m, int v) { throw new NotImplementedException(); } } +} From f27f182ab169ee2af3c87fb8a558bf3099a34822 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 19:48:45 -0500 Subject: [PATCH 074/325] interim commit --- .../RsDivSufSort.cs | 133 +++++++++++++++++- .../RsTrsort.cs | 12 ++ 2 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 2339437..97947cc 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -425,9 +425,136 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) } //} - private void trsort(SAPtr iSAb, Span sA, int m, int v) + private static readonly int[] lg_table_array = new[] { - throw new NotImplementedException(); + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 + }; + private static ReadOnlySpan lg_table => lg_table_array; + + private static int tr_ilg(int n) + { + if ((n & 0xffff_0000) > 0) + { + if ((n & 0xff00_0000) > 0) + { + return 24 + lg_table[((n >> 24) & 0xff)]; + } + else + { + return 16 + lg_table[((n >> 16) & 0xff)]; + } + } + else + { + if ((n & 0x0000_ff00) > 0) + { + return 8 + lg_table[((n >> 8) & 0xff)]; + } + else + { + return 0 + lg_table[((n >> 0) & 0xff)]; + } + } + } + + private ref struct Budget + { + public int Chance; + public int Remain; + public int IncVal; + public int Count; + + public Budget(int chance, int incVal) : this() + { + Chance = chance; + IncVal = incVal; + } + } + + /// Tandem repeat sort + private void trsort(SAPtr ISA, Span SA, int n, int depth) + { + SAPtr ISAd; + SAPtr first; + SAPtr last; + Index t; + Index skip; + Index unsorted; + Budget budget = new(tr_ilg(n) * 2 / 3, n); + + macro_rules! ISA { + ($x: expr) => { + SA[ISA + $x] + }; + } + + // JERRY + ISAd = ISA + depth; + while (-n < SA[0]) + { + first = SAPtr(0); + skip = 0; + unsorted = 0; + + // PETER + loop { + t = SA[first]; + if (t < 0) + { + first -= t; + skip += t; + } + else + { + if (skip != 0) + { + SA[first + skip] = skip; + skip = 0; + } + last = SAPtr(ISA!(t) + 1); + if (1 < (last - first)) + { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &mut budget); + if (budget.count != 0) + { + unsorted += budget.count; + } + else + { + skip = (first - last).0; + } + } + else if (last - first) == 1 { + skip = -1; + } + first = last; + } + + // cond (PETER) + if !(first < n) { + break; + } + } + + if (skip != 0) + { + SA[first + skip] = skip; + } + if (unsorted == 0) + { + break; + } + + // iter + ISAd += ISAd - ISA; + } } } -} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs new file mode 100644 index 0000000..cd19069 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + class RsTrsort + { + } +} From 4e448b02155af8272b3fb930ccca8765f1753c91 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 15 Jul 2021 19:55:44 -0500 Subject: [PATCH 075/325] interim commit --- .../RsDivSufSort.cs | 69 +++++++++++-------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 97947cc..f0484b4 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -5,7 +5,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; -using SAPtr = System.Index; +using SAPtr = System.Int32; namespace DeltaQ.SuffixSorting.LibDivSufSort { @@ -186,26 +186,26 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) if (0 < m) { // Sort the type B* suffixes by their first two characters - SAPtr PAb = new(n - m); - SAPtr ISAb = new(m); + SAPtr PAb = n - m; + SAPtr ISAb = m; for (i = m - 2; i > 0; i--) { //for i in (0.. = (m - 2)).rev() { - t = SA[PAb.Value + i]; + t = SA[PAb + i]; c0 = T[t]; c1 = T[t + 1]; Bstar[(c0, c1)] -= 1; SA[Bstar[(c0, c1)]] = i; } - t = SA[PAb.Value + m - 1]; + t = SA[PAb + m - 1]; c0 = T[t]; c1 = T[t + 1]; Bstar[(c0, c1)] -= 1; SA[Bstar[(c0, c1)]] = m - 1; // Sort the type B* substrings using sssort. - SAPtr buf = new(m); + SAPtr buf = m; var bufsize = n - (2 * m); // init (outer) @@ -260,7 +260,7 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) { { var SAi = SA[i]; - SA[ISAb.Value + SAi] = i; + SA[ISAb + SAi] = i; } i -= 1; @@ -283,7 +283,7 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) //SA[i] = !SA[i]; SA[i] = ~SA[i]; { - var idx = ISAb.Value + SA[i]; + var idx = ISAb + SA[i]; SA[idx] = j; } @@ -294,7 +294,7 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) } } { - var idx = ISAb.Value + SA[i]; + var idx = ISAb + SA[i]; SA[idx] = j; } @@ -366,7 +366,7 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) j -= 1; { - var pos = SA[ISAb.Value + j]; + var pos = SA[ISAb + j]; //TODO: check complement SA[pos] = (t == 0 || (1 < (t - i))) ? t : ~t; } @@ -484,27 +484,33 @@ private void trsort(SAPtr ISA, Span SA, int n, int depth) SAPtr ISAd; SAPtr first; SAPtr last; - Index t; - Index skip; - Index unsorted; + /*Index*/ + int t; + /*Index*/ + int skip; + /*Index*/ + int unsorted; Budget budget = new(tr_ilg(n) * 2 / 3, n); - macro_rules! ISA { - ($x: expr) => { - SA[ISA + $x] - }; - } + //macro_rules! ISA { + // ($x: expr) => { + // SA[ISA + $x] + // }; + //} + + //ref int getISA(int x) => ref SA[ISA + x]; // JERRY ISAd = ISA + depth; while (-n < SA[0]) { - first = SAPtr(0); + first = 0; skip = 0; unsorted = 0; // PETER - loop { + while (true) + { t = SA[first]; if (t < 0) { @@ -518,28 +524,30 @@ private void trsort(SAPtr ISA, Span SA, int n, int depth) SA[first + skip] = skip; skip = 0; } - last = SAPtr(ISA!(t) + 1); + last = SA[ISA + (t)] + 1; if (1 < (last - first)) { - budget.count = 0; - tr_introsort(ISA, ISAd, SA, first, last, &mut budget); - if (budget.count != 0) + budget.Count = 0; + tr_introsort(ISA, ISAd, SA, first, last, budget); + if (budget.Count != 0) { - unsorted += budget.count; + unsorted += budget.Count; } else { - skip = (first - last).0; + skip = first - last; } } - else if (last - first) == 1 { + else if ((last - first) == 1) + { skip = -1; } first = last; } // cond (PETER) - if !(first < n) { + if (!(first < n)) + { break; } } @@ -557,4 +565,9 @@ private void trsort(SAPtr ISA, Span SA, int n, int depth) ISAd += ISAd - ISA; } } + + private void tr_introsort(int iSA, int iSAd, Span sA, int first, int last, Budget budget) + { + throw new NotImplementedException(); + } } From c69d9d227bf56c460e0611e998e724b1e9d9c0c4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 18:56:45 -0500 Subject: [PATCH 076/325] interim commit --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index f0484b4..cf2f1d6 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -571,3 +571,4 @@ private void tr_introsort(int iSA, int iSAd, Span sA, int first, int last, throw new NotImplementedException(); } } +} From ef83513a25d4bc0f20c69c66d121a3d663e8d53b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 19:22:44 -0500 Subject: [PATCH 077/325] interim commit --- .../RsDivSufSort.cs | 605 +++++++++++++++++- 1 file changed, 603 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index cf2f1d6..259ba4b 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -6,6 +6,7 @@ using System.Text; using System.Threading.Tasks; using SAPtr = System.Int32; +using Idx = System.Int32; namespace DeltaQ.SuffixSorting.LibDivSufSort { @@ -566,9 +567,609 @@ private void trsort(SAPtr ISA, Span SA, int n, int depth) } } - private void tr_introsort(int iSA, int iSAd, Span sA, int first, int last, Budget budget) + private struct StackItem { - throw new NotImplementedException(); + public SAPtr a; + public SAPtr b; + public SAPtr c; + public Idx d; + public Idx e; + } + + private const int STACK_SIZE = 64; + private ref struct TrStack + { + public readonly Span Items; + + public int Size => _size; + private int _size; + + public TrStack(Span items) + { + Items = items; + _size = 0; + } + + public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) + { + Debug.Assert(_size < Items.Length); + ref StackItem item = ref Items[_size++]; + item.a = a; + item.b = b; + item.c = c; + item.d = d; + item.e = e; + } + public void Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) + { + Debug.Assert(_size > 0); + ref StackItem item = ref Items[--_size]; + a = item.a; + b = item.b; + c = item.c; + d = item.d; + e = item.e; + } + } + + private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr first, ref SAPtr last, Budget budget) + { + SAPtr a = 0; + SAPtr b = 0; + SAPtr c; + Idx t, v, x; + Idx incr = ISAd - ISA; + Idx limit; + Idx next; + Idx trlink = -1; + + TrStack stack = new(); + } + /* + macro_rules! ISA { + ($x: expr) => { + SA[ISA + $x] + }; + } + macro_rules! ISAd { + ($x: expr) => { + SA[ISAd + $x] + }; + } + + let mut limit = tr_ilg(last - first); + // PASCAL + loop { + crosscheck!("pascal limit={} first={} last={}", limit, first, last); + if (limit < 0) { + if (limit == -1) { + // tandem repeat partition + tr_partition( + SA, + ISAd - incr, + first, + first, + last, + &mut a, + &mut b, + (last - 1).0, + ); + + // update ranks + if a < last { + crosscheck!("ranks a Date: Fri, 16 Jul 2021 19:31:14 -0500 Subject: [PATCH 078/325] interim commit --- .../RsDivSufSort.cs | 1155 +++++++++-------- 1 file changed, 610 insertions(+), 545 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 259ba4b..06953c9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -529,7 +529,7 @@ private void trsort(SAPtr ISA, Span SA, int n, int depth) if (1 < (last - first)) { budget.Count = 0; - tr_introsort(ISA, ISAd, SA, first, last, budget); + tr_introsort(ISA, ref ISAd, SA, ref first, ref last, budget); if (budget.Count != 0) { unsorted += budget.Count; @@ -619,557 +619,622 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir SAPtr c; Idx t, v, x; Idx incr = ISAd - ISA; - Idx limit; Idx next; Idx trlink = -1; - TrStack stack = new(); + TrStack stack = new(stackalloc StackItem[STACK_SIZE]); - } - /* - macro_rules! ISA { - ($x: expr) => { - SA[ISA + $x] - }; - } - macro_rules! ISAd { - ($x: expr) => { - SA[ISAd + $x] - }; - } - - let mut limit = tr_ilg(last - first); - // PASCAL - loop { - crosscheck!("pascal limit={} first={} last={}", limit, first, last); - if (limit < 0) { - if (limit == -1) { - // tandem repeat partition - tr_partition( - SA, - ISAd - incr, - first, - first, - last, - &mut a, - &mut b, - (last - 1).0, - ); - - // update ranks - if a < last { - crosscheck!("ranks a { + SA[ISA + $x] + }; } - - let old_limit = limit; - limit -= 1; - if (old_limit == 0) { - crosscheck!( - "heapsort ISAd={} first={} last={} last-first={}", - ISAd, - first, - last, - last - first - ); - SA_dump!(&SA.range(first..last), "before tr_heapsort"); - tr_heapsort(ISAd, SA, first, (last - first).0); - SA_dump!(&SA.range(first..last), "after tr_heapsort"); - - // YOHAN - a = last - 1; - while first < a { - // VINCENT - x = ISAd!(SA[a]); - b = a - 1; - while (first <= b) && (ISAd!(SA[b])) == x { - SA[b] = !SA[b]; - - // iter (VINCENT) - b -= 1; - } - - // iter (YOHAN) - a = b; - } - limit = -3; - crosscheck!("post-vincent continue"); - continue; + macro_rules! ISAd { + ($x: expr) => { + SA[ISAd + $x] + }; } + */ - // choose pivot - a = tr_pivot(SA, ISAd, first, last); - crosscheck!("picked pivot {}", a); - SA.swap(first, a); - v = ISAd!(SA[first]); - - // partition - tr_partition(SA, ISAd, first, first + 1, last, &mut a, &mut b, v); - if (last - first) != (b - a) { - crosscheck!("pre-nolwenn"); - next = if ISA!(SA[a]) != v { tr_ilg(b - a) } else { -1 }; - - // update ranks - // NOLWENN - c = first; - v = (a - 1).0; - while c < a { - { - let SAc = SA[c]; - ISA!(SAc) = v; - } - c += 1; - } - if b < last { - // ARTHUR - c = a; - v = (b - 1).0; - while c < b { - { - let SAc = SA[c]; - ISA!(SAc) = v; - } - c += 1; - } - } - - // push - if (1 < (b - a)) && budget.check(b - a) { - crosscheck!("a"); - if (a - first) <= (last - b) { - crosscheck!("aa"); - if (last - b) <= (b - a) { - crosscheck!("aaa"); - if 1 < (a - first) { - crosscheck!("aaaa"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); - last = a; - } else if 1 < (last - b) { - crosscheck!("aaab"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); - first = b; - } else { - crosscheck!("aaac"); - ISAd += incr; - first = a; - last = b; - limit = next; - } - } else if (a - first) <= (b - a) { - crosscheck!("aab"); - if 1 < (a - first) { - crosscheck!("aaba"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); - last = a; - } else { - crosscheck!("aabb"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); - ISAd += incr; - first = a; - last = b; - limit = next; - } - } else { - crosscheck!("aac"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); - ISAd += incr; - first = a; - last = b; - limit = next; - } - } else { - crosscheck!("ab"); - if (a - first) <= (b - a) { - crosscheck!("aba"); - if 1 < (last - b) { - crosscheck!("abaa"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); - first = b; - } else if 1 < (a - first) { - crosscheck!("abab"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); - last = a; - } else { - crosscheck!("abac"); - ISAd += incr; - first = a; - last = b; - limit = next; - } - } else if (last - b) <= (b - a) { - crosscheck!("abb"); - if 1 < (last - b) { - crosscheck!("abba"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); - first = b; - } else { - crosscheck!("abbb"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); - ISAd += incr; - first = a; - last = b; - limit = next; - } - } else { - crosscheck!("abc"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); - ISAd += incr; - first = a; - last = b; - limit = next; - } - } - } else { - crosscheck!("b"); - if (1 < (b - a)) && (0 <= trlink) { - crosscheck!("ba"); - stack.items[trlink as usize].d = -1; - } - if (a - first) <= (last - b) { - crosscheck!("bb"); - if 1 < (a - first) { - crosscheck!("bba"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); - last = a; - } else if 1 < (last - b) { - crosscheck!("bbb"); - first = b; - } else { - crosscheck!("bbc"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() - { - return; - } - } - } else { - crosscheck!("bc"); - if 1 < (last - b) { - crosscheck!("bca"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); - first = b; - } else if 1 < (a - first) { - crosscheck!("bcb"); - last = a; - } else { - crosscheck!("bcc"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() - { - return; - } - crosscheck!("bcc post"); - } - } - } - } else { - crosscheck!("c"); - if budget.check(last - first) { - crosscheck!("ca"); - limit = tr_ilg(last - first); - ISAd += incr; - } else { - crosscheck!("cb"); - if 0 <= trlink { - crosscheck!("cba"); - stack.items[trlink as usize].d = -1; - } - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() - { - return; - } - crosscheck!("cb post"); - } - } - } // end PASCAL - } + Idx limit = tr_ilg(last - first); + + // PASCAL + while (true) + { + //TODO: crosscheck + //crosscheck!("pascal limit={} first={} last={}", limit, first, last); + if (limit < 0) + { + if (limit == -1) + { + // tandem repeat partition + tr_partition( + SA, + ISAd - incr, + first, + first, + last, + ref a, + ref b, + (last - 1) + ); + + // update ranks + if (a < last) + { + //TODO: crosscheck + //crosscheck!("ranks a sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) + { + throw new NotImplementedException(); + } } } From 47b581f1115972f93ee3b26b6222194d383d55cb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:14:52 -0500 Subject: [PATCH 079/325] interim commit --- .../RsDivSufSort.cs | 99 ++++++++++--------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 06953c9..3e0dc05 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -580,35 +580,36 @@ private struct StackItem private ref struct TrStack { public readonly Span Items; - - public int Size => _size; - private int _size; + public int Size; public TrStack(Span items) { Items = items; - _size = 0; + Size = 0; } public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) { - Debug.Assert(_size < Items.Length); - ref StackItem item = ref Items[_size++]; + Debug.Assert(Size < Items.Length); + ref StackItem item = ref Items[Size++]; item.a = a; item.b = b; item.c = c; item.d = d; item.e = e; } - public void Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) + public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) { - Debug.Assert(_size > 0); - ref StackItem item = ref Items[--_size]; + //Debug.Assert(Size > 0); + if (Size == 0) return false; + + ref StackItem item = ref Items[--Size]; a = item.a; b = item.b; c = item.c; d = item.d; e = item.e; + return true; } } @@ -710,70 +711,70 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir trlink = stack.Size - 2; } - if (a - first) <= (last - b) { - crosscheck!("star"); - if 1 < (a - first) { - crosscheck!("board"); - crosscheck!( - "push {} {} {} {} {}", - ISAd, - b, - last, - tr_ilg(last - b), - trlink - ); - stack.push(ISAd, b, last, tr_ilg(last - b), trlink); + if((a - first) <= (last - b)) { + //TODO: crosscheck + //crosscheck!("star"); + if(1 < (a - first)) { + //TODO: crosscheck + //crosscheck!("board"); + //crosscheck!( + // "push {} {} {} {} {}", + // ISAd, + // b, + // last, + // tr_ilg(last - b), + // trlink + //); + stack.Push(ISAd, b, last, tr_ilg(last - b), trlink); last = a; limit = tr_ilg(a - first); } - else if 1 < (last - b) { - crosscheck!("north"); + else if(1 < (last - b)) { + //TODO: crosscheck + //crosscheck!("north"); first = b; limit = tr_ilg(last - b); } else { - crosscheck!("denny"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + //TODO: crosscheck + //crosscheck!("denny"); + if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } - crosscheck!("denny-post"); + //crosscheck!("denny-post"); } } else { - crosscheck!("moon"); - if 1 < (last - b) { - crosscheck!("land"); - crosscheck!( - "push {} {} {} {} {}", - ISAd, - first, - a, - tr_ilg(a - first), - trlink - ); - stack.push(ISAd, first, a, tr_ilg(a - first), trlink); + //crosscheck!("moon"); + if(1 < (last - b)) { + //crosscheck!("land"); + //crosscheck!( + // "push {} {} {} {} {}", + // ISAd, + // first, + // a, + // tr_ilg(a - first), + // trlink + //); + stack.Push(ISAd, first, a, tr_ilg(a - first), trlink); first = b; limit = tr_ilg(last - b); } - else if 1 < (a - first) { - crosscheck!("ship"); + else if(1 < (a - first)) { + //crosscheck!("ship"); last = a; limit = tr_ilg(a - first); } else { - crosscheck!("clap"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + //crosscheck!("clap"); + if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } - crosscheck!("clap-post"); + //crosscheck!("clap-post"); } } } @@ -782,7 +783,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir // end if limit == -1 // tandem repeat copy - stack.size -= 1; + stack.Size -= 1; a = stack.items[stack.size].b; b = stack.items[stack.size].c; if stack.items[stack.size].d == 0 { From c398c67c1ca811e446302f2efba772e782ceefc4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:17:13 -0500 Subject: [PATCH 080/325] interim commit --- .../RsDivSufSort.cs | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 3e0dc05..6826121 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -711,10 +711,12 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir trlink = stack.Size - 2; } - if((a - first) <= (last - b)) { + if ((a - first) <= (last - b)) + { //TODO: crosscheck //crosscheck!("star"); - if(1 < (a - first)) { + if (1 < (a - first)) + { //TODO: crosscheck //crosscheck!("board"); //crosscheck!( @@ -729,7 +731,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last = a; limit = tr_ilg(a - first); } - else if(1 < (last - b)) { + else if (1 < (last - b)) + { //TODO: crosscheck //crosscheck!("north"); first = b; @@ -739,16 +742,18 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir { //TODO: crosscheck //crosscheck!("denny"); - if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } //crosscheck!("denny-post"); } - } else + } + else { //crosscheck!("moon"); - if(1 < (last - b)) { + if (1 < (last - b)) + { //crosscheck!("land"); //crosscheck!( // "push {} {} {} {} {}", @@ -762,7 +767,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir first = b; limit = tr_ilg(last - b); } - else if(1 < (a - first)) { + else if (1 < (a - first)) + { //crosscheck!("ship"); last = a; limit = tr_ilg(a - first); @@ -770,7 +776,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { //crosscheck!("clap"); - if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } @@ -783,22 +789,21 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir // end if limit == -1 // tandem repeat copy - stack.Size -= 1; - a = stack.items[stack.size].b; - b = stack.items[stack.size].c; - if stack.items[stack.size].d == 0 { - tr_copy(ISA, SA, first, a, b, last, (ISAd - ISA).0); + ref StackItem item = ref stack.Items[--stack.Size]; + a = item.b; + b = item.c; + if (item.d == 0) + { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); } else { - if 0 <= trlink { - stack.items[trlink as usize].d = -1; + if(0 <= trlink) { + stack.Items[trlink].d = -1; } - tr_partialcopy(ISA, SA, first, a, b, last, (ISAd - ISA).0); + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); } - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } From 94a3ba6ca03da2426165af9b5b2c17b97c57ab14 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:22:58 -0500 Subject: [PATCH 081/325] interim commit --- .../RsDivSufSort.cs | 250 +++++++++--------- 1 file changed, 132 insertions(+), 118 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 6826121..ef9e591 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -644,7 +644,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir while (true) { //TODO: crosscheck - //crosscheck!("pascal limit={} first={} last={}", limit, first, last); + crosscheck("pascal limit={} first={} last={}", limit, first, last); if (limit < 0) { if (limit == -1) @@ -665,7 +665,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if (a < last) { //TODO: crosscheck - //crosscheck!("ranks a SA, ref SAPtr fir if (b < last) { //TODO: crosscheck - //crosscheck!("ranks b SA, ref SAPtr fir if (1 < (b - a)) { //TODO: crosscheck - //crosscheck!("1<(b-a)"); - //crosscheck!("push NULL {} {} {} {}", a, b, 0, 0); + crosscheck("1<(b-a)"); + crosscheck("push NULL {} {} {} {}", a, b, 0, 0); stack.Push(0, a, b, 0, 0); - //crosscheck!("push {} {} {} {} {}", ISAd - incr, first, last, -2, trlink); + crosscheck("push {} {} {} {} {}", ISAd - incr, first, last, -2, trlink); stack.Push(ISAd - incr, first, last, -2, trlink); trlink = stack.Size - 2; } @@ -714,19 +714,19 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if ((a - first) <= (last - b)) { //TODO: crosscheck - //crosscheck!("star"); + crosscheck("star"); if (1 < (a - first)) { //TODO: crosscheck - //crosscheck!("board"); - //crosscheck!( - // "push {} {} {} {} {}", - // ISAd, - // b, - // last, - // tr_ilg(last - b), - // trlink - //); + crosscheck("board"); + crosscheck( + "push {} {} {} {} {}", + ISAd, + b, + last, + tr_ilg(last - b), + trlink + ); stack.Push(ISAd, b, last, tr_ilg(last - b), trlink); last = a; limit = tr_ilg(a - first); @@ -734,53 +734,53 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else if (1 < (last - b)) { //TODO: crosscheck - //crosscheck!("north"); + crosscheck("north"); first = b; limit = tr_ilg(last - b); } else { //TODO: crosscheck - //crosscheck!("denny"); + crosscheck("denny"); if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } - //crosscheck!("denny-post"); + crosscheck("denny-post"); } } else { - //crosscheck!("moon"); + crosscheck("moon"); if (1 < (last - b)) { - //crosscheck!("land"); - //crosscheck!( - // "push {} {} {} {} {}", - // ISAd, - // first, - // a, - // tr_ilg(a - first), - // trlink - //); + crosscheck("land"); + crosscheck( + "push {} {} {} {} {}", + ISAd, + first, + a, + tr_ilg(a - first), + trlink + ); stack.Push(ISAd, first, a, tr_ilg(a - first), trlink); first = b; limit = tr_ilg(last - b); } else if (1 < (a - first)) { - //crosscheck!("ship"); + crosscheck("ship"); last = a; limit = tr_ilg(a - first); } else { - //crosscheck!("clap"); + crosscheck("clap"); if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } - //crosscheck!("clap-post"); + crosscheck("clap-post"); } } } @@ -798,12 +798,13 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } else { - if(0 <= trlink) { + if (0 <= trlink) + { stack.Items[trlink].d = -1; } tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); } - if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } @@ -813,34 +814,40 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir // end if limit == -2 // sorted partition - if 0 <= SA[first] { - crosscheck!("0<=*first"); + if (0 <= SA[first]) + { + crosscheck("0<=*first"); a = first; // GEMINI - loop { + while (true) + { { - let SA_a = SA[a]; + var SA_a = SA[a]; ISA!(SA_a) = a.0; } // cond (GEMINI) a += 1; - if !((a < last) && (0 <= SA[a])) { + if (!((a < last) && (0 <= SA[a]))) + { break; } } first = a; } - if first < last { - crosscheck!("first SA, ref SAPtr fir -1 }; a += 1; - if a < last { - crosscheck!("++a SA, ref SAPtr fir // push if (budget.check((a - first).0)) { - crosscheck!("budget pass"); + crosscheck("budget pass"); if (a - first) <= (last - a) { - crosscheck!("push {} {} {} {} {}", ISAd, a, last, -3, trlink); + crosscheck("push {} {} {} {} {}", ISAd, a, last, -3, trlink); stack.push(ISAd, a, last, -3, trlink); ISAd += incr; last = a; @@ -880,7 +889,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } else { if 1 < (last - a) { - crosscheck!( + crosscheck( "push {} {} {} {} {}", ISAd + incr, first, @@ -902,27 +911,27 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } else { - crosscheck!("budget fail"); + crosscheck("budget fail"); if 0 <= trlink { - crosscheck!("0<=trlink"); + crosscheck("0<=trlink"); stack.items[trlink as usize].d = -1; } if 1 < (last - a) { - crosscheck!("1<(last-a)"); + crosscheck("1<(last-a)"); first = a; limit = -3; } else { - crosscheck!("1<(last-a) not"); + crosscheck("1<(last-a) not"); if !stack .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) .is_ok() { return; } - crosscheck!("1<(last-a) not post"); - crosscheck!( + crosscheck("1<(last-a) not post"); + crosscheck( "were popped: ISAd={} first={} last={} limit={} trlink={}", ISAd, first, @@ -935,15 +944,15 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } else { - crosscheck!("times pop"); + crosscheck("times pop"); if !stack .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) .is_ok() { return; } - crosscheck!("times pop-post"); - crosscheck!( + crosscheck("times pop-post"); + crosscheck( "were popped: ISAd={} first={} last={} limit={} trlink={}", ISAd, first, @@ -957,7 +966,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } // end if limit < 0 if (last - first) <= TR_INSERTIONSORT_THRESHOLD { - crosscheck!("insertionsort last-first={}", last - first); + crosscheck("insertionsort last-first={}", last - first); tr_insertionsort(SA, ISAd, first, last); limit = -3; continue; @@ -967,7 +976,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir limit -= 1; if (old_limit == 0) { - crosscheck!( + crosscheck( "heapsort ISAd={} first={} last={} last-first={}", ISAd, first, @@ -995,20 +1004,20 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir a = b; } limit = -3; - crosscheck!("post-vincent continue"); + crosscheck("post-vincent continue"); continue; } // choose pivot a = tr_pivot(SA, ISAd, first, last); - crosscheck!("picked pivot {}", a); + crosscheck("picked pivot {}", a); SA.swap(first, a); v = ISAd!(SA[first]); // partition tr_partition(SA, ISAd, first, first + 1, last, &mut a, &mut b, v); if (last - first) != (b - a) { - crosscheck!("pre-nolwenn"); + crosscheck("pre-nolwenn"); next = if ISA!(SA[a]) != v { tr_ilg(b - a) } else { -1 }; // update ranks @@ -1037,47 +1046,47 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir // push if (1 < (b - a)) && budget.check(b - a) { - crosscheck!("a"); + crosscheck("a"); if (a - first) <= (last - b) { - crosscheck!("aa"); + crosscheck("aa"); if (last - b) <= (b - a) { - crosscheck!("aaa"); + crosscheck("aaa"); if 1 < (a - first) { - crosscheck!("aaaa"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); + crosscheck("aaaa"); + crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.push(ISAd + incr, a, b, next, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); + crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.push(ISAd, b, last, limit, trlink); last = a; } else if 1 < (last - b) { - crosscheck!("aaab"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); + crosscheck("aaab"); + crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.push(ISAd + incr, a, b, next, trlink); first = b; } else { - crosscheck!("aaac"); + crosscheck("aaac"); ISAd += incr; first = a; last = b; limit = next; } } else if (a - first) <= (b - a) { - crosscheck!("aab"); + crosscheck("aab"); if 1 < (a - first) { - crosscheck!("aaba"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); + crosscheck("aaba"); + crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.push(ISAd, b, last, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.push(ISAd + incr, a, b, next, trlink); last = a; } else { - crosscheck!("aabb"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); + crosscheck("aabb"); + crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.push(ISAd, b, last, limit, trlink); ISAd += incr; first = a; @@ -1086,10 +1095,10 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } else { - crosscheck!("aac"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); + crosscheck("aac"); + crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.push(ISAd, b, last, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); + crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.push(ISAd, first, a, limit, trlink); ISAd += incr; first = a; @@ -1098,45 +1107,45 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } else { - crosscheck!("ab"); + crosscheck("ab"); if (a - first) <= (b - a) { - crosscheck!("aba"); + crosscheck("aba"); if 1 < (last - b) { - crosscheck!("abaa"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); + crosscheck("abaa"); + crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.push(ISAd + incr, a, b, next, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); + crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.push(ISAd, first, a, limit, trlink); first = b; } else if 1 < (a - first) { - crosscheck!("abab"); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); + crosscheck("abab"); + crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.push(ISAd + incr, a, b, next, trlink); last = a; } else { - crosscheck!("abac"); + crosscheck("abac"); ISAd += incr; first = a; last = b; limit = next; } } else if (last - b) <= (b - a) { - crosscheck!("abb"); + crosscheck("abb"); if 1 < (last - b) { - crosscheck!("abba"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); + crosscheck("abba"); + crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.push(ISAd, first, a, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.push(ISAd + incr, a, b, next, trlink); first = b; } else { - crosscheck!("abbb"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); + crosscheck("abbb"); + crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.push(ISAd, first, a, limit, trlink); ISAd += incr; first = a; @@ -1145,10 +1154,10 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } else { - crosscheck!("abc"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); + crosscheck("abc"); + crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.push(ISAd, first, a, limit, trlink); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); + crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.push(ISAd, b, last, limit, trlink); ISAd += incr; first = a; @@ -1158,26 +1167,26 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } else { - crosscheck!("b"); + crosscheck("b"); if (1 < (b - a)) && (0 <= trlink) { - crosscheck!("ba"); + crosscheck("ba"); stack.items[trlink as usize].d = -1; } if (a - first) <= (last - b) { - crosscheck!("bb"); + crosscheck("bb"); if 1 < (a - first) { - crosscheck!("bba"); - crosscheck!("push {} {} {} {} {}", ISAd, b, last, limit, trlink); + crosscheck("bba"); + crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.push(ISAd, b, last, limit, trlink); last = a; } else if 1 < (last - b) { - crosscheck!("bbb"); + crosscheck("bbb"); first = b; } else { - crosscheck!("bbc"); + crosscheck("bbc"); if !stack .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) .is_ok() @@ -1187,43 +1196,43 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } else { - crosscheck!("bc"); + crosscheck("bc"); if 1 < (last - b) { - crosscheck!("bca"); - crosscheck!("push {} {} {} {} {}", ISAd, first, a, limit, trlink); + crosscheck("bca"); + crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.push(ISAd, first, a, limit, trlink); first = b; } else if 1 < (a - first) { - crosscheck!("bcb"); + crosscheck("bcb"); last = a; } else { - crosscheck!("bcc"); + crosscheck("bcc"); if !stack .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) .is_ok() { return; } - crosscheck!("bcc post"); + crosscheck("bcc post"); } } } } else { - crosscheck!("c"); + crosscheck("c"); if budget.check(last - first) { - crosscheck!("ca"); + crosscheck("ca"); limit = tr_ilg(last - first); ISAd += incr; } else { - crosscheck!("cb"); + crosscheck("cb"); if 0 <= trlink { - crosscheck!("cba"); + crosscheck("cba"); stack.items[trlink as usize].d = -1; } if !stack @@ -1232,12 +1241,17 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir { return; } - crosscheck!("cb post"); + crosscheck("cb post"); } } } // end PASCAL } + private void crosscheck(string v, params object[] args) + { + throw new NotImplementedException(); + } + private void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) { throw new NotImplementedException(); From a1f6d2eee370f7282d22573d72013523d613c125 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:31:09 -0500 Subject: [PATCH 082/325] interim commit --- .../RsDivSufSort.cs | 59 +++++++++++-------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index ef9e591..074f0eb 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -880,15 +880,18 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if (budget.check((a - first).0)) { crosscheck("budget pass"); - if (a - first) <= (last - a) { + if ((a - first) <= (last - a)) + { crosscheck("push {} {} {} {} {}", ISAd, a, last, -3, trlink); - stack.push(ISAd, a, last, -3, trlink); + stack.Push(ISAd, a, last, -3, trlink); ISAd += incr; last = a; limit = next; - } else + } + else { - if 1 < (last - a) { + if (1 < (last - a)) + { crosscheck( "push {} {} {} {} {}", ISAd + incr, @@ -897,7 +900,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir next, trlink ); - stack.push(ISAd + incr, first, a, next, trlink); + stack.Push(ISAd + incr, first, a, next, trlink); first = a; limit = -3; } @@ -1054,15 +1057,15 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if 1 < (a - first) { crosscheck("aaaa"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); + stack.Push(ISAd + incr, a, b, next, trlink); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); + stack.Push(ISAd, b, last, limit, trlink); last = a; } else if 1 < (last - b) { crosscheck("aaab"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); + stack.Push(ISAd + incr, a, b, next, trlink); first = b; } else @@ -1078,16 +1081,16 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if 1 < (a - first) { crosscheck("aaba"); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); + stack.Push(ISAd, b, last, limit, trlink); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); + stack.Push(ISAd + incr, a, b, next, trlink); last = a; } else { crosscheck("aabb"); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); + stack.Push(ISAd, b, last, limit, trlink); ISAd += incr; first = a; last = b; @@ -1097,9 +1100,9 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir { crosscheck("aac"); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); + stack.Push(ISAd, b, last, limit, trlink); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); + stack.Push(ISAd, first, a, limit, trlink); ISAd += incr; first = a; last = b; @@ -1113,15 +1116,15 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if 1 < (last - b) { crosscheck("abaa"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); + stack.Push(ISAd + incr, a, b, next, trlink); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); + stack.Push(ISAd, first, a, limit, trlink); first = b; } else if 1 < (a - first) { crosscheck("abab"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); + stack.Push(ISAd + incr, a, b, next, trlink); last = a; } else @@ -1137,16 +1140,16 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if 1 < (last - b) { crosscheck("abba"); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); + stack.Push(ISAd, first, a, limit, trlink); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.push(ISAd + incr, a, b, next, trlink); + stack.Push(ISAd + incr, a, b, next, trlink); first = b; } else { crosscheck("abbb"); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); + stack.Push(ISAd, first, a, limit, trlink); ISAd += incr; first = a; last = b; @@ -1156,9 +1159,9 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir { crosscheck("abc"); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); + stack.Push(ISAd, first, a, limit, trlink); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); + stack.Push(ISAd, b, last, limit, trlink); ISAd += incr; first = a; last = b; @@ -1177,7 +1180,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if 1 < (a - first) { crosscheck("bba"); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.push(ISAd, b, last, limit, trlink); + stack.Push(ISAd, b, last, limit, trlink); last = a; } else if 1 < (last - b) { @@ -1200,7 +1203,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if 1 < (last - b) { crosscheck("bca"); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.push(ISAd, first, a, limit, trlink); + stack.Push(ISAd, first, a, limit, trlink); first = b; } else if 1 < (a - first) { @@ -1247,6 +1250,16 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } // end PASCAL } + private void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) + { + throw new NotImplementedException(); + } + + private void tr_copy(int iSA, Span sA, int first, int a, int b, int last, int v) + { + throw new NotImplementedException(); + } + private void crosscheck(string v, params object[] args) { throw new NotImplementedException(); From 7097d57b9f52845ff515ca64f48fd547e2b6a52b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:34:31 -0500 Subject: [PATCH 083/325] interim commit --- .../RsDivSufSort.cs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 074f0eb..c6d8cf2 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -822,8 +822,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir while (true) { { - var SA_a = SA[a]; - ISA!(SA_a) = a.0; + SA[ISA + SA[a]] = a; } // cond (GEMINI) @@ -843,7 +842,9 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir // MONSTRO while (true) { - SA[a] = !SA[a]; + //TODO: checkme + //SA[a] = !SA[a]; + SA[a] = ~SA[a]; a += 1; if (!(SA[a] < 0)) @@ -852,13 +853,9 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } - next = if ISA!(SA[a]) != ISAd!(SA[a]) { - tr_ilg(a - first + 1) - } - else - { - -1 - }; + var xisa = SA[ISA + SA[a]];//ISA!(SA[a]); + var xisad = SA[ISAd + SA[a]];//ISAd!(SA[a]); + next = xisa != xisad ? tr_ilg(a - first + 1) : -1; a += 1; if (a < last) { From 5361b954cf75ab361ccc4732709e461140972795 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:35:33 -0500 Subject: [PATCH 084/325] interim commit --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index c6d8cf2..0e716f8 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -853,9 +853,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } - var xisa = SA[ISA + SA[a]];//ISA!(SA[a]); - var xisad = SA[ISAd + SA[a]];//ISAd!(SA[a]); - next = xisa != xisad ? tr_ilg(a - first + 1) : -1; + next = SA[ISA + SA[a]] != SA[ISAd + SA[a]] ? tr_ilg(a - first + 1) : -1; a += 1; if (a < last) { @@ -866,8 +864,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir while (b < a) { { - var SA_b = SA[b]; - ISA!(SA_b) = v; + SA[ISA + SA[b]] = v; } b += 1; } From 05d6563b1f86419e9e679d4e9552dd9002600149 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:37:13 -0500 Subject: [PATCH 085/325] interim commit --- .../RsDivSufSort.cs | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 0e716f8..1628a39 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -477,6 +477,25 @@ public Budget(int chance, int incVal) : this() Chance = chance; IncVal = incVal; } + + public bool Check(int size) + { + if (size <= Remain) + { + Remain -= size; + return true; + } + + if (Chance == 0) + { + Count += size; + return false; + } + + Remain += IncVal - size; + Chance -= 1; + return true; + } } /// Tandem repeat sort @@ -871,7 +890,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } // push - if (budget.check((a - first).0)) + if (budget.Check(a - first)) { crosscheck("budget pass"); if ((a - first) <= (last - a)) From f87388f27b9eb2902b2857ee2d99d826a770814f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 20:38:32 -0500 Subject: [PATCH 086/325] interim commit --- .../RsDivSufSort.cs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 1628a39..a3c82a9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -928,11 +928,11 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("budget fail"); - if 0 <= trlink { + if(0 <= trlink) { crosscheck("0<=trlink"); - stack.items[trlink as usize].d = -1; + stack.Items[trlink].d = -1; } - if 1 < (last - a) { + if(1 < (last - a)) { crosscheck("1<(last-a)"); first = a; limit = -3; @@ -940,9 +940,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("1<(last-a) not"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } @@ -961,9 +959,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("times pop"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } From faca6d1bc69804bec90b08e8f992fb8e9d4b5c2f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 21:59:52 -0500 Subject: [PATCH 087/325] interim commit --- .../RsDivSufSort.cs | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index a3c82a9..78a4a95 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -928,11 +928,13 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("budget fail"); - if(0 <= trlink) { + if (0 <= trlink) + { crosscheck("0<=trlink"); stack.Items[trlink].d = -1; } - if(1 < (last - a)) { + if (1 < (last - a)) + { crosscheck("1<(last-a)"); first = a; limit = -3; @@ -940,7 +942,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("1<(last-a) not"); - if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } @@ -959,7 +961,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("times pop"); - if(!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } @@ -977,7 +979,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir continue; } // end if limit < 0 - if (last - first) <= TR_INSERTIONSORT_THRESHOLD { + if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) + { crosscheck("insertionsort last-first={}", last - first); tr_insertionsort(SA, ISAd, first, last); limit = -3; @@ -996,16 +999,18 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last - first ); SA_dump!(&SA.range(first..last), "before tr_heapsort"); - tr_heapsort(ISAd, SA, first, (last - first).0); + tr_heapsort(ISAd, SA, first, (last - first)); SA_dump!(&SA.range(first..last), "after tr_heapsort"); // YOHAN a = last - 1; - while first < a { + while (first < a) + { // VINCENT x = ISAd!(SA[a]); b = a - 1; - while (first <= b) && (ISAd!(SA[b])) == x { + while ((first <= b) && (ISAd!(SA[b])) == x) + { SA[b] = !SA[b]; // iter (VINCENT) @@ -1259,6 +1264,21 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } // end PASCAL } + private int tr_pivot(Span sA, int iSAd, int first, int last) + { + throw new NotImplementedException(); + } + + private void tr_heapsort(int iSAd, Span sA, int first, int v) + { + throw new NotImplementedException(); + } + + private void tr_insertionsort(Span sA, int iSAd, int first, int last) + { + throw new NotImplementedException(); + } + private void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) { throw new NotImplementedException(); From 8f49ba14bc256667c8567db3a0b4be45155947b4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 22:12:32 -0500 Subject: [PATCH 088/325] interim commit --- .../RsDivSufSort.cs | 55 +++++++++++-------- .../SpanExtensions.cs | 15 +++++ 2 files changed, 48 insertions(+), 22 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 78a4a95..cd3dcdb 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -998,20 +998,21 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last, last - first ); - SA_dump!(&SA.range(first..last), "before tr_heapsort"); + SA_dump(SA[first..last], "before tr_heapsort"); tr_heapsort(ISAd, SA, first, (last - first)); - SA_dump!(&SA.range(first..last), "after tr_heapsort"); + SA_dump(SA[first..last], "after tr_heapsort"); // YOHAN a = last - 1; while (first < a) { // VINCENT - x = ISAd!(SA[a]); + x = SA[ISAd + SA[a]]; b = a - 1; - while ((first <= b) && (ISAd!(SA[b])) == x) + while ((first <= b) && (SA[ISAd + SA[b]]) == x) { - SA[b] = !SA[b]; + //! + SA[b] = ~SA[b]; // iter (VINCENT) b -= 1; @@ -1028,41 +1029,44 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir // choose pivot a = tr_pivot(SA, ISAd, first, last); crosscheck("picked pivot {}", a); - SA.swap(first, a); - v = ISAd!(SA[first]); + SA.Swap(first, a); + v = SA[ISAd + (SA[first])]; // partition - tr_partition(SA, ISAd, first, first + 1, last, &mut a, &mut b, v); - if (last - first) != (b - a) { + tr_partition(SA, ISAd, first, first + 1, last, ref a, ref b, v); + if ((last - first) != (b - a)) + { crosscheck("pre-nolwenn"); - next = if ISA!(SA[a]) != v { tr_ilg(b - a) } else { -1 }; + next = SA[ISA + (SA[a])] != v ? tr_ilg(b - a) : -1; // update ranks // NOLWENN c = first; - v = (a - 1).0; - while c < a { + v = (a - 1); + while (c < a) + { { - let SAc = SA[c]; - ISA!(SAc) = v; + SA[ISA + (SA[c])] = v; } c += 1; } - if b < last { + if (b < last) + { // ARTHUR c = a; - v = (b - 1).0; - while c < b { + v = (b - 1); + while (c < b) + { { - let SAc = SA[c]; - ISA!(SAc) = v; + SA[ISA + (SA[c])] = v; } c += 1; } } // push - if (1 < (b - a)) && budget.check(b - a) { + if ((1 < (b - a)) && budget.Check(b - a)) + { crosscheck("a"); if (a - first) <= (last - b) { crosscheck("aa"); @@ -1182,7 +1186,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir limit = next; } } - } else + } + else { crosscheck("b"); if (1 < (b - a)) && (0 <= trlink) { @@ -1237,7 +1242,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } } } - } else + } + else { crosscheck("c"); if budget.check(last - first) { @@ -1264,6 +1270,11 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } // end PASCAL } + private void SA_dump(Span span, string v) + { + throw new NotImplementedException(); + } + private int tr_pivot(Span sA, int iSAd, int first, int last) { throw new NotImplementedException(); diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs new file mode 100644 index 0000000..f1faf1c --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs @@ -0,0 +1,15 @@ +using System; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + public static class SpanExtensions + { + public static void Swap(this Span span, int indexA, int indexB) + { + ref var itemA = ref span[indexA]; + ref var itemB = ref span[indexB]; + span[indexA] = itemB; + span[indexB] = itemA; + } + } +} From 532185e87a24dd6ebcbd4c9a5c3e289757ababb0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 22:13:33 -0500 Subject: [PATCH 089/325] interim commit --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index cd3dcdb..ac3c9ca 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -632,6 +632,7 @@ public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) } } + private const Idx TR_INSERTIONSORT_THRESHOLD = 8; private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr first, ref SAPtr last, Budget budget) { SAPtr a = 0; @@ -987,7 +988,7 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir continue; } - let old_limit = limit; + var old_limit = limit; limit -= 1; if (old_limit == 0) { From 2ec05f5b80c7b031b0ff9b383a63f0111f6ec733 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 16 Jul 2021 22:17:43 -0500 Subject: [PATCH 090/325] interim commit --- .../RsDivSufSort.cs | 87 +++++++++++-------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index ac3c9ca..754cf55 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1069,11 +1069,14 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir if ((1 < (b - a)) && budget.Check(b - a)) { crosscheck("a"); - if (a - first) <= (last - b) { + if ((a - first) <= (last - b)) + { crosscheck("aa"); - if (last - b) <= (b - a) { + if ((last - b) <= (b - a)) + { crosscheck("aaa"); - if 1 < (a - first) { + if (1 < (a - first)) + { crosscheck("aaaa"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.Push(ISAd + incr, a, b, next, trlink); @@ -1081,7 +1084,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir stack.Push(ISAd, b, last, limit, trlink); last = a; } - else if 1 < (last - b) { + else if (1 < (last - b)) + { crosscheck("aaab"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.Push(ISAd + incr, a, b, next, trlink); @@ -1095,9 +1099,12 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last = b; limit = next; } - } else if (a - first) <= (b - a) { + } + else if ((a - first) <= (b - a)) + { crosscheck("aab"); - if 1 < (a - first) { + if (1 < (a - first)) + { crosscheck("aaba"); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.Push(ISAd, b, last, limit, trlink); @@ -1115,7 +1122,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last = b; limit = next; } - } else + } + else { crosscheck("aac"); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); @@ -1127,12 +1135,15 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last = b; limit = next; } - } else + } + else { crosscheck("ab"); - if (a - first) <= (b - a) { + if ((a - first) <= (b - a)) + { crosscheck("aba"); - if 1 < (last - b) { + if (1 < (last - b)) + { crosscheck("abaa"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.Push(ISAd + incr, a, b, next, trlink); @@ -1140,7 +1151,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir stack.Push(ISAd, first, a, limit, trlink); first = b; } - else if 1 < (a - first) { + else if (1 < (a - first)) + { crosscheck("abab"); crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); stack.Push(ISAd + incr, a, b, next, trlink); @@ -1154,9 +1166,12 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last = b; limit = next; } - } else if (last - b) <= (b - a) { + } + else if ((last - b) <= (b - a)) + { crosscheck("abb"); - if 1 < (last - b) { + if (1 < (last - b)) + { crosscheck("abba"); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.Push(ISAd, first, a, limit, trlink); @@ -1174,7 +1189,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir last = b; limit = next; } - } else + } + else { crosscheck("abc"); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); @@ -1191,51 +1207,54 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("b"); - if (1 < (b - a)) && (0 <= trlink) { + if ((1 < (b - a)) && (0 <= trlink)) + { crosscheck("ba"); - stack.items[trlink as usize].d = -1; + stack.Items[trlink].d = -1; } - if (a - first) <= (last - b) { + if ((a - first) <= (last - b)) + { crosscheck("bb"); - if 1 < (a - first) { + if (1 < (a - first)) + { crosscheck("bba"); crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); stack.Push(ISAd, b, last, limit, trlink); last = a; } - else if 1 < (last - b) { + else if (1 < (last - b)) + { crosscheck("bbb"); first = b; } else { crosscheck("bbc"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } } - } else + } + else { crosscheck("bc"); - if 1 < (last - b) { + if (1 < (last - b)) + { crosscheck("bca"); crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); stack.Push(ISAd, first, a, limit, trlink); first = b; } - else if 1 < (a - first) { + else if (1 < (a - first)) + { crosscheck("bcb"); last = a; } else { crosscheck("bcc"); - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } @@ -1247,7 +1266,8 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("c"); - if budget.check(last - first) { + if (budget.Check(last - first)) + { crosscheck("ca"); limit = tr_ilg(last - first); ISAd += incr; @@ -1255,13 +1275,12 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir else { crosscheck("cb"); - if 0 <= trlink { + if (0 <= trlink) + { crosscheck("cba"); - stack.items[trlink as usize].d = -1; + stack.Items[trlink].d = -1; } - if !stack - .pop(&mut ISAd, &mut first, &mut last, &mut limit, &mut trlink) - .is_ok() + if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) { return; } From dfd55f2aca99cce4c5cc3b32da8cbb23fbb2fd60 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 28 Oct 2021 03:58:58 -0500 Subject: [PATCH 091/325] Target .NET 6 everywhere --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 2 +- src/DeltaQ.CLI/DeltaQ.CLI.csproj | 4 ++-- .../DeltaQ.SuffixSorting.Abstractions.csproj | 2 +- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 2 +- .../DeltaQ.SuffixSorting.SAIS.csproj | 2 +- src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj | 4 ++-- src/deltaq/deltaq.csproj | 2 +- test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj | 6 +++--- .../DeltaQ.SuffixSorting.SAIS.Tests.csproj | 6 +++--- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 6 +++--- 10 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 705a609..c098b13 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -1,7 +1,7 @@  - net5.0;netstandard2.0 + net6.0;netstandard2.0 DeltaQ jzebedee 0.2.0 diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CLI/DeltaQ.CLI.csproj index 780edd6..c602d2b 100644 --- a/src/DeltaQ.CLI/DeltaQ.CLI.csproj +++ b/src/DeltaQ.CLI/DeltaQ.CLI.csproj @@ -2,7 +2,7 @@ Exe - net5.0;netstandard2.0 + net6.0;netstandard2.0 DeltaQ jzebedee true @@ -11,7 +11,7 @@ - + diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 96341f1..5f91da1 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -1,7 +1,7 @@ - net5.0;netstandard2.0 + net6.0;netstandard2.0 DeltaQ jzebedee true diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index ccd0faf..e194dfa 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -1,7 +1,7 @@ - net5.0 + net6.0 DeltaQ jzebedee true diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index f914286..86a73cf 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -1,7 +1,7 @@  - net5.0;netstandard2.0 + net6.0;netstandard2.0 DeltaQ jzebedee true diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 08ccea7..d093e70 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -1,7 +1,7 @@  - net5.0;netstandard2.0 + net6.0;netstandard2.0 DeltaQ jzebedee true @@ -11,7 +11,7 @@ - + diff --git a/src/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj index de82084..3d4aa29 100644 --- a/src/deltaq/deltaq.csproj +++ b/src/deltaq/deltaq.csproj @@ -1,7 +1,7 @@  - net5.0;netstandard2.0 + net6.0;netstandard2.0 DeltaQ jzebedee 0.2.0 diff --git a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj index e7bd74c..6a6dbba 100644 --- a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj +++ b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj @@ -1,19 +1,19 @@  - net5.0;net461 + net6.0;net461 false latest - + runtime; build; native; contentfiles; analyzers; buildtransitive all - + runtime; build; native; contentfiles; analyzers; buildtransitive all diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj index 6ca859f..9b54fd2 100644 --- a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj @@ -1,19 +1,19 @@  - net5.0;net461 + net6.0;net461 false latest - + runtime; build; native; contentfiles; analyzers; buildtransitive all - + runtime; build; native; contentfiles; analyzers; buildtransitive all diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index e836fcc..8579195 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -1,19 +1,19 @@  - net5.0;net461 + net6.0;net461 false latest - + runtime; build; native; contentfiles; analyzers; buildtransitive all - + runtime; build; native; contentfiles; analyzers; buildtransitive all From ea5d669f74eaff377a7b21791a09afca3a80d5dc Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 28 Oct 2021 03:59:13 -0500 Subject: [PATCH 092/325] Add LibDivSufSort test stub --- deltaq.sln | 11 +++++++-- ...Q.SuffixSorting.LivDivSufSort.Tests.csproj | 23 +++++++++++++++++++ .../UnitTest1.cs | 13 +++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj create mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs diff --git a/deltaq.sln b/deltaq.sln index 69b5405..01ff79d 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.30914.41 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31825.309 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ", "src\deltaq\DeltaQ.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" EndProject @@ -29,6 +29,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Utility.Memory", "sr EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LibDivSufSort", "src\DeltaQ.SuffixSorting.LibDivSufSort\DeltaQ.SuffixSorting.LibDivSufSort.csproj", "{E89B007E-0BDE-4642-B40F-CCB7569F88B8}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.SuffixSorting.LivDivSufSort.Tests", "test\DeltaQ.SuffixSorting.LivDivSufSort.Tests\DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj", "{5486E391-BFF9-4ED9-8383-032AE249C588}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -79,6 +81,10 @@ Global {E89B007E-0BDE-4642-B40F-CCB7569F88B8}.Debug|Any CPU.Build.0 = Debug|Any CPU {E89B007E-0BDE-4642-B40F-CCB7569F88B8}.Release|Any CPU.ActiveCfg = Release|Any CPU {E89B007E-0BDE-4642-B40F-CCB7569F88B8}.Release|Any CPU.Build.0 = Release|Any CPU + {5486E391-BFF9-4ED9-8383-032AE249C588}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5486E391-BFF9-4ED9-8383-032AE249C588}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5486E391-BFF9-4ED9-8383-032AE249C588}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5486E391-BFF9-4ED9-8383-032AE249C588}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -95,6 +101,7 @@ Global {2D37444E-3C89-4E1E-A0E6-C009F205EA84} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {96F1E46E-53CB-4463-82E2-0F81BEB87080} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {E89B007E-0BDE-4642-B40F-CCB7569F88B8} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} + {5486E391-BFF9-4ED9-8383-032AE249C588} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {595D8046-0D57-4408-A80A-777358A7E831} diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj new file mode 100644 index 0000000..5f88768 --- /dev/null +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj @@ -0,0 +1,23 @@ + + + + net6.0 + enable + + false + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs new file mode 100644 index 0000000..bc8a1a0 --- /dev/null +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs @@ -0,0 +1,13 @@ +using Xunit; + +namespace DeltaQ.SuffixSorting.LivDivSufSort.Tests +{ + public class UnitTest1 + { + [Fact] + public void Test1() + { + + } + } +} \ No newline at end of file From ac4240c8fb2d568b3248e66df913348e61480b6b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 12:05:15 -0500 Subject: [PATCH 093/325] Update RsDivSufSort Implement construct_SA Fix ALPHABET_SIZE having the wrong value Add IntAccessor (probably not needed) Fix Remain not being set in Budget ctor --- .../RsDivSufSort.cs | 151 +++++++++++++++++- 1 file changed, 144 insertions(+), 7 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 754cf55..54e6ec9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -12,7 +12,7 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort { public partial class DivSufSort { - private const int ALPHABET_SIZE = sizeof(byte) + 1; + private const int ALPHABET_SIZE = byte.MaxValue + 1; private const int BUCKET_A_SIZE = ALPHABET_SIZE; private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; @@ -36,13 +36,135 @@ public void divsufsort(ReadOnlySpan T, Span SA) // break; } - var result = sort_typeBstar(T, SA); + var result = sort_typeBstar(new IntAccessor(T), SA); construct_SA(T, SA, result.A, result.B, result.m); } - private void construct_SA(ReadOnlySpan t, Span sA, Span a, Span b, int m) + private static void construct_SA(ReadOnlySpan T, Span SA, Span A, Span B, int m) { - throw new NotImplementedException(); + Idx n = T.Length; + + BBucket Bb = new(B); + BStarBucket Bstar = new(B); + + SAPtr i; + SAPtr j; + Idx k; + Idx s; + Idx c0; + Idx c2; + if (0 < m) + { + // Construct the sorted order of type B suffixes by using the + // sorted order of type B* suffixes + Idx c1 = ALPHABET_SIZE - 2; + while (0 <= c1) + { + // Scan the suffix array from right to left + i = Bstar[(c1, c1 + 1)]; + j = A[c1 + 1] - 1; + k = 0; + c2 = -1; + + while (i <= j) + { + s = SA[j]; + if (0 < s) + { + Trace.Assert(T[s] == c1); + Trace.Assert((s + 1) < n); + Trace.Assert(T[s] <= T[s + 1]); + + //TODO: check this + //SA[j] = !s; + SA[j] = ~s; + s -= 1; + c0 = T[s]; + if ((0 < s) && (T[s - 1] > c0)) + { + //TODO: check this + //s = !s; + s = ~s; + } + if (c0 != c2) + { + if (0 <= c2) + { + Bb[(c2, c1)] = k; + } + c2 = c0; + k = Bb[(c2, c1)]; + } + Trace.Assert(k < j); + SA[k] = s; + k -= 1; + } + else + { + Trace.Assert(((s == 0) && (T[s] == c1)) || (s < 0)); + //TODO: check this + //SA[j] = !s; + SA[j] = ~s; + } + + // iter + j -= 1; + } + + // iter + c1 -= 1; + } + } + + // Construct the suffix array by using the sorted order of type B suffixes + c2 = T[n - 1]; + k = A[c2]; + //TODO: check this + //SA[k] = T[n - 2] < c2 ? !(n - 1) : n - 1; + SA[k] = T[n - 2] < c2 ? ~(n - 1) : n - 1; + k += 1; + // Scan the suffix array from left to right + { + // init + i = 0; + j = n; + + while (i < j) + { + s = SA[i]; + if (0 < s) + { + Trace.Assert(T[s - 1] >= T[s]); + s -= 1; + c0 = T[s]; + if ((s == 0) || (T[s - 1] < c0)) + { + //TODO: check this + //s = !s; + s = ~s; + } + if (c0 != c2) + { + A[c2] = k; + c2 = c0; + k = A[c2]; + } + Trace.Assert(i < k); + SA[k] = s; + k += 1; + } + else + { + Trace.Assert(s < 0); + //TODO: check this + //SA[i] = !s; + SA[i] = ~s; + } + + // iter + i += 1; + } + } } public ref struct SortTypeBstarResult @@ -78,8 +200,17 @@ public ref struct BBucket // } //} + public ref struct IntAccessor + { + public readonly ReadOnlySpan span; + public IntAccessor(ReadOnlySpan span) => this.span = span; + + public int this[Idx index] => span[index]; + public int Length => span.Length; + } + //fn sort_typeBstar(T: &Text, SA: &mut SuffixArray) -> SortTypeBstarResult { - public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) + public SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) { var n = T.Length; @@ -161,6 +292,8 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) } m = n - m; + //JZ: so far, so good + // Note: A type B* suffix is lexicographically smaller than a type B suffix // that beings with the same first two characters. @@ -190,9 +323,11 @@ public SortTypeBstarResult sort_typeBstar(in ReadOnlySpan T, Span SA) SAPtr PAb = n - m; SAPtr ISAb = m; - for (i = m - 2; i > 0; i--) + //for i in (0.. = (m - 2)).rev() { + //TODO: get rid of this Enumerable + //foreach(var ini in Enumerable.Range(0, m - 2).Reverse()) + for(i = m - 2; i > 0; i--) { - //for i in (0.. = (m - 2)).rev() { t = SA[PAb + i]; c0 = T[t]; c1 = T[t + 1]; @@ -475,7 +610,9 @@ private ref struct Budget public Budget(int chance, int incVal) : this() { Chance = chance; + Remain = incVal; IncVal = incVal; + Count = 0; } public bool Check(int size) From 0aae182ca63a43d62ff4756086ef74d536d39a2f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 15:27:15 -0500 Subject: [PATCH 094/325] Fix A and B bucket spans not being zeroed --- .../RsDivSufSort.cs | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 54e6ec9..8b000a3 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1,24 +1,20 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using SAPtr = System.Int32; using Idx = System.Int32; +using SAPtr = System.Int32; namespace DeltaQ.SuffixSorting.LibDivSufSort { - public partial class DivSufSort + public static class DivSufSort { private const int ALPHABET_SIZE = byte.MaxValue + 1; private const int BUCKET_A_SIZE = ALPHABET_SIZE; private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; - public void divsufsort(ReadOnlySpan T, Span SA) + public static void divsufsort(ReadOnlySpan T, Span SA) { - Debug.Assert(T.Length == SA.Length); + Trace.Assert(T.Length == SA.Length); var n = T.Length; @@ -210,12 +206,13 @@ public ref struct IntAccessor } //fn sort_typeBstar(T: &Text, SA: &mut SuffixArray) -> SortTypeBstarResult { - public SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) + public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) { var n = T.Length; - using var owner_A = SpanOwner.Allocate(BUCKET_A_SIZE); - using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE); + //These MUST be zeroed first + using var owner_A = SpanOwner.Allocate(BUCKET_A_SIZE, AllocationMode.Clear); + using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE, AllocationMode.Clear); Span A = owner_A.Span; Span B = owner_B.Span; @@ -358,7 +355,7 @@ public SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) if (1 < (j - i)) { - Debugger.Break(); + throw new NotImplementedException(); //SA_dump!(&SA.range(i..j), "sssort(A)"); //sssort::sssort( // T, @@ -607,7 +604,7 @@ private ref struct Budget public int IncVal; public int Count; - public Budget(int chance, int incVal) : this() + public Budget(int chance, int incVal) { Chance = chance; Remain = incVal; @@ -636,7 +633,7 @@ public bool Check(int size) } /// Tandem repeat sort - private void trsort(SAPtr ISA, Span SA, int n, int depth) + private static void trsort(SAPtr ISA, Span SA, int n, int depth) { SAPtr ISAd; SAPtr first; @@ -770,7 +767,7 @@ public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) } private const Idx TR_INSERTIONSORT_THRESHOLD = 8; - private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr first, ref SAPtr last, Budget budget) + private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr first, ref SAPtr last, Budget budget) { SAPtr a = 0; SAPtr b = 0; @@ -1427,42 +1424,42 @@ private void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr fir } // end PASCAL } - private void SA_dump(Span span, string v) + private static void SA_dump(Span span, string v) { throw new NotImplementedException(); } - private int tr_pivot(Span sA, int iSAd, int first, int last) + private static int tr_pivot(Span sA, int iSAd, int first, int last) { throw new NotImplementedException(); } - private void tr_heapsort(int iSAd, Span sA, int first, int v) + private static void tr_heapsort(int iSAd, Span sA, int first, int v) { throw new NotImplementedException(); } - private void tr_insertionsort(Span sA, int iSAd, int first, int last) + private static void tr_insertionsort(Span sA, int iSAd, int first, int last) { throw new NotImplementedException(); } - private void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) + private static void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) { throw new NotImplementedException(); } - private void tr_copy(int iSA, Span sA, int first, int a, int b, int last, int v) + private static void tr_copy(int iSA, Span sA, int first, int a, int b, int last, int v) { throw new NotImplementedException(); } - private void crosscheck(string v, params object[] args) + private static void crosscheck(string v, params object[] args) { throw new NotImplementedException(); } - private void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) + private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) { throw new NotImplementedException(); } From ef1d0a3b3998eaa401a113c5d52b5dcb09dca0fd Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 15:57:25 -0500 Subject: [PATCH 095/325] Add sssort stub --- .../RsDivSufSort.cs | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 8b000a3..1cca101 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -323,7 +323,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) //for i in (0.. = (m - 2)).rev() { //TODO: get rid of this Enumerable //foreach(var ini in Enumerable.Range(0, m - 2).Reverse()) - for(i = m - 2; i > 0; i--) + for (i = m - 2; i > 0; i--) { t = SA[PAb + i]; c0 = T[t]; @@ -357,19 +357,17 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) { throw new NotImplementedException(); //SA_dump!(&SA.range(i..j), "sssort(A)"); - //sssort::sssort( - // T, - // SA, - // PAb, - // SAPtr(i), - // SAPtr(j), - // buf, - // bufsize, - // 2, - // n, - // SA[i] == (m - 1), - - //); + sssort( + T, + SA, + PAb, + ref i, + (SAPtr)j, + ref buf, + ref bufsize, + 2, + n, + SA[i] == (m - 1)); //SA_dump!(&SA.range(i..j), "sssort(B)"); } @@ -556,7 +554,14 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) return new SortTypeBstarResult { A = A, B = B, m = m }; } - //} + + /// + /// Substring sort + /// + private static void sssort(IntAccessor T, Span SA, SAPtr PA, ref SAPtr first, SAPtr last, ref SAPtr buf, ref Idx bufsize, Idx depth, Idx n, bool lastsuffix) + { + throw new NotImplementedException(); + } private static readonly int[] lg_table_array = new[] { From 8f87b3f4cbc4694d00dea277967aa3d6e169ceee Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 16:18:10 -0500 Subject: [PATCH 096/325] Add stub implementation of sssort --- .../RsDivSufSort.cs | 494 +++++++++++++++++- 1 file changed, 490 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 1cca101..e2181fc 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -555,11 +555,496 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) return new SortTypeBstarResult { A = A, B = B, m = m }; } - /// + private const Idx SS_BLOCKSIZE = 1024; + + /// /// Substring sort /// private static void sssort(IntAccessor T, Span SA, SAPtr PA, ref SAPtr first, SAPtr last, ref SAPtr buf, ref Idx bufsize, Idx depth, Idx n, bool lastsuffix) { + // Note: in most of this file "PA" seems to mean "Partition Array" - we're + // working on a slice of SA. This is also why SA (or a mutable reference to it) + // is passed around, so we don't run into lifetime issues. + + SAPtr a; + SAPtr b; + SAPtr middle; + SAPtr curbuf; + Idx j; + Idx k; + Idx curbufsize; + Idx limit; + Idx i; + + if (lastsuffix) + { + first += 1; + } + + limit = ss_isqrt(last - first); + if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < limit)) + { + if (SS_BLOCKSIZE < limit) + { + limit = SS_BLOCKSIZE; + } + middle = last - limit; + buf = middle; + bufsize = limit; + } + else + { + middle = last; + limit = 0; + } + + // ESPRESSO + a = first; + i = 0; + while (SS_BLOCKSIZE < (middle - a)) + { + crosscheck("ss_mintrosort (espresso) a={} depth={}", a - PA, depth); + ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); + + curbufsize = (last - (a + SS_BLOCKSIZE)); + curbuf = a + SS_BLOCKSIZE; + if (curbufsize <= bufsize) + { + curbufsize = bufsize; + curbuf = buf; + } + + // FRESCO + b = a; + k = SS_BLOCKSIZE; + j = i; + while ((j & 1) > 0) + { + crosscheck("ss_swapmerge {}", k); + ss_swapmerge(T, SA, PA, b - k, b, b + k, curbuf, curbufsize, depth); + + // iter + b -= k; + k <<= 1; + j >>= 1; + } + + // iter + a += SS_BLOCKSIZE; + i += 1; + } + + crosscheck("ss_mintrosort (pre-mariachi) a={} depth={}", a - PA, depth); + ss_mintrosort(T, SA, PA, a, middle, depth); + + //SA_dump!(&SA.range(first..last), "pre-mariachi"); + + // MARIACHI + k = SS_BLOCKSIZE; + while (i != 0) + { + if ((i & 1) > 0) + { + //SA_dump!(&SA.range(first..last), "in-mariachi pre-swap"); + crosscheck( + "a={} middle={} bufsize={} depth={}", + a - first, + middle - first, + bufsize, + depth + ); + ss_swapmerge(T, SA, PA, a - k, a, middle, buf, bufsize, depth); + //SA_dump!(&SA.range(first..last), "in-mariachi post-swap"); + a -= k; + } + + // iter + k <<= 1; + i >>= 1; + } + //SA_dump!(&SA.range(first..last), "post-mariachi"); + + if (limit != 0) + { + crosscheck("ss_mintrosort limit!=0"); + ss_mintrosort(T, SA, PA, middle, last, depth); + //SA_dump!(&SA.range(first..last), "post-mintrosort limit!=0"); + ss_inplacemerge(T, SA, PA, first, middle, last, depth); + //SA_dump!(&SA.range(first..last), "post-inplacemerge limit!=0"); + } + //SA_dump!(&SA.range(first..last), "post-limit!=0"); + + if (lastsuffix) + { + crosscheck("lastsuffix!"); + + // Insert last type B* suffix + Span PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; + //let mut PAi:[Idx; 2] = [SA[PA + SA[first - 1]], n - 2]; + //let SAI = SuffixArray(&mut PAi); + + a = first; + i = SA[first - 1]; + + // CELINE + while ((a < last) && ((SA[a] < 0) || (0 < ss_compare(T, PAi, (SAPtr)0, SA, PA + SA[a], depth)))) + { + // body + SA[a - 1] = SA[a]; + + // iter + a += 1; + } + SA[a - 1] = i; + } + } + + private static int ss_compare(IntAccessor t, Span pAi, int v1, Span sA, int v2, int depth) + { + throw new NotImplementedException(); + } + + private static void ss_inplacemerge(IntAccessor t, Span sA, int pA, int first, int middle, int last, int depth) + { + throw new NotImplementedException(); + } + + private static void ss_swapmerge(IntAccessor t, Span sA, int pA, int v1, int b, int v2, int curbuf, int curbufsize, int depth) + { + throw new NotImplementedException(); + } + + /// + /// Multikey introsort for medium size groups + /// + private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) + { + //macro_rules! PA { + // ($x: expr) => { + // SA[PA + $x] + // }; + //}; + + let mut stack = Stack::new(); + + let mut a: SAPtr; + let mut b: SAPtr; + let mut c: SAPtr; + let mut d: SAPtr; + let mut e: SAPtr; + let mut f: SAPtr; + + let mut s: Idx; + let mut t: Idx; + + let mut limit: Idx; + let mut v: Idx; + let mut x: Idx = 0; + + // RENEE + limit = ss_ilg(last - first); + loop { + if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) + { + if (1 < (last - first)) + { + ss_insertionsort(T, SA, PA, first, last, depth); + } + if !stack + .pop(&mut first, &mut last, &mut depth, &mut limit) + .is_ok() + { + return; + } + continue; + } + + let Td = depth; + macro_rules! Td { + ($x: expr) => { + T.get(Td + $x) + }; + } + macro_rules! TdPAStar { + ($x: expr) => { + Td!(PA!(SA[$x])) + }; + } + + let old_limit = limit; + limit -= 1; + if (old_limit == 0) + { + SA_dump!(&SA.range(first..last), "before heapsort"); + ss_heapsort(T, Td, SA, PA, first, (last - first).into()); + SA_dump!(&SA.range(first..last), "after heapsort"); + } + + if (limit < 0) + { + a = first + 1; + v = TdPAStar!(first); + + // DAVE + while a < last { + x = TdPAStar!(a); + if (x != v) + { + if (1 < (a - first)) + { + break; + } + v = x; + first = a; + } + + // loop iter + a += 1; + } + + if Td!(PA!(SA[first]) - 1) < v { + first = ss_partition(SA, PA, first, a, depth); + } + if (a - first) <= (last - a) { + if 1 < (a - first) { + stack.push(a, last, depth, -1); + last = a; + depth += 1; + limit = ss_ilg(a - first); + } + else + { + first = a; + limit = -1; + } + } else + { + if 1 < (last - a) { + stack.push(first, a, depth + 1, ss_ilg(a - first)); + first = a; + limit = -1; + } + else + { + last = a; + depth += 1; + limit = ss_ilg(a - first); + } + } + continue; + } + + // choose pivot + a = ss_pivot(T, Td, SA, PA, first, last); + v = TdPAStar!(a); + SA.swap(first, a); + + // partition + // NORA + b = first; + loop { + b += 1; + if !(b < last) { + break; + } + x = TdPAStar!(b); + if !(x == v) { + break; + } + // body + } + a = b; + if (a < last) && (x < v) { + // STAN + loop { + b += 1; + if !(b < last) { + break; + } + x = TdPAStar!(b); + if !(x <= v) { + break; + } + // body + if x == v { + SA.swap(b, a); + a += 1; + } + } + } + + // NATHAN + c = last; + loop { + c -= 1; + if !(b < c) { + break; + } + x = TdPAStar!(c); + if !(x == v) { + break; + } + // body + } + d = c; + if (b < d) && (x > v) { + // JACOB + loop { + c -= 1; + if !(b < c) { + break; + } + x = TdPAStar!(c); + if !(x >= v) { + break; + } + // body + if x == v { + SA.swap(c, d); + d -= 1; + } + } + } + + // RITA + while b < c { + SA.swap(b, c); + // ROMEO + loop { + b += 1; + if !(b < c) { + break; + } + x = TdPAStar!(b); + if !(x <= v) { + break; + } + // body + if x == v { + SA.swap(b, a); + a += 1; + } + } + // JULIET + loop { + c -= 1; + if !(b < c) { + break; + } + x = TdPAStar!(c); + if !(x >= v) { + break; + } + // body + if x == v { + SA.swap(c, d); + d -= 1; + } + } + } + + if a <= d { + c = b - 1; + s = (a - first).0; + t = (b - a).0; + if s > t { + s = t; + } + + // JOSHUA + e = first; + f = b - s; + while 0 < s { + SA.swap(e, f); + s -= 1; + e += 1; + f += 1; + } + s = (d - c).0; + t = (last - d - 1).0; + if s > t { + s = t; + } + // BERENICE + e = b; + f = last - s; + while 0 < s { + SA.swap(e, f); + s -= 1; + e += 1; + f += 1; + } + + a = first + (b - a); + c = last - (d - c); + b = if v <= Td!(PA!(SA[a]) - 1) { + a + } + else + { + let res = ss_partition(SA, PA, a, c, depth); + res + }; + + if (a - first) <= (last - c) { + if (last - c) <= (c - b) { + stack.push(b, c, depth + 1, ss_ilg(c - b)); + stack.push(c, last, depth, limit); + last = a; + } else if (a - first) <= (c - b) { + stack.push(c, last, depth, limit); + stack.push(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else + { + stack.push(c, last, depth, limit); + stack.push(first, a, depth, limit); + first = b; + last = c; + depth += 1; + limit = ss_ilg(c - b); + } + } else + { + if (a - first) <= (c - b) { + stack.push(b, c, depth + 1, ss_ilg(c - b)); + stack.push(first, a, depth, limit); + first = c; + } else if (last - c) <= (c - b) { + stack.push(first, a, depth, limit); + stack.push(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else + { + stack.push(first, a, depth, limit); + stack.push(c, last, depth, limit); + first = b; + last = c; + depth += 1; + limit = ss_ilg(c - b); + } + } + } + else + { + limit += 1; + if Td!(PA!(SA[first]) - 1) < v { + first = ss_partition(SA, PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } + } + + /// + /// Fast sqrt, using lookup tables + /// + private static int ss_isqrt(int v) + { + //TODO: implement me + return (Idx)Math.Sqrt(v); + throw new NotImplementedException(); } @@ -737,8 +1222,8 @@ private struct StackItem private const int STACK_SIZE = 64; private ref struct TrStack { - public readonly Span Items; - public int Size; + private readonly Span Items; + private int Size; public TrStack(Span items) { @@ -1459,9 +1944,10 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int throw new NotImplementedException(); } + [Conditional("DEBUG")] private static void crosscheck(string v, params object[] args) { - throw new NotImplementedException(); + Debug.WriteLine(format: v, args: args); } private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) From 7453168397e2b0f810ff8916ab8f487f659ce998 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 16:21:34 -0500 Subject: [PATCH 097/325] Fixup RsDivSufSort Make TrStack fields public again Remove NIE throw before calling out to sssort --- .../RsDivSufSort.cs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index e2181fc..7f598c4 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -355,7 +355,6 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) if (1 < (j - i)) { - throw new NotImplementedException(); //SA_dump!(&SA.range(i..j), "sssort(A)"); sssort( T, @@ -557,7 +556,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) private const Idx SS_BLOCKSIZE = 1024; - /// + /// /// Substring sort /// private static void sssort(IntAccessor T, Span SA, SAPtr PA, ref SAPtr first, SAPtr last, ref SAPtr buf, ref Idx bufsize, Idx depth, Idx n, bool lastsuffix) @@ -719,11 +718,11 @@ private static void ss_swapmerge(IntAccessor t, Span sA, int pA, int v1, in /// private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) { - //macro_rules! PA { - // ($x: expr) => { - // SA[PA + $x] - // }; - //}; + macro_rules! PA { + ($x: expr) => { + SA[PA + $x] + }; + }; let mut stack = Stack::new(); @@ -1222,8 +1221,8 @@ private struct StackItem private const int STACK_SIZE = 64; private ref struct TrStack { - private readonly Span Items; - private int Size; + public readonly Span Items; + public int Size; public TrStack(Span items) { @@ -1947,7 +1946,7 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int [Conditional("DEBUG")] private static void crosscheck(string v, params object[] args) { - Debug.WriteLine(format: v, args: args); + //Debug.WriteLine(format: v, args: args); } private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) From 4b9f0d1a13e44bd3118d0c57432e89f06608331b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 16:53:19 -0500 Subject: [PATCH 098/325] Perform first round of fixups for ss_mintrosort Add sssort stack implementation --- .../RsDivSufSort.cs | 329 ++++++++++++------ 1 file changed, 213 insertions(+), 116 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 7f598c4..666f7c2 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -713,70 +713,105 @@ private static void ss_swapmerge(IntAccessor t, Span sA, int pA, int v1, in throw new NotImplementedException(); } + + private struct SsStackItem + { + public SAPtr a; + public SAPtr b; + public SAPtr c; + public Idx d; + } + + private const int SS_STACK_SIZE = 16; + private ref struct SsStack + { + public readonly Span Items; + public int Size; + + public SsStack(Span items) + { + Items = items; + Size = 0; + } + + public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) + { + Debug.Assert(Size < Items.Length); + ref SsStackItem item = ref Items[Size++]; + item.a = a; + item.b = b; + item.c = c; + item.d = d; + } + public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) + { + //Debug.Assert(Size > 0); + if (Size == 0) return false; + + ref SsStackItem item = ref Items[--Size]; + a = item.a; + b = item.b; + c = item.c; + d = item.d; + return true; + } + } + + private const Idx SS_INSERTIONSORT_THRESHOLD = 8; + /// /// Multikey introsort for medium size groups /// private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) { - macro_rules! PA { - ($x: expr) => { - SA[PA + $x] - }; - }; + //PA($x) => SA[PA + $x] - let mut stack = Stack::new(); + var stack = new SsStack(); - let mut a: SAPtr; - let mut b: SAPtr; - let mut c: SAPtr; - let mut d: SAPtr; - let mut e: SAPtr; - let mut f: SAPtr; + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr d; + SAPtr e; + SAPtr f; - let mut s: Idx; - let mut t: Idx; + Idx s; + Idx t; - let mut limit: Idx; - let mut v: Idx; - let mut x: Idx = 0; + Idx limit; + Idx v; + Idx x = 0; // RENEE limit = ss_ilg(last - first); - loop { + while (true) + { if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) { if (1 < (last - first)) { ss_insertionsort(T, SA, PA, first, last, depth); } - if !stack - .pop(&mut first, &mut last, &mut depth, &mut limit) - .is_ok() + if (!stack.Pop(ref first, ref last, ref depth, ref limit)) { return; } continue; } - let Td = depth; - macro_rules! Td { - ($x: expr) => { - T.get(Td + $x) - }; - } - macro_rules! TdPAStar { - ($x: expr) => { - Td!(PA!(SA[$x])) - }; - } + /*readonly*/ + var Td = depth; + //Td!($x) => T[Td + $x] + //TdPAStar!($x) => Td!(PA!(SA[$x])) - let old_limit = limit; + /*readonly*/ + var old_limit = limit; limit -= 1; if (old_limit == 0) { - SA_dump!(&SA.range(first..last), "before heapsort"); - ss_heapsort(T, Td, SA, PA, first, (last - first).into()); - SA_dump!(&SA.range(first..last), "after heapsort"); + //SA_dump!(&SA.range(first..last), "before heapsort"); + ss_heapsort(T, Td, SA, PA, first, (last - first)); + //SA_dump!(&SA.range(first..last), "after heapsort"); } if (limit < 0) @@ -801,12 +836,15 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ a += 1; } - if Td!(PA!(SA[first]) - 1) < v { + if (Td!(PA!(SA[first]) - 1) < v) + { first = ss_partition(SA, PA, first, a, depth); } - if (a - first) <= (last - a) { - if 1 < (a - first) { - stack.push(a, last, depth, -1); + if ((a - first) <= (last - a)) + { + if (1 < (a - first)) + { + stack.Push(a, last, depth, -1); last = a; depth += 1; limit = ss_ilg(a - first); @@ -816,10 +854,12 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ first = a; limit = -1; } - } else + } + else { - if 1 < (last - a) { - stack.push(first, a, depth + 1, ss_ilg(a - first)); + if (1 < (last - a)) + { + stack.Push(first, a, depth + 1, ss_ilg(a - first)); first = a; limit = -1; } @@ -836,37 +876,45 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ // choose pivot a = ss_pivot(T, Td, SA, PA, first, last); v = TdPAStar!(a); - SA.swap(first, a); + SA.Swap(first, a); // partition // NORA b = first; - loop { + while (true) + { b += 1; - if !(b < last) { + if (!(b < last)) + { break; } x = TdPAStar!(b); - if !(x == v) { + if (!(x == v)) + { break; } // body } a = b; - if (a < last) && (x < v) { + if ((a < last) && (x < v)) + { // STAN - loop { + while (true) + { b += 1; - if !(b < last) { + if (!(b < last)) + { break; } x = TdPAStar!(b); - if !(x <= v) { + if (!(x <= v)) + { break; } // body - if x == v { - SA.swap(b, a); + if (x == v) + { + SA.Swap(b, a); a += 1; } } @@ -874,69 +922,86 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ // NATHAN c = last; - loop { + while (true) + { c -= 1; - if !(b < c) { + if (!(b < c)) + { break; } x = TdPAStar!(c); - if !(x == v) { + if (!(x == v)) + { break; } // body } d = c; - if (b < d) && (x > v) { + if ((b < d) && (x > v)) + { // JACOB - loop { + while (true) + { c -= 1; - if !(b < c) { + if (!(b < c)) + { break; } x = TdPAStar!(c); - if !(x >= v) { + if (!(x >= v)) + { break; } // body - if x == v { - SA.swap(c, d); + if (x == v) + { + SA.Swap(c, d); d -= 1; } } } // RITA - while b < c { - SA.swap(b, c); + while (b < c) + { + SA.Swap(b, c); // ROMEO - loop { + while (true) + { b += 1; - if !(b < c) { + if (!(b < c)) + { break; } x = TdPAStar!(b); - if !(x <= v) { + if (!(x <= v)) + { break; } // body - if x == v { - SA.swap(b, a); + if (x == v) + { + SA.Swap(b, a); a += 1; } } // JULIET - loop { + while (true) + { c -= 1; - if !(b < c) { + if (!(b < c)) + { break; } x = TdPAStar!(c); - if !(x >= v) { + if (!(x >= v)) + { break; } // body - if x == v { - SA.swap(c, d); + if (x == v) + { + SA.Swap(c, d); d -= 1; } } @@ -953,22 +1018,25 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ // JOSHUA e = first; f = b - s; - while 0 < s { - SA.swap(e, f); + while (0 < s) + { + SA.Swap(e, f); s -= 1; e += 1; f += 1; } s = (d - c).0; t = (last - d - 1).0; - if s > t { + if (s > t) + { s = t; } // BERENICE e = b; f = last - s; - while 0 < s { - SA.swap(e, f); + while (0 < s) + { + SA.Swap(e, f); s -= 1; e += 1; f += 1; @@ -976,47 +1044,50 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ a = first + (b - a); c = last - (d - c); - b = if v <= Td!(PA!(SA[a]) - 1) { - a - } - else + b = v <= Td!(PA!(SA[a]) - 1) ? a : ss_partition(SA, PA, a, c, depth); + + if ((a - first) <= (last - c)) { - let res = ss_partition(SA, PA, a, c, depth); - res - }; - - if (a - first) <= (last - c) { - if (last - c) <= (c - b) { - stack.push(b, c, depth + 1, ss_ilg(c - b)); - stack.push(c, last, depth, limit); + if ((last - c) <= (c - b)) + { + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + stack.Push(c, last, depth, limit); last = a; - } else if (a - first) <= (c - b) { - stack.push(c, last, depth, limit); - stack.push(b, c, depth + 1, ss_ilg(c - b)); + } + else if ((a - first) <= (c - b)) + { + stack.Push(c, last, depth, limit); + stack.Push(b, c, depth + 1, ss_ilg(c - b)); last = a; - } else + } + else { - stack.push(c, last, depth, limit); - stack.push(first, a, depth, limit); + stack.Push(c, last, depth, limit); + stack.Push(first, a, depth, limit); first = b; last = c; depth += 1; limit = ss_ilg(c - b); } - } else + } + else { - if (a - first) <= (c - b) { - stack.push(b, c, depth + 1, ss_ilg(c - b)); - stack.push(first, a, depth, limit); + if ((a - first) <= (c - b)) + { + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + stack.Push(first, a, depth, limit); first = c; - } else if (last - c) <= (c - b) { - stack.push(first, a, depth, limit); - stack.push(b, c, depth + 1, ss_ilg(c - b)); + } + else if ((last - c) <= (c - b)) + { + stack.Push(first, a, depth, limit); + stack.Push(b, c, depth + 1, ss_ilg(c - b)); first = c; - } else + } + else { - stack.push(first, a, depth, limit); - stack.push(c, last, depth, limit); + stack.Push(first, a, depth, limit); + stack.Push(c, last, depth, limit); first = b; last = c; depth += 1; @@ -1027,7 +1098,8 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ else { limit += 1; - if Td!(PA!(SA[first]) - 1) < v { + if (Td!(PA!(SA[first]) - 1) < v) + { first = ss_partition(SA, PA, first, last, depth); limit = ss_ilg(last - first); } @@ -1036,6 +1108,31 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ } } + private static int ss_pivot(IntAccessor t, int td, Span sA, int pA, int first, int last) + { + throw new NotImplementedException(); + } + + private static int ss_partition(Span sA, int pA, int first, int a, int depth) + { + throw new NotImplementedException(); + } + + private static void ss_insertionsort(IntAccessor t, Span sA, int pA, int first, int last, int depth) + { + throw new NotImplementedException(); + } + + private static int ss_ilg(int v) + { + throw new NotImplementedException(); + } + + private static void ss_heapsort(IntAccessor t, int td, Span sA, int pA, int first, object p) + { + throw new NotImplementedException(); + } + /// /// Fast sqrt, using lookup tables /// @@ -1209,7 +1306,7 @@ private static void trsort(SAPtr ISA, Span SA, int n, int depth) } } - private struct StackItem + private struct TrStackItem { public SAPtr a; public SAPtr b; @@ -1218,13 +1315,13 @@ private struct StackItem public Idx e; } - private const int STACK_SIZE = 64; + private const int TR_STACK_SIZE = 64; private ref struct TrStack { - public readonly Span Items; + public readonly Span Items; public int Size; - public TrStack(Span items) + public TrStack(Span items) { Items = items; Size = 0; @@ -1233,7 +1330,7 @@ public TrStack(Span items) public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) { Debug.Assert(Size < Items.Length); - ref StackItem item = ref Items[Size++]; + ref TrStackItem item = ref Items[Size++]; item.a = a; item.b = b; item.c = c; @@ -1245,7 +1342,7 @@ public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) //Debug.Assert(Size > 0); if (Size == 0) return false; - ref StackItem item = ref Items[--Size]; + ref TrStackItem item = ref Items[--Size]; a = item.a; b = item.b; c = item.c; @@ -1266,7 +1363,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA Idx next; Idx trlink = -1; - TrStack stack = new(stackalloc StackItem[STACK_SIZE]); + TrStack stack = new(stackalloc TrStackItem[TR_STACK_SIZE]); /* macro_rules! ISA { @@ -1432,7 +1529,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA // end if limit == -1 // tandem repeat copy - ref StackItem item = ref stack.Items[--stack.Size]; + ref TrStackItem item = ref stack.Items[--stack.Size]; a = item.b; b = item.c; if (item.d == 0) From 53b4d854aef108d939882d7ab9698e02d4c985c4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 18:52:29 -0500 Subject: [PATCH 099/325] Fixup ss_mintrosort with accessors --- .../RsDivSufSort.cs | 94 +++++++++++++------ 1 file changed, 66 insertions(+), 28 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 666f7c2..400849e 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -759,12 +759,43 @@ public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) private const Idx SS_INSERTIONSORT_THRESHOLD = 8; + private ref struct SpanOffsetAccessor + { + private readonly Span _span; + private readonly int _offset; + + public SpanOffsetAccessor(Span span, int offset) + { + _span = span; + _offset = offset; + } + + public ref T this[int index] => ref _span[_offset + index]; + } + + private ref struct TdPAStarAccessor + { + private readonly Span _SA; + private readonly Span _PA; + private readonly IntAccessor _TD; + + public TdPAStarAccessor(ReadOnlySpan T, Span SA, int partitionOffset, int tdOffset) + { + _SA = SA; + _PA = SA[partitionOffset..]; + _TD = new(T[tdOffset..]); + } + + public int this[int index] => _TD[_PA[_SA[index]]]; + } + /// /// Multikey introsort for medium size groups /// - private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) + private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) { - //PA($x) => SA[PA + $x] + //PA($x) => + var PA = SA[partitionOffset..];//new SpanOffsetAccessor(SA, PA); var stack = new SsStack(); @@ -790,7 +821,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ { if (1 < (last - first)) { - ss_insertionsort(T, SA, PA, first, last, depth); + ss_insertionsort(T, SA, partitionOffset, first, last, depth); } if (!stack.Pop(ref first, ref last, ref depth, ref limit)) { @@ -799,10 +830,14 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ continue; } - /*readonly*/ - var Td = depth; //Td!($x) => T[Td + $x] + var tdOffset = depth; + var Td = T.span[tdOffset..]; + //TdPAStar!($x) => Td!(PA!(SA[$x])) + //TdPAStar!($x) => T[Td + SA[PA + SA[$x]]] + //var TdPAStar = Td[PA[SA[$x]]]; + var TdPAStar = new TdPAStarAccessor(T.span, SA, partitionOffset, tdOffset); /*readonly*/ var old_limit = limit; @@ -810,18 +845,19 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ if (old_limit == 0) { //SA_dump!(&SA.range(first..last), "before heapsort"); - ss_heapsort(T, Td, SA, PA, first, (last - first)); + ss_heapsort(T, tdOffset, SA, partitionOffset, first, (last - first)); //SA_dump!(&SA.range(first..last), "after heapsort"); } if (limit < 0) { a = first + 1; - v = TdPAStar!(first); + v = TdPAStar[first]; // DAVE - while a < last { - x = TdPAStar!(a); + while (a < last) + { + x = TdPAStar[a]; if (x != v) { if (1 < (a - first)) @@ -836,9 +872,9 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ a += 1; } - if (Td!(PA!(SA[first]) - 1) < v) + if (Td[PA[SA[first]] - 1] < v) { - first = ss_partition(SA, PA, first, a, depth); + first = ss_partition(SA, partitionOffset, first, a, depth); } if ((a - first) <= (last - a)) { @@ -874,8 +910,8 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ } // choose pivot - a = ss_pivot(T, Td, SA, PA, first, last); - v = TdPAStar!(a); + a = ss_pivot(T, tdOffset, SA, partitionOffset, first, last); + v = TdPAStar[a]; SA.Swap(first, a); // partition @@ -888,7 +924,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ { break; } - x = TdPAStar!(b); + x = TdPAStar[b]; if (!(x == v)) { break; @@ -906,7 +942,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ { break; } - x = TdPAStar!(b); + x = TdPAStar[b]; if (!(x <= v)) { break; @@ -929,7 +965,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ { break; } - x = TdPAStar!(c); + x = TdPAStar[c]; if (!(x == v)) { break; @@ -947,7 +983,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ { break; } - x = TdPAStar!(c); + x = TdPAStar[c]; if (!(x >= v)) { break; @@ -973,7 +1009,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ { break; } - x = TdPAStar!(b); + x = TdPAStar[b]; if (!(x <= v)) { break; @@ -993,7 +1029,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ { break; } - x = TdPAStar!(c); + x = TdPAStar[c]; if (!(x >= v)) { break; @@ -1007,11 +1043,13 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ } } - if a <= d { + if (a <= d) + { c = b - 1; - s = (a - first).0; - t = (b - a).0; - if s > t { + s = (a - first)/*.0*/; + t = (b - a)/*.0*/; + if (s > t) + { s = t; } @@ -1025,8 +1063,8 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ e += 1; f += 1; } - s = (d - c).0; - t = (last - d - 1).0; + s = (d - c)/*.0*/; + t = (last - d - 1)/*.0*/; if (s > t) { s = t; @@ -1044,7 +1082,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ a = first + (b - a); c = last - (d - c); - b = v <= Td!(PA!(SA[a]) - 1) ? a : ss_partition(SA, PA, a, c, depth); + b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, partitionOffset, a, c, depth); if ((a - first) <= (last - c)) { @@ -1098,9 +1136,9 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr PA, /*ref*/ else { limit += 1; - if (Td!(PA!(SA[first]) - 1) < v) + if (Td[PA[SA[first]] - 1] < v) { - first = ss_partition(SA, PA, first, last, depth); + first = ss_partition(SA, partitionOffset, first, last, depth); limit = ss_ilg(last - first); } depth += 1; From f320dec73502828291241832d258bf8c89f941d0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 19:00:42 -0500 Subject: [PATCH 100/325] Implement ss_isqrt --- .../RsDivSufSort.cs | 81 ++++++++++++++++++- 1 file changed, 77 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 400849e..d937175 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1171,15 +1171,88 @@ private static void ss_heapsort(IntAccessor t, int td, Span sA, int pA, int throw new NotImplementedException(); } + private static readonly Idx[] sqq_table_array = new[] + { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, + 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, + 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, + 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, + 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, + 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, + 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, + 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, + 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, + 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, + 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, + 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 + }; + private static ReadOnlySpan sqq_table => sqq_table_array; + /// /// Fast sqrt, using lookup tables /// - private static int ss_isqrt(int v) + private static int ss_isqrt(int x) { - //TODO: implement me - return (Idx)Math.Sqrt(v); + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } - throw new NotImplementedException(); + Idx e; + if ((x & 0xffff_0000) > 0) + { + if ((x & 0xff00_0000) > 0) + { + e = 24 + lg_table[((x >> 24) & 0xff)]; + } + else + { + e = 16 + lg_table[((x >> 16) & 0xff)]; + } + } + else + { + if ((x & 0x0000_ff00) > 0) + { + e = 8 + lg_table[(((x >> 8) & 0xff))]; + } + else + { + e = 0 + lg_table[(((x >> 0) & 0xff))]; + } + }; + + Idx y; + if (e >= 16) + { + y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); + if (e >= 24) + { + y = (y + 1 + x / y) >> 1; + } + y = (y + 1 + x / y) >> 1; + } + else if (e >= 8) + { + y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; + } + else + { + return sqq_table[x] >> 4; + } + + if (x < (y * y)) + { + return y - 1; + } + else + { + return y; + } } private static readonly int[] lg_table_array = new[] From 00a6d81d93177a0e43c67679f3cd7e1859ac090b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 19:03:50 -0500 Subject: [PATCH 101/325] Implement ss_ilg --- .../RsDivSufSort.cs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index d937175..d6496ae 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1161,9 +1161,19 @@ private static void ss_insertionsort(IntAccessor t, Span sA, int pA, int fi throw new NotImplementedException(); } - private static int ss_ilg(int v) + /// + /// Fast log2, using lookup tables + /// + private static int ss_ilg(int n) { - throw new NotImplementedException(); + if ((n & 0xff00) > 0) + { + return 8 + lg_table[((n >> 8) & 0xff)]; + } + else + { + return 0 + lg_table[((n >> 0) & 0xff)]; + } } private static void ss_heapsort(IntAccessor t, int td, Span sA, int pA, int first, object p) From 72594bcaa3592cd7b1a6144b5a681918d45c5daa Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 19:06:34 -0500 Subject: [PATCH 102/325] Implement ss_insertionsort --- .../RsDivSufSort.cs | 56 ++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index d6496ae..00e0072 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1156,9 +1156,61 @@ private static int ss_partition(Span sA, int pA, int first, int a, int dept throw new NotImplementedException(); } - private static void ss_insertionsort(IntAccessor t, Span sA, int pA, int first, int last, int depth) + private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) { - throw new NotImplementedException(); + SAPtr i; + SAPtr j; + Idx t; + Idx r; + + i = last - 2; + // for 1 + while (first <= i) + { + t = SA[i]; + j = i + 1; + + // for 2 + while (true) + { + // cond for 2 + r = ss_compare(T, SA, PA + t, SA, PA + SA[j], depth); + if (!(0 < r)) + { + break; + } + + // body for 2 + + // do while + while (true) + { + SA[j - 1] = SA[j]; + + j += 1; + if (!((j < last) && SA[j] < 0)) + { + break; + } + } + + if (last <= j) + { + break; + } + + // iter for 2 (empty) + } + + if (r == 0) + { + SA[j] = ~SA[j]; + } + SA[j - 1] = t; + + // iter + i -= 1; + } } /// From f7b8769a7da9ac995ad1578f847984f2b0859c93 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 19:16:23 -0500 Subject: [PATCH 103/325] Implement ss_compare --- .../RsDivSufSort.cs | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 00e0072..2c0b0a6 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -698,9 +698,46 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, ref SAPtr firs } } - private static int ss_compare(IntAccessor t, Span pAi, int v1, Span sA, int v2, int depth) + /// + /// Compare two suffixes + /// + private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) { - throw new NotImplementedException(); + //TODO: possible perf improvement - JZ + + var U1 = depth + SAp1[p1]; + var U2 = depth + SAp2[p2]; + var U1n = SAp1[p1 + 1] + 2; + var U2n = SAp2[p2 + 1] + 2; + + while ((U1 < U1n) && (U2 < U2n) && (T[U1] == T[U2])) + { + U1 += 1; + U2 += 1; + } + + if (U1 < U1n) + { + if (U2 < U2n) + { + return T[U1] - T[U2]; + } + else + { + return 1; + } + } + else + { + if (U2 < U2n) + { + return -1; + } + else + { + return 0; + } + } } private static void ss_inplacemerge(IntAccessor t, Span sA, int pA, int first, int middle, int last, int depth) From 259881125c799413f378cd4cb6538da81a89bf3a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 22:20:12 -0500 Subject: [PATCH 104/325] Implement more of ldss --- .../RsDivSufSort.cs | 268 +++++++++++++++++- 1 file changed, 264 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 2c0b0a6..a263c24 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -740,9 +740,193 @@ private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span } } - private static void ss_inplacemerge(IntAccessor t, Span sA, int pA, int first, int middle, int last, int depth) + private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) { - throw new NotImplementedException(); + SAPtr p; + SAPtr a; + SAPtr b; + Idx len; + Idx half; + Idx q; + Idx r; + Idx x; + + var original_first = first; + var original_last = last; + + //SA_dump!( + // &SA.range(original_first..original_last), + // "inplacemerge start" + //); + + // FERRIS + while (true) + { + if (SA[last - 1] < 0) + { + x = 1; + p = PA + ~SA[last - 1]; + } + else + { + x = 0; + p = PA + SA[last - 1]; + } + + // LOIS + a = first; + len = (middle - first)/*.0*/; + half = len >> 1; + r = -1; + while (0 < len) + { + b = a + half; + q = ss_compare(T, SA, PA + (0 <= SA[b] ? SA[b] : ~SA[b]), SA, p, depth); + if (q < 0) + { + a = b + 1; + half -= (len & 1) ^ 1; + } + else + { + r = q; + } + + // iter + len = half; + half >>= 1; + } + //SA_dump!(&SA.range(original_first..original_last), "post-lois"); + + if (a < middle) + { + if (r == 0) + { + SA[a] = ~SA[a]; + } + ss_rotate(SA, a, middle, last); + //SA_dump!(&SA.range(original_first..original_last), "post-rotate"); + last -= middle - a; + middle = a; + if (first == middle) + { + break; + } + } + + last -= 1; + if (x != 0) + { + // TIMMY + last -= 1; + while (SA[last] < 0) + { + last -= 1; + } + //SA_dump!(&SA.range(original_first..original_last), "post-timmy"); + } + if (middle == last) + { + break; + } + + //SA_dump!(&SA.range(original_first..original_last), "ferris-wrap"); + } + } + + private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr last) + { + SAPtr a; + SAPtr b; + Idx t; + Idx l; + Idx r; + + var original_first = first; + var original_last = last; + + l = (middle - first)/*.0*/; + r = (last - middle)/*.0*/; + + //SA_dump!(&SA.range(original_first..original_last), "pre-brendan"); + + // BRENDAN + while ((0 < l) && (0 < r)) + { + if (l == r) + { + ss_blockswap(SA, first, middle, l); + //SA_dump!(&SA.range(original_first..original_last), "post-blockswap"); + break; + } + + if (l < r) + { + a = last - 1; + b = middle - 1; + t = SA[a]; + + // ALICE + while (true) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + if (b < first) + { + SA[a] = t; + last = a; + r -= l + 1; + if (r <= l) + { + break; + } + a -= 1; + b = middle - 1; + t = SA[a]; + } + } + //SA_dump!(&SA.range(original_first..original_last), "post-alice"); + } + else + { + a = first; + b = middle; + t = SA[a]; + // ROBERT + while (true) + { + SA[a] = SA[b]; + a += 1; + SA[b] = SA[a]; + b += 1; + if (last <= b) + { + SA[a] = t; + first = a + 1; + + l -= r + 1; + if (l <= r) + { + break; + } + a += 1; + b = middle; + t = SA[a]; + } + } + //SA_dump!(&SA.range(original_first..original_last), "post-robert"); + } + } + } + + private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) + { + for (int i = 0; i < n; i++) + { + SA.Swap(a + i, b + i); + } } private static void ss_swapmerge(IntAccessor t, Span sA, int pA, int v1, int b, int v2, int curbuf, int curbufsize, int depth) @@ -834,7 +1018,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf //PA($x) => var PA = SA[partitionOffset..];//new SpanOffsetAccessor(SA, PA); - var stack = new SsStack(); + var stack = new SsStack(stackalloc SsStackItem[SS_STACK_SIZE]); SAPtr a; SAPtr b; @@ -1183,11 +1367,87 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf } } - private static int ss_pivot(IntAccessor t, int td, Span sA, int pA, int first, int last) + /// + /// Returns the pivot element + /// + private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr first, SAPtr last) + { + Idx t = (last - first)/*.0*/; + SAPtr middle = first + (t / 2); + + if (t <= 512) + { + if (t <= 32) + { + return ss_median3(T, Td, SA, PA, first, middle, last - 1); + } + else + { + t >>= 2; + return ss_median5( + T, + Td, + SA, + PA, + first, + first + t, + middle, + last - 1 - t, + last - 1); + } + } + + t >>= 3; + first = ss_median3(T, Td, SA, PA, first, first + t, first + (t << 1)); + middle = ss_median3(T, Td, SA, PA, middle - t, middle, middle + t); + last = ss_median3(T, Td, SA, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + + return ss_median3(T, Td, SA, PA, first, middle, last); + } + + private static int ss_median5(IntAccessor t, int td, Span sA, int pA, int first, int v1, int middle, int v2, int v3) { throw new NotImplementedException(); } + static void Swap(ref T lhs, ref T rhs) + { + T temp; + temp = lhs; + lhs = rhs; + rhs = temp; + } + + /// + /// Returns the median of three elements + /// + private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3) + { + //int get(int x) => T[Td + SA[PA + SA[x]]] + var get = new TdPAStarAccessor(T.span, SA, PA, Td); + + if (get[v1] > get[v2]) + { + Swap(ref v1, ref v2); + } + + if (get[v2] > get[v3]) + { + if (get[v1] > get[v3]) + { + return v1; + } + else + { + return v3; + } + } + else + { + return v2; + } + } + private static int ss_partition(Span sA, int pA, int first, int a, int depth) { throw new NotImplementedException(); From 1db6e79dea23c852dbb94792e6b651e7e57cbe49 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 22:50:01 -0500 Subject: [PATCH 105/325] Fix ref parameters and add faux macros for ISA and ISAd --- .../RsDivSufSort.cs | 191 +++++++++--------- 1 file changed, 92 insertions(+), 99 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index a263c24..5a8f8e0 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1738,7 +1738,7 @@ private static void trsort(SAPtr ISA, Span SA, int n, int depth) if (1 < (last - first)) { budget.Count = 0; - tr_introsort(ISA, ref ISAd, SA, ref first, ref last, budget); + tr_introsort(ISA, ISAd, SA, first, last, ref budget); if (budget.Count != 0) { unsorted += budget.Count; @@ -1823,13 +1823,13 @@ public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) } private const Idx TR_INSERTIONSORT_THRESHOLD = 8; - private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SAPtr first, ref SAPtr last, Budget budget) + private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA, SAPtr first, SAPtr last, ref Budget budget) { SAPtr a = 0; SAPtr b = 0; SAPtr c; Idx t, v, x; - Idx incr = ISAd - ISA; + Idx incr = isadOffset - isaOffset; Idx next; Idx trlink = -1; @@ -1847,13 +1847,14 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA }; } */ + var ISA = SA[isaOffset..]; + var ISAd = SA[isadOffset..]; - Idx limit = tr_ilg(last - first); + var limit = tr_ilg(last - first); // PASCAL while (true) { - //TODO: crosscheck crosscheck("pascal limit={} first={} last={}", limit, first, last); if (limit < 0) { @@ -1862,7 +1863,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA // tandem repeat partition tr_partition( SA, - ISAd - incr, + isadOffset - incr, first, first, last, @@ -1883,7 +1884,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA while (c < a) { { - SA[ISA + SA[c]] = v; + ISA[SA[c]] = v; } // iter (JONAS) @@ -1901,7 +1902,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA while (c < b) { { - SA[ISA + (SA[c])] = v; + ISA[SA[c]] = v; } // iter (AHAB) @@ -1912,47 +1913,42 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA // push if (1 < (b - a)) { - //TODO: crosscheck crosscheck("1<(b-a)"); crosscheck("push NULL {} {} {} {}", a, b, 0, 0); stack.Push(0, a, b, 0, 0); - crosscheck("push {} {} {} {} {}", ISAd - incr, first, last, -2, trlink); - stack.Push(ISAd - incr, first, last, -2, trlink); + crosscheck("push {} {} {} {} {}", isadOffset - incr, first, last, -2, trlink); + stack.Push(isadOffset - incr, first, last, -2, trlink); trlink = stack.Size - 2; } if ((a - first) <= (last - b)) { - //TODO: crosscheck crosscheck("star"); if (1 < (a - first)) { - //TODO: crosscheck crosscheck("board"); crosscheck( "push {} {} {} {} {}", - ISAd, + isadOffset, b, last, tr_ilg(last - b), trlink ); - stack.Push(ISAd, b, last, tr_ilg(last - b), trlink); + stack.Push(isadOffset, b, last, tr_ilg(last - b), trlink); last = a; limit = tr_ilg(a - first); } else if (1 < (last - b)) { - //TODO: crosscheck crosscheck("north"); first = b; limit = tr_ilg(last - b); } else { - //TODO: crosscheck crosscheck("denny"); - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } @@ -1967,13 +1963,13 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA crosscheck("land"); crosscheck( "push {} {} {} {} {}", - ISAd, + isadOffset, first, a, tr_ilg(a - first), trlink ); - stack.Push(ISAd, first, a, tr_ilg(a - first), trlink); + stack.Push(isadOffset, first, a, tr_ilg(a - first), trlink); first = b; limit = tr_ilg(last - b); } @@ -1986,7 +1982,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA else { crosscheck("clap"); - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } @@ -2004,7 +2000,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA b = item.c; if (item.d == 0) { - tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + tr_copy(isaOffset, SA, first, a, b, last, isadOffset - isaOffset); } else { @@ -2012,9 +2008,9 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA { stack.Items[trlink].d = -1; } - tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + tr_partialcopy(isaOffset, SA, first, a, b, last, isadOffset - isaOffset); } - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } @@ -2032,7 +2028,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA while (true) { { - SA[ISA + SA[a]] = a; + ISA[SA[a]] = a; } // cond (GEMINI) @@ -2052,8 +2048,6 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA // MONSTRO while (true) { - //TODO: checkme - //SA[a] = !SA[a]; SA[a] = ~SA[a]; a += 1; @@ -2063,7 +2057,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA } } - next = SA[ISA + SA[a]] != SA[ISAd + SA[a]] ? tr_ilg(a - first + 1) : -1; + next = ISA[SA[a]] != ISAd[SA[a]] ? tr_ilg(a - first + 1) : -1; a += 1; if (a < last) { @@ -2074,7 +2068,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA while (b < a) { { - SA[ISA + SA[b]] = v; + ISA[SA[b]] = v; } b += 1; } @@ -2086,9 +2080,9 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA crosscheck("budget pass"); if ((a - first) <= (last - a)) { - crosscheck("push {} {} {} {} {}", ISAd, a, last, -3, trlink); - stack.Push(ISAd, a, last, -3, trlink); - ISAd += incr; + crosscheck("push {} {} {} {} {}", isadOffset, a, last, -3, trlink); + stack.Push(isadOffset, a, last, -3, trlink); + isadOffset += incr; last = a; limit = next; } @@ -2098,19 +2092,19 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA { crosscheck( "push {} {} {} {} {}", - ISAd + incr, + isadOffset + incr, first, a, next, trlink ); - stack.Push(ISAd + incr, first, a, next, trlink); + stack.Push(isadOffset + incr, first, a, next, trlink); first = a; limit = -3; } else { - ISAd += incr; + isadOffset += incr; last = a; limit = next; } @@ -2133,14 +2127,14 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA else { crosscheck("1<(last-a) not"); - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } crosscheck("1<(last-a) not post"); crosscheck( "were popped: ISAd={} first={} last={} limit={} trlink={}", - ISAd, + isadOffset, first, last, limit, @@ -2152,14 +2146,14 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA else { crosscheck("times pop"); - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } crosscheck("times pop-post"); crosscheck( "were popped: ISAd={} first={} last={} limit={} trlink={}", - ISAd, + isadOffset, first, last, limit, @@ -2173,7 +2167,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) { crosscheck("insertionsort last-first={}", last - first); - tr_insertionsort(SA, ISAd, first, last); + tr_insertionsort(SA, isadOffset, first, last); limit = -3; continue; } @@ -2184,25 +2178,24 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA { crosscheck( "heapsort ISAd={} first={} last={} last-first={}", - ISAd, + isadOffset, first, last, last - first ); - SA_dump(SA[first..last], "before tr_heapsort"); - tr_heapsort(ISAd, SA, first, (last - first)); - SA_dump(SA[first..last], "after tr_heapsort"); + //SA_dump(SA[first..last], "before tr_heapsort"); + tr_heapsort(isadOffset, SA, first, (last - first)); + //SA_dump(SA[first..last], "after tr_heapsort"); // YOHAN a = last - 1; while (first < a) { // VINCENT - x = SA[ISAd + SA[a]]; + x = ISAd[SA[a]]; b = a - 1; - while ((first <= b) && (SA[ISAd + SA[b]]) == x) + while ((first <= b) && (ISAd[SA[b]]) == x) { - //! SA[b] = ~SA[b]; // iter (VINCENT) @@ -2218,17 +2211,17 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA } // choose pivot - a = tr_pivot(SA, ISAd, first, last); + a = tr_pivot(SA, isadOffset, first, last); crosscheck("picked pivot {}", a); SA.Swap(first, a); - v = SA[ISAd + (SA[first])]; + v = ISAd[SA[first]]; // partition - tr_partition(SA, ISAd, first, first + 1, last, ref a, ref b, v); + tr_partition(SA, isadOffset, first, first + 1, last, ref a, ref b, v); if ((last - first) != (b - a)) { crosscheck("pre-nolwenn"); - next = SA[ISA + (SA[a])] != v ? tr_ilg(b - a) : -1; + next = ISA[SA[a]] != v ? tr_ilg(b - a) : -1; // update ranks // NOLWENN @@ -2237,7 +2230,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA while (c < a) { { - SA[ISA + (SA[c])] = v; + ISA[SA[c]] = v; } c += 1; } @@ -2249,7 +2242,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA while (c < b) { { - SA[ISA + (SA[c])] = v; + ISA[SA[c]] = v; } c += 1; } @@ -2268,23 +2261,23 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA if (1 < (a - first)) { crosscheck("aaaa"); - crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.Push(ISAd + incr, a, b, next, trlink); - crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.Push(ISAd, b, last, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + stack.Push(isadOffset + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + stack.Push(isadOffset, b, last, limit, trlink); last = a; } else if (1 < (last - b)) { crosscheck("aaab"); - crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.Push(ISAd + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + stack.Push(isadOffset + incr, a, b, next, trlink); first = b; } else { crosscheck("aaac"); - ISAd += incr; + isadOffset += incr; first = a; last = b; limit = next; @@ -2296,18 +2289,18 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA if (1 < (a - first)) { crosscheck("aaba"); - crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.Push(ISAd, b, last, limit, trlink); - crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.Push(ISAd + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + stack.Push(isadOffset, b, last, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + stack.Push(isadOffset + incr, a, b, next, trlink); last = a; } else { crosscheck("aabb"); - crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.Push(ISAd, b, last, limit, trlink); - ISAd += incr; + crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + stack.Push(isadOffset, b, last, limit, trlink); + isadOffset += incr; first = a; last = b; limit = next; @@ -2316,11 +2309,11 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA else { crosscheck("aac"); - crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.Push(ISAd, b, last, limit, trlink); - crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.Push(ISAd, first, a, limit, trlink); - ISAd += incr; + crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + stack.Push(isadOffset, b, last, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + stack.Push(isadOffset, first, a, limit, trlink); + isadOffset += incr; first = a; last = b; limit = next; @@ -2335,23 +2328,23 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA if (1 < (last - b)) { crosscheck("abaa"); - crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.Push(ISAd + incr, a, b, next, trlink); - crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.Push(ISAd, first, a, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + stack.Push(isadOffset + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + stack.Push(isadOffset, first, a, limit, trlink); first = b; } else if (1 < (a - first)) { crosscheck("abab"); - crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.Push(ISAd + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + stack.Push(isadOffset + incr, a, b, next, trlink); last = a; } else { crosscheck("abac"); - ISAd += incr; + isadOffset += incr; first = a; last = b; limit = next; @@ -2363,18 +2356,18 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA if (1 < (last - b)) { crosscheck("abba"); - crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.Push(ISAd, first, a, limit, trlink); - crosscheck("push {} {} {} {} {}", ISAd + incr, a, b, next, trlink); - stack.Push(ISAd + incr, a, b, next, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + stack.Push(isadOffset, first, a, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + stack.Push(isadOffset + incr, a, b, next, trlink); first = b; } else { crosscheck("abbb"); - crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.Push(ISAd, first, a, limit, trlink); - ISAd += incr; + crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + stack.Push(isadOffset, first, a, limit, trlink); + isadOffset += incr; first = a; last = b; limit = next; @@ -2383,11 +2376,11 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA else { crosscheck("abc"); - crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.Push(ISAd, first, a, limit, trlink); - crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.Push(ISAd, b, last, limit, trlink); - ISAd += incr; + crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + stack.Push(isadOffset, first, a, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + stack.Push(isadOffset, b, last, limit, trlink); + isadOffset += incr; first = a; last = b; limit = next; @@ -2408,8 +2401,8 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA if (1 < (a - first)) { crosscheck("bba"); - crosscheck("push {} {} {} {} {}", ISAd, b, last, limit, trlink); - stack.Push(ISAd, b, last, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + stack.Push(isadOffset, b, last, limit, trlink); last = a; } else if (1 < (last - b)) @@ -2420,7 +2413,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA else { crosscheck("bbc"); - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } @@ -2432,8 +2425,8 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA if (1 < (last - b)) { crosscheck("bca"); - crosscheck("push {} {} {} {} {}", ISAd, first, a, limit, trlink); - stack.Push(ISAd, first, a, limit, trlink); + crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + stack.Push(isadOffset, first, a, limit, trlink); first = b; } else if (1 < (a - first)) @@ -2444,7 +2437,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA else { crosscheck("bcc"); - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } @@ -2460,7 +2453,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA { crosscheck("ca"); limit = tr_ilg(last - first); - ISAd += incr; + isadOffset += incr; } else { @@ -2470,7 +2463,7 @@ private static void tr_introsort(SAPtr ISA, ref SAPtr ISAd, Span SA, ref SA crosscheck("cba"); stack.Items[trlink].d = -1; } - if (!stack.Pop(ref ISAd, ref first, ref last, ref limit, ref trlink)) + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) { return; } From 2ab157870bc688bc8bbc58bd9bf35499a8deca7a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 30 Oct 2021 22:59:22 -0500 Subject: [PATCH 106/325] Implement tr_insertionsort --- .../RsDivSufSort.cs | 58 ++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 5a8f8e0..41cb6bf 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2488,9 +2488,63 @@ private static void tr_heapsort(int iSAd, Span sA, int first, int v) throw new NotImplementedException(); } - private static void tr_insertionsort(Span sA, int iSAd, int first, int last) + /// + /// Simple insertionsort for small size groups + /// + private static void tr_insertionsort(Span SA, SAPtr isadOffset, SAPtr first, SAPtr last) { - throw new NotImplementedException(); + SAPtr a; + SAPtr b; + Idx t; + Idx r; + + var ISAd = SA[isadOffset..]; + + a = first + 1; + // KAREN + while (a < last) + { + // JEZEBEL + t = SA[a]; + b = a - 1; + while (true) + { + // cond (JEZEBEL) + r = ISAd[t] - ISAd[SA[b]]; + if (!(0 > r)) + { + break; + } + + // LILITH + while (true) + { + SA[b + 1] = SA[b]; + + // cond (LILITH) + b -= 1; + if (!((first <= b) && (SA[b] < 0))) + { + break; + } + } + + // body (JEZEBEL) + if (b < first) + { + break; + } + } + + if (r == 0) + { + SA[b] = ~SA[b]; + } + SA[b + 1] = t; + + // iter + a += 1; + } } private static void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) From d0ea52dfab186fc7552012ed6cf1bb3a9f6dedb7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 31 Oct 2021 16:05:36 -0500 Subject: [PATCH 107/325] Fix not updating ISAd when isadOffset updates Take readonly ISAd as parameter for tr_insertionsort instead of passing isadOffset --- .../RsDivSufSort.cs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 41cb6bf..554b7d4 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2027,9 +2027,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA // GEMINI while (true) { - { - ISA[SA[a]] = a; - } + ISA[SA[a]] = a; // cond (GEMINI) a += 1; @@ -2067,9 +2065,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA v = a - 1; while (b < a) { - { - ISA[SA[b]] = v; - } + ISA[SA[b]] = v; b += 1; } } @@ -2083,6 +2079,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck("push {} {} {} {} {}", isadOffset, a, last, -3, trlink); stack.Push(isadOffset, a, last, -3, trlink); isadOffset += incr; + ISAd = ISAd[incr..]; last = a; limit = next; } @@ -2105,6 +2102,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA else { isadOffset += incr; + ISAd = ISAd[incr..]; last = a; limit = next; } @@ -2167,7 +2165,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) { crosscheck("insertionsort last-first={}", last - first); - tr_insertionsort(SA, isadOffset, first, last); + tr_insertionsort(SA, ISAd, first, last); limit = -3; continue; } @@ -2491,15 +2489,13 @@ private static void tr_heapsort(int iSAd, Span sA, int first, int v) /// /// Simple insertionsort for small size groups /// - private static void tr_insertionsort(Span SA, SAPtr isadOffset, SAPtr first, SAPtr last) + private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr first, SAPtr last) { SAPtr a; SAPtr b; Idx t; Idx r; - var ISAd = SA[isadOffset..]; - a = first + 1; // KAREN while (a < last) From 29c52b92adc1cda78b4e7e0b0a4dc34ec4a6351d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 31 Oct 2021 16:16:45 -0500 Subject: [PATCH 108/325] Remove sssort parameters being passed by reference --- .../RsDivSufSort.cs | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 554b7d4..a84f15c 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -356,17 +356,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) if (1 < (j - i)) { //SA_dump!(&SA.range(i..j), "sssort(A)"); - sssort( - T, - SA, - PAb, - ref i, - (SAPtr)j, - ref buf, - ref bufsize, - 2, - n, - SA[i] == (m - 1)); + sssort(T, SA, PAb, i, j, buf, bufsize, 2, n, SA[i] == (m - 1)); //SA_dump!(&SA.range(i..j), "sssort(B)"); } @@ -409,13 +399,8 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) j = i; while (true) { - //TODO: check this - //SA[i] = !SA[i]; SA[i] = ~SA[i]; - { - var idx = ISAb + SA[i]; - SA[idx] = j; - } + SA[ISAb + SA[i]] = j; i -= 1; if (!(SA[i] < 0)) @@ -423,11 +408,8 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) break; } } - { - var idx = ISAb + SA[i]; - SA[idx] = j; - } + SA[ISAb + SA[i]] = j; i -= 1; } @@ -559,7 +541,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) /// /// Substring sort /// - private static void sssort(IntAccessor T, Span SA, SAPtr PA, ref SAPtr first, SAPtr last, ref SAPtr buf, ref Idx bufsize, Idx depth, Idx n, bool lastsuffix) + private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) { // Note: in most of this file "PA" seems to mean "Partition Array" - we're // working on a slice of SA. This is also why SA (or a mutable reference to it) @@ -2027,6 +2009,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA // GEMINI while (true) { + //Debug.Assert(SA[isaOffset..] == ISA); ISA[SA[a]] = a; // cond (GEMINI) From 763970ea6b744ef19af58ba2b52cc2fa8d6155b2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 31 Oct 2021 16:51:47 -0500 Subject: [PATCH 109/325] Reenable SA_dump --- .../RsDivSufSort.cs | 58 +++++++++++-------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index a84f15c..4ce001c 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -355,9 +355,9 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) if (1 < (j - i)) { - //SA_dump!(&SA.range(i..j), "sssort(A)"); + SA_dump(SA[i..j], "sssort(A)"); sssort(T, SA, PAb, i, j, buf, bufsize, 2, n, SA[i] == (m - 1)); - //SA_dump!(&SA.range(i..j), "sssort(B)"); + SA_dump(SA[i..j], "sssort(B)"); } // iter (inner) @@ -618,7 +618,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S crosscheck("ss_mintrosort (pre-mariachi) a={} depth={}", a - PA, depth); ss_mintrosort(T, SA, PA, a, middle, depth); - //SA_dump!(&SA.range(first..last), "pre-mariachi"); + SA_dump(SA[first..last], "pre-mariachi"); // MARIACHI k = SS_BLOCKSIZE; @@ -626,7 +626,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S { if ((i & 1) > 0) { - //SA_dump!(&SA.range(first..last), "in-mariachi pre-swap"); + SA_dump(SA[first..last], "in-mariachi pre-swap"); crosscheck( "a={} middle={} bufsize={} depth={}", a - first, @@ -635,7 +635,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S depth ); ss_swapmerge(T, SA, PA, a - k, a, middle, buf, bufsize, depth); - //SA_dump!(&SA.range(first..last), "in-mariachi post-swap"); + SA_dump(SA[first..last], "in-mariachi post-swap"); a -= k; } @@ -643,17 +643,17 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S k <<= 1; i >>= 1; } - //SA_dump!(&SA.range(first..last), "post-mariachi"); + SA_dump(SA[first..last], "post-mariachi"); if (limit != 0) { crosscheck("ss_mintrosort limit!=0"); ss_mintrosort(T, SA, PA, middle, last, depth); - //SA_dump!(&SA.range(first..last), "post-mintrosort limit!=0"); + SA_dump(SA[first..last], "post-mintrosort limit!=0"); ss_inplacemerge(T, SA, PA, first, middle, last, depth); - //SA_dump!(&SA.range(first..last), "post-inplacemerge limit!=0"); + SA_dump(SA[first..last], "post-inplacemerge limit!=0"); } - //SA_dump!(&SA.range(first..last), "post-limit!=0"); + SA_dump(SA[first..last], "post-limit!=0"); if (lastsuffix) { @@ -736,10 +736,7 @@ private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr var original_first = first; var original_last = last; - //SA_dump!( - // &SA.range(original_first..original_last), - // "inplacemerge start" - //); + SA_dump(SA[original_first..original_last], "inplacemerge start"); // FERRIS while (true) @@ -778,7 +775,7 @@ private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr len = half; half >>= 1; } - //SA_dump!(&SA.range(original_first..original_last), "post-lois"); + SA_dump(SA[original_first..original_last], "post-lois"); if (a < middle) { @@ -787,7 +784,7 @@ private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr SA[a] = ~SA[a]; } ss_rotate(SA, a, middle, last); - //SA_dump!(&SA.range(original_first..original_last), "post-rotate"); + SA_dump(SA[original_first..original_last], "post-rotate"); last -= middle - a; middle = a; if (first == middle) @@ -805,14 +802,14 @@ private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr { last -= 1; } - //SA_dump!(&SA.range(original_first..original_last), "post-timmy"); + SA_dump(SA[original_first..original_last], "post-timmy"); } if (middle == last) { break; } - //SA_dump!(&SA.range(original_first..original_last), "ferris-wrap"); + SA_dump(SA[original_first..original_last], "ferris-wrap"); } } @@ -830,7 +827,7 @@ private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr las l = (middle - first)/*.0*/; r = (last - middle)/*.0*/; - //SA_dump!(&SA.range(original_first..original_last), "pre-brendan"); + SA_dump(SA[original_first..original_last], "pre-brendan"); // BRENDAN while ((0 < l) && (0 < r)) @@ -838,7 +835,7 @@ private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr las if (l == r) { ss_blockswap(SA, first, middle, l); - //SA_dump!(&SA.range(original_first..original_last), "post-blockswap"); + SA_dump(SA[original_first..original_last], "post-blockswap"); break; } @@ -869,7 +866,7 @@ private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr las t = SA[a]; } } - //SA_dump!(&SA.range(original_first..original_last), "post-alice"); + SA_dump(SA[original_first..original_last], "post-alice"); } else { @@ -898,7 +895,7 @@ private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr las t = SA[a]; } } - //SA_dump!(&SA.range(original_first..original_last), "post-robert"); + SA_dump(SA[original_first..original_last], "post-robert"); } } } @@ -1047,9 +1044,9 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf limit -= 1; if (old_limit == 0) { - //SA_dump!(&SA.range(first..last), "before heapsort"); + SA_dump(SA[first..last], "before heapsort"); ss_heapsort(T, tdOffset, SA, partitionOffset, first, (last - first)); - //SA_dump!(&SA.range(first..last), "after heapsort"); + SA_dump(SA[first..last], "after heapsort"); } if (limit < 0) @@ -2454,9 +2451,20 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA } // end PASCAL } - private static void SA_dump(Span span, string v) + [Conditional("SA_DUMP")] + private static void SA_dump(ReadOnlySpan span, string v) { - throw new NotImplementedException(); + Debug.Write($"{v} - {span.ToString()}: "); + Debug.Write("["); + for(int i = 0; i < span.Length; i++) + { + Debug.Write($"{i}"); + if(i != span.Length - 1) + { + Debug.Write(","); + } + } + Debug.WriteLine("]"); } private static int tr_pivot(Span sA, int iSAd, int first, int last) From 176c3b569fc566f568857bc7e624772b1f40148f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 31 Oct 2021 16:56:47 -0500 Subject: [PATCH 110/325] Implement crosscheck Make SA_Dump and crosscheck debug conditional --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 4ce001c..123617a 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2451,7 +2451,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA } // end PASCAL } - [Conditional("SA_DUMP")] + [Conditional("DEBUG")] private static void SA_dump(ReadOnlySpan span, string v) { Debug.Write($"{v} - {span.ToString()}: "); @@ -2547,7 +2547,11 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int [Conditional("DEBUG")] private static void crosscheck(string v, params object[] args) { - //Debug.WriteLine(format: v, args: args); + Debug.WriteLine(v); + Debug.Indent(); + foreach (var arg in args) + Debug.WriteLine(arg); + Debug.Unindent(); } private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) From 679d603cf486cbda85df5e6fb6bc791e79a32a1f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 31 Oct 2021 16:57:24 -0500 Subject: [PATCH 111/325] Reenable SA_dumps around tr_heapsort --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 123617a..630f4e5 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2161,9 +2161,9 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA last, last - first ); - //SA_dump(SA[first..last], "before tr_heapsort"); + SA_dump(SA[first..last], "before tr_heapsort"); tr_heapsort(isadOffset, SA, first, (last - first)); - //SA_dump(SA[first..last], "after tr_heapsort"); + SA_dump(SA[first..last], "after tr_heapsort"); // YOHAN a = last - 1; From 56a905a1d0fe456156dea1715086e56842520c51 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 11:43:31 -0500 Subject: [PATCH 112/325] Fix some crosscheck output --- .../RsDivSufSort.cs | 58 +++++-------------- 1 file changed, 15 insertions(+), 43 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 630f4e5..168e238 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -584,7 +584,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S i = 0; while (SS_BLOCKSIZE < (middle - a)) { - crosscheck("ss_mintrosort (espresso) a={} depth={}", a - PA, depth); + crosscheck("ss_mintrosort (espresso) a={a} depth={depth}", a - PA, depth); ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); curbufsize = (last - (a + SS_BLOCKSIZE)); @@ -601,7 +601,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S j = i; while ((j & 1) > 0) { - crosscheck("ss_swapmerge {}", k); + crosscheck("ss_swapmerge {k}", k); ss_swapmerge(T, SA, PA, b - k, b, b + k, curbuf, curbufsize, depth); // iter @@ -615,7 +615,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S i += 1; } - crosscheck("ss_mintrosort (pre-mariachi) a={} depth={}", a - PA, depth); + crosscheck($"ss_mintrosort (pre-mariachi) a={a - PA} depth={depth}"); ss_mintrosort(T, SA, PA, a, middle, depth); SA_dump(SA[first..last], "pre-mariachi"); @@ -627,13 +627,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S if ((i & 1) > 0) { SA_dump(SA[first..last], "in-mariachi pre-swap"); - crosscheck( - "a={} middle={} bufsize={} depth={}", - a - first, - middle - first, - bufsize, - depth - ); + crosscheck($"a={a - first} middle={middle - first} bufsize={bufsize} depth={depth}"); ss_swapmerge(T, SA, PA, a - k, a, middle, buf, bufsize, depth); SA_dump(SA[first..last], "in-mariachi post-swap"); a -= k; @@ -1834,7 +1828,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA // PASCAL while (true) { - crosscheck("pascal limit={} first={} last={}", limit, first, last); + crosscheck($"pascal limit={limit} first={first} last={last}"); if (limit < 0) { if (limit == -1) @@ -1893,9 +1887,9 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (b - a)) { crosscheck("1<(b-a)"); - crosscheck("push NULL {} {} {} {}", a, b, 0, 0); + crosscheck($"push NULL {a} {b} {0} {0}"); stack.Push(0, a, b, 0, 0); - crosscheck("push {} {} {} {} {}", isadOffset - incr, first, last, -2, trlink); + crosscheck($"push {isadOffset - incr} {first} {last} {-2} {trlink}"); stack.Push(isadOffset - incr, first, last, -2, trlink); trlink = stack.Size - 2; } @@ -1906,14 +1900,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (a - first)) { crosscheck("board"); - crosscheck( - "push {} {} {} {} {}", - isadOffset, - b, - last, - tr_ilg(last - b), - trlink - ); + crosscheck($"push {isadOffset} {b} {last} {tr_ilg(last - b)} {trlink}"); stack.Push(isadOffset, b, last, tr_ilg(last - b), trlink); last = a; limit = tr_ilg(a - first); @@ -2144,7 +2131,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) { - crosscheck("insertionsort last-first={}", last - first); + crosscheck($"insertionsort last-first={last - first}"); tr_insertionsort(SA, ISAd, first, last); limit = -3; continue; @@ -2154,13 +2141,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA limit -= 1; if (old_limit == 0) { - crosscheck( - "heapsort ISAd={} first={} last={} last-first={}", - isadOffset, - first, - last, - last - first - ); + crosscheck($"heapsort ISAd={isadOffset} first={first} last={last} last-first={last - first}"); SA_dump(SA[first..last], "before tr_heapsort"); tr_heapsort(isadOffset, SA, first, (last - first)); SA_dump(SA[first..last], "after tr_heapsort"); @@ -2454,17 +2435,12 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA [Conditional("DEBUG")] private static void SA_dump(ReadOnlySpan span, string v) { - Debug.Write($"{v} - {span.ToString()}: "); - Debug.Write("["); - for(int i = 0; i < span.Length; i++) + Debug.WriteLine($":: {v}"); + foreach(var i in span) { - Debug.Write($"{i}"); - if(i != span.Length - 1) - { - Debug.Write(","); - } + Debug.Write($"{i} "); } - Debug.WriteLine("]"); + Debug.WriteLine(""); } private static int tr_pivot(Span sA, int iSAd, int first, int last) @@ -2547,11 +2523,7 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int [Conditional("DEBUG")] private static void crosscheck(string v, params object[] args) { - Debug.WriteLine(v); - Debug.Indent(); - foreach (var arg in args) - Debug.WriteLine(arg); - Debug.Unindent(); + Debug.WriteLine(v, args); } private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) From 419599c3a924ba36d1a64a4cabb3b7c508d2820e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 12:31:53 -0500 Subject: [PATCH 113/325] Add additional SA_dump points --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 168e238..529ae23 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -414,7 +414,9 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) } // Construct the inverse suffix array of type B* suffixes using trsort. + SA_dump(SA, "trsort(A)"); trsort(ISAb, SA, m, 1); + SA_dump(SA, "trsort(B)"); // Set the sorted order of type B* suffixes { @@ -486,6 +488,8 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) } } // End: Set the sorted order of type B* suffixes + SA_dump(SA, "JZ-post-sortOrderBstar"); + // Calculate the index of start/end point of each bucket { Bb[(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1)] = n; // end point @@ -531,6 +535,8 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) c0 -= 1; } } // End: Calculate the index of start/end point of each bucket + + SA_dump(SA, "JZ-final-bStar"); } return new SortTypeBstarResult { A = A, B = B, m = m }; From 7aa4711742cc52f0435051fa0dfb55a7cf16d7ee Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 14:16:07 -0500 Subject: [PATCH 114/325] Add wrapping in SA_dump and a few more checkpoints --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 529ae23..ce108ac 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -489,6 +489,8 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) } // End: Set the sorted order of type B* suffixes SA_dump(SA, "JZ-post-sortOrderBstar"); + SA_dump(A, "JZ-A"); + SA_dump(B, "JZ-B"); // Calculate the index of start/end point of each bucket { @@ -2442,9 +2444,10 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA private static void SA_dump(ReadOnlySpan span, string v) { Debug.WriteLine($":: {v}"); - foreach(var i in span) + for (int i = 0; i < span.Length; i++) { - Debug.Write($"{i} "); + Debug.Write($"{span[i]} "); + Debug.WriteLineIf((i + 1) % 25 == 0, ""); } Debug.WriteLine(""); } From 96ab60d58841a839c21256c96d70a155dccab1bd Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 14:40:46 -0500 Subject: [PATCH 115/325] Fix off-by-one while calculating B* suffix sorts --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index ce108ac..2d30d53 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -321,9 +321,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) SAPtr ISAb = m; //for i in (0.. = (m - 2)).rev() { - //TODO: get rid of this Enumerable - //foreach(var ini in Enumerable.Range(0, m - 2).Reverse()) - for (i = m - 2; i > 0; i--) + for (i = m - 2; i >= 0; i--) { t = SA[PAb + i]; c0 = T[t]; @@ -592,7 +590,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S i = 0; while (SS_BLOCKSIZE < (middle - a)) { - crosscheck("ss_mintrosort (espresso) a={a} depth={depth}", a - PA, depth); + crosscheck($"ss_mintrosort (espresso) a={a - PA} depth={depth}"); ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); curbufsize = (last - (a + SS_BLOCKSIZE)); @@ -609,7 +607,7 @@ private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, S j = i; while ((j & 1) > 0) { - crosscheck("ss_swapmerge {k}", k); + crosscheck($"ss_swapmerge {k}"); ss_swapmerge(T, SA, PA, b - k, b, b + k, curbuf, curbufsize, depth); // iter From 711faa4e37a3ab894356b87044be89ba2ff8c6cb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 14:45:59 -0500 Subject: [PATCH 116/325] Add SAISChecker --- .../SAISChecker.cs | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs new file mode 100644 index 0000000..718df25 --- /dev/null +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs @@ -0,0 +1,107 @@ +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; + +namespace DeltaQ.Tests +{ + internal class SAISChecker + { + internal static int Check(ReadOnlySpan T, ReadOnlySpan SA, int n, bool verbose = false) + { + using var owned_C = SpanOwner.Allocate(256); + Span C = owned_C.Span; + int i, p, q, t; + int c; + + if (verbose) { Console.Write(@"sufcheck: "); } + if (n == 0) + { + if (verbose) { Console.WriteLine("Done."); } + return 0; + } + + /* Check arguments. */ + if ((T == null) || (SA == null) || (n < 0)) + { + if (verbose) { Console.WriteLine("Invalid arguments."); } + return -1; + } + + /* check range: [0..n-1] */ + for (i = 0; i < n; ++i) + { + if ((SA[i] < 0) || (n <= SA[i])) + { + if (verbose) + { + Console.WriteLine("Out of the range [0," + (n - 1) + "]."); + Console.WriteLine(" SA[" + i + "]=" + SA[i]); + } + return -2; + } + } + + /* check first characters. */ + for (i = 1; i < n; ++i) + { + if (T[SA[i - 1]] > T[SA[i]]) + { + if (verbose) + { + Console.WriteLine("Suffixes in wrong order."); + Console.Write(" T[SA[" + (i - 1) + "]=" + SA[i - 1] + "]=" + T[SA[i - 1]]); + Console.WriteLine(" > T[SA[" + i + "]=" + SA[i] + "]=" + T[SA[i]]); + } + return -3; + } + } + + /* check suffixes. */ + + //for (i = 0; i < 256; ++i) { C[i] = 0; } + C.Clear(); + + for (i = 0; i < n; ++i) { ++C[T[i]]; } + for (i = 0, p = 0; i < 256; ++i) + { + t = C[i]; + C[i] = p; + p += t; + } + + q = C[T[n - 1]]; + C[T[n - 1]] += 1; + for (i = 0; i < n; ++i) + { + p = SA[i]; + if (0 < p) + { + c = T[--p]; + t = C[c]; + } + else + { + c = T[p = n - 1]; + t = q; + } + if ((t < 0) || (p != SA[t])) + { + if (verbose) + { + Console.WriteLine("Suffixes in wrong position."); + Console.WriteLine(" SA[" + t + "]=" + ((0 <= t) ? SA[t] : -1) + " or"); + Console.WriteLine(" SA[" + i + "]=" + SA[i]); + } + return -4; + } + if (t != q) + { + ++C[c]; + if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } + } + } + + if (verbose) { Console.WriteLine("Done."); } + return 0; + } + } +} From 407798cc320a2e126b72bd9bed64b50136aa0bac Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 14:46:33 -0500 Subject: [PATCH 117/325] Remove test stub --- .../UnitTest1.cs | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs deleted file mode 100644 index bc8a1a0..0000000 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/UnitTest1.cs +++ /dev/null @@ -1,13 +0,0 @@ -using Xunit; - -namespace DeltaQ.SuffixSorting.LivDivSufSort.Tests -{ - public class UnitTest1 - { - [Fact] - public void Test1() - { - - } - } -} \ No newline at end of file From 2e26a10d04a60b19401a9449f7b2914476b3684b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 14:57:54 -0500 Subject: [PATCH 118/325] Remove comments and move debugging methods to the bottom --- .../RsDivSufSort.cs | 44 ++++++------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 2d30d53..53ae16c 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -71,15 +71,11 @@ private static void construct_SA(ReadOnlySpan T, Span SA, Span A Trace.Assert((s + 1) < n); Trace.Assert(T[s] <= T[s + 1]); - //TODO: check this - //SA[j] = !s; SA[j] = ~s; s -= 1; c0 = T[s]; if ((0 < s) && (T[s - 1] > c0)) { - //TODO: check this - //s = !s; s = ~s; } if (c0 != c2) @@ -98,8 +94,6 @@ private static void construct_SA(ReadOnlySpan T, Span SA, Span A else { Trace.Assert(((s == 0) && (T[s] == c1)) || (s < 0)); - //TODO: check this - //SA[j] = !s; SA[j] = ~s; } @@ -135,8 +129,6 @@ private static void construct_SA(ReadOnlySpan T, Span SA, Span A c0 = T[s]; if ((s == 0) || (T[s - 1] < c0)) { - //TODO: check this - //s = !s; s = ~s; } if (c0 != c2) @@ -152,8 +144,6 @@ private static void construct_SA(ReadOnlySpan T, Span SA, Span A else { Trace.Assert(s < 0); - //TODO: check this - //SA[i] = !s; SA[i] = ~s; } @@ -205,7 +195,6 @@ public ref struct IntAccessor public int Length => span.Length; } - //fn sort_typeBstar(T: &Text, SA: &mut SuffixArray) -> SortTypeBstarResult { public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) { var n = T.Length; @@ -289,8 +278,6 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) } m = n - m; - //JZ: so far, so good - // Note: A type B* suffix is lexicographically smaller than a type B suffix // that beings with the same first two characters. @@ -2438,18 +2425,6 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA } // end PASCAL } - [Conditional("DEBUG")] - private static void SA_dump(ReadOnlySpan span, string v) - { - Debug.WriteLine($":: {v}"); - for (int i = 0; i < span.Length; i++) - { - Debug.Write($"{span[i]} "); - Debug.WriteLineIf((i + 1) % 25 == 0, ""); - } - Debug.WriteLine(""); - } - private static int tr_pivot(Span sA, int iSAd, int first, int last) { throw new NotImplementedException(); @@ -2527,15 +2502,24 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int throw new NotImplementedException(); } - [Conditional("DEBUG")] - private static void crosscheck(string v, params object[] args) + private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) { - Debug.WriteLine(v, args); + throw new NotImplementedException(); } - private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) + [Conditional("DEBUG")] + private static void SA_dump(ReadOnlySpan span, string v) { - throw new NotImplementedException(); + Debug.WriteLine($":: {v}"); + for (int i = 0; i < span.Length; i++) + { + Debug.Write($"{span[i]} "); + Debug.WriteLineIf((i + 1) % 25 == 0, ""); + } + Debug.WriteLine(""); } + + [Conditional("DEBUG")] + private static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); } } From a9630a32b6994faa55d2d4b99ab12cb1872f25ab Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:08:25 -0500 Subject: [PATCH 119/325] Update more crosscheck formats --- .../RsDivSufSort.cs | 76 ++++++------------- 1 file changed, 24 insertions(+), 52 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 53ae16c..fc78d42 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1920,14 +1920,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (last - b)) { crosscheck("land"); - crosscheck( - "push {} {} {} {} {}", - isadOffset, - first, - a, - tr_ilg(a - first), - trlink - ); + crosscheck($"push {isadOffset} {first} {a} {tr_ilg(a - first)} {trlink}"); stack.Push(isadOffset, first, a, tr_ilg(a - first), trlink); first = b; limit = tr_ilg(last - b); @@ -2036,7 +2029,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck("budget pass"); if ((a - first) <= (last - a)) { - crosscheck("push {} {} {} {} {}", isadOffset, a, last, -3, trlink); + crosscheck($"push {isadOffset} {a} {last} {-3} {trlink}"); stack.Push(isadOffset, a, last, -3, trlink); isadOffset += incr; ISAd = ISAd[incr..]; @@ -2047,14 +2040,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { if (1 < (last - a)) { - crosscheck( - "push {} {} {} {} {}", - isadOffset + incr, - first, - a, - next, - trlink - ); + crosscheck($"push {isadOffset + incr} {first} {a} {next} {trlink}"); stack.Push(isadOffset + incr, first, a, next, trlink); first = a; limit = -3; @@ -2090,14 +2076,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA return; } crosscheck("1<(last-a) not post"); - crosscheck( - "were popped: ISAd={} first={} last={} limit={} trlink={}", - isadOffset, - first, - last, - limit, - trlink - ); + crosscheck($"were popped: ISAd={isadOffset} first={first} last={last} limit={limit} trlink={trlink}"); } } } @@ -2109,14 +2088,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA return; } crosscheck("times pop-post"); - crosscheck( - "were popped: ISAd={} first={} last={} limit={} trlink={}", - isadOffset, - first, - last, - limit, - trlink - ); + crosscheck($"were popped: ISAd={isadOffset} first={first} last={last} limit={limit} trlink={trlink}"); } // end if first < last } // end if limit == -1, -2, or something else continue; @@ -2164,7 +2136,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA // choose pivot a = tr_pivot(SA, isadOffset, first, last); - crosscheck("picked pivot {}", a); + crosscheck($"picked pivot {a}"); SA.Swap(first, a); v = ISAd[SA[first]]; @@ -2213,16 +2185,16 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (a - first)) { crosscheck("aaaa"); - crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); stack.Push(isadOffset + incr, a, b, next, trlink); - crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); last = a; } else if (1 < (last - b)) { crosscheck("aaab"); - crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); stack.Push(isadOffset + incr, a, b, next, trlink); first = b; } @@ -2241,16 +2213,16 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (a - first)) { crosscheck("aaba"); - crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); - crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); stack.Push(isadOffset + incr, a, b, next, trlink); last = a; } else { crosscheck("aabb"); - crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); isadOffset += incr; first = a; @@ -2261,9 +2233,9 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA else { crosscheck("aac"); - crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); - crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); isadOffset += incr; first = a; @@ -2280,16 +2252,16 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (last - b)) { crosscheck("abaa"); - crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); stack.Push(isadOffset + incr, a, b, next, trlink); - crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); first = b; } else if (1 < (a - first)) { crosscheck("abab"); - crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); stack.Push(isadOffset + incr, a, b, next, trlink); last = a; } @@ -2308,16 +2280,16 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (last - b)) { crosscheck("abba"); - crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); - crosscheck("push {} {} {} {} {}", isadOffset + incr, a, b, next, trlink); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); stack.Push(isadOffset + incr, a, b, next, trlink); first = b; } else { crosscheck("abbb"); - crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); isadOffset += incr; first = a; @@ -2328,9 +2300,9 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA else { crosscheck("abc"); - crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); - crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); isadOffset += incr; first = a; @@ -2353,7 +2325,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (a - first)) { crosscheck("bba"); - crosscheck("push {} {} {} {} {}", isadOffset, b, last, limit, trlink); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); last = a; } @@ -2377,7 +2349,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (1 < (last - b)) { crosscheck("bca"); - crosscheck("push {} {} {} {} {}", isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); first = b; } From 545711c3cb123d94c9362cfc8bfb52d8a061832d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:10:38 -0500 Subject: [PATCH 120/325] Unwrap tr_partition call --- .../RsDivSufSort.cs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index fc78d42..132e164 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1827,16 +1827,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if (limit == -1) { // tandem repeat partition - tr_partition( - SA, - isadOffset - incr, - first, - first, - last, - ref a, - ref b, - (last - 1) - ); + tr_partition(SA, isadOffset - incr, first, first, last, ref a, ref b, last - 1); // update ranks if (a < last) From 5731698a363f7cc221c8fdb021ac619a9fb33980 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:11:23 -0500 Subject: [PATCH 121/325] Use flat namespace --- .../RsDivSufSort.cs | 3693 ++++++++--------- 1 file changed, 1846 insertions(+), 1847 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 132e164..0419061 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -4,413 +4,441 @@ using Idx = System.Int32; using SAPtr = System.Int32; -namespace DeltaQ.SuffixSorting.LibDivSufSort -{ - public static class DivSufSort - { - private const int ALPHABET_SIZE = byte.MaxValue + 1; - private const int BUCKET_A_SIZE = ALPHABET_SIZE; - private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; - - public static void divsufsort(ReadOnlySpan T, Span SA) - { - Trace.Assert(T.Length == SA.Length); +namespace DeltaQ.SuffixSorting.LibDivSufSort; - var n = T.Length; +public static class DivSufSort +{ + private const int ALPHABET_SIZE = byte.MaxValue + 1; + private const int BUCKET_A_SIZE = ALPHABET_SIZE; + private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; - switch (n) - { - case 0: return; - case 1: - SA[0] = 0; - return; - //case 2: - // if(T[0] < T[1]) - // { - // SA.copy - // } - // break; - } + public static void divsufsort(ReadOnlySpan T, Span SA) + { + Trace.Assert(T.Length == SA.Length); - var result = sort_typeBstar(new IntAccessor(T), SA); - construct_SA(T, SA, result.A, result.B, result.m); - } + var n = T.Length; - private static void construct_SA(ReadOnlySpan T, Span SA, Span A, Span B, int m) + switch (n) { - Idx n = T.Length; - - BBucket Bb = new(B); - BStarBucket Bstar = new(B); - - SAPtr i; - SAPtr j; - Idx k; - Idx s; - Idx c0; - Idx c2; - if (0 < m) - { - // Construct the sorted order of type B suffixes by using the - // sorted order of type B* suffixes - Idx c1 = ALPHABET_SIZE - 2; - while (0 <= c1) - { - // Scan the suffix array from right to left - i = Bstar[(c1, c1 + 1)]; - j = A[c1 + 1] - 1; - k = 0; - c2 = -1; - - while (i <= j) - { - s = SA[j]; - if (0 < s) - { - Trace.Assert(T[s] == c1); - Trace.Assert((s + 1) < n); - Trace.Assert(T[s] <= T[s + 1]); - - SA[j] = ~s; - s -= 1; - c0 = T[s]; - if ((0 < s) && (T[s - 1] > c0)) - { - s = ~s; - } - if (c0 != c2) - { - if (0 <= c2) - { - Bb[(c2, c1)] = k; - } - c2 = c0; - k = Bb[(c2, c1)]; - } - Trace.Assert(k < j); - SA[k] = s; - k -= 1; - } - else - { - Trace.Assert(((s == 0) && (T[s] == c1)) || (s < 0)); - SA[j] = ~s; - } - - // iter - j -= 1; - } + case 0: return; + case 1: + SA[0] = 0; + return; + //case 2: + // if(T[0] < T[1]) + // { + // SA.copy + // } + // break; + } - // iter - c1 -= 1; - } - } + var result = sort_typeBstar(new IntAccessor(T), SA); + construct_SA(T, SA, result.A, result.B, result.m); + } - // Construct the suffix array by using the sorted order of type B suffixes - c2 = T[n - 1]; - k = A[c2]; - //TODO: check this - //SA[k] = T[n - 2] < c2 ? !(n - 1) : n - 1; - SA[k] = T[n - 2] < c2 ? ~(n - 1) : n - 1; - k += 1; - // Scan the suffix array from left to right + private static void construct_SA(ReadOnlySpan T, Span SA, Span A, Span B, int m) + { + Idx n = T.Length; + + BBucket Bb = new(B); + BStarBucket Bstar = new(B); + + SAPtr i; + SAPtr j; + Idx k; + Idx s; + Idx c0; + Idx c2; + if (0 < m) + { + // Construct the sorted order of type B suffixes by using the + // sorted order of type B* suffixes + Idx c1 = ALPHABET_SIZE - 2; + while (0 <= c1) { - // init - i = 0; - j = n; + // Scan the suffix array from right to left + i = Bstar[(c1, c1 + 1)]; + j = A[c1 + 1] - 1; + k = 0; + c2 = -1; - while (i < j) + while (i <= j) { - s = SA[i]; + s = SA[j]; if (0 < s) { - Trace.Assert(T[s - 1] >= T[s]); + Trace.Assert(T[s] == c1); + Trace.Assert((s + 1) < n); + Trace.Assert(T[s] <= T[s + 1]); + + SA[j] = ~s; s -= 1; c0 = T[s]; - if ((s == 0) || (T[s - 1] < c0)) + if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } if (c0 != c2) { - A[c2] = k; + if (0 <= c2) + { + Bb[(c2, c1)] = k; + } c2 = c0; - k = A[c2]; + k = Bb[(c2, c1)]; } - Trace.Assert(i < k); + Trace.Assert(k < j); SA[k] = s; - k += 1; + k -= 1; } else { - Trace.Assert(s < 0); - SA[i] = ~s; + Trace.Assert(((s == 0) && (T[s] == c1)) || (s < 0)); + SA[j] = ~s; } // iter - i += 1; + j -= 1; } + + // iter + c1 -= 1; } } - public ref struct SortTypeBstarResult + // Construct the suffix array by using the sorted order of type B suffixes + c2 = T[n - 1]; + k = A[c2]; + //TODO: check this + //SA[k] = T[n - 2] < c2 ? !(n - 1) : n - 1; + SA[k] = T[n - 2] < c2 ? ~(n - 1) : n - 1; + k += 1; + // Scan the suffix array from left to right { - public Span A; - public Span B; - public int m; - } + // init + i = 0; + j = n; - public ref struct BStarBucket - { - public readonly Span B; - public BStarBucket(Span B) => this.B = B; + while (i < j) + { + s = SA[i]; + if (0 < s) + { + Trace.Assert(T[s - 1] >= T[s]); + s -= 1; + c0 = T[s]; + if ((s == 0) || (T[s - 1] < c0)) + { + s = ~s; + } + if (c0 != c2) + { + A[c2] = k; + c2 = c0; + k = A[c2]; + } + Trace.Assert(i < k); + SA[k] = s; + k += 1; + } + else + { + Trace.Assert(s < 0); + SA[i] = ~s; + } - public ref int this[(int c0, int c1) index] => ref B[(index.c0 << 8) | index.c1]; + // iter + i += 1; + } } + } - public ref struct BBucket - { - public readonly Span B; - public BBucket(Span B) => this.B = B; + public ref struct SortTypeBstarResult + { + public Span A; + public Span B; + public int m; + } - public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; - } + public ref struct BStarBucket + { + public readonly Span B; + public BStarBucket(Span B) => this.B = B; - //[DebuggerDisplay("")] - //public ref struct SAPtr - //{ - // public readonly Index Index; - // public SAPtr(Index idx) - // { - // this.Index = idx; - // } - //} + public ref int this[(int c0, int c1) index] => ref B[(index.c0 << 8) | index.c1]; + } - public ref struct IntAccessor - { - public readonly ReadOnlySpan span; - public IntAccessor(ReadOnlySpan span) => this.span = span; + public ref struct BBucket + { + public readonly Span B; + public BBucket(Span B) => this.B = B; - public int this[Idx index] => span[index]; - public int Length => span.Length; - } + public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; + } - public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) - { - var n = T.Length; + //[DebuggerDisplay("")] + //public ref struct SAPtr + //{ + // public readonly Index Index; + // public SAPtr(Index idx) + // { + // this.Index = idx; + // } + //} + + public ref struct IntAccessor + { + public readonly ReadOnlySpan span; + public IntAccessor(ReadOnlySpan span) => this.span = span; - //These MUST be zeroed first - using var owner_A = SpanOwner.Allocate(BUCKET_A_SIZE, AllocationMode.Clear); - using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE, AllocationMode.Clear); + public int this[Idx index] => span[index]; + public int Length => span.Length; + } + + public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) + { + var n = T.Length; - Span A = owner_A.Span; - Span B = owner_B.Span; + //These MUST be zeroed first + using var owner_A = SpanOwner.Allocate(BUCKET_A_SIZE, AllocationMode.Clear); + using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE, AllocationMode.Clear); - BBucket Bb = new(B); - BStarBucket Bstar = new(B); + Span A = owner_A.Span; + Span B = owner_B.Span; - int c0, c1, i, j, k, t, m; + BBucket Bb = new(B); + BStarBucket Bstar = new(B); - // Count the number of occurences of the first one or two characters of each - // type A, B and B* suffix. Moreover, store the beginning position of all - // type B* suffixes into the array SA. - i = n - 1; - m = n; - c0 = T[n - 1]; + int c0, c1, i, j, k, t, m; - while (0 <= i) + // Count the number of occurences of the first one or two characters of each + // type A, B and B* suffix. Moreover, store the beginning position of all + // type B* suffixes into the array SA. + i = n - 1; + m = n; + c0 = T[n - 1]; + + while (0 <= i) + { + // type A suffix (originally do..while) + while (true) { - // type A suffix (originally do..while) - while (true) + c1 = c0; + A[c1] += 1; + + // original loop condition + i -= 1; + if (0 > i) { - c1 = c0; - A[c1] += 1; + break; + } - // original loop condition - i -= 1; + c0 = T[i]; + if (c0 < c1) + { + break; + } + } + + if (0 <= i) + { + // type B* suffix + Bstar[(c0, c1)] += 1; + + m -= 1; + SA[m] = i; + + // type B suffix + + // init + i -= 1; + c1 = c0; + + while (true) + { + // cond if (0 > i) { break; } - c0 = T[i]; - if (c0 < c1) + if (c0 > c1) { break; } - } - - if (0 <= i) - { - // type B* suffix - Bstar[(c0, c1)] += 1; - m -= 1; - SA[m] = i; - - // type B suffix + // body + Bb[(c0, c1)] += 1; - // init + // iter i -= 1; c1 = c0; + } + } + } + m = n - m; - while (true) - { - // cond - if (0 > i) - { - break; - } - c0 = T[i]; - if (c0 > c1) - { - break; - } + // Note: A type B* suffix is lexicographically smaller than a type B suffix + // that beings with the same first two characters. - // body - Bb[(c0, c1)] += 1; + // Calculate the index of start/end point of each bucket. + { + i = 0; + j = 0; + for (c0 = 0; c0 < ALPHABET_SIZE; c0++) + { + // body + t = i + A[c0]; + A[c0] = i + j; // start point + i = t + Bb[(c0, c0)]; - // iter - i -= 1; - c1 = c0; - } + for (c1 = c0 + 1; c1 < ALPHABET_SIZE; c1++) + { + j += Bstar[(c0, c1)]; + Bstar[(c0, c1)] = j; // end point + i += Bb[(c0, c1)]; } } - m = n - m; + } - // Note: A type B* suffix is lexicographically smaller than a type B suffix - // that beings with the same first two characters. + if (0 < m) + { + // Sort the type B* suffixes by their first two characters + SAPtr PAb = n - m; + SAPtr ISAb = m; - // Calculate the index of start/end point of each bucket. + //for i in (0.. = (m - 2)).rev() { + for (i = m - 2; i >= 0; i--) + { + t = SA[PAb + i]; + c0 = T[t]; + c1 = T[t + 1]; + Bstar[(c0, c1)] -= 1; + SA[Bstar[(c0, c1)]] = i; + } + t = SA[PAb + m - 1]; + c0 = T[t]; + c1 = T[t + 1]; + Bstar[(c0, c1)] -= 1; + SA[Bstar[(c0, c1)]] = m - 1; + + // Sort the type B* substrings using sssort. + SAPtr buf = m; + var bufsize = n - (2 * m); + + // init (outer) + c0 = ALPHABET_SIZE - 2; + j = m; + while (0 < j) { - i = 0; - j = 0; - for (c0 = 0; c0 < ALPHABET_SIZE; c0++) + // init (inner) + c1 = ALPHABET_SIZE - 1; + while (c0 < c1) { - // body - t = i + A[c0]; - A[c0] = i + j; // start point - i = t + Bb[(c0, c0)]; + // body (inner) + i = Bstar[(c0, c1)]; - for (c1 = c0 + 1; c1 < ALPHABET_SIZE; c1++) + if (1 < (j - i)) { - j += Bstar[(c0, c1)]; - Bstar[(c0, c1)] = j; // end point - i += Bb[(c0, c1)]; + SA_dump(SA[i..j], "sssort(A)"); + sssort(T, SA, PAb, i, j, buf, bufsize, 2, n, SA[i] == (m - 1)); + SA_dump(SA[i..j], "sssort(B)"); } - } - } - - if (0 < m) - { - // Sort the type B* suffixes by their first two characters - SAPtr PAb = n - m; - SAPtr ISAb = m; - //for i in (0.. = (m - 2)).rev() { - for (i = m - 2; i >= 0; i--) - { - t = SA[PAb + i]; - c0 = T[t]; - c1 = T[t + 1]; - Bstar[(c0, c1)] -= 1; - SA[Bstar[(c0, c1)]] = i; + // iter (inner) + j = i; + c1 -= 1; } - t = SA[PAb + m - 1]; - c0 = T[t]; - c1 = T[t + 1]; - Bstar[(c0, c1)] -= 1; - SA[Bstar[(c0, c1)]] = m - 1; - // Sort the type B* substrings using sssort. - SAPtr buf = m; - var bufsize = n - (2 * m); + // iter (outer) + c0 -= 1; + } - // init (outer) - c0 = ALPHABET_SIZE - 2; - j = m; - while (0 < j) + // Compute ranks of type B* substrings + i = m - 1; + while (0 <= i) + { + if (0 <= SA[i]) { - // init (inner) - c1 = ALPHABET_SIZE - 1; - while (c0 < c1) + j = i; + while (true) { - // body (inner) - i = Bstar[(c0, c1)]; + { + var SAi = SA[i]; + SA[ISAb + SAi] = i; + } - if (1 < (j - i)) + i -= 1; + if (!((0 <= i) && (0 <= SA[i]))) { - SA_dump(SA[i..j], "sssort(A)"); - sssort(T, SA, PAb, i, j, buf, bufsize, 2, n, SA[i] == (m - 1)); - SA_dump(SA[i..j], "sssort(B)"); + break; } + } - // iter (inner) - j = i; - c1 -= 1; + SA[i + 1] = i - j; + if (i <= 0) + { + break; } + } + j = i; + while (true) + { + SA[i] = ~SA[i]; + SA[ISAb + SA[i]] = j; - // iter (outer) - c0 -= 1; + i -= 1; + if (!(SA[i] < 0)) + { + break; + } } - // Compute ranks of type B* substrings - i = m - 1; + SA[ISAb + SA[i]] = j; + i -= 1; + } + + // Construct the inverse suffix array of type B* suffixes using trsort. + SA_dump(SA, "trsort(A)"); + trsort(ISAb, SA, m, 1); + SA_dump(SA, "trsort(B)"); + + // Set the sorted order of type B* suffixes + { + // init + i = n - 1; + j = m; + c0 = T[n - 1]; while (0 <= i) { - if (0 <= SA[i]) - { - j = i; - while (true) - { - { - var SAi = SA[i]; - SA[ISAb + SAi] = i; - } - - i -= 1; - if (!((0 <= i) && (0 <= SA[i]))) - { - break; - } - } + // init + i -= 1; + c1 = c0; - SA[i + 1] = i - j; - if (i <= 0) + while (true) + { + // cond + if (!(0 <= i)) { break; } - } - j = i; - while (true) - { - SA[i] = ~SA[i]; - SA[ISAb + SA[i]] = j; - - i -= 1; - if (!(SA[i] < 0)) + c0 = T[i]; + if (!(c0 >= c1)) { break; } - } - SA[ISAb + SA[i]] = j; - i -= 1; - } + // body (empty) - // Construct the inverse suffix array of type B* suffixes using trsort. - SA_dump(SA, "trsort(A)"); - trsort(ISAb, SA, m, 1); - SA_dump(SA, "trsort(B)"); + // iter + i -= 1; + c1 = c0; + } - // Set the sorted order of type B* suffixes - { - // init - i = n - 1; - j = m; - c0 = T[n - 1]; - while (0 <= i) + if (0 <= i) { + t = i; + // init i -= 1; c1 = c0; @@ -423,7 +451,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) break; } c0 = T[i]; - if (!(c0 >= c1)) + if (!(c0 <= c1)) { break; } @@ -435,675 +463,665 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) c1 = c0; } - if (0 <= i) + j -= 1; { - t = i; - - // init - i -= 1; - c1 = c0; - - while (true) - { - // cond - if (!(0 <= i)) - { - break; - } - c0 = T[i]; - if (!(c0 <= c1)) - { - break; - } - - // body (empty) - - // iter - i -= 1; - c1 = c0; - } - - j -= 1; - { - var pos = SA[ISAb + j]; - //TODO: check complement - SA[pos] = (t == 0 || (1 < (t - i))) ? t : ~t; - } + var pos = SA[ISAb + j]; + //TODO: check complement + SA[pos] = (t == 0 || (1 < (t - i))) ? t : ~t; } } - } // End: Set the sorted order of type B* suffixes + } + } // End: Set the sorted order of type B* suffixes - SA_dump(SA, "JZ-post-sortOrderBstar"); - SA_dump(A, "JZ-A"); - SA_dump(B, "JZ-B"); + SA_dump(SA, "JZ-post-sortOrderBstar"); + SA_dump(A, "JZ-A"); + SA_dump(B, "JZ-B"); + + // Calculate the index of start/end point of each bucket + { + Bb[(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1)] = n; // end point + + // init + c0 = ALPHABET_SIZE - 2; + k = m - 1; - // Calculate the index of start/end point of each bucket + while (0 <= c0) { - Bb[(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1)] = n; // end point + i = A[c0 + 1] - 1; // init - c0 = ALPHABET_SIZE - 2; - k = m - 1; - - while (0 <= c0) + c1 = ALPHABET_SIZE - 1; + while (c0 < c1) { - i = A[c0 + 1] - 1; + t = i - Bb[(c0, c1)]; + Bb[(c0, c1)] = i; // end point - // init - c1 = ALPHABET_SIZE - 1; - while (c0 < c1) + // Move all type B* suffixes to the correct position { - t = i - Bb[(c0, c1)]; - Bb[(c0, c1)] = i; // end point + // init + i = t; + j = Bstar[(c0, c1)]; - // Move all type B* suffixes to the correct position + while (j <= k) { - // init - i = t; - j = Bstar[(c0, c1)]; - - while (j <= k) - { - SA[i] = SA[k]; + SA[i] = SA[k]; - // iter - i -= 1; - k -= 1; - } - } // End: Move all type B* suffixes to the correct position - - // iter - c1 -= 1; - } - Bstar[(c0, c0 + 1)] = i - Bb[(c0, c0)] + 1; - Bb[(c0, c0)] = i; // end point + // iter + i -= 1; + k -= 1; + } + } // End: Move all type B* suffixes to the correct position // iter - c0 -= 1; + c1 -= 1; } - } // End: Calculate the index of start/end point of each bucket + Bstar[(c0, c0 + 1)] = i - Bb[(c0, c0)] + 1; + Bb[(c0, c0)] = i; // end point - SA_dump(SA, "JZ-final-bStar"); - } + // iter + c0 -= 1; + } + } // End: Calculate the index of start/end point of each bucket - return new SortTypeBstarResult { A = A, B = B, m = m }; + SA_dump(SA, "JZ-final-bStar"); } - private const Idx SS_BLOCKSIZE = 1024; + return new SortTypeBstarResult { A = A, B = B, m = m }; + } + + private const Idx SS_BLOCKSIZE = 1024; - /// - /// Substring sort - /// - private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) + /// + /// Substring sort + /// + private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) + { + // Note: in most of this file "PA" seems to mean "Partition Array" - we're + // working on a slice of SA. This is also why SA (or a mutable reference to it) + // is passed around, so we don't run into lifetime issues. + + SAPtr a; + SAPtr b; + SAPtr middle; + SAPtr curbuf; + Idx j; + Idx k; + Idx curbufsize; + Idx limit; + Idx i; + + if (lastsuffix) { - // Note: in most of this file "PA" seems to mean "Partition Array" - we're - // working on a slice of SA. This is also why SA (or a mutable reference to it) - // is passed around, so we don't run into lifetime issues. - - SAPtr a; - SAPtr b; - SAPtr middle; - SAPtr curbuf; - Idx j; - Idx k; - Idx curbufsize; - Idx limit; - Idx i; - - if (lastsuffix) - { - first += 1; - } + first += 1; + } - limit = ss_isqrt(last - first); - if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < limit)) - { - if (SS_BLOCKSIZE < limit) - { - limit = SS_BLOCKSIZE; - } - middle = last - limit; - buf = middle; - bufsize = limit; - } - else + limit = ss_isqrt(last - first); + if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < limit)) + { + if (SS_BLOCKSIZE < limit) { - middle = last; - limit = 0; + limit = SS_BLOCKSIZE; } + middle = last - limit; + buf = middle; + bufsize = limit; + } + else + { + middle = last; + limit = 0; + } - // ESPRESSO - a = first; - i = 0; - while (SS_BLOCKSIZE < (middle - a)) - { - crosscheck($"ss_mintrosort (espresso) a={a - PA} depth={depth}"); - ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); - - curbufsize = (last - (a + SS_BLOCKSIZE)); - curbuf = a + SS_BLOCKSIZE; - if (curbufsize <= bufsize) - { - curbufsize = bufsize; - curbuf = buf; - } - - // FRESCO - b = a; - k = SS_BLOCKSIZE; - j = i; - while ((j & 1) > 0) - { - crosscheck($"ss_swapmerge {k}"); - ss_swapmerge(T, SA, PA, b - k, b, b + k, curbuf, curbufsize, depth); - - // iter - b -= k; - k <<= 1; - j >>= 1; - } + // ESPRESSO + a = first; + i = 0; + while (SS_BLOCKSIZE < (middle - a)) + { + crosscheck($"ss_mintrosort (espresso) a={a - PA} depth={depth}"); + ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); - // iter - a += SS_BLOCKSIZE; - i += 1; + curbufsize = (last - (a + SS_BLOCKSIZE)); + curbuf = a + SS_BLOCKSIZE; + if (curbufsize <= bufsize) + { + curbufsize = bufsize; + curbuf = buf; } - crosscheck($"ss_mintrosort (pre-mariachi) a={a - PA} depth={depth}"); - ss_mintrosort(T, SA, PA, a, middle, depth); - - SA_dump(SA[first..last], "pre-mariachi"); - - // MARIACHI + // FRESCO + b = a; k = SS_BLOCKSIZE; - while (i != 0) + j = i; + while ((j & 1) > 0) { - if ((i & 1) > 0) - { - SA_dump(SA[first..last], "in-mariachi pre-swap"); - crosscheck($"a={a - first} middle={middle - first} bufsize={bufsize} depth={depth}"); - ss_swapmerge(T, SA, PA, a - k, a, middle, buf, bufsize, depth); - SA_dump(SA[first..last], "in-mariachi post-swap"); - a -= k; - } + crosscheck($"ss_swapmerge {k}"); + ss_swapmerge(T, SA, PA, b - k, b, b + k, curbuf, curbufsize, depth); // iter + b -= k; k <<= 1; - i >>= 1; + j >>= 1; } - SA_dump(SA[first..last], "post-mariachi"); - if (limit != 0) + // iter + a += SS_BLOCKSIZE; + i += 1; + } + + crosscheck($"ss_mintrosort (pre-mariachi) a={a - PA} depth={depth}"); + ss_mintrosort(T, SA, PA, a, middle, depth); + + SA_dump(SA[first..last], "pre-mariachi"); + + // MARIACHI + k = SS_BLOCKSIZE; + while (i != 0) + { + if ((i & 1) > 0) { - crosscheck("ss_mintrosort limit!=0"); - ss_mintrosort(T, SA, PA, middle, last, depth); - SA_dump(SA[first..last], "post-mintrosort limit!=0"); - ss_inplacemerge(T, SA, PA, first, middle, last, depth); - SA_dump(SA[first..last], "post-inplacemerge limit!=0"); + SA_dump(SA[first..last], "in-mariachi pre-swap"); + crosscheck($"a={a - first} middle={middle - first} bufsize={bufsize} depth={depth}"); + ss_swapmerge(T, SA, PA, a - k, a, middle, buf, bufsize, depth); + SA_dump(SA[first..last], "in-mariachi post-swap"); + a -= k; } - SA_dump(SA[first..last], "post-limit!=0"); - if (lastsuffix) - { - crosscheck("lastsuffix!"); + // iter + k <<= 1; + i >>= 1; + } + SA_dump(SA[first..last], "post-mariachi"); - // Insert last type B* suffix - Span PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; - //let mut PAi:[Idx; 2] = [SA[PA + SA[first - 1]], n - 2]; - //let SAI = SuffixArray(&mut PAi); + if (limit != 0) + { + crosscheck("ss_mintrosort limit!=0"); + ss_mintrosort(T, SA, PA, middle, last, depth); + SA_dump(SA[first..last], "post-mintrosort limit!=0"); + ss_inplacemerge(T, SA, PA, first, middle, last, depth); + SA_dump(SA[first..last], "post-inplacemerge limit!=0"); + } + SA_dump(SA[first..last], "post-limit!=0"); - a = first; - i = SA[first - 1]; + if (lastsuffix) + { + crosscheck("lastsuffix!"); - // CELINE - while ((a < last) && ((SA[a] < 0) || (0 < ss_compare(T, PAi, (SAPtr)0, SA, PA + SA[a], depth)))) - { - // body - SA[a - 1] = SA[a]; + // Insert last type B* suffix + Span PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; + //let mut PAi:[Idx; 2] = [SA[PA + SA[first - 1]], n - 2]; + //let SAI = SuffixArray(&mut PAi); - // iter - a += 1; - } - SA[a - 1] = i; + a = first; + i = SA[first - 1]; + + // CELINE + while ((a < last) && ((SA[a] < 0) || (0 < ss_compare(T, PAi, (SAPtr)0, SA, PA + SA[a], depth)))) + { + // body + SA[a - 1] = SA[a]; + + // iter + a += 1; } + SA[a - 1] = i; } + } - /// - /// Compare two suffixes - /// - private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) - { - //TODO: possible perf improvement - JZ + /// + /// Compare two suffixes + /// + private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) + { + //TODO: possible perf improvement - JZ + + var U1 = depth + SAp1[p1]; + var U2 = depth + SAp2[p2]; + var U1n = SAp1[p1 + 1] + 2; + var U2n = SAp2[p2 + 1] + 2; - var U1 = depth + SAp1[p1]; - var U2 = depth + SAp2[p2]; - var U1n = SAp1[p1 + 1] + 2; - var U2n = SAp2[p2 + 1] + 2; + while ((U1 < U1n) && (U2 < U2n) && (T[U1] == T[U2])) + { + U1 += 1; + U2 += 1; + } - while ((U1 < U1n) && (U2 < U2n) && (T[U1] == T[U2])) + if (U1 < U1n) + { + if (U2 < U2n) { - U1 += 1; - U2 += 1; + return T[U1] - T[U2]; } - - if (U1 < U1n) + else { - if (U2 < U2n) - { - return T[U1] - T[U2]; - } - else - { - return 1; - } + return 1; + } + } + else + { + if (U2 < U2n) + { + return -1; } else { - if (U2 < U2n) - { - return -1; - } - else - { - return 0; - } + return 0; } } + } - private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) + private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) + { + SAPtr p; + SAPtr a; + SAPtr b; + Idx len; + Idx half; + Idx q; + Idx r; + Idx x; + + var original_first = first; + var original_last = last; + + SA_dump(SA[original_first..original_last], "inplacemerge start"); + + // FERRIS + while (true) { - SAPtr p; - SAPtr a; - SAPtr b; - Idx len; - Idx half; - Idx q; - Idx r; - Idx x; - - var original_first = first; - var original_last = last; - - SA_dump(SA[original_first..original_last], "inplacemerge start"); + if (SA[last - 1] < 0) + { + x = 1; + p = PA + ~SA[last - 1]; + } + else + { + x = 0; + p = PA + SA[last - 1]; + } - // FERRIS - while (true) + // LOIS + a = first; + len = (middle - first)/*.0*/; + half = len >> 1; + r = -1; + while (0 < len) { - if (SA[last - 1] < 0) + b = a + half; + q = ss_compare(T, SA, PA + (0 <= SA[b] ? SA[b] : ~SA[b]), SA, p, depth); + if (q < 0) { - x = 1; - p = PA + ~SA[last - 1]; + a = b + 1; + half -= (len & 1) ^ 1; } else { - x = 0; - p = PA + SA[last - 1]; + r = q; } - // LOIS - a = first; - len = (middle - first)/*.0*/; - half = len >> 1; - r = -1; - while (0 < len) - { - b = a + half; - q = ss_compare(T, SA, PA + (0 <= SA[b] ? SA[b] : ~SA[b]), SA, p, depth); - if (q < 0) - { - a = b + 1; - half -= (len & 1) ^ 1; - } - else - { - r = q; - } + // iter + len = half; + half >>= 1; + } + SA_dump(SA[original_first..original_last], "post-lois"); - // iter - len = half; - half >>= 1; + if (a < middle) + { + if (r == 0) + { + SA[a] = ~SA[a]; } - SA_dump(SA[original_first..original_last], "post-lois"); - - if (a < middle) + ss_rotate(SA, a, middle, last); + SA_dump(SA[original_first..original_last], "post-rotate"); + last -= middle - a; + middle = a; + if (first == middle) { - if (r == 0) - { - SA[a] = ~SA[a]; - } - ss_rotate(SA, a, middle, last); - SA_dump(SA[original_first..original_last], "post-rotate"); - last -= middle - a; - middle = a; - if (first == middle) - { - break; - } + break; } + } + last -= 1; + if (x != 0) + { + // TIMMY last -= 1; - if (x != 0) + while (SA[last] < 0) { - // TIMMY last -= 1; - while (SA[last] < 0) - { - last -= 1; - } - SA_dump(SA[original_first..original_last], "post-timmy"); - } - if (middle == last) - { - break; } - - SA_dump(SA[original_first..original_last], "ferris-wrap"); + SA_dump(SA[original_first..original_last], "post-timmy"); + } + if (middle == last) + { + break; } + + SA_dump(SA[original_first..original_last], "ferris-wrap"); } + } - private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr last) - { - SAPtr a; - SAPtr b; - Idx t; - Idx l; - Idx r; + private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr last) + { + SAPtr a; + SAPtr b; + Idx t; + Idx l; + Idx r; - var original_first = first; - var original_last = last; + var original_first = first; + var original_last = last; - l = (middle - first)/*.0*/; - r = (last - middle)/*.0*/; + l = (middle - first)/*.0*/; + r = (last - middle)/*.0*/; - SA_dump(SA[original_first..original_last], "pre-brendan"); + SA_dump(SA[original_first..original_last], "pre-brendan"); - // BRENDAN - while ((0 < l) && (0 < r)) + // BRENDAN + while ((0 < l) && (0 < r)) + { + if (l == r) { - if (l == r) - { - ss_blockswap(SA, first, middle, l); - SA_dump(SA[original_first..original_last], "post-blockswap"); - break; - } + ss_blockswap(SA, first, middle, l); + SA_dump(SA[original_first..original_last], "post-blockswap"); + break; + } - if (l < r) - { - a = last - 1; - b = middle - 1; - t = SA[a]; + if (l < r) + { + a = last - 1; + b = middle - 1; + t = SA[a]; - // ALICE - while (true) + // ALICE + while (true) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + if (b < first) { - SA[a] = SA[b]; - a -= 1; - SA[b] = SA[a]; - b -= 1; - if (b < first) + SA[a] = t; + last = a; + r -= l + 1; + if (r <= l) { - SA[a] = t; - last = a; - r -= l + 1; - if (r <= l) - { - break; - } - a -= 1; - b = middle - 1; - t = SA[a]; + break; } + a -= 1; + b = middle - 1; + t = SA[a]; } - SA_dump(SA[original_first..original_last], "post-alice"); } - else + SA_dump(SA[original_first..original_last], "post-alice"); + } + else + { + a = first; + b = middle; + t = SA[a]; + // ROBERT + while (true) { - a = first; - b = middle; - t = SA[a]; - // ROBERT - while (true) + SA[a] = SA[b]; + a += 1; + SA[b] = SA[a]; + b += 1; + if (last <= b) { - SA[a] = SA[b]; - a += 1; - SA[b] = SA[a]; - b += 1; - if (last <= b) - { - SA[a] = t; - first = a + 1; + SA[a] = t; + first = a + 1; - l -= r + 1; - if (l <= r) - { - break; - } - a += 1; - b = middle; - t = SA[a]; + l -= r + 1; + if (l <= r) + { + break; } + a += 1; + b = middle; + t = SA[a]; } - SA_dump(SA[original_first..original_last], "post-robert"); } + SA_dump(SA[original_first..original_last], "post-robert"); } } + } - private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) + private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) + { + for (int i = 0; i < n; i++) { - for (int i = 0; i < n; i++) - { - SA.Swap(a + i, b + i); - } + SA.Swap(a + i, b + i); } + } - private static void ss_swapmerge(IntAccessor t, Span sA, int pA, int v1, int b, int v2, int curbuf, int curbufsize, int depth) - { - throw new NotImplementedException(); - } + private static void ss_swapmerge(IntAccessor t, Span sA, int pA, int v1, int b, int v2, int curbuf, int curbufsize, int depth) + { + throw new NotImplementedException(); + } - private struct SsStackItem + private struct SsStackItem + { + public SAPtr a; + public SAPtr b; + public SAPtr c; + public Idx d; + } + + private const int SS_STACK_SIZE = 16; + private ref struct SsStack + { + public readonly Span Items; + public int Size; + + public SsStack(Span items) { - public SAPtr a; - public SAPtr b; - public SAPtr c; - public Idx d; + Items = items; + Size = 0; } - private const int SS_STACK_SIZE = 16; - private ref struct SsStack + public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) + { + Debug.Assert(Size < Items.Length); + ref SsStackItem item = ref Items[Size++]; + item.a = a; + item.b = b; + item.c = c; + item.d = d; + } + public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) { - public readonly Span Items; - public int Size; + //Debug.Assert(Size > 0); + if (Size == 0) return false; + + ref SsStackItem item = ref Items[--Size]; + a = item.a; + b = item.b; + c = item.c; + d = item.d; + return true; + } + } - public SsStack(Span items) - { - Items = items; - Size = 0; - } + private const Idx SS_INSERTIONSORT_THRESHOLD = 8; - public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) - { - Debug.Assert(Size < Items.Length); - ref SsStackItem item = ref Items[Size++]; - item.a = a; - item.b = b; - item.c = c; - item.d = d; - } - public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) - { - //Debug.Assert(Size > 0); - if (Size == 0) return false; + private ref struct SpanOffsetAccessor + { + private readonly Span _span; + private readonly int _offset; - ref SsStackItem item = ref Items[--Size]; - a = item.a; - b = item.b; - c = item.c; - d = item.d; - return true; - } + public SpanOffsetAccessor(Span span, int offset) + { + _span = span; + _offset = offset; } - private const Idx SS_INSERTIONSORT_THRESHOLD = 8; + public ref T this[int index] => ref _span[_offset + index]; + } + + private ref struct TdPAStarAccessor + { + private readonly Span _SA; + private readonly Span _PA; + private readonly IntAccessor _TD; - private ref struct SpanOffsetAccessor + public TdPAStarAccessor(ReadOnlySpan T, Span SA, int partitionOffset, int tdOffset) { - private readonly Span _span; - private readonly int _offset; + _SA = SA; + _PA = SA[partitionOffset..]; + _TD = new(T[tdOffset..]); + } - public SpanOffsetAccessor(Span span, int offset) - { - _span = span; - _offset = offset; - } + public int this[int index] => _TD[_PA[_SA[index]]]; + } - public ref T this[int index] => ref _span[_offset + index]; - } + /// + /// Multikey introsort for medium size groups + /// + private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) + { + //PA($x) => + var PA = SA[partitionOffset..];//new SpanOffsetAccessor(SA, PA); - private ref struct TdPAStarAccessor - { - private readonly Span _SA; - private readonly Span _PA; - private readonly IntAccessor _TD; + var stack = new SsStack(stackalloc SsStackItem[SS_STACK_SIZE]); - public TdPAStarAccessor(ReadOnlySpan T, Span SA, int partitionOffset, int tdOffset) - { - _SA = SA; - _PA = SA[partitionOffset..]; - _TD = new(T[tdOffset..]); - } + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr d; + SAPtr e; + SAPtr f; - public int this[int index] => _TD[_PA[_SA[index]]]; - } + Idx s; + Idx t; - /// - /// Multikey introsort for medium size groups - /// - private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) - { - //PA($x) => - var PA = SA[partitionOffset..];//new SpanOffsetAccessor(SA, PA); + Idx limit; + Idx v; + Idx x = 0; - var stack = new SsStack(stackalloc SsStackItem[SS_STACK_SIZE]); + // RENEE + limit = ss_ilg(last - first); + while (true) + { + if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) + { + if (1 < (last - first)) + { + ss_insertionsort(T, SA, partitionOffset, first, last, depth); + } + if (!stack.Pop(ref first, ref last, ref depth, ref limit)) + { + return; + } + continue; + } - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr d; - SAPtr e; - SAPtr f; + //Td!($x) => T[Td + $x] + var tdOffset = depth; + var Td = T.span[tdOffset..]; - Idx s; - Idx t; + //TdPAStar!($x) => Td!(PA!(SA[$x])) + //TdPAStar!($x) => T[Td + SA[PA + SA[$x]]] + //var TdPAStar = Td[PA[SA[$x]]]; + var TdPAStar = new TdPAStarAccessor(T.span, SA, partitionOffset, tdOffset); - Idx limit; - Idx v; - Idx x = 0; + /*readonly*/ + var old_limit = limit; + limit -= 1; + if (old_limit == 0) + { + SA_dump(SA[first..last], "before heapsort"); + ss_heapsort(T, tdOffset, SA, partitionOffset, first, (last - first)); + SA_dump(SA[first..last], "after heapsort"); + } - // RENEE - limit = ss_ilg(last - first); - while (true) + if (limit < 0) { - if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) + a = first + 1; + v = TdPAStar[first]; + + // DAVE + while (a < last) { - if (1 < (last - first)) - { - ss_insertionsort(T, SA, partitionOffset, first, last, depth); - } - if (!stack.Pop(ref first, ref last, ref depth, ref limit)) + x = TdPAStar[a]; + if (x != v) { - return; + if (1 < (a - first)) + { + break; + } + v = x; + first = a; } - continue; - } - - //Td!($x) => T[Td + $x] - var tdOffset = depth; - var Td = T.span[tdOffset..]; - //TdPAStar!($x) => Td!(PA!(SA[$x])) - //TdPAStar!($x) => T[Td + SA[PA + SA[$x]]] - //var TdPAStar = Td[PA[SA[$x]]]; - var TdPAStar = new TdPAStarAccessor(T.span, SA, partitionOffset, tdOffset); + // loop iter + a += 1; + } - /*readonly*/ - var old_limit = limit; - limit -= 1; - if (old_limit == 0) + if (Td[PA[SA[first]] - 1] < v) { - SA_dump(SA[first..last], "before heapsort"); - ss_heapsort(T, tdOffset, SA, partitionOffset, first, (last - first)); - SA_dump(SA[first..last], "after heapsort"); + first = ss_partition(SA, partitionOffset, first, a, depth); } - - if (limit < 0) + if ((a - first) <= (last - a)) { - a = first + 1; - v = TdPAStar[first]; - - // DAVE - while (a < last) + if (1 < (a - first)) { - x = TdPAStar[a]; - if (x != v) - { - if (1 < (a - first)) - { - break; - } - v = x; - first = a; - } - - // loop iter - a += 1; + stack.Push(a, last, depth, -1); + last = a; + depth += 1; + limit = ss_ilg(a - first); } - - if (Td[PA[SA[first]] - 1] < v) + else { - first = ss_partition(SA, partitionOffset, first, a, depth); + first = a; + limit = -1; } - if ((a - first) <= (last - a)) + } + else + { + if (1 < (last - a)) { - if (1 < (a - first)) - { - stack.Push(a, last, depth, -1); - last = a; - depth += 1; - limit = ss_ilg(a - first); - } - else - { - first = a; - limit = -1; - } + stack.Push(first, a, depth + 1, ss_ilg(a - first)); + first = a; + limit = -1; } else { - if (1 < (last - a)) - { - stack.Push(first, a, depth + 1, ss_ilg(a - first)); - first = a; - limit = -1; - } - else - { - last = a; - depth += 1; - limit = ss_ilg(a - first); - } + last = a; + depth += 1; + limit = ss_ilg(a - first); } - continue; } + continue; + } - // choose pivot - a = ss_pivot(T, tdOffset, SA, partitionOffset, first, last); - v = TdPAStar[a]; - SA.Swap(first, a); + // choose pivot + a = ss_pivot(T, tdOffset, SA, partitionOffset, first, last); + v = TdPAStar[a]; + SA.Swap(first, a); - // partition - // NORA - b = first; + // partition + // NORA + b = first; + while (true) + { + b += 1; + if (!(b < last)) + { + break; + } + x = TdPAStar[b]; + if (!(x == v)) + { + break; + } + // body + } + a = b; + if ((a < last) && (x < v)) + { + // STAN while (true) { b += 1; @@ -1112,39 +1130,39 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf break; } x = TdPAStar[b]; - if (!(x == v)) + if (!(x <= v)) { break; } // body - } - a = b; - if ((a < last) && (x < v)) - { - // STAN - while (true) + if (x == v) { - b += 1; - if (!(b < last)) - { - break; - } - x = TdPAStar[b]; - if (!(x <= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(b, a); - a += 1; - } + SA.Swap(b, a); + a += 1; } } + } - // NATHAN - c = last; + // NATHAN + c = last; + while (true) + { + c -= 1; + if (!(b < c)) + { + break; + } + x = TdPAStar[c]; + if (!(x == v)) + { + break; + } + // body + } + d = c; + if ((b < d) && (x > v)) + { + // JACOB while (true) { c -= 1; @@ -1153,351 +1171,333 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf break; } x = TdPAStar[c]; - if (!(x == v)) + if (!(x >= v)) { break; } // body - } - d = c; - if ((b < d) && (x > v)) - { - // JACOB - while (true) + if (x == v) { - c -= 1; - if (!(b < c)) - { - break; - } - x = TdPAStar[c]; - if (!(x >= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(c, d); - d -= 1; - } + SA.Swap(c, d); + d -= 1; } } + } - // RITA - while (b < c) + // RITA + while (b < c) + { + SA.Swap(b, c); + // ROMEO + while (true) { - SA.Swap(b, c); - // ROMEO - while (true) + b += 1; + if (!(b < c)) { - b += 1; - if (!(b < c)) - { - break; - } - x = TdPAStar[b]; - if (!(x <= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(b, a); - a += 1; - } + break; } - // JULIET - while (true) + x = TdPAStar[b]; + if (!(x <= v)) { - c -= 1; - if (!(b < c)) - { - break; - } - x = TdPAStar[c]; - if (!(x >= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(c, d); - d -= 1; - } + break; } - } - - if (a <= d) - { - c = b - 1; - s = (a - first)/*.0*/; - t = (b - a)/*.0*/; - if (s > t) + // body + if (x == v) { - s = t; + SA.Swap(b, a); + a += 1; } - - // JOSHUA - e = first; - f = b - s; - while (0 < s) + } + // JULIET + while (true) + { + c -= 1; + if (!(b < c)) { - SA.Swap(e, f); - s -= 1; - e += 1; - f += 1; + break; } - s = (d - c)/*.0*/; - t = (last - d - 1)/*.0*/; - if (s > t) + x = TdPAStar[c]; + if (!(x >= v)) { - s = t; + break; } - // BERENICE - e = b; - f = last - s; - while (0 < s) + // body + if (x == v) { - SA.Swap(e, f); - s -= 1; - e += 1; - f += 1; + SA.Swap(c, d); + d -= 1; } + } + } + + if (a <= d) + { + c = b - 1; + s = (a - first)/*.0*/; + t = (b - a)/*.0*/; + if (s > t) + { + s = t; + } + + // JOSHUA + e = first; + f = b - s; + while (0 < s) + { + SA.Swap(e, f); + s -= 1; + e += 1; + f += 1; + } + s = (d - c)/*.0*/; + t = (last - d - 1)/*.0*/; + if (s > t) + { + s = t; + } + // BERENICE + e = b; + f = last - s; + while (0 < s) + { + SA.Swap(e, f); + s -= 1; + e += 1; + f += 1; + } - a = first + (b - a); - c = last - (d - c); - b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, partitionOffset, a, c, depth); + a = first + (b - a); + c = last - (d - c); + b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, partitionOffset, a, c, depth); - if ((a - first) <= (last - c)) + if ((a - first) <= (last - c)) + { + if ((last - c) <= (c - b)) { - if ((last - c) <= (c - b)) - { - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - stack.Push(c, last, depth, limit); - last = a; - } - else if ((a - first) <= (c - b)) - { - stack.Push(c, last, depth, limit); - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - last = a; - } - else - { - stack.Push(c, last, depth, limit); - stack.Push(first, a, depth, limit); - first = b; - last = c; - depth += 1; - limit = ss_ilg(c - b); - } + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + stack.Push(c, last, depth, limit); + last = a; + } + else if ((a - first) <= (c - b)) + { + stack.Push(c, last, depth, limit); + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + last = a; } else { - if ((a - first) <= (c - b)) - { - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - stack.Push(first, a, depth, limit); - first = c; - } - else if ((last - c) <= (c - b)) - { - stack.Push(first, a, depth, limit); - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - first = c; - } - else - { - stack.Push(first, a, depth, limit); - stack.Push(c, last, depth, limit); - first = b; - last = c; - depth += 1; - limit = ss_ilg(c - b); - } + stack.Push(c, last, depth, limit); + stack.Push(first, a, depth, limit); + first = b; + last = c; + depth += 1; + limit = ss_ilg(c - b); } } else { - limit += 1; - if (Td[PA[SA[first]] - 1] < v) + if ((a - first) <= (c - b)) + { + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + stack.Push(first, a, depth, limit); + first = c; + } + else if ((last - c) <= (c - b)) { - first = ss_partition(SA, partitionOffset, first, last, depth); - limit = ss_ilg(last - first); + stack.Push(first, a, depth, limit); + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } + else + { + stack.Push(first, a, depth, limit); + stack.Push(c, last, depth, limit); + first = b; + last = c; + depth += 1; + limit = ss_ilg(c - b); } - depth += 1; } } - } - - /// - /// Returns the pivot element - /// - private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr first, SAPtr last) - { - Idx t = (last - first)/*.0*/; - SAPtr middle = first + (t / 2); - - if (t <= 512) + else { - if (t <= 32) + limit += 1; + if (Td[PA[SA[first]] - 1] < v) { - return ss_median3(T, Td, SA, PA, first, middle, last - 1); - } - else - { - t >>= 2; - return ss_median5( - T, - Td, - SA, - PA, - first, - first + t, - middle, - last - 1 - t, - last - 1); + first = ss_partition(SA, partitionOffset, first, last, depth); + limit = ss_ilg(last - first); } + depth += 1; } - - t >>= 3; - first = ss_median3(T, Td, SA, PA, first, first + t, first + (t << 1)); - middle = ss_median3(T, Td, SA, PA, middle - t, middle, middle + t); - last = ss_median3(T, Td, SA, PA, last - 1 - (t << 1), last - 1 - t, last - 1); - - return ss_median3(T, Td, SA, PA, first, middle, last); } + } + + /// + /// Returns the pivot element + /// + private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr first, SAPtr last) + { + Idx t = (last - first)/*.0*/; + SAPtr middle = first + (t / 2); - private static int ss_median5(IntAccessor t, int td, Span sA, int pA, int first, int v1, int middle, int v2, int v3) + if (t <= 512) { - throw new NotImplementedException(); + if (t <= 32) + { + return ss_median3(T, Td, SA, PA, first, middle, last - 1); + } + else + { + t >>= 2; + return ss_median5( + T, + Td, + SA, + PA, + first, + first + t, + middle, + last - 1 - t, + last - 1); + } } - static void Swap(ref T lhs, ref T rhs) + t >>= 3; + first = ss_median3(T, Td, SA, PA, first, first + t, first + (t << 1)); + middle = ss_median3(T, Td, SA, PA, middle - t, middle, middle + t); + last = ss_median3(T, Td, SA, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + + return ss_median3(T, Td, SA, PA, first, middle, last); + } + + private static int ss_median5(IntAccessor t, int td, Span sA, int pA, int first, int v1, int middle, int v2, int v3) + { + throw new NotImplementedException(); + } + + static void Swap(ref T lhs, ref T rhs) + { + T temp; + temp = lhs; + lhs = rhs; + rhs = temp; + } + + /// + /// Returns the median of three elements + /// + private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3) + { + //int get(int x) => T[Td + SA[PA + SA[x]]] + var get = new TdPAStarAccessor(T.span, SA, PA, Td); + + if (get[v1] > get[v2]) { - T temp; - temp = lhs; - lhs = rhs; - rhs = temp; + Swap(ref v1, ref v2); } - /// - /// Returns the median of three elements - /// - private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3) + if (get[v2] > get[v3]) { - //int get(int x) => T[Td + SA[PA + SA[x]]] - var get = new TdPAStarAccessor(T.span, SA, PA, Td); - - if (get[v1] > get[v2]) - { - Swap(ref v1, ref v2); - } - - if (get[v2] > get[v3]) + if (get[v1] > get[v3]) { - if (get[v1] > get[v3]) - { - return v1; - } - else - { - return v3; - } + return v1; } else { - return v2; + return v3; } } - - private static int ss_partition(Span sA, int pA, int first, int a, int depth) + else { - throw new NotImplementedException(); + return v2; } + } + + private static int ss_partition(Span sA, int pA, int first, int a, int depth) + { + throw new NotImplementedException(); + } - private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) + private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) + { + SAPtr i; + SAPtr j; + Idx t; + Idx r; + + i = last - 2; + // for 1 + while (first <= i) { - SAPtr i; - SAPtr j; - Idx t; - Idx r; + t = SA[i]; + j = i + 1; - i = last - 2; - // for 1 - while (first <= i) + // for 2 + while (true) { - t = SA[i]; - j = i + 1; - - // for 2 - while (true) + // cond for 2 + r = ss_compare(T, SA, PA + t, SA, PA + SA[j], depth); + if (!(0 < r)) { - // cond for 2 - r = ss_compare(T, SA, PA + t, SA, PA + SA[j], depth); - if (!(0 < r)) - { - break; - } - - // body for 2 + break; + } - // do while - while (true) - { - SA[j - 1] = SA[j]; + // body for 2 - j += 1; - if (!((j < last) && SA[j] < 0)) - { - break; - } - } + // do while + while (true) + { + SA[j - 1] = SA[j]; - if (last <= j) + j += 1; + if (!((j < last) && SA[j] < 0)) { break; } - - // iter for 2 (empty) } - if (r == 0) + if (last <= j) { - SA[j] = ~SA[j]; + break; } - SA[j - 1] = t; - // iter - i -= 1; + // iter for 2 (empty) } - } - /// - /// Fast log2, using lookup tables - /// - private static int ss_ilg(int n) - { - if ((n & 0xff00) > 0) - { - return 8 + lg_table[((n >> 8) & 0xff)]; - } - else + if (r == 0) { - return 0 + lg_table[((n >> 0) & 0xff)]; + SA[j] = ~SA[j]; } + SA[j - 1] = t; + + // iter + i -= 1; } + } - private static void ss_heapsort(IntAccessor t, int td, Span sA, int pA, int first, object p) + /// + /// Fast log2, using lookup tables + /// + private static int ss_ilg(int n) + { + if ((n & 0xff00) > 0) { - throw new NotImplementedException(); + return 8 + lg_table[((n >> 8) & 0xff)]; } - - private static readonly Idx[] sqq_table_array = new[] + else { + return 0 + lg_table[((n >> 0) & 0xff)]; + } + } + + private static void ss_heapsort(IntAccessor t, int td, Span sA, int pA, int first, object p) + { + throw new NotImplementedException(); + } + + private static readonly Idx[] sqq_table_array = new[] + { 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, @@ -1515,784 +1515,704 @@ private static void ss_heapsort(IntAccessor t, int td, Span sA, int pA, int 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 }; - private static ReadOnlySpan sqq_table => sqq_table_array; + private static ReadOnlySpan sqq_table => sqq_table_array; - /// - /// Fast sqrt, using lookup tables - /// - private static int ss_isqrt(int x) + /// + /// Fast sqrt, using lookup tables + /// + private static int ss_isqrt(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { - if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) - { - return SS_BLOCKSIZE; - } + return SS_BLOCKSIZE; + } - Idx e; - if ((x & 0xffff_0000) > 0) + Idx e; + if ((x & 0xffff_0000) > 0) + { + if ((x & 0xff00_0000) > 0) { - if ((x & 0xff00_0000) > 0) - { - e = 24 + lg_table[((x >> 24) & 0xff)]; - } - else - { - e = 16 + lg_table[((x >> 16) & 0xff)]; - } + e = 24 + lg_table[((x >> 24) & 0xff)]; } else { - if ((x & 0x0000_ff00) > 0) - { - e = 8 + lg_table[(((x >> 8) & 0xff))]; - } - else - { - e = 0 + lg_table[(((x >> 0) & 0xff))]; - } - }; - - Idx y; - if (e >= 16) - { - y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); - if (e >= 24) - { - y = (y + 1 + x / y) >> 1; - } - y = (y + 1 + x / y) >> 1; + e = 16 + lg_table[((x >> 16) & 0xff)]; } - else if (e >= 8) + } + else + { + if ((x & 0x0000_ff00) > 0) { - y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; + e = 8 + lg_table[(((x >> 8) & 0xff))]; } else { - return sqq_table[x] >> 4; + e = 0 + lg_table[(((x >> 0) & 0xff))]; } + }; - if (x < (y * y)) - { - return y - 1; - } - else + Idx y; + if (e >= 16) + { + y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); + if (e >= 24) { - return y; + y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } + else if (e >= 8) + { + y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; + } + else + { + return sqq_table[x] >> 4; } - private static readonly int[] lg_table_array = new[] + if (x < (y * y)) { - -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + return y - 1; + } + else + { + return y; + } + } + + private static readonly int[] lg_table_array = new[] + { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; - private static ReadOnlySpan lg_table => lg_table_array; + private static ReadOnlySpan lg_table => lg_table_array; - private static int tr_ilg(int n) + private static int tr_ilg(int n) + { + if ((n & 0xffff_0000) > 0) { - if ((n & 0xffff_0000) > 0) + if ((n & 0xff00_0000) > 0) { - if ((n & 0xff00_0000) > 0) - { - return 24 + lg_table[((n >> 24) & 0xff)]; - } - else - { - return 16 + lg_table[((n >> 16) & 0xff)]; - } + return 24 + lg_table[((n >> 24) & 0xff)]; } else { - if ((n & 0x0000_ff00) > 0) - { - return 8 + lg_table[((n >> 8) & 0xff)]; - } - else - { - return 0 + lg_table[((n >> 0) & 0xff)]; - } + return 16 + lg_table[((n >> 16) & 0xff)]; } } - - private ref struct Budget + else { - public int Chance; - public int Remain; - public int IncVal; - public int Count; - - public Budget(int chance, int incVal) + if ((n & 0x0000_ff00) > 0) { - Chance = chance; - Remain = incVal; - IncVal = incVal; - Count = 0; + return 8 + lg_table[((n >> 8) & 0xff)]; } - - public bool Check(int size) + else { - if (size <= Remain) - { - Remain -= size; - return true; - } + return 0 + lg_table[((n >> 0) & 0xff)]; + } + } + } - if (Chance == 0) - { - Count += size; - return false; - } + private ref struct Budget + { + public int Chance; + public int Remain; + public int IncVal; + public int Count; + + public Budget(int chance, int incVal) + { + Chance = chance; + Remain = incVal; + IncVal = incVal; + Count = 0; + } - Remain += IncVal - size; - Chance -= 1; + public bool Check(int size) + { + if (size <= Remain) + { + Remain -= size; return true; } + + if (Chance == 0) + { + Count += size; + return false; + } + + Remain += IncVal - size; + Chance -= 1; + return true; } + } - /// Tandem repeat sort - private static void trsort(SAPtr ISA, Span SA, int n, int depth) + /// Tandem repeat sort + private static void trsort(SAPtr ISA, Span SA, int n, int depth) + { + SAPtr ISAd; + SAPtr first; + SAPtr last; + /*Index*/ + int t; + /*Index*/ + int skip; + /*Index*/ + int unsorted; + Budget budget = new(tr_ilg(n) * 2 / 3, n); + + //macro_rules! ISA { + // ($x: expr) => { + // SA[ISA + $x] + // }; + //} + + //ref int getISA(int x) => ref SA[ISA + x]; + + // JERRY + ISAd = ISA + depth; + while (-n < SA[0]) { - SAPtr ISAd; - SAPtr first; - SAPtr last; - /*Index*/ - int t; - /*Index*/ - int skip; - /*Index*/ - int unsorted; - Budget budget = new(tr_ilg(n) * 2 / 3, n); - - //macro_rules! ISA { - // ($x: expr) => { - // SA[ISA + $x] - // }; - //} - - //ref int getISA(int x) => ref SA[ISA + x]; - - // JERRY - ISAd = ISA + depth; - while (-n < SA[0]) - { - first = 0; - skip = 0; - unsorted = 0; - - // PETER - while (true) + first = 0; + skip = 0; + unsorted = 0; + + // PETER + while (true) + { + t = SA[first]; + if (t < 0) { - t = SA[first]; - if (t < 0) + first -= t; + skip += t; + } + else + { + if (skip != 0) { - first -= t; - skip += t; + SA[first + skip] = skip; + skip = 0; } - else + last = SA[ISA + (t)] + 1; + if (1 < (last - first)) { - if (skip != 0) + budget.Count = 0; + tr_introsort(ISA, ISAd, SA, first, last, ref budget); + if (budget.Count != 0) { - SA[first + skip] = skip; - skip = 0; - } - last = SA[ISA + (t)] + 1; - if (1 < (last - first)) - { - budget.Count = 0; - tr_introsort(ISA, ISAd, SA, first, last, ref budget); - if (budget.Count != 0) - { - unsorted += budget.Count; - } - else - { - skip = first - last; - } + unsorted += budget.Count; } - else if ((last - first) == 1) + else { - skip = -1; + skip = first - last; } - first = last; } - - // cond (PETER) - if (!(first < n)) + else if ((last - first) == 1) { - break; + skip = -1; } + first = last; } - if (skip != 0) - { - SA[first + skip] = skip; - } - if (unsorted == 0) + // cond (PETER) + if (!(first < n)) { break; } + } - // iter - ISAd += ISAd - ISA; + if (skip != 0) + { + SA[first + skip] = skip; + } + if (unsorted == 0) + { + break; } - } - private struct TrStackItem - { - public SAPtr a; - public SAPtr b; - public SAPtr c; - public Idx d; - public Idx e; + // iter + ISAd += ISAd - ISA; } + } - private const int TR_STACK_SIZE = 64; - private ref struct TrStack - { - public readonly Span Items; - public int Size; + private struct TrStackItem + { + public SAPtr a; + public SAPtr b; + public SAPtr c; + public Idx d; + public Idx e; + } - public TrStack(Span items) - { - Items = items; - Size = 0; - } + private const int TR_STACK_SIZE = 64; + private ref struct TrStack + { + public readonly Span Items; + public int Size; - public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) - { - Debug.Assert(Size < Items.Length); - ref TrStackItem item = ref Items[Size++]; - item.a = a; - item.b = b; - item.c = c; - item.d = d; - item.e = e; - } - public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) - { - //Debug.Assert(Size > 0); - if (Size == 0) return false; + public TrStack(Span items) + { + Items = items; + Size = 0; + } - ref TrStackItem item = ref Items[--Size]; - a = item.a; - b = item.b; - c = item.c; - d = item.d; - e = item.e; - return true; - } + public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) + { + Debug.Assert(Size < Items.Length); + ref TrStackItem item = ref Items[Size++]; + item.a = a; + item.b = b; + item.c = c; + item.d = d; + item.e = e; } + public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) + { + //Debug.Assert(Size > 0); + if (Size == 0) return false; + + ref TrStackItem item = ref Items[--Size]; + a = item.a; + b = item.b; + c = item.c; + d = item.d; + e = item.e; + return true; + } + } - private const Idx TR_INSERTIONSORT_THRESHOLD = 8; - private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA, SAPtr first, SAPtr last, ref Budget budget) + private const Idx TR_INSERTIONSORT_THRESHOLD = 8; + private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA, SAPtr first, SAPtr last, ref Budget budget) + { + SAPtr a = 0; + SAPtr b = 0; + SAPtr c; + Idx t, v, x; + Idx incr = isadOffset - isaOffset; + Idx next; + Idx trlink = -1; + + TrStack stack = new(stackalloc TrStackItem[TR_STACK_SIZE]); + + /* + macro_rules! ISA { + ($x: expr) => { + SA[ISA + $x] + }; + } + macro_rules! ISAd { + ($x: expr) => { + SA[ISAd + $x] + }; + } + */ + var ISA = SA[isaOffset..]; + var ISAd = SA[isadOffset..]; + + var limit = tr_ilg(last - first); + + // PASCAL + while (true) { - SAPtr a = 0; - SAPtr b = 0; - SAPtr c; - Idx t, v, x; - Idx incr = isadOffset - isaOffset; - Idx next; - Idx trlink = -1; - - TrStack stack = new(stackalloc TrStackItem[TR_STACK_SIZE]); - - /* - macro_rules! ISA { - ($x: expr) => { - SA[ISA + $x] - }; - } - macro_rules! ISAd { - ($x: expr) => { - SA[ISAd + $x] - }; - } - */ - var ISA = SA[isaOffset..]; - var ISAd = SA[isadOffset..]; - - var limit = tr_ilg(last - first); - - // PASCAL - while (true) + crosscheck($"pascal limit={limit} first={first} last={last}"); + if (limit < 0) { - crosscheck($"pascal limit={limit} first={first} last={last}"); - if (limit < 0) + if (limit == -1) { - if (limit == -1) + // tandem repeat partition + tr_partition(SA, isadOffset - incr, first, first, last, ref a, ref b, last - 1); + + // update ranks + if (a < last) { - // tandem repeat partition - tr_partition(SA, isadOffset - incr, first, first, last, ref a, ref b, last - 1); + //TODO: crosscheck + crosscheck("ranks a SA limit = next; } } + else + { + crosscheck("aac"); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); + stack.Push(isadOffset, b, last, limit, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); + stack.Push(isadOffset, first, a, limit, trlink); + isadOffset += incr; + first = a; + last = b; + limit = next; + } } else { - crosscheck("b"); - if ((1 < (b - a)) && (0 <= trlink)) + crosscheck("ab"); + if ((a - first) <= (b - a)) { - crosscheck("ba"); - stack.Items[trlink].d = -1; - } - if ((a - first) <= (last - b)) - { - crosscheck("bb"); - if (1 < (a - first)) + crosscheck("aba"); + if (1 < (last - b)) { - crosscheck("bba"); - crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); - stack.Push(isadOffset, b, last, limit, trlink); - last = a; + crosscheck("abaa"); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); + stack.Push(isadOffset + incr, a, b, next, trlink); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); + stack.Push(isadOffset, first, a, limit, trlink); + first = b; } - else if (1 < (last - b)) + else if (1 < (a - first)) { - crosscheck("bbb"); - first = b; + crosscheck("abab"); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); + stack.Push(isadOffset + incr, a, b, next, trlink); + last = a; } else { - crosscheck("bbc"); - if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) - { - return; - } + crosscheck("abac"); + isadOffset += incr; + first = a; + last = b; + limit = next; } } - else + else if ((last - b) <= (b - a)) { - crosscheck("bc"); + crosscheck("abb"); if (1 < (last - b)) { - crosscheck("bca"); + crosscheck("abba"); crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset + incr} {a} {b} {next} {trlink}"); + stack.Push(isadOffset + incr, a, b, next, trlink); first = b; } - else if (1 < (a - first)) - { - crosscheck("bcb"); - last = a; - } else { - crosscheck("bcc"); - if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) - { - return; - } - crosscheck("bcc post"); + crosscheck("abbb"); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); + stack.Push(isadOffset, first, a, limit, trlink); + isadOffset += incr; + first = a; + last = b; + limit = next; } } + else + { + crosscheck("abc"); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); + stack.Push(isadOffset, first, a, limit, trlink); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); + stack.Push(isadOffset, b, last, limit, trlink); + isadOffset += incr; + first = a; + last = b; + limit = next; + } } } else { - crosscheck("c"); - if (budget.Check(last - first)) + crosscheck("b"); + if ((1 < (b - a)) && (0 <= trlink)) + { + crosscheck("ba"); + stack.Items[trlink].d = -1; + } + if ((a - first) <= (last - b)) { - crosscheck("ca"); - limit = tr_ilg(last - first); - isadOffset += incr; + crosscheck("bb"); + if (1 < (a - first)) + { + crosscheck("bba"); + crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); + stack.Push(isadOffset, b, last, limit, trlink); + last = a; + } + else if (1 < (last - b)) + { + crosscheck("bbb"); + first = b; + } + else + { + crosscheck("bbc"); + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) + { + return; + } + } } else { - crosscheck("cb"); - if (0 <= trlink) + crosscheck("bc"); + if (1 < (last - b)) { - crosscheck("cba"); - stack.Items[trlink].d = -1; + crosscheck("bca"); + crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); + stack.Push(isadOffset, first, a, limit, trlink); + first = b; } - if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) + else if (1 < (a - first)) { - return; + crosscheck("bcb"); + last = a; + } + else + { + crosscheck("bcc"); + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) + { + return; + } + crosscheck("bcc post"); } - crosscheck("cb post"); } } - } // end PASCAL - } + } + else + { + crosscheck("c"); + if (budget.Check(last - first)) + { + crosscheck("ca"); + limit = tr_ilg(last - first); + isadOffset += incr; + } + else + { + crosscheck("cb"); + if (0 <= trlink) + { + crosscheck("cba"); + stack.Items[trlink].d = -1; + } + if (!stack.Pop(ref isadOffset, ref first, ref last, ref limit, ref trlink)) + { + return; + } + crosscheck("cb post"); + } + } + } // end PASCAL + } - private static int tr_pivot(Span sA, int iSAd, int first, int last) - { - throw new NotImplementedException(); - } + private static int tr_pivot(Span sA, int iSAd, int first, int last) + { + throw new NotImplementedException(); + } - private static void tr_heapsort(int iSAd, Span sA, int first, int v) - { - throw new NotImplementedException(); - } + private static void tr_heapsort(int iSAd, Span sA, int first, int v) + { + throw new NotImplementedException(); + } - /// - /// Simple insertionsort for small size groups - /// - private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr first, SAPtr last) + /// + /// Simple insertionsort for small size groups + /// + private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr first, SAPtr last) + { + SAPtr a; + SAPtr b; + Idx t; + Idx r; + + a = first + 1; + // KAREN + while (a < last) { - SAPtr a; - SAPtr b; - Idx t; - Idx r; - - a = first + 1; - // KAREN - while (a < last) + // JEZEBEL + t = SA[a]; + b = a - 1; + while (true) { - // JEZEBEL - t = SA[a]; - b = a - 1; - while (true) + // cond (JEZEBEL) + r = ISAd[t] - ISAd[SA[b]]; + if (!(0 > r)) { - // cond (JEZEBEL) - r = ISAd[t] - ISAd[SA[b]]; - if (!(0 > r)) - { - break; - } - - // LILITH - while (true) - { - SA[b + 1] = SA[b]; + break; + } - // cond (LILITH) - b -= 1; - if (!((first <= b) && (SA[b] < 0))) - { - break; - } - } + // LILITH + while (true) + { + SA[b + 1] = SA[b]; - // body (JEZEBEL) - if (b < first) + // cond (LILITH) + b -= 1; + if (!((first <= b) && (SA[b] < 0))) { break; } } - if (r == 0) + // body (JEZEBEL) + if (b < first) { - SA[b] = ~SA[b]; + break; } - SA[b + 1] = t; + } - // iter - a += 1; + if (r == 0) + { + SA[b] = ~SA[b]; } - } + SA[b + 1] = t; - private static void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) - { - throw new NotImplementedException(); + // iter + a += 1; } + } - private static void tr_copy(int iSA, Span sA, int first, int a, int b, int last, int v) - { - throw new NotImplementedException(); - } + private static void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) + { + throw new NotImplementedException(); + } - private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) - { - throw new NotImplementedException(); - } + private static void tr_copy(int iSA, Span sA, int first, int a, int b, int last, int v) + { + throw new NotImplementedException(); + } - [Conditional("DEBUG")] - private static void SA_dump(ReadOnlySpan span, string v) + private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) + { + throw new NotImplementedException(); + } + + [Conditional("DEBUG")] + private static void SA_dump(ReadOnlySpan span, string v) + { + Debug.WriteLine($":: {v}"); + for (int i = 0; i < span.Length; i++) { - Debug.WriteLine($":: {v}"); - for (int i = 0; i < span.Length; i++) - { - Debug.Write($"{span[i]} "); - Debug.WriteLineIf((i + 1) % 25 == 0, ""); - } - Debug.WriteLine(""); + Debug.Write($"{span[i]} "); + Debug.WriteLineIf((i + 1) % 25 == 0, ""); } - - [Conditional("DEBUG")] - private static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); + Debug.WriteLine(""); } + + [Conditional("DEBUG")] + private static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); } From de463b2c5834af8f43315609b638f483a03986f2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:14:18 -0500 Subject: [PATCH 122/325] Update tr_partition signature --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 0419061..802cba8 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2465,7 +2465,10 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int throw new NotImplementedException(); } - private static void tr_partition(Span sA, int v1, int first1, int first2, int last, ref int a, ref int b, int v2) + /// + /// Tandem repeat partition + /// + private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) { throw new NotImplementedException(); } From 95ee1c71839c5d7ef5e93b2dfc66c3aa9e24c9af Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:22:19 -0500 Subject: [PATCH 123/325] (interim) Add raw rust --- .../RsDivSufSort.cs | 158 +++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 802cba8..15ccc37 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2470,7 +2470,163 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int /// private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) { - throw new NotImplementedException(); + let mut a: SAPtr; + let mut b: SAPtr; + let mut c: SAPtr; + let mut d: SAPtr; + let mut e: SAPtr; + let mut f: SAPtr; + let mut t: Idx; + let mut s: Idx; + let mut x: Idx = 0; + + macro_rules! get { + ($x: expr) => { + SA[ISAd + SA[$x]] + }; + } + + // JOSEPH + b = middle - 1; + loop { + // cond + b += 1; + if !(b < last) { + break; + } + x = get!(b); + if !(x == v) { + break; + } + } + a = b; + if (a < last) && (x < v) { + // MARY + loop { + b += 1; + if !(b < last) { + break; + } + x = get!(b); + if !(x <= v) { + break; + } + + // body + if (x == v) + { + SA.swap(b, a); + a += 1; + } + } + } + + // JEREMIAH + c = last; + loop { + c -= 1; + if !(b < c) { + break; + } + x = get!(c); + if !(x == v) { + break; + } + } + d = c; + if (b < d) && (x > v) { + // BEDELIA + loop { + c -= 1; + if !(b < c) { + break; + } + x = get!(c); + if !(x >= v) { + break; + } + if x == v { + SA.swap(c, d); + d -= 1; + } + } + } + + // ALEX + while b < c { + SA.swap(b, c); + // SIMON + loop { + b += 1; + if !(b < c) { + break; + } + x = get!(b); + if !(x <= v) { + break; + } + if x == v { + SA.swap(b, a); + a += 1; + } + } + + // GREGORY + loop { + c -= 1; + if !(b < c) { + break; + } + x = get!(c); + if !(x >= v) { + break; + } + if x == v { + SA.swap(c, d); + d -= 1; + } + } + } // end ALEX + + if a <= d { + c = b - 1; + + s = (a - first).0; + t = (b - a).0; + if (s > t) + { + s = t + } + + // GENEVIEVE + e = first; + f = b - s; + while 0 < s { + SA.swap(e, f); + s -= 1; + e += 1; + f += 1; + } + s = (d - c).0; + t = (last - d - 1).0; + if s > t { + s = t; + } + + // MARISSA + e = b; + f = last - s; + while 0 < s { + SA.swap(e, f); + s -= 1; + e += 1; + f += 1; + } + first += (b - a); + last -= (d - c).0; + } + pa.0 = first.0; + pb.0 = last.0; } [Conditional("DEBUG")] From b296fbd6546782560f6638977b194f32ad77f1a3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:23:09 -0500 Subject: [PATCH 124/325] Convert tr_partition syntax --- .../RsDivSufSort.cs | 146 +++++++++++------- 1 file changed, 88 insertions(+), 58 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 15ccc37..c2b7b80 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2470,52 +2470,61 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int /// private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) { - let mut a: SAPtr; - let mut b: SAPtr; - let mut c: SAPtr; - let mut d: SAPtr; - let mut e: SAPtr; - let mut f: SAPtr; - let mut t: Idx; - let mut s: Idx; - let mut x: Idx = 0; - - macro_rules! get { - ($x: expr) => { - SA[ISAd + SA[$x]] - }; - } + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr d; + SAPtr e; + SAPtr f; + Idx t; + Idx s; + Idx x = 0; + + //ref int get(int x) => ref SA[ISAd + SA[x]]; + + //macro_rules! get { + // ($x: expr) => { + // SA[ISAd + SA[$x]] + // }; + //} // JOSEPH b = middle - 1; - loop { + while (true) + { // cond b += 1; - if !(b < last) { + if (!(b < last)) + { break; } x = get!(b); - if !(x == v) { + if (!(x == v)) + { break; } } a = b; - if (a < last) && (x < v) { + if ((a < last) && (x < v)) + { // MARY - loop { + while (true) + { b += 1; - if !(b < last) { + if (!(b < last)) + { break; } x = get!(b); - if !(x <= v) { + if (!(x <= v)) + { break; } // body if (x == v) { - SA.swap(b, a); + SA.Swap(b, a); a += 1; } } @@ -2523,110 +2532,131 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi // JEREMIAH c = last; - loop { + while (true) + { c -= 1; - if !(b < c) { + if (!(b < c)) + { break; } x = get!(c); - if !(x == v) { + if (!(x == v)) + { break; } } d = c; - if (b < d) && (x > v) { + if ((b < d) && (x > v)) + { // BEDELIA - loop { + while (true) + { c -= 1; - if !(b < c) { + if (!(b < c)) + { break; } x = get!(c); - if !(x >= v) { + if (!(x >= v)) + { break; } - if x == v { - SA.swap(c, d); + if (x == v) + { + SA.Swap(c, d); d -= 1; } } } // ALEX - while b < c { - SA.swap(b, c); + while (b < c) + { + SA.Swap(b, c); // SIMON - loop { + while (true) + { b += 1; - if !(b < c) { + if (!(b < c)) + { break; } x = get!(b); - if !(x <= v) { + if (!(x <= v)) + { break; } - if x == v { - SA.swap(b, a); + if (x == v) + { + SA.Swap(b, a); a += 1; } } // GREGORY - loop { + while (true) + { c -= 1; - if !(b < c) { + if (!(b < c)) + { break; } x = get!(c); - if !(x >= v) { + if (!(x >= v)) + { break; } - if x == v { - SA.swap(c, d); + if (x == v) + { + SA.Swap(c, d); d -= 1; } } } // end ALEX - if a <= d { + if (a <= d) + { c = b - 1; - s = (a - first).0; - t = (b - a).0; + s = (a - first); + t = (b - a); if (s > t) { - s = t + s = t; } // GENEVIEVE e = first; f = b - s; - while 0 < s { - SA.swap(e, f); + while (0 < s) + { + SA.Swap(e, f); s -= 1; e += 1; f += 1; } - s = (d - c).0; - t = (last - d - 1).0; - if s > t { + s = (d - c); + t = (last - d - 1); + if (s > t) + { s = t; } // MARISSA e = b; f = last - s; - while 0 < s { - SA.swap(e, f); + while (0 < s) + { + SA.Swap(e, f); s -= 1; e += 1; f += 1; } first += (b - a); - last -= (d - c).0; + last -= (d - c); } - pa.0 = first.0; - pb.0 = last.0; + pa = first; + pb = last; } [Conditional("DEBUG")] From 35f715930f7021793fbc24e0a80167d33e4d9cf4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:27:35 -0500 Subject: [PATCH 125/325] Finish tr_partition with get! macro port --- .../RsDivSufSort.cs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index c2b7b80..91fdaff 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2468,7 +2468,7 @@ private static void tr_copy(int iSA, Span sA, int first, int a, int b, int /// /// Tandem repeat partition /// - private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) + private static void tr_partition(Span SA, SAPtr isadOffset, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) { SAPtr a; SAPtr b; @@ -2481,6 +2481,7 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi Idx x = 0; //ref int get(int x) => ref SA[ISAd + SA[x]]; + Span ISAd = SA[isadOffset..]; //macro_rules! get { // ($x: expr) => { @@ -2498,7 +2499,7 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi { break; } - x = get!(b); + x = ISAd[SA[b]]; if (!(x == v)) { break; @@ -2515,7 +2516,7 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi { break; } - x = get!(b); + x = ISAd[SA[b]]; if (!(x <= v)) { break; @@ -2539,7 +2540,7 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi { break; } - x = get!(c); + x = ISAd[SA[c]]; if (!(x == v)) { break; @@ -2556,7 +2557,7 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi { break; } - x = get!(c); + x = ISAd[SA[c]]; if (!(x >= v)) { break; @@ -2581,7 +2582,7 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi { break; } - x = get!(b); + x = ISAd[SA[b]]; if (!(x <= v)) { break; @@ -2601,7 +2602,7 @@ private static void tr_partition(Span SA, SAPtr ISAd, SAPtr first, SAPtr mi { break; } - x = get!(c); + x = ISAd[SA[c]]; if (!(x >= v)) { break; From d1a28fa131ae2aa4002e1e3404036ba49008c88c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 15:31:49 -0500 Subject: [PATCH 126/325] Remove some SA_dump checkpoints --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 91fdaff..380807b 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -473,10 +473,6 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) } } // End: Set the sorted order of type B* suffixes - SA_dump(SA, "JZ-post-sortOrderBstar"); - SA_dump(A, "JZ-A"); - SA_dump(B, "JZ-B"); - // Calculate the index of start/end point of each bucket { Bb[(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1)] = n; // end point @@ -522,8 +518,6 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) c0 -= 1; } } // End: Calculate the index of start/end point of each bucket - - SA_dump(SA, "JZ-final-bStar"); } return new SortTypeBstarResult { A = A, B = B, m = m }; From 1856233e2f8d6c098cc9fb69a90f8722ae7d60ae Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 13 Dec 2021 16:21:25 -0500 Subject: [PATCH 127/325] Add tr_sort and tr_introsort checkpoints --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 380807b..fe97f58 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -400,6 +400,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) // Construct the inverse suffix array of type B* suffixes using trsort. SA_dump(SA, "trsort(A)"); + crosscheck($"enter trsort: ISAb={ISAb} m={m} depth={1}"); trsort(ISAb, SA, m, 1); SA_dump(SA, "trsort(B)"); @@ -1698,7 +1699,13 @@ private static void trsort(SAPtr ISA, Span SA, int n, int depth) if (1 < (last - first)) { budget.Count = 0; + crosscheck($"enter tr_introsort: ISA={ISA} ISAd={ISAd} first={first} last={last}"); + crosscheck($" budget: count={budget.Count} chance={budget.Chance} incval={budget.IncVal} remain={budget.Remain}"); + SA_dump(SA, "tr_introsort(A)"); tr_introsort(ISA, ISAd, SA, first, last, ref budget); + SA_dump(SA, "tr_introsort(B)"); + crosscheck($"exit tr_introsort"); + crosscheck($" budget: count={budget.Count} chance={budget.Chance} incval={budget.IncVal} remain={budget.Remain}"); if (budget.Count != 0) { unsorted += budget.Count; From bce070a5df239d449802f8f9131c79cd997d32ab Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 11:53:50 -0500 Subject: [PATCH 128/325] Fix broken swap --- .../SpanExtensions.cs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs index f1faf1c..6cab4b1 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs @@ -1,15 +1,12 @@ using System; +using System.Runtime.CompilerServices; namespace DeltaQ.SuffixSorting.LibDivSufSort { public static class SpanExtensions { - public static void Swap(this Span span, int indexA, int indexB) - { - ref var itemA = ref span[indexA]; - ref var itemB = ref span[indexB]; - span[indexA] = itemB; - span[indexB] = itemA; - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void Swap(this Span span, int i, int j) + => (span[j], span[i]) = (span[i], span[j]); } } From 3584ec6eea84741a6703010ada608c38e12502ca Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:13:36 -0500 Subject: [PATCH 129/325] Fix ISAd not updating Mark all isadOffset updates in tr_introsort Use SpanOwner to allocate TrStackItem buffer instead of stackalloc Add additional checkpoints --- .../RsDivSufSort.cs | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index fe97f58..26d014d 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1800,7 +1800,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA Idx next; Idx trlink = -1; - TrStack stack = new(stackalloc TrStackItem[TR_STACK_SIZE]); + using var stackOwner = SpanOwner.Allocate(TR_STACK_SIZE, AllocationMode.Clear); + TrStack stack = new(stackOwner.Span); /* macro_rules! ISA { @@ -1903,6 +1904,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; crosscheck("denny-post"); } } @@ -1930,6 +1933,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; crosscheck("clap-post"); } } @@ -1958,6 +1963,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; } else { @@ -2024,6 +2031,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck($"push {isadOffset} {a} {last} {-3} {trlink}"); stack.Push(isadOffset, a, last, -3, trlink); isadOffset += incr; + //JZ: ISAd update point ISAd = ISAd[incr..]; last = a; limit = next; @@ -2040,6 +2048,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA else { isadOffset += incr; + //JZ: ISAd update point ISAd = ISAd[incr..]; last = a; limit = next; @@ -2067,6 +2076,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; crosscheck("1<(last-a) not post"); crosscheck($"were popped: ISAd={isadOffset} first={first} last={last} limit={limit} trlink={trlink}"); } @@ -2079,6 +2090,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; crosscheck("times pop-post"); crosscheck($"were popped: ISAd={isadOffset} first={first} last={last} limit={limit} trlink={trlink}"); } // end if first < last @@ -2089,7 +2102,9 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) { crosscheck($"insertionsort last-first={last - first}"); + SA_dump(SA, "tr_insertionsort(A)"); tr_insertionsort(SA, ISAd, first, last); + SA_dump(SA, "tr_insertionsort(B)"); limit = -3; continue; } @@ -2194,6 +2209,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { crosscheck("aaac"); isadOffset += incr; + //JZ: ISAd update point + ISAd = ISAd[incr..]; first = a; last = b; limit = next; @@ -2217,6 +2234,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); isadOffset += incr; + //JZ: ISAd update point + ISAd = ISAd[incr..]; first = a; last = b; limit = next; @@ -2230,6 +2249,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); isadOffset += incr; + //JZ: ISAd update point + ISAd = ISAd[incr..]; first = a; last = b; limit = next; @@ -2261,6 +2282,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { crosscheck("abac"); isadOffset += incr; + //JZ: ISAd update point + ISAd = ISAd[incr..]; first = a; last = b; limit = next; @@ -2284,6 +2307,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck($"push {isadOffset} {first} {a} {limit} {trlink}"); stack.Push(isadOffset, first, a, limit, trlink); isadOffset += incr; + //JZ: ISAd update point + ISAd = ISAd[incr..]; first = a; last = b; limit = next; @@ -2297,6 +2322,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck($"push {isadOffset} {b} {last} {limit} {trlink}"); stack.Push(isadOffset, b, last, limit, trlink); isadOffset += incr; + //JZ: ISAd update point + ISAd = ISAd[incr..]; first = a; last = b; limit = next; @@ -2333,6 +2360,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; } } else @@ -2357,6 +2386,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; crosscheck("bcc post"); } } @@ -2370,6 +2401,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA crosscheck("ca"); limit = tr_ilg(last - first); isadOffset += incr; + //JZ: ISAd update point + ISAd = ISAd[incr..]; } else { @@ -2383,6 +2416,8 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA { return; } + //JZ: ISAd update point + ISAd = SA[isadOffset..]; crosscheck("cb post"); } } From ce112c783106d05060aa979b8eb3f0d809966319 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:18:20 -0500 Subject: [PATCH 130/325] Add raw rust in ss_partition --- .../RsDivSufSort.cs | 66 ++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 26d014d..12683ec 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1,6 +1,7 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Diagnostics; +using System.Runtime.CompilerServices; using Idx = System.Int32; using SAPtr = System.Int32; @@ -1409,9 +1410,70 @@ private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP } } - private static int ss_partition(Span sA, int pA, int first, int a, int depth) + /// Binary partition for substrings. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr ss_partition(Span SA, SAPtr PA, SAPtr first, SAPtr last, Idx depth) { - throw new NotImplementedException(); + macro_rules! PA { + ($x: expr) => { + SA[PA + $x] + }; + } + // JIMMY + let mut a = first - 1; + let mut b = last; + macro_rules! a { + () => { + SA[a] + }; + } + macro_rules! b { + () => { + SA[b] + }; + } + + loop { + // JANINE + loop { + a += 1; + if !(a < b) { + break; + } + if !((PA!(a!()) + depth) >= (PA!(a!() + 1) + 1)) { + break; + } + + // loop body + a!() = !a!(); + } + + // GEORGIO + loop { + b -= 1; + if !(a < b) { + break; + } + if !((PA!(b!()) + depth) < (PA!(b!() + 1) + 1)) { + break; + } + + // loop body is empty + } + + if b <= a { + break; + } + + let t = !b!(); + b!() = a!(); + a!() = t; + } + + if (first < a) { + SA[first] = !SA[first]; + } + a } private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) From df6ba5ba9d5e7c8721b8359b656a6b5a967500c8 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:21:55 -0500 Subject: [PATCH 131/325] Port some syntax in ss_partition --- .../RsDivSufSort.cs | 67 +++++++++++-------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 12683ec..fc80f2a 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1414,33 +1414,37 @@ private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr ss_partition(Span SA, SAPtr PA, SAPtr first, SAPtr last, Idx depth) { - macro_rules! PA { - ($x: expr) => { - SA[PA + $x] - }; - } + //macro_rules! PA { + // ($x: expr) => { + // SA[PA + $x] + // }; + //} // JIMMY - let mut a = first - 1; - let mut b = last; - macro_rules! a { - () => { - SA[a] - }; - } - macro_rules! b { - () => { - SA[b] - }; - } + var a = first - 1; + var b = last; + //macro_rules! a { + // () => { + // SA[a] + // }; + //} + //macro_rules! b { + // () => { + // SA[b] + // }; + //} - loop { + while (true) + { // JANINE - loop { + while (true) + { a += 1; - if !(a < b) { + if (!(a < b)) + { break; } - if !((PA!(a!()) + depth) >= (PA!(a!() + 1) + 1)) { + if (!((PA!(a!()) + depth) >= (PA!(a!() + 1) + 1))) + { break; } @@ -1449,31 +1453,36 @@ private static SAPtr ss_partition(Span SA, SAPtr PA, SAPtr first, SAPtr las } // GEORGIO - loop { + while (true) + { b -= 1; - if !(a < b) { + if (!(a < b)) + { break; } - if !((PA!(b!()) + depth) < (PA!(b!() + 1) + 1)) { + if (!((PA!(b!()) + depth) < (PA!(b!() + 1) + 1))) + { break; } // loop body is empty } - if b <= a { + if (b <= a) + { break; } - let t = !b!(); + var t = !b!(); b!() = a!(); a!() = t; } - if (first < a) { - SA[first] = !SA[first]; + if (first < a) + { + SA[first] = ~SA[first]; } - a + return a; } private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) From d323627ab9dec42ef97aec705e456e2eb128e759 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:27:22 -0500 Subject: [PATCH 132/325] Implement ss_partition --- .../RsDivSufSort.cs | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index fc80f2a..7593add 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1057,7 +1057,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf if (Td[PA[SA[first]] - 1] < v) { - first = ss_partition(SA, partitionOffset, first, a, depth); + first = ss_partition(SA, SA[partitionOffset..], first, a, depth); } if ((a - first) <= (last - a)) { @@ -1265,7 +1265,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf a = first + (b - a); c = last - (d - c); - b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, partitionOffset, a, c, depth); + b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, SA[partitionOffset..], a, c, depth); if ((a - first) <= (last - c)) { @@ -1321,7 +1321,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf limit += 1; if (Td[PA[SA[first]] - 1] < v) { - first = ss_partition(SA, partitionOffset, first, last, depth); + first = ss_partition(SA, SA[partitionOffset..], first, last, depth); limit = ss_ilg(last - first); } depth += 1; @@ -1412,26 +1412,11 @@ private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP /// Binary partition for substrings. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr ss_partition(Span SA, SAPtr PA, SAPtr first, SAPtr last, Idx depth) + private static SAPtr ss_partition(Span SA, Span PA, SAPtr first, SAPtr last, Idx depth) { - //macro_rules! PA { - // ($x: expr) => { - // SA[PA + $x] - // }; - //} // JIMMY var a = first - 1; var b = last; - //macro_rules! a { - // () => { - // SA[a] - // }; - //} - //macro_rules! b { - // () => { - // SA[b] - // }; - //} while (true) { @@ -1443,13 +1428,13 @@ private static SAPtr ss_partition(Span SA, SAPtr PA, SAPtr first, SAPtr las { break; } - if (!((PA!(a!()) + depth) >= (PA!(a!() + 1) + 1))) + if (!((PA[SA[a]] + depth) >= (PA[SA[a] + 1] + 1))) { break; } // loop body - a!() = !a!(); + SA[a] = ~SA[a]; } // GEORGIO @@ -1460,7 +1445,7 @@ private static SAPtr ss_partition(Span SA, SAPtr PA, SAPtr first, SAPtr las { break; } - if (!((PA!(b!()) + depth) < (PA!(b!() + 1) + 1))) + if (!((PA[SA[b]] + depth) < (PA[SA[b] + 1] + 1))) { break; } @@ -1473,9 +1458,9 @@ private static SAPtr ss_partition(Span SA, SAPtr PA, SAPtr first, SAPtr las break; } - var t = !b!(); - b!() = a!(); - a!() = t; + var t = ~SA[b]; + SA[b] = SA[a]; + SA[a] = t; } if (first < a) From 8fe524840ef8b369dce37bb59f7cbc056852fa4a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:28:23 -0500 Subject: [PATCH 133/325] Go back to passing just paOffset to ss_partition --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 7593add..ee3bf94 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1057,7 +1057,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf if (Td[PA[SA[first]] - 1] < v) { - first = ss_partition(SA, SA[partitionOffset..], first, a, depth); + first = ss_partition(SA, partitionOffset, first, a, depth); } if ((a - first) <= (last - a)) { @@ -1265,7 +1265,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf a = first + (b - a); c = last - (d - c); - b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, SA[partitionOffset..], a, c, depth); + b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, partitionOffset, a, c, depth); if ((a - first) <= (last - c)) { @@ -1321,7 +1321,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf limit += 1; if (Td[PA[SA[first]] - 1] < v) { - first = ss_partition(SA, SA[partitionOffset..], first, last, depth); + first = ss_partition(SA, partitionOffset, first, last, depth); limit = ss_ilg(last - first); } depth += 1; @@ -1412,8 +1412,10 @@ private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP /// Binary partition for substrings. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr ss_partition(Span SA, Span PA, SAPtr first, SAPtr last, Idx depth) + private static SAPtr ss_partition(Span SA, SAPtr paOffset, SAPtr first, SAPtr last, Idx depth) { + Span PA = SA[paOffset..]; + // JIMMY var a = first - 1; var b = last; From 0915420b1f189724deb7a883f291ad3f9fb5516f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:34:02 -0500 Subject: [PATCH 134/325] Mark IntAccessor methods as readonly --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index ee3bf94..110480f 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -192,8 +192,8 @@ public ref struct IntAccessor public readonly ReadOnlySpan span; public IntAccessor(ReadOnlySpan span) => this.span = span; - public int this[Idx index] => span[index]; - public int Length => span.Length; + public readonly int this[Idx index] => span[index]; + public readonly int Length => span.Length; } public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) From 8a774ce9f66ac438a19b4274d85f6ab3efe143c1 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:34:23 -0500 Subject: [PATCH 135/325] Add rust stub to ss_median5 --- .../RsDivSufSort.cs | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 110480f..1a931ce 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1367,9 +1367,38 @@ private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP return ss_median3(T, Td, SA, PA, first, middle, last); } - private static int ss_median5(IntAccessor t, int td, Span sA, int pA, int first, int v1, int middle, int v2, int v3) + /// Returns the median of five elements + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) { - throw new NotImplementedException(); + let mut t: SAPtr; + macro_rules! get { + ($x: expr) => { + T[Td + SA[PA + SA[$x]]] + }; + } + if get!(v2) > get!(v3) { + mem::swap(&mut v2, &mut v3); + } + if get!(v4) > get!(v5) { + mem::swap(&mut v4, &mut v5); + } + if get!(v2) > get!(v4) { + mem::swap(&mut v2, &mut v4); + mem::swap(&mut v3, &mut v5); + } + if get!(v1) > get!(v3) { + mem::swap(&mut v1, &mut v3); + } + if get!(v1) > get!(v4) { + mem::swap(&mut v1, &mut v4); + mem::swap(&mut v3, &mut v5); + } + if get!(v3) > get!(v4) { + v4 + } else { + v3 + } } static void Swap(ref T lhs, ref T rhs) From 3c196819aac30256f2e63892639d54c69b6cdb93 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:41:29 -0500 Subject: [PATCH 136/325] Mark everything in TdPAStarAccessor as readonly --- .../RsDivSufSort.cs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 1a931ce..309798e 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -958,18 +958,18 @@ public SpanOffsetAccessor(Span span, int offset) private ref struct TdPAStarAccessor { - private readonly Span _SA; - private readonly Span _PA; + private readonly ReadOnlySpan _SA; + private readonly ReadOnlySpan _PA; private readonly IntAccessor _TD; - public TdPAStarAccessor(ReadOnlySpan T, Span SA, int partitionOffset, int tdOffset) + public TdPAStarAccessor(ReadOnlySpan T, ReadOnlySpan SA, int partitionOffset, int tdOffset) { _SA = SA; _PA = SA[partitionOffset..]; _TD = new(T[tdOffset..]); } - public int this[int index] => _TD[_PA[_SA[index]]]; + public readonly int this[int index] => _TD[_PA[_SA[index]]]; } /// @@ -1396,7 +1396,9 @@ private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAP } if get!(v3) > get!(v4) { v4 - } else { + } + else + { v3 } } From 5ec19c79c64eda58a53247bcdcf656c031b5e7bb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:43:24 -0500 Subject: [PATCH 137/325] Implement ss_median5 --- .../RsDivSufSort.cs | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 309798e..4a6aed0 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1371,35 +1371,36 @@ private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) { - let mut t: SAPtr; - macro_rules! get { - ($x: expr) => { - T[Td + SA[PA + SA[$x]]] - }; - } - if get!(v2) > get!(v3) { - mem::swap(&mut v2, &mut v3); + var get = new TdPAStarAccessor(T.span, SA, PA, Td); + if (get[v2] > get[v3]) + { + Swap(ref v2, ref v3); } - if get!(v4) > get!(v5) { - mem::swap(&mut v4, &mut v5); + if (get[v4] > get[v5]) + { + Swap(ref v4, ref v5); } - if get!(v2) > get!(v4) { - mem::swap(&mut v2, &mut v4); - mem::swap(&mut v3, &mut v5); + if (get[v2] > get[v4]) + { + Swap(ref v2, ref v4); + Swap(ref v3, ref v5); } - if get!(v1) > get!(v3) { - mem::swap(&mut v1, &mut v3); + if (get[v1] > get[v3]) + { + Swap(ref v1, ref v3); } - if get!(v1) > get!(v4) { - mem::swap(&mut v1, &mut v4); - mem::swap(&mut v3, &mut v5); + if (get[v1] > get[v4]) + { + Swap(ref v1, ref v4); + Swap(ref v3, ref v5); } - if get!(v3) > get!(v4) { - v4 + if (get[v3] > get[v4]) + { + return v4; } else { - v3 + return v3; } } From 4448bcbf3f6a10749ca8faba33e6c1cb1ba19960 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:47:33 -0500 Subject: [PATCH 138/325] Remove old trsort items --- .../RsTrsort.cs | 12 - .../trsort.c | 586 ------------------ 2 files changed, 598 deletions(-) delete mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs delete mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs deleted file mode 100644 index cd19069..0000000 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsTrsort.cs +++ /dev/null @@ -1,12 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace DeltaQ.SuffixSorting.LibDivSufSort -{ - class RsTrsort - { - } -} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c b/src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c deleted file mode 100644 index 6fe3e67..0000000 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/trsort.c +++ /dev/null @@ -1,586 +0,0 @@ -/* - * trsort.c for libdivsufsort - * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "divsufsort_private.h" - - -/*- Private Functions -*/ - -static const saint_t lg_table[256]= { - -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 -}; - -static INLINE -saint_t -tr_ilg(saidx_t n) { -#if defined(BUILD_DIVSUFSORT64) - return (n >> 32) ? - ((n >> 48) ? - ((n >> 56) ? - 56 + lg_table[(n >> 56) & 0xff] : - 48 + lg_table[(n >> 48) & 0xff]) : - ((n >> 40) ? - 40 + lg_table[(n >> 40) & 0xff] : - 32 + lg_table[(n >> 32) & 0xff])) : - ((n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff])); -#else - return (n & 0xffff0000) ? - ((n & 0xff000000) ? - 24 + lg_table[(n >> 24) & 0xff] : - 16 + lg_table[(n >> 16) & 0xff]) : - ((n & 0x0000ff00) ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]); -#endif -} - - -/*---------------------------------------------------------------------------*/ - -/* Simple insertionsort for small size groups. */ -static -void -tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { - saidx_t *a, *b; - saidx_t t, r; - - for(a = first + 1; a < last; ++a) { - for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { - do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); - if(b < first) { break; } - } - if(r == 0) { *b = ~*b; } - *(b + 1) = t; - } -} - - -/*---------------------------------------------------------------------------*/ - -static INLINE -void -tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) { - saidx_t j, k; - saidx_t v; - saidx_t c, d, e; - - for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { - d = ISAd[SA[k = j++]]; - if(d < (e = ISAd[SA[j]])) { k = j; d = e; } - if(d <= c) { break; } - } - SA[i] = v; -} - -/* Simple top-down heapsort. */ -static -void -tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) { - saidx_t i, m; - saidx_t t; - - m = size; - if((size % 2) == 0) { - m--; - if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } - } - - for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } - if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } - for(i = m - 1; 0 < i; --i) { - t = SA[0], SA[0] = SA[i]; - tr_fixdown(ISAd, SA, 0, i); - SA[i] = t; - } -} - - -/*---------------------------------------------------------------------------*/ - -/* Returns the median of three elements. */ -static INLINE -saidx_t * -tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) { - saidx_t *t; - if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } - if(ISAd[*v2] > ISAd[*v3]) { - if(ISAd[*v1] > ISAd[*v3]) { return v1; } - else { return v3; } - } - return v2; -} - -/* Returns the median of five elements. */ -static INLINE -saidx_t * -tr_median5(const saidx_t *ISAd, - saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { - saidx_t *t; - if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } - if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } - if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } - if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } - if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } - if(ISAd[*v3] > ISAd[*v4]) { return v4; } - return v3; -} - -/* Returns the pivot element. */ -static INLINE -saidx_t * -tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { - saidx_t *middle; - saidx_t t; - - t = last - first; - middle = first + t / 2; - - if(t <= 512) { - if(t <= 32) { - return tr_median3(ISAd, first, middle, last - 1); - } else { - t >>= 2; - return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); - } - } - t >>= 3; - first = tr_median3(ISAd, first, first + t, first + (t << 1)); - middle = tr_median3(ISAd, middle - t, middle, middle + t); - last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); - return tr_median3(ISAd, first, middle, last); -} - - -/*---------------------------------------------------------------------------*/ - -typedef struct _trbudget_t trbudget_t; -struct _trbudget_t { - saidx_t chance; - saidx_t remain; - saidx_t incval; - saidx_t count; -}; - -static INLINE -void -trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) { - budget->chance = chance; - budget->remain = budget->incval = incval; -} - -static INLINE -saint_t -trbudget_check(trbudget_t *budget, saidx_t size) { - if(size <= budget->remain) { budget->remain -= size; return 1; } - if(budget->chance == 0) { budget->count += size; return 0; } - budget->remain += budget->incval - size; - budget->chance -= 1; - return 1; -} - - -/*---------------------------------------------------------------------------*/ - -static INLINE -void -tr_partition(const saidx_t *ISAd, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t **pa, saidx_t **pb, saidx_t v) { - saidx_t *a, *b, *c, *d, *e, *f; - saidx_t t, s; - saidx_t x = 0; - - for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } - if(((a = b) < last) && (x < v)) { - for(; (++b < last) && ((x = ISAd[*b]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } - } - for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } - if((b < (d = c)) && (x > v)) { - for(; (b < --c) && ((x = ISAd[*c]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } - } - for(; b < c;) { - SWAP(*b, *c); - for(; (++b < c) && ((x = ISAd[*b]) <= v);) { - if(x == v) { SWAP(*b, *a); ++a; } - } - for(; (b < --c) && ((x = ISAd[*c]) >= v);) { - if(x == v) { SWAP(*c, *d); --d; } - } - } - - if(a <= d) { - c = b - 1; - if((s = a - first) > (t = b - a)) { s = t; } - for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - if((s = d - c) > (t = last - d - 1)) { s = t; } - for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - first += (b - a), last -= (d - c); - } - *pa = first, *pb = last; -} - -static -void -tr_copy(saidx_t *ISA, const saidx_t *SA, - saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, - saidx_t depth) { - /* sort suffixes of middle partition - by using sorted order of suffixes of left and right partition. */ - saidx_t *c, *d, *e; - saidx_t s, v; - - v = b - SA - 1; - for(c = first, d = a - 1; c <= d; ++c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *++d = s; - ISA[s] = d - SA; - } - } - for(c = last - 1, e = d + 1, d = b; e < d; --c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *--d = s; - ISA[s] = d - SA; - } - } -} - -static -void -tr_partialcopy(saidx_t *ISA, const saidx_t *SA, - saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, - saidx_t depth) { - saidx_t *c, *d, *e; - saidx_t s, v; - saidx_t rank, lastrank, newrank = -1; - - v = b - SA - 1; - lastrank = -1; - for(c = first, d = a - 1; c <= d; ++c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *++d = s; - rank = ISA[s + depth]; - if(lastrank != rank) { lastrank = rank; newrank = d - SA; } - ISA[s] = newrank; - } - } - - lastrank = -1; - for(e = d; first <= e; --e) { - rank = ISA[*e]; - if(lastrank != rank) { lastrank = rank; newrank = e - SA; } - if(newrank != rank) { ISA[*e] = newrank; } - } - - lastrank = -1; - for(c = last - 1, e = d + 1, d = b; e < d; --c) { - if((0 <= (s = *c - depth)) && (ISA[s] == v)) { - *--d = s; - rank = ISA[s + depth]; - if(lastrank != rank) { lastrank = rank; newrank = d - SA; } - ISA[s] = newrank; - } - } -} - -static -void -tr_introsort(saidx_t *ISA, const saidx_t *ISAd, - saidx_t *SA, saidx_t *first, saidx_t *last, - trbudget_t *budget) { -#define STACK_SIZE TR_STACKSIZE - struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE]; - saidx_t *a, *b, *c; - saidx_t t; - saidx_t v, x = 0; - saidx_t incr = ISAd - ISA; - saint_t limit, next; - saint_t ssize, trlink = -1; - - for(ssize = 0, limit = tr_ilg(last - first);;) { - - if(limit < 0) { - if(limit == -1) { - /* tandem repeat partition */ - tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); - - /* update ranks */ - if(a < last) { - for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } - } - if(b < last) { - for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } - } - - /* push */ - if(1 < (b - a)) { - STACK_PUSH5(NULL, a, b, 0, 0); - STACK_PUSH5(ISAd - incr, first, last, -2, trlink); - trlink = ssize - 2; - } - if((a - first) <= (last - b)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); - last = a, limit = tr_ilg(a - first); - } else if(1 < (last - b)) { - first = b, limit = tr_ilg(last - b); - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } else { - if(1 < (last - b)) { - STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); - first = b, limit = tr_ilg(last - b); - } else if(1 < (a - first)) { - last = a, limit = tr_ilg(a - first); - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } else if(limit == -2) { - /* tandem repeat copy */ - a = stack[--ssize].b, b = stack[ssize].c; - if(stack[ssize].d == 0) { - tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); - } else { - if(0 <= trlink) { stack[trlink].d = -1; } - tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); - } - STACK_POP5(ISAd, first, last, limit, trlink); - } else { - /* sorted partition */ - if(0 <= *first) { - a = first; - do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); - first = a; - } - if(first < last) { - a = first; do { *a = ~*a; } while(*++a < 0); - next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; - if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } - - /* push */ - if(trbudget_check(budget, a - first)) { - if((a - first) <= (last - a)) { - STACK_PUSH5(ISAd, a, last, -3, trlink); - ISAd += incr, last = a, limit = next; - } else { - if(1 < (last - a)) { - STACK_PUSH5(ISAd + incr, first, a, next, trlink); - first = a, limit = -3; - } else { - ISAd += incr, last = a, limit = next; - } - } - } else { - if(0 <= trlink) { stack[trlink].d = -1; } - if(1 < (last - a)) { - first = a, limit = -3; - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - continue; - } - - if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { - tr_insertionsort(ISAd, first, last); - limit = -3; - continue; - } - - if(limit-- == 0) { - tr_heapsort(ISAd, first, last - first); - for(a = last - 1; first < a; a = b) { - for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } - } - limit = -3; - continue; - } - - /* choose pivot */ - a = tr_pivot(ISAd, first, last); - SWAP(*first, *a); - v = ISAd[*first]; - - /* partition */ - tr_partition(ISAd, first, first + 1, last, &a, &b, v); - if((last - first) != (b - a)) { - next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; - - /* update ranks */ - for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } - if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } - - /* push */ - if((1 < (b - a)) && (trbudget_check(budget, b - a))) { - if((a - first) <= (last - b)) { - if((last - b) <= (b - a)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - STACK_PUSH5(ISAd, b, last, limit, trlink); - last = a; - } else if(1 < (last - b)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - first = b; - } else { - ISAd += incr, first = a, last = b, limit = next; - } - } else if((a - first) <= (b - a)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd, b, last, limit, trlink); - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - last = a; - } else { - STACK_PUSH5(ISAd, b, last, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } else { - STACK_PUSH5(ISAd, b, last, limit, trlink); - STACK_PUSH5(ISAd, first, a, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } else { - if((a - first) <= (b - a)) { - if(1 < (last - b)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - STACK_PUSH5(ISAd, first, a, limit, trlink); - first = b; - } else if(1 < (a - first)) { - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - last = a; - } else { - ISAd += incr, first = a, last = b, limit = next; - } - } else if((last - b) <= (b - a)) { - if(1 < (last - b)) { - STACK_PUSH5(ISAd, first, a, limit, trlink); - STACK_PUSH5(ISAd + incr, a, b, next, trlink); - first = b; - } else { - STACK_PUSH5(ISAd, first, a, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } else { - STACK_PUSH5(ISAd, first, a, limit, trlink); - STACK_PUSH5(ISAd, b, last, limit, trlink); - ISAd += incr, first = a, last = b, limit = next; - } - } - } else { - if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } - if((a - first) <= (last - b)) { - if(1 < (a - first)) { - STACK_PUSH5(ISAd, b, last, limit, trlink); - last = a; - } else if(1 < (last - b)) { - first = b; - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } else { - if(1 < (last - b)) { - STACK_PUSH5(ISAd, first, a, limit, trlink); - first = b; - } else if(1 < (a - first)) { - last = a; - } else { - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } - } else { - if(trbudget_check(budget, last - first)) { - limit = tr_ilg(last - first), ISAd += incr; - } else { - if(0 <= trlink) { stack[trlink].d = -1; } - STACK_POP5(ISAd, first, last, limit, trlink); - } - } - } -#undef STACK_SIZE -} - - - -/*---------------------------------------------------------------------------*/ - -/*- Function -*/ - -/* Tandem repeat sort */ -void -trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) { - saidx_t *ISAd; - saidx_t *first, *last; - trbudget_t budget; - saidx_t t, skip, unsorted; - - trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); -/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ - for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { - first = SA; - skip = 0; - unsorted = 0; - do { - if((t = *first) < 0) { first -= t; skip += t; } - else { - if(skip != 0) { *(first + skip) = skip; skip = 0; } - last = SA + ISA[t] + 1; - if(1 < (last - first)) { - budget.count = 0; - tr_introsort(ISA, ISAd, SA, first, last, &budget); - if(budget.count != 0) { unsorted += budget.count; } - else { skip = first - last; } - } else if((last - first) == 1) { - skip = -1; - } - first = last; - } - } while(first < (SA + n)); - if(skip != 0) { *(first + skip) = skip; } - if(unsorted == 0) { break; } - } -} From 74d903e2896f3e89340925235c864be789e75de7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:50:13 -0500 Subject: [PATCH 139/325] Bump WCT HighPerformance package version --- src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index d093e70..93fa53f 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -11,7 +11,7 @@ - + From 5ee267fd5e339775d12dadeee662f7d90d1519bd Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:50:33 -0500 Subject: [PATCH 140/325] Use RandomNumberGenerator in BsPatchTests --- test/DeltaQ.BsDiff.Tests/BsPatchTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs b/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs index b375844..09ebc1b 100644 --- a/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs +++ b/test/DeltaQ.BsDiff.Tests/BsPatchTests.cs @@ -7,7 +7,7 @@ namespace DeltaQ.Tests { public class BsPatchTests { - private static RNGCryptoServiceProvider _cryptoRNG = new RNGCryptoServiceProvider(); + private static RandomNumberGenerator _cryptoRNG = RandomNumberGenerator.Create(); private static byte[] GetRandomFilledBuffer(int count) { var buffer = new byte[count]; From b40367bb9b98ac8b0853168b10bf44d1fda983db Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:54:02 -0500 Subject: [PATCH 141/325] Add raw rust for ss_swapmerge --- .../RsDivSufSort.cs | 191 +++++++++++++++++- 1 file changed, 189 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 4a6aed0..d905692 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -891,9 +891,196 @@ private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) } } - private static void ss_swapmerge(IntAccessor t, Span sA, int pA, int v1, int b, int v2, int curbuf, int curbufsize, int depth) + /// D&C based merge + private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) { - throw new NotImplementedException(); + macro_rules! get_idx { + ($a: expr) => { + if 0 <= $a { + $a + } else { + !$a + } + }; + } + macro_rules! merge_check { + ($a: expr, $b: expr, $c: expr) => { + crosscheck!("mc c={}", $c); + if ($c & 1 > 0) + || (($c & 2 > 0) + && (ss_compare(T, SA, PA + get_idx!(SA[$a - 1]), SA, PA + SA[$a], depth) == 0)) + { + crosscheck!("swapping a-first={}", $a - first); + SA[$a] = !SA[$a]; + } + if ($c & 4 > 0) + && (ss_compare(T, SA, PA + get_idx!(SA[$b - 1]), SA, PA + SA[$b], depth) == 0) + { + crosscheck!("swapping b-first={}", $b - first); + SA[$b] = !SA[$b]; + } + }; + } + + let mut stack = MergeStack::new(); + let mut l: SAPtr; + let mut r: SAPtr; + let mut lm: SAPtr; + let mut rm: SAPtr; + let mut m: Idx; + let mut len: Idx; + let mut half: Idx; + let mut check: Idx; + let mut next: Idx; + + // BARBARIAN + check = 0; + loop { + crosscheck!("barbarian check={}", check); + SA_dump!(&SA.range(first..last), "ss_swapmerge barbarian"); + SA_dump!(&SA.range(buf..buf + bufsize), "ss_swapmerge barbarian buf"); + if (last - middle) <= bufsize { + crosscheck!("<=bufsize"); + if (first < middle) && (middle < last) { + crosscheck!("f> 1; + while 0 < len { + crosscheck!("in-olanna len={} half={}", len, half); + if ss_compare( + T, + SA, + PA + get_idx!(SA[middle + m + half]), + SA, + PA + get_idx!(SA[middle - m - half - 1]), + depth, + ) < 0 + { + m += half + 1; + half -= (len & 1) ^ 1; + } + + // iter + len = half; + half >>= 1; + } + + if 0 < m { + crosscheck!("0 < m, m={}", m); + lm = middle - m; + rm = middle + m; + ss_blockswap(SA, lm, middle, m); + r = middle; + l = middle; + next = 0; + if rm < last { + if SA[rm] < 0 { + SA[rm] = !SA[rm]; + if first < lm { + // KOOPA + l -= 1; + while SA[l] < 0 { + l -= 1; + } + crosscheck!("post-koopa l-first={}", l - first); + next |= 4; + crosscheck!("post-koopa next={}", next); + } + next |= 1; + } else if first < lm { + // MUNCHER + while SA[r] < 0 { + r += 1; + } + crosscheck!("post-muncher r-first={}", r - first); + next |= 2; + } + } + + if (l - first) <= (last - r) { + crosscheck!("post-muncher l-f 0) && (r == middle) { + crosscheck!("post-muncher next ^= 6 old={}", next); + next ^= 6; + crosscheck!("post-muncher next ^= 6 new={}", next); + } + stack.push(first, lm, l, (check & 3) | (next & 4)); + first = r; + middle = rm; + crosscheck!("post-muncher not, check was={} next was={}", check, next); + check = (next & 3) | (check & 4); + crosscheck!("post-muncher not, check is={} next is={}", check, next); + } + } else { + if ss_compare( + T, + SA, + PA + get_idx!(SA[middle - 1]), + SA, + PA + SA[middle], + depth, + ) == 0 + { + SA[middle] = !SA[middle]; + } + merge_check!(first, last, check); + SA_dump!(&SA.range(first..last), "ss_swapmerge pop 3"); + if !stack + .pop(&mut first, &mut middle, &mut last, &mut check) + .is_ok() + { + return; + } + } + } } From 77293940979e9ce2dc6972582da767d22d3c555f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 12:59:21 -0500 Subject: [PATCH 142/325] Begin converting ss_swapmerge --- .../RsDivSufSort.cs | 112 +++++++++--------- 1 file changed, 59 insertions(+), 53 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index d905692..a6c2f5f 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -894,61 +894,63 @@ private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) /// D&C based merge private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) { - macro_rules! get_idx { - ($a: expr) => { - if 0 <= $a { - $a - } else { - !$a - } - }; - } - macro_rules! merge_check { - ($a: expr, $b: expr, $c: expr) => { - crosscheck!("mc c={}", $c); - if ($c & 1 > 0) - || (($c & 2 > 0) - && (ss_compare(T, SA, PA + get_idx!(SA[$a - 1]), SA, PA + SA[$a], depth) == 0)) - { - crosscheck!("swapping a-first={}", $a - first); - SA[$a] = !SA[$a]; - } - if ($c & 4 > 0) - && (ss_compare(T, SA, PA + get_idx!(SA[$b - 1]), SA, PA + SA[$b], depth) == 0) - { - crosscheck!("swapping b-first={}", $b - first); - SA[$b] = !SA[$b]; - } - }; - } + //macro_rules! get_idx { + // ($a: expr) => { + // if 0 <= $a { + // $a + // } else { + // !$a + // } + // }; + //} + + //macro_rules! merge_check { + // ($a: expr, $b: expr, $c: expr) => { + // crosscheck!("mc c={}", $c); + // if ($c & 1 > 0) + // || (($c & 2 > 0) + // && (ss_compare(T, SA, PA + get_idx!(SA[$a - 1]), SA, PA + SA[$a], depth) == 0)) + // { + // crosscheck!("swapping a-first={}", $a - first); + // SA[$a] = !SA[$a]; + // } + // if ($c & 4 > 0) + // && (ss_compare(T, SA, PA + get_idx!(SA[$b - 1]), SA, PA + SA[$b], depth) == 0) + // { + // crosscheck!("swapping b-first={}", $b - first); + // SA[$b] = !SA[$b]; + // } + // }; + //} - let mut stack = MergeStack::new(); - let mut l: SAPtr; - let mut r: SAPtr; - let mut lm: SAPtr; - let mut rm: SAPtr; - let mut m: Idx; - let mut len: Idx; - let mut half: Idx; - let mut check: Idx; - let mut next: Idx; + var stack = MergeStack::new(); + SAPtr l; + SAPtr r; + SAPtr lm; + SAPtr rm; + + Idx m; + Idx len; + Idx half; + Idx check; + Idx next; // BARBARIAN check = 0; - loop { - crosscheck!("barbarian check={}", check); - SA_dump!(&SA.range(first..last), "ss_swapmerge barbarian"); - SA_dump!(&SA.range(buf..buf + bufsize), "ss_swapmerge barbarian buf"); - if (last - middle) <= bufsize { - crosscheck!("<=bufsize"); - if (first < middle) && (middle < last) { - crosscheck!("f SA, SAPtr PA, SAPtr fi crosscheck!("post-koopa next={}", next); } next |= 1; - } else if first < lm { + } + else if first < lm { // MUNCHER while SA[r] < 0 { r += 1; @@ -1045,7 +1048,8 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi crosscheck!("post-muncher check was={} next was={}", check, next); check = (check & 3) | (next & 4); crosscheck!("post-muncher check is={} next is={}", check, next); - } else { + } else + { crosscheck!("post-muncher not l-f 0) && (r == middle) { crosscheck!("post-muncher next ^= 6 old={}", next); @@ -1059,7 +1063,9 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi check = (next & 3) | (check & 4); crosscheck!("post-muncher not, check is={} next is={}", check, next); } - } else { + } + else + { if ss_compare( T, SA, From 17012a0f5e208e55982bb2320c7ad5d18874711e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:05:31 -0500 Subject: [PATCH 143/325] Begin implementing MergeStack --- .../RsDivSufSort.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index a6c2f5f..b50e905 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -923,7 +923,10 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi // }; //} - var stack = MergeStack::new(); + //MergeStack is the same as SsStack + using var stackOwner = SpanOwner.Allocate(MERGE_STACK_SIZE, AllocationMode.Clear); + var stack = new SsStack(stackOwner.Span); + SAPtr l; SAPtr r; SAPtr lm; @@ -954,14 +957,12 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi } merge_check!(first, last, check); - SA_dump!(&SA.range(first..last), "ss_swapmerge pop 1"); - if !stack - .pop(&mut first, &mut middle, &mut last, &mut check) - .is_ok() + SA_dump(SA[first..last], "ss_swapmerge pop 1"); + if (!stack.Pop(ref first, ref middle, ref last, ref check)) { return; } - SA_dump!(&SA.range(first..last), "ss_swapmerge pop 1 survived"); + SA_dump(SA[first..last], "ss_swapmerge pop 1 survived"); continue; } @@ -1099,6 +1100,7 @@ private struct SsStackItem } private const int SS_STACK_SIZE = 16; + private const int MERGE_STACK_SIZE = 32; private ref struct SsStack { public readonly Span Items; From 69b3af196aba0d9f5168a9ca690b7c224d09059b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:07:05 -0500 Subject: [PATCH 144/325] Port more of ss_swapmerge --- .../RsDivSufSort.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index b50e905..02bf301 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -966,18 +966,18 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi continue; } - if (middle - first) <= bufsize { - crosscheck!("m-f<=bufsize"); - if first < middle { - crosscheck!("f Date: Tue, 14 Dec 2021 13:13:13 -0500 Subject: [PATCH 145/325] Continue porting ss_swapmerge --- .../RsDivSufSort.cs | 85 ++++++++++--------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 02bf301..421617c 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -986,18 +986,18 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi // OLANNA m = 0; - len = cmp::min((middle - first).0, (last - middle).0); + len = Math.Min(middle - first, last - middle); half = len >> 1; - while 0 < len { - crosscheck!("in-olanna len={} half={}", len, half); - if ss_compare( + while (0 < len) + { + crosscheck($"in-olanna len={len} half={half}"); + if (ss_compare( T, SA, PA + get_idx!(SA[middle + m + half]), SA, PA + get_idx!(SA[middle - m - half - 1]), - depth, - ) < 0 + depth) < 0) { m += half + 1; half -= (len & 1) ^ 1; @@ -1008,81 +1008,88 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi half >>= 1; } - if 0 < m { - crosscheck!("0 < m, m={}", m); + if (0 < m) + { + crosscheck($"0 < m, m={m}"); lm = middle - m; rm = middle + m; ss_blockswap(SA, lm, middle, m); r = middle; l = middle; next = 0; - if rm < last { - if SA[rm] < 0 { - SA[rm] = !SA[rm]; - if first < lm { + if (rm < last) + { + if (SA[rm] < 0) + { + SA[rm] = ~SA[rm]; + if (first < lm) + { // KOOPA l -= 1; - while SA[l] < 0 { + while (SA[l] < 0) + { l -= 1; } - crosscheck!("post-koopa l-first={}", l - first); + crosscheck($"post-koopa l-first={l - first}"); next |= 4; - crosscheck!("post-koopa next={}", next); + crosscheck($"post-koopa next={next}"); } next |= 1; } - else if first < lm { + else if (first < lm) + { // MUNCHER - while SA[r] < 0 { + while (SA[r] < 0) + { r += 1; } - crosscheck!("post-muncher r-first={}", r - first); + crosscheck($"post-muncher r-first={r - first}"); next |= 2; } } - if (l - first) <= (last - r) { - crosscheck!("post-muncher l-f 0) && (r == middle) { - crosscheck!("post-muncher next ^= 6 old={}", next); + crosscheck("post-muncher not l-f 0) && (r == middle)) + { + crosscheck($"post-muncher next ^= 6 old={next}"); next ^= 6; - crosscheck!("post-muncher next ^= 6 new={}", next); + crosscheck($"post-muncher next ^= 6 new={next}"); } - stack.push(first, lm, l, (check & 3) | (next & 4)); + stack.Push(first, lm, l, (check & 3) | (next & 4)); first = r; middle = rm; - crosscheck!("post-muncher not, check was={} next was={}", check, next); + crosscheck($"post-muncher not, check was={check} next was={next}"); check = (next & 3) | (check & 4); - crosscheck!("post-muncher not, check is={} next is={}", check, next); + crosscheck($"post-muncher not, check is={check} next is={next}"); } } else { - if ss_compare( + if (ss_compare( T, SA, PA + get_idx!(SA[middle - 1]), SA, PA + SA[middle], - depth, - ) == 0 + depth) == 0) { - SA[middle] = !SA[middle]; + SA[middle] = ~SA[middle]; } merge_check!(first, last, check); - SA_dump!(&SA.range(first..last), "ss_swapmerge pop 3"); - if !stack - .pop(&mut first, &mut middle, &mut last, &mut check) - .is_ok() + SA_dump(SA[first..last], "ss_swapmerge pop 3"); + if (!stack.Pop(ref first, ref middle, ref last, ref check)) { return; } From 10437d29e4614ce355355b61bcaf99abdfb3b5e2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:15:06 -0500 Subject: [PATCH 146/325] Port get_idx --- .../RsDivSufSort.cs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 421617c..0577685 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -894,15 +894,7 @@ private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) /// D&C based merge private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) { - //macro_rules! get_idx { - // ($a: expr) => { - // if 0 <= $a { - // $a - // } else { - // !$a - // } - // }; - //} + static Idx get_idx(Idx a) => 0 <= a ? a : ~a; //macro_rules! merge_check { // ($a: expr, $b: expr, $c: expr) => { @@ -994,9 +986,9 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi if (ss_compare( T, SA, - PA + get_idx!(SA[middle + m + half]), + PA + get_idx(SA[middle + m + half]), SA, - PA + get_idx!(SA[middle - m - half - 1]), + PA + get_idx(SA[middle - m - half - 1]), depth) < 0) { m += half + 1; @@ -1080,7 +1072,7 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi if (ss_compare( T, SA, - PA + get_idx!(SA[middle - 1]), + PA + get_idx(SA[middle - 1]), SA, PA + SA[middle], depth) == 0) From 4794be2ed3414d9aeb3e41ce5ce5a0600c3ad42a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:18:51 -0500 Subject: [PATCH 147/325] Test port of merge_check --- .../RsDivSufSort.cs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 0577685..fba8a51 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -896,6 +896,21 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi { static Idx get_idx(Idx a) => 0 <= a ? a : ~a; + void merge_check(int a, int b, int c) + { + crosscheck($"mc c={c}"); + if (((c & 1) > 0) || (((c & 2) > 0) && (ss_compare(T, SA, PA + get_idx(SA[a - 1]), SA, PA + SA[a], depth) == 0))) + { + crosscheck($"swapping a-first={a - first}"); + SA[a] = ~SA[a]; + } + if (((c & 4) > 0) && (ss_compare(T, SA, PA + get_idx(SA[b - 1]), SA, PA + SA[b], depth) == 0)) + { + crosscheck($"swapping b-first={b - first}"); + SA[b] = ~SA[b]; + } + } + //macro_rules! merge_check { // ($a: expr, $b: expr, $c: expr) => { // crosscheck!("mc c={}", $c); From d84e41f994afa16274d84a567ad3e59d4ba776a6 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:23:36 -0500 Subject: [PATCH 148/325] Port merge_check calls and add forward/backward merge stubs --- .../RsDivSufSort.cs | 36 +++++++------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index fba8a51..919aab9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -896,7 +896,7 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi { static Idx get_idx(Idx a) => 0 <= a ? a : ~a; - void merge_check(int a, int b, int c) + void merge_check(Idx a, Idx b, Idx c) { crosscheck($"mc c={c}"); if (((c & 1) > 0) || (((c & 2) > 0) && (ss_compare(T, SA, PA + get_idx(SA[a - 1]), SA, PA + SA[a], depth) == 0))) @@ -911,25 +911,6 @@ void merge_check(int a, int b, int c) } } - //macro_rules! merge_check { - // ($a: expr, $b: expr, $c: expr) => { - // crosscheck!("mc c={}", $c); - // if ($c & 1 > 0) - // || (($c & 2 > 0) - // && (ss_compare(T, SA, PA + get_idx!(SA[$a - 1]), SA, PA + SA[$a], depth) == 0)) - // { - // crosscheck!("swapping a-first={}", $a - first); - // SA[$a] = !SA[$a]; - // } - // if ($c & 4 > 0) - // && (ss_compare(T, SA, PA + get_idx!(SA[$b - 1]), SA, PA + SA[$b], depth) == 0) - // { - // crosscheck!("swapping b-first={}", $b - first); - // SA[$b] = !SA[$b]; - // } - // }; - //} - //MergeStack is the same as SsStack using var stackOwner = SpanOwner.Allocate(MERGE_STACK_SIZE, AllocationMode.Clear); var stack = new SsStack(stackOwner.Span); @@ -962,7 +943,7 @@ void merge_check(int a, int b, int c) SA_dump(SA[first..last], "ss_swapmerge post-mergebackward"); SA_dump(SA[buf..(buf + bufsize)], "ss_swapmerge post-mergebackward buf"); } - merge_check!(first, last, check); + merge_check(first, last, check); SA_dump(SA[first..last], "ss_swapmerge pop 1"); if (!stack.Pop(ref first, ref middle, ref last, ref check)) @@ -982,7 +963,7 @@ void merge_check(int a, int b, int c) ss_mergeforward(T, SA, PA, first, middle, last, buf, depth); SA_dump(SA[first..last], "after mergeforward"); } - merge_check!(first, last, check); + merge_check(first, last, check); SA_dump(SA[first..last], "ss_swapmerge pop 2"); if (!stack.Pop(ref first, ref middle, ref last, ref check)) { @@ -1094,7 +1075,7 @@ void merge_check(int a, int b, int c) { SA[middle] = ~SA[middle]; } - merge_check!(first, last, check); + merge_check(first, last, check); SA_dump(SA[first..last], "ss_swapmerge pop 3"); if (!stack.Pop(ref first, ref middle, ref last, ref check)) { @@ -1104,6 +1085,15 @@ void merge_check(int a, int b, int c) } } + private static void ss_mergebackward(IntAccessor t, Span sA, int pA, int first, int middle, int last, int buf, int depth) + { + throw new NotImplementedException(); + } + + private static void ss_mergeforward(IntAccessor t, Span sA, int pA, int first, int middle, int last, int buf, int depth) + { + throw new NotImplementedException(); + } private struct SsStackItem { From 8c3e1f0a76e8e820c150a868be429b6e5d0c2ed2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:25:37 -0500 Subject: [PATCH 149/325] Finish porting ss_swapmerge --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 919aab9..c3e2504 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -896,7 +896,7 @@ private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr fi { static Idx get_idx(Idx a) => 0 <= a ? a : ~a; - void merge_check(Idx a, Idx b, Idx c) + void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) { crosscheck($"mc c={c}"); if (((c & 1) > 0) || (((c & 2) > 0) && (ss_compare(T, SA, PA + get_idx(SA[a - 1]), SA, PA + SA[a], depth) == 0))) @@ -943,7 +943,7 @@ void merge_check(Idx a, Idx b, Idx c) SA_dump(SA[first..last], "ss_swapmerge post-mergebackward"); SA_dump(SA[buf..(buf + bufsize)], "ss_swapmerge post-mergebackward buf"); } - merge_check(first, last, check); + merge_check(T, SA, first, last, check); SA_dump(SA[first..last], "ss_swapmerge pop 1"); if (!stack.Pop(ref first, ref middle, ref last, ref check)) @@ -963,7 +963,7 @@ void merge_check(Idx a, Idx b, Idx c) ss_mergeforward(T, SA, PA, first, middle, last, buf, depth); SA_dump(SA[first..last], "after mergeforward"); } - merge_check(first, last, check); + merge_check(T, SA, first, last, check); SA_dump(SA[first..last], "ss_swapmerge pop 2"); if (!stack.Pop(ref first, ref middle, ref last, ref check)) { @@ -1075,7 +1075,7 @@ void merge_check(Idx a, Idx b, Idx c) { SA[middle] = ~SA[middle]; } - merge_check(first, last, check); + merge_check(T, SA, first, last, check); SA_dump(SA[first..last], "ss_swapmerge pop 3"); if (!stack.Pop(ref first, ref middle, ref last, ref check)) { From b4ab5a8598890a2aac72499fc5956866c4af98df Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:27:49 -0500 Subject: [PATCH 150/325] Add raw rust to ss_mergeforward --- .../RsDivSufSort.cs | 110 +++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index c3e2504..73cf983 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1090,9 +1090,115 @@ private static void ss_mergebackward(IntAccessor t, Span sA, int pA, int fi throw new NotImplementedException(); } - private static void ss_mergeforward(IntAccessor t, Span sA, int pA, int first, int middle, int last, int buf, int depth) + /// Merge-forward with internal buffer + private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) { - throw new NotImplementedException(); + let mut a: SAPtr; + let mut b: SAPtr; + let mut c: SAPtr; + let mut bufend: SAPtr; + let mut t: Idx; + let mut r: Idx; + + SA_dump!(&SA.range(first..last), "ss_mergeforward start"); + + bufend = buf + (middle - first) - 1; + ss_blockswap(SA, buf, first, (middle - first).0); + + // IGNACE + a = first; + t = SA[a]; + b = buf; + c = middle; + loop { + r = ss_compare(T, SA, PA + SA[b], SA, PA + SA[c], depth); + if r < 0 { + // RONALD + loop { + SA[a] = SA[b]; + a += 1; + if bufend <= b { + SA[bufend] = t; + return; + } + SA[b] = SA[a]; + b += 1; + + // cond + if !(SA[b] < 0) { + break; + } + } + } else if r > 0 { + // JEREMY + loop { + SA[a] = SA[c]; + a += 1; + SA[c] = SA[a]; + c += 1; + if last <= c { + // TONY + while b < bufend { + SA[a] = SA[b]; + a += 1; + SA[b] = SA[a]; + b += 1; + } + SA[a] = SA[b]; + SA[b] = t; + return; + } + + // cond (JEMERY) + if !(SA[c] < 0) { + break; + } + } + } else { + SA[c] = !SA[c]; + // JENS + loop { + SA[a] = SA[b]; + a += 1; + if bufend <= b { + SA[bufend] = t; + return; + } + SA[b] = SA[a]; + b += 1; + + // cond (JENS) + if !(SA[b] < 0) { + break; + } + } + + // DIMITER + loop { + SA[a] = SA[c]; + a += 1; + SA[c] = SA[a]; + c += 1; + if last <= c { + // MIDORI + while b < bufend { + SA[a] = SA[b]; + a += 1; + SA[b] = SA[a]; + b += 1; + } + SA[a] = SA[b]; + SA[b] = t; + return; + } + + // cond (DIMITER) + if !(SA[c] < 0) { + break; + } + } + } + } } private struct SsStackItem From e44c2d836516066f23dc79e99847c5752eab6d34 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:31:22 -0500 Subject: [PATCH 151/325] Port ss_mergeforward --- .../RsDivSufSort.cs | 76 ++++++++++++------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 73cf983..04725b3 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1091,33 +1091,37 @@ private static void ss_mergebackward(IntAccessor t, Span sA, int pA, int fi } /// Merge-forward with internal buffer - private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) + private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) { - let mut a: SAPtr; - let mut b: SAPtr; - let mut c: SAPtr; - let mut bufend: SAPtr; - let mut t: Idx; - let mut r: Idx; + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr bufend; + Idx t; + Idx r; - SA_dump!(&SA.range(first..last), "ss_mergeforward start"); + SA_dump(SA[first..last], "ss_mergeforward start"); bufend = buf + (middle - first) - 1; - ss_blockswap(SA, buf, first, (middle - first).0); + ss_blockswap(SA, buf, first, middle - first); // IGNACE a = first; t = SA[a]; b = buf; c = middle; - loop { + while (true) + { r = ss_compare(T, SA, PA + SA[b], SA, PA + SA[c], depth); - if r < 0 { + if (r < 0) + { // RONALD - loop { + while (true) + { SA[a] = SA[b]; a += 1; - if bufend <= b { + if (bufend <= b) + { SA[bufend] = t; return; } @@ -1125,20 +1129,26 @@ private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr b += 1; // cond - if !(SA[b] < 0) { + if (!(SA[b] < 0)) + { break; } } - } else if r > 0 { + } + else if (r > 0) + { // JEREMY - loop { + while (true) + { SA[a] = SA[c]; a += 1; SA[c] = SA[a]; c += 1; - if last <= c { + if (last <= c) + { // TONY - while b < bufend { + while (b < bufend) + { SA[a] = SA[b]; a += 1; SA[b] = SA[a]; @@ -1150,17 +1160,22 @@ private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr } // cond (JEMERY) - if !(SA[c] < 0) { + if (!(SA[c] < 0)) + { break; } } - } else { - SA[c] = !SA[c]; + } + else + { + SA[c] = ~SA[c]; // JENS - loop { + while (true) + { SA[a] = SA[b]; a += 1; - if bufend <= b { + if (bufend <= b) + { SA[bufend] = t; return; } @@ -1168,20 +1183,24 @@ private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr b += 1; // cond (JENS) - if !(SA[b] < 0) { + if (!(SA[b] < 0)) + { break; } } // DIMITER - loop { + while (true) + { SA[a] = SA[c]; a += 1; SA[c] = SA[a]; c += 1; - if last <= c { + if (last <= c) + { // MIDORI - while b < bufend { + while (b < bufend) + { SA[a] = SA[b]; a += 1; SA[b] = SA[a]; @@ -1193,7 +1212,8 @@ private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr } // cond (DIMITER) - if !(SA[c] < 0) { + if (!(SA[c] < 0)) + { break; } } From 58ad2a79f29927236ac22398045a8f193b048be3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:37:38 -0500 Subject: [PATCH 152/325] Port ss_mergebackward --- .../RsDivSufSort.cs | 216 +++++++++++++++++- 1 file changed, 214 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 04725b3..53388e0 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1085,9 +1085,221 @@ void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) } } - private static void ss_mergebackward(IntAccessor t, Span sA, int pA, int first, int middle, int last, int buf, int depth) + /// Merge-backward with internal buffer + private static void ss_mergebackward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) { - throw new NotImplementedException(); + SAPtr p1; + SAPtr p2; + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr bufend; + + Idx t; + Idx r; + Idx x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(SA, buf, middle, (last - middle)); + + x = 0; + if (SA[bufend] < 0) + { + p1 = PA + ~SA[bufend]; + x |= 1; + } + else + { + p1 = PA + SA[bufend]; + } + if (SA[middle - 1] < 0) + { + p2 = PA + ~SA[middle - 1]; + x |= 2; + } + else + { + p2 = PA + SA[middle - 1]; + } + + // MARTIN + a = last - 1; + t = SA[a]; + b = bufend; + c = middle - 1; + while (true) + { + r = ss_compare(T, SA, p1, SA, p2, depth); + if (0 < r) + { + if ((x & 1) > 0) + { + // BAPTIST + while (true) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + + // cond + if (!(SA[b] < 0)) + { + break; + } + } + x ^= 1; + } + SA[a] = SA[b]; + a -= 1; + if (b <= buf) + { + SA[buf] = t; + break; + } + SA[b] = SA[a]; + b -= 1; + if (SA[b] < 0) + { + p1 = PA + ~SA[b]; + x |= 1; + } + else + { + p1 = PA + SA[b]; + } + } + else if (r < 0) + { + if ((x & 2) > 0) + { + // JULES + while (true) + { + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + + // cond + if (~SA[c] < 0) + { + break; + } + } + x ^= 2; + } + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + if (c < first) + { + // GARAMOND + while (buf < b) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + } + SA[a] = SA[b]; + SA[b] = t; + break; + } + if (SA[c] < 0) + { + p2 = PA + ~SA[c]; + x |= 2; + } + else + { + p2 = PA + SA[c]; + } + } + else + { + if ((x & 1) > 0) + { + // XAVIER + while (true) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + if (!(SA[b] < 0)) + { + break; + } + } + x ^= 1; + } + SA[a] = ~SA[b]; + a -= 1; + if (b <= buf) + { + SA[buf] = t; + break; + } + SA[b] = SA[a]; + b -= 1; + if ((x & 2) > 0) + { + // WALTER + while (true) + { + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + + // cond + if (!(SA[c] < 0)) + { + break; + } + } + x ^= 2; + } + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + if (c < first) + { + // ZENITH + while (buf < b) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + } + SA[a] = SA[b]; + SA[b] = t; + break; + } + if (SA[b] < 0) + { + p1 = PA + ~SA[b]; + x |= 1; + } + else + { + p1 = PA + SA[b]; + } + if (SA[c] < 0) + { + p2 = PA + ~SA[c]; + x |= 2; + } + else + { + p2 = PA + SA[c]; + } + } + } } /// Merge-forward with internal buffer From 4ef6a4b86117bd7b31a911b24ae512ac61606022 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:46:46 -0500 Subject: [PATCH 153/325] Remove unused variable --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 53388e0..99129bd 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2422,7 +2422,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA SAPtr a = 0; SAPtr b = 0; SAPtr c; - Idx t, v, x; + Idx v, x; Idx incr = isadOffset - isaOffset; Idx next; Idx trlink = -1; From 9413d4a7e48d35cfe82676843c40725db71a47b5 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:50:43 -0500 Subject: [PATCH 154/325] Add raw rust for tr_pivot --- .../RsDivSufSort.cs | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 99129bd..8cf2ea5 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3051,9 +3051,26 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA } // end PASCAL } - private static int tr_pivot(Span sA, int iSAd, int first, int last) + /// Returns the pivot element + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) { - throw new NotImplementedException(); + let mut t: Idx = (last - first).0; + let mut middle: SAPtr = first + t / 2; + + if t <= 512 { + if t <= 32 { + return tr_median3(SA, ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(SA, ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(SA, ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(SA, ISAd, middle - t, middle, middle + t); + last = tr_median3(SA, ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + tr_median3(SA, ISAd, first, middle, last) } private static void tr_heapsort(int iSAd, Span sA, int first, int v) From 19bb0889b0bdc2dff02458059981cb6389ee9b39 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:52:14 -0500 Subject: [PATCH 155/325] Port tr_pivot and add stubs for tr_median3/5 --- .../RsDivSufSort.cs | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 8cf2ea5..5f415d1 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3055,13 +3055,17 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) { - let mut t: Idx = (last - first).0; - let mut middle: SAPtr = first + t / 2; + Idx t = last - first; + SAPtr middle = first + t / 2; - if t <= 512 { - if t <= 32 { + if (t <= 512) + { + if (t <= 32) + { return tr_median3(SA, ISAd, first, middle, last - 1); - } else { + } + else + { t >>= 2; return tr_median5(SA, ISAd, first, first + t, middle, last - 1 - t, last - 1); } @@ -3070,7 +3074,17 @@ private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) first = tr_median3(SA, ISAd, first, first + t, first + (t << 1)); middle = tr_median3(SA, ISAd, middle - t, middle, middle + t); last = tr_median3(SA, ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); - tr_median3(SA, ISAd, first, middle, last) + return tr_median3(SA, ISAd, first, middle, last); + } + + private static int tr_median5(Span sA, int iSAd, int first, int v1, int middle, int v2, int v3) + { + throw new NotImplementedException(); + } + + private static int tr_median3(Span sA, int iSAd, int first, int middle, int v) + { + throw new NotImplementedException(); } private static void tr_heapsort(int iSAd, Span sA, int first, int v) From 9f04ffaef429ce0e1fce49bf33cfca854a80de60 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:53:41 -0500 Subject: [PATCH 156/325] Add raw rust to tr_median3 --- .../RsDivSufSort.cs | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 5f415d1..7a9de87 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3082,9 +3082,28 @@ private static int tr_median5(Span sA, int iSAd, int first, int v1, int mid throw new NotImplementedException(); } - private static int tr_median3(Span sA, int iSAd, int first, int middle, int v) + /// Returns the median of three elements + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr tr_median3(Span SA, SAPtr ISAd, SAPtr v1, SAPtr v2, SAPtr v3) { - throw new NotImplementedException(); + macro_rules! get { + ($x: expr) => { + SA[ISAd + SA[$x]] + }; + } + + if get!(v1) > get!(v2) { + mem::swap(&mut v1, &mut v2); + } + if get!(v2) > get!(v3) { + if get!(v1) > get!(v3) { + v1 + } else { + v3 + } + } else { + v2 + } } private static void tr_heapsort(int iSAd, Span sA, int first, int v) From a92d63ce820e1806be4dbf9e39fcbb300d751493 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:57:30 -0500 Subject: [PATCH 157/325] Port tr_median3 --- .../RsDivSufSort.cs | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 7a9de87..2ed20ba 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3084,25 +3084,30 @@ private static int tr_median5(Span sA, int iSAd, int first, int v1, int mid /// Returns the median of three elements [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr tr_median3(Span SA, SAPtr ISAd, SAPtr v1, SAPtr v2, SAPtr v3) + private static SAPtr tr_median3(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3) { - macro_rules! get { - ($x: expr) => { - SA[ISAd + SA[$x]] - }; - } + Span ISAd = SA[isadOffset..]; - if get!(v1) > get!(v2) { - mem::swap(&mut v1, &mut v2); + //get(x) => ISAd[SA[x]] + + if (ISAd[SA[v1]] > ISAd[SA[v2]]) + { + Swap(ref v1, ref v2); } - if get!(v2) > get!(v3) { - if get!(v1) > get!(v3) { - v1 - } else { - v3 + if (ISAd[SA[v2]] > ISAd[SA[v3]]) + { + if (ISAd[SA[v1]] > ISAd[SA[v3]]) + { + return v1; } - } else { - v2 + else + { + return v3; + } + } + else + { + return v2; } } From bf7e8afe549f04d6a9d86303452a3e4318b0dd4d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 13:59:24 -0500 Subject: [PATCH 158/325] Add raw rust to tr_median5 --- .../RsDivSufSort.cs | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 2ed20ba..70a5118 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3077,9 +3077,37 @@ private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) return tr_median3(SA, ISAd, first, middle, last); } - private static int tr_median5(Span sA, int iSAd, int first, int v1, int middle, int v2, int v3) + /// Returns the median of five elements + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr tr_median5(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) { - throw new NotImplementedException(); + macro_rules! get { + ($x: expr) => { + SA[ISAd + SA[$x]] + }; + } + if get!(v2) > get!(v3) { + mem::swap(&mut v2, &mut v3); + } + if get!(v4) > get!(v5) { + mem::swap(&mut v4, &mut v5); + } + if get!(v2) > get!(v4) { + mem::swap(&mut v2, &mut v4); + mem::swap(&mut v3, &mut v5); + } + if get!(v1) > get!(v3) { + mem::swap(&mut v1, &mut v3); + } + if get!(v1) > get!(v4) { + mem::swap(&mut v1, &mut v4); + mem::swap(&mut v3, &mut v5); + } + if get!(v3) > get!(v4) { + v4 + } else { + v3 + } } /// Returns the median of three elements From c579a054ce777a5d7ed0bd0788b34412a5fb49ff Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 14:00:53 -0500 Subject: [PATCH 159/325] Port tr_median5 --- .../RsDivSufSort.cs | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 70a5118..bdf4018 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3081,32 +3081,39 @@ private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr tr_median5(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) { - macro_rules! get { - ($x: expr) => { - SA[ISAd + SA[$x]] - }; + Span ISAd = SA[isadOffset..]; + + //get(x) => ISAd[SA[x]] + + if (ISAd[SA[v2]] > ISAd[SA[v3]]) + { + Swap(ref v2, ref v3); } - if get!(v2) > get!(v3) { - mem::swap(&mut v2, &mut v3); + if (ISAd[SA[v4]] > ISAd[SA[v5]]) + { + Swap(ref v4, ref v5); } - if get!(v4) > get!(v5) { - mem::swap(&mut v4, &mut v5); + if (ISAd[SA[v2]] > ISAd[SA[v4]]) + { + Swap(ref v2, ref v4); + Swap(ref v3, ref v5); } - if get!(v2) > get!(v4) { - mem::swap(&mut v2, &mut v4); - mem::swap(&mut v3, &mut v5); + if (ISAd[SA[v1]] > ISAd[SA[v3]]) + { + Swap(ref v1, ref v3); } - if get!(v1) > get!(v3) { - mem::swap(&mut v1, &mut v3); + if (ISAd[SA[v1]] > ISAd[SA[v4]]) + { + Swap(ref v1, ref v4); + Swap(ref v3, ref v5); } - if get!(v1) > get!(v4) { - mem::swap(&mut v1, &mut v4); - mem::swap(&mut v3, &mut v5); + if (ISAd[SA[v3]] > ISAd[SA[v4]]) + { + return v4; } - if get!(v3) > get!(v4) { - v4 - } else { - v3 + else + { + return v3; } } From ecac534f3f2960c6f9d7f65c05e90affb65cac8e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 14:20:42 -0500 Subject: [PATCH 160/325] Inline ref swap --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index bdf4018..4d88720 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1941,6 +1941,7 @@ private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAP } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] static void Swap(ref T lhs, ref T rhs) { T temp; From b1d517e59ea83add4774151031cda72b4e304ab5 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 14:22:46 -0500 Subject: [PATCH 161/325] Add raw rust to tr_copy --- .../RsDivSufSort.cs | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 4d88720..2512854 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3214,9 +3214,57 @@ private static void tr_partialcopy(int iSA, Span sA, int first, int a, int throw new NotImplementedException(); } - private static void tr_copy(int iSA, Span sA, int first, int a, int b, int last, int v) + /// Tandem repeat copy + private static void tr_copy(SAPtr ISA, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) { - throw new NotImplementedException(); + // sort suffixes of middle partition + // by using sorted order of suffixes of left and right partition. + let mut c: SAPtr; + let mut d: SAPtr; + let mut e: SAPtr; + let mut s: Idx; + let mut v: Idx; + + crosscheck!("tr_copy first={} a={} b={} last={}", first, a, b, last); + + v = (b - 1).0; + + macro_rules! ISA { + ($x: expr) => { + SA[ISA + $x] + }; + } + + // JACK + c = first; + d = a - 1; + while c <= d { + s = SA[c] - depth; + if (0 <= s) && (ISA!(s) == v) { + d += 1; + SA[d] = s; + ISA!(s) = d.0; + } + + // iter (JACK) + c += 1; + } + + // JILL + c = last - 1; + e = d + 1; + d = b; + while e < d { + s = SA[c] - depth; + if (0 <= s) && (ISA!(s) == v) { + d -= 1; + SA[d] = s; + ISA!(s) = d.0; + } + + // iter (JILL) + c -= 1; + } } /// From e33030bc7d345028ecaafef4be3c13f66d502d5f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 14:26:04 -0500 Subject: [PATCH 162/325] Port tr_copy --- .../RsDivSufSort.cs | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 2512854..edaaef8 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3215,35 +3215,33 @@ private static void tr_partialcopy(int iSA, Span sA, int first, int a, int } /// Tandem repeat copy - private static void tr_copy(SAPtr ISA, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) + private static void tr_copy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) { // sort suffixes of middle partition // by using sorted order of suffixes of left and right partition. - let mut c: SAPtr; - let mut d: SAPtr; - let mut e: SAPtr; - let mut s: Idx; - let mut v: Idx; + SAPtr c; + SAPtr d; + SAPtr e; + Idx s; + Idx v; - crosscheck!("tr_copy first={} a={} b={} last={}", first, a, b, last); + crosscheck($"tr_copy first={first} a={a} b={b} last={last}"); - v = (b - 1).0; + v = (b - 1); - macro_rules! ISA { - ($x: expr) => { - SA[ISA + $x] - }; - } + Span ISA = SA[isaOffset..]; // JACK c = first; d = a - 1; - while c <= d { + while (c <= d) + { s = SA[c] - depth; - if (0 <= s) && (ISA!(s) == v) { + if ((0 <= s) && (ISA[s] == v)) + { d += 1; SA[d] = s; - ISA!(s) = d.0; + ISA[s] = d; } // iter (JACK) @@ -3254,12 +3252,14 @@ private static void tr_copy(SAPtr ISA, Span SA, SAPtr first, SAPtr a, SAPtr c = last - 1; e = d + 1; d = b; - while e < d { + while (e < d) + { s = SA[c] - depth; - if (0 <= s) && (ISA!(s) == v) { + if ((0 <= s) && (ISA[s] == v)) + { d -= 1; SA[d] = s; - ISA!(s) = d.0; + ISA[s] = d; } // iter (JILL) From 38e8af74cc0aeb65ada3d97891aa91c40678dab3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 18:44:33 -0500 Subject: [PATCH 163/325] Add raw rust to tr_heapsort --- .../RsDivSufSort.cs | 49 ++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index edaaef8..dc3afb6 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3147,9 +3147,54 @@ private static SAPtr tr_median3(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr } } - private static void tr_heapsort(int iSAd, Span sA, int first, int v) + /// Simple top-down heapsort + private static void tr_heapsort(SAPtr ISAd, Span SA_top, SAPtr first, Idx size) { - throw new NotImplementedException(); + let mut i: Idx; + let mut m: Idx; + let mut t: Idx; + + macro_rules! ISAd { + ($x: expr) => { + SA_top[ISAd + $x] + }; + } + macro_rules! SA { + ($x: expr) => { + SA_top[first + $x] + }; + } + macro_rules! SA_swap { + ($a: expr, $b: expr) => { + SA_top.swap(first + $a, first + $b); + }; + } + + m = size; + if (size % 2) == 0 { + m -= 1; + if ISAd!(SA!(m / 2)) < ISAd!(SA!(m)) { + SA_swap!(m, (m / 2)); + } + } + + // LISA + for i in (0..(m / 2)).rev() { + crosscheck!("LISA i={}", i); + tr_fixdown(ISAd, SA_top, first, i, m); + } + if (size % 2) == 0 { + SA_swap!(0, m); + tr_fixdown(ISAd, SA_top, first, 0, m); + } + // MARK + for i in (1..m).rev() { + crosscheck!("MARK i={}", i); + t = SA!(0); + SA!(0) = SA!(i); + tr_fixdown(ISAd, SA_top, first, 0, i); + SA!(i) = t; + } } /// From b5f18c2ae7ad5ec99351ced9af1d78145ca26d8d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 18:56:28 -0500 Subject: [PATCH 164/325] Port tr_heapsort and add tr_fixdown stub --- .../RsDivSufSort.cs | 81 ++++++++++++------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index dc3afb6..d4a9438 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3148,55 +3148,74 @@ private static SAPtr tr_median3(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr } /// Simple top-down heapsort - private static void tr_heapsort(SAPtr ISAd, Span SA_top, SAPtr first, Idx size) + private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, Idx size) { - let mut i: Idx; - let mut m: Idx; - let mut t: Idx; + Idx i; + Idx m; + Idx t; - macro_rules! ISAd { - ($x: expr) => { - SA_top[ISAd + $x] - }; - } - macro_rules! SA { - ($x: expr) => { - SA_top[first + $x] - }; - } - macro_rules! SA_swap { - ($a: expr, $b: expr) => { - SA_top.swap(first + $a, first + $b); - }; - } + Span ISAd = SA_top[isadOffset..]; + //macro_rules! ISAd { + // ($x: expr) => { + // SA_top[ISAd + $x] + // }; + //} + + Span SA = SA_top[first..]; + //macro_rules! SA { + // ($x: expr) => { + // SA_top[first + $x] + // }; + //} + + //void SA_swap(int a, int b) => SA_top.Swap(first + a, first + b); + //macro_rules! SA_swap { + // ($a: expr, $b: expr) => { + // SA_top.swap(first + $a, first + $b); + // }; + //} m = size; - if (size % 2) == 0 { + if ((size % 2) == 0) + { m -= 1; - if ISAd!(SA!(m / 2)) < ISAd!(SA!(m)) { - SA_swap!(m, (m / 2)); + if (ISAd[SA[m / 2]] < ISAd[SA[m]]) + { + SA_top.Swap(first + m, first + (m / 2)); } } // LISA - for i in (0..(m / 2)).rev() { - crosscheck!("LISA i={}", i); + //TODO: checkme + //for i in (0..(m / 2)).rev() { + for (i = (m / 2) - 1; i >= 0; i--) + { + crosscheck($"LISA i={i}"); tr_fixdown(ISAd, SA_top, first, i, m); } - if (size % 2) == 0 { - SA_swap!(0, m); + if ((size % 2) == 0) + { + SA_top.Swap(first + 0, first + m); tr_fixdown(ISAd, SA_top, first, 0, m); } // MARK - for i in (1..m).rev() { - crosscheck!("MARK i={}", i); - t = SA!(0); - SA!(0) = SA!(i); + //TODO: checkme + //for i in (1..m).rev() { + for (i = m - 1; i > 0; i--) + { + crosscheck($"MARK i={i}"); + t = SA[0]; + SA[0] = SA[i]; tr_fixdown(ISAd, SA_top, first, 0, i); - SA!(i) = t; + SA[i] = t; } } + private static void tr_fixdown(Span iSAd, Span sA_top, int first, int i, int m) + { + throw new NotImplementedException(); + } + /// /// Simple insertionsort for small size groups /// From 05bac173a0e7a8c197beff17af3577e83797bf29 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 19:02:21 -0500 Subject: [PATCH 165/325] Add raw rust to tr_fixdown --- .../RsDivSufSort.cs | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index d4a9438..873b0c8 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3211,9 +3211,55 @@ private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, } } - private static void tr_fixdown(Span iSAd, Span sA_top, int first, int i, int m) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void tr_fixdown(Span ISAd, Span SA_top, SAPtr first, Idx i, Idx size) { - throw new NotImplementedException(); + let mut j: Idx; + let mut k: Idx; + let mut d: Idx; + let mut e: Idx; + + crosscheck!("fixdown i={} size={}", i, size); + + macro_rules! ISAd { + ($x: expr) => { + SA_top[ISAd + $x] + }; + } + macro_rules! SA { + ($x: expr) => { + SA_top[first + $x] + }; + }; + + // WILMOT + let v = SA!(i); + let c = ISAd!(v); + loop { + // cond + j = 2 * i + 1; + if !(j < size) { + break; + } + + // body + k = j; + d = ISAd!(SA!(k)); + j += 1; + e = ISAd!(SA!(j)); + if d < e { + k = j; + d = e; + } + if d <= c { + break; + } + + // iter (WILMOT) + SA!(i) = SA!(k); + i = k; + } + SA!(i) = v; } /// From 3151297a438f3559a8733e3cc56e8d6259f3660f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 14 Dec 2021 19:07:16 -0500 Subject: [PATCH 166/325] Port tr_fixdown and Spanify --- .../RsDivSufSort.cs | 55 ++++++++----------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 873b0c8..39229e2 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3191,12 +3191,12 @@ private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, for (i = (m / 2) - 1; i >= 0; i--) { crosscheck($"LISA i={i}"); - tr_fixdown(ISAd, SA_top, first, i, m); + tr_fixdown(ISAd, SA, i, m); } if ((size % 2) == 0) { SA_top.Swap(first + 0, first + m); - tr_fixdown(ISAd, SA_top, first, 0, m); + tr_fixdown(ISAd, SA, 0, m); } // MARK //TODO: checkme @@ -3206,60 +3206,53 @@ private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, crosscheck($"MARK i={i}"); t = SA[0]; SA[0] = SA[i]; - tr_fixdown(ISAd, SA_top, first, 0, i); + tr_fixdown(ISAd, SA, 0, i); SA[i] = t; } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void tr_fixdown(Span ISAd, Span SA_top, SAPtr first, Idx i, Idx size) + private static void tr_fixdown(Span ISAd, Span SA, Idx i, Idx size) { - let mut j: Idx; - let mut k: Idx; - let mut d: Idx; - let mut e: Idx; - - crosscheck!("fixdown i={} size={}", i, size); - - macro_rules! ISAd { - ($x: expr) => { - SA_top[ISAd + $x] - }; - } - macro_rules! SA { - ($x: expr) => { - SA_top[first + $x] - }; - }; + Idx j; + Idx k; + Idx d; + Idx e; + + crosscheck($"fixdown i={i} size={size}"); // WILMOT - let v = SA!(i); - let c = ISAd!(v); - loop { + var v = SA[i]; + var c = ISAd[v]; + while (true) + { // cond j = 2 * i + 1; - if !(j < size) { + if (!(j < size)) + { break; } // body k = j; - d = ISAd!(SA!(k)); + d = ISAd[SA[k]]; j += 1; - e = ISAd!(SA!(j)); - if d < e { + e = ISAd[SA[j]]; + if (d < e) + { k = j; d = e; } - if d <= c { + if (d <= c) + { break; } // iter (WILMOT) - SA!(i) = SA!(k); + SA[i] = SA[k]; i = k; } - SA!(i) = v; + SA[i] = v; } /// From 35c081f8a864234e2ae9840f71f0f72553cc341c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 02:45:10 -0500 Subject: [PATCH 167/325] Add LibDivSufSortTests --- ...Q.SuffixSorting.LivDivSufSort.Tests.csproj | 12 +- .../LibDivSufSortTests.cs | 213 ++++++++++++++++++ .../SAISChecker.cs | 107 --------- 3 files changed, 224 insertions(+), 108 deletions(-) create mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs delete mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj index 5f88768..19d2438 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj @@ -1,4 +1,4 @@ - + net6.0 @@ -7,6 +7,12 @@ false + + + PreserveNewest + + + @@ -20,4 +26,8 @@ + + + + diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs new file mode 100644 index 0000000..ce1ba5d --- /dev/null +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs @@ -0,0 +1,213 @@ +using DeltaQ.SuffixSorting.LibDivSufSort; +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Text; +using Xunit; + +namespace DeltaQ.Tests +{ + public class LibDivSufSortTests + { + private const string FuzzFilesBasePath = "assets/"; + + [Conditional("DEBUG")] + private void SetupCrosscheckListeners() + { + const string crosscheckFilename = "crosscheck/csharp"; + try + { + Directory.CreateDirectory(crosscheckFilename); + File.Create(crosscheckFilename).Dispose(); + } + catch (IOException) { } + //var dtl = Trace.Listeners[0] as DefaultTraceListener; + //dtl!.LogFileName = "crosscheck/csharp"; + var lflistener = new TextWriterTraceListener(crosscheckFilename); + Trace.Listeners.Clear(); + Trace.Listeners.Add(lflistener); + } + + [Conditional("DEBUG")] + private void FinalizeCrosscheck() + { + Trace.Flush(); + } + +#if NET461 + private static void RandomFillBuffer(byte[] buffer) + { + var rand = new Random(63 * 13 * 63 * 13); + rand.NextBytes(buffer); + } +#else + private static SpanOwner GetOwnedRandomBuffer(int size) + { + var rand = new Random(63 * 13 * 63 * 13); + + var owner = SpanOwner.Allocate(size); + rand.NextBytes(owner.Span); + + return owner; + } +#endif + + private static void Verify(ReadOnlySpan input, ReadOnlySpan sa) + { + //ref byte suff(int index) => ref input[sa[index]]; + for (int i = 0; i < input.Length - 1; i++) + { + //if(!(suff(i) < suff(i + 1))) + var cur = input[sa[i]..]; + var next = input[sa[i + 1]..]; + var cmp = cur.SequenceCompareTo(next); + if (!(cmp < 0)) + //if (!(cur < next)) + { + var ex = new InvalidOperationException("Input was unsorted"); + ex.Data["i"] = i; + ex.Data["j"] = i + 1; + throw ex; + } + } + } + + [Fact] + public void CheckShruggy() + { + const string shruggy = @"¯\_(ツ)_/¯"; + + var size = Encoding.UTF8.GetByteCount(shruggy); + using var ownedT = SpanOwner.Allocate(size); + + var actualSize = Encoding.UTF8.GetBytes(shruggy, ownedT.Span); + Assert.Equal(size, actualSize); + + ReadOnlySpan T = ownedT.Span; + //int result; + using (var ownedSA = SpanOwner.Allocate(size, AllocationMode.Clear)) + { + var SA = ownedSA.Span; + + DivSufSort.divsufsort(T, SA); + Verify(T, SA); + //result = SAISChecker.Check(T, SA, T.Length, false); + } + //Assert.Equal(expected: 0, result); + } + + public static IEnumerable FuzzFiles => FuzzFilesInner.Select(fuzzFile => new object[] { Path.Join(FuzzFilesBasePath, fuzzFile) }); + private static IEnumerable FuzzFilesInner + { + get + { + //Crosscheck passed: + yield return "fuzz1"; + yield return "fuzz2"; + yield return "fuzz3"; + yield return "crash-cf8673530fdca659e0ddf070b4718b9c0bb504ec"; + yield return "crash-ce407adf7cf638d3fa89b5637a94355d7d658872"; + yield return "crash-c792e788de61771b6cd65c1aa5670c62e57a33c4"; + yield return "crash-90b42d1c55ee90a8b004fb9db1853429ceb4c4ba"; + yield return "crash-8765ef2258178ca027876eab83e01d6d58db9ca0"; + yield return "crash-4f8c31dec8c3678a07e0fbacc6bd69e7cc9037fb"; + yield return "crash-16356e91966a827f79e49167170194fc3088a7ab"; + //Crosscheck untested: + //yield return prefix + ""; + } + } + + [Theory] + [MemberData(nameof(FuzzFiles))] + public void CheckFile(string path) + { + SetupCrosscheckListeners(); + + var bytes = File.ReadAllBytes(path); + ReadOnlySpan T = bytes; + + using var ownedSA = SpanOwner.Allocate(T.Length, AllocationMode.Clear); + var SA = ownedSA.Span; + + DivSufSort.divsufsort(T, SA); + Verify(T, SA); + + Trace.Flush(); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(4)] + [InlineData(8)] + [InlineData(16)] + [InlineData(32)] + [InlineData(51)] + [InlineData(0x1000)] + [InlineData(0x8000)] + //[InlineData(0x80000)] + //[InlineData(0x800000)] + public void CheckRandomBuffer(int size) + { +#if NET461 + var ownedT = ArrayPool.Shared.Rent(size); + try +#else + using (var ownedT = GetOwnedRandomBuffer(size)) +#endif + { +#if NET461 + RandomFillBuffer(ownedT); + ReadOnlySpan T = ownedT; +#else + ReadOnlySpan T = ownedT.Span; +#endif + //int result; + using (var ownedSA = SpanOwner.Allocate(size, AllocationMode.Clear)) + { + var SA = ownedSA.Span; + + DivSufSort.divsufsort(T, SA); + Verify(T, SA); + //result = SAISChecker.Check(T, SA, T.Length, false); + } + //Assert.Equal(expected: 0, result); + } +#if NET461 + finally + { + ArrayPool.Shared.Return(ownedT); + } +#endif + } + + //[Theory] + //[InlineData(0)] + //[InlineData(1)] + //[InlineData(2)] + //[InlineData(4)] + //[InlineData(8)] + //[InlineData(16)] + //[InlineData(32)] + //[InlineData(51)] + //[InlineData(0x1000)] + //public void CheckRandomBufferContinuous(int size) + //{ + // const int repetitions = 2_000; + // for (int i = 0; i < repetitions; i++) + // { + // CheckRandomBuffer(size); + + // if (i % 100 == 0) + // { + // System.Diagnostics.Debug.WriteLine("Gen0:{0} Gen1:{1} Gen2:{2}", GC.CollectionCount(0), GC.CollectionCount(1), GC.CollectionCount(2)); + // } + // } + //} + } +} diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs deleted file mode 100644 index 718df25..0000000 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/SAISChecker.cs +++ /dev/null @@ -1,107 +0,0 @@ -using Microsoft.Toolkit.HighPerformance.Buffers; -using System; - -namespace DeltaQ.Tests -{ - internal class SAISChecker - { - internal static int Check(ReadOnlySpan T, ReadOnlySpan SA, int n, bool verbose = false) - { - using var owned_C = SpanOwner.Allocate(256); - Span C = owned_C.Span; - int i, p, q, t; - int c; - - if (verbose) { Console.Write(@"sufcheck: "); } - if (n == 0) - { - if (verbose) { Console.WriteLine("Done."); } - return 0; - } - - /* Check arguments. */ - if ((T == null) || (SA == null) || (n < 0)) - { - if (verbose) { Console.WriteLine("Invalid arguments."); } - return -1; - } - - /* check range: [0..n-1] */ - for (i = 0; i < n; ++i) - { - if ((SA[i] < 0) || (n <= SA[i])) - { - if (verbose) - { - Console.WriteLine("Out of the range [0," + (n - 1) + "]."); - Console.WriteLine(" SA[" + i + "]=" + SA[i]); - } - return -2; - } - } - - /* check first characters. */ - for (i = 1; i < n; ++i) - { - if (T[SA[i - 1]] > T[SA[i]]) - { - if (verbose) - { - Console.WriteLine("Suffixes in wrong order."); - Console.Write(" T[SA[" + (i - 1) + "]=" + SA[i - 1] + "]=" + T[SA[i - 1]]); - Console.WriteLine(" > T[SA[" + i + "]=" + SA[i] + "]=" + T[SA[i]]); - } - return -3; - } - } - - /* check suffixes. */ - - //for (i = 0; i < 256; ++i) { C[i] = 0; } - C.Clear(); - - for (i = 0; i < n; ++i) { ++C[T[i]]; } - for (i = 0, p = 0; i < 256; ++i) - { - t = C[i]; - C[i] = p; - p += t; - } - - q = C[T[n - 1]]; - C[T[n - 1]] += 1; - for (i = 0; i < n; ++i) - { - p = SA[i]; - if (0 < p) - { - c = T[--p]; - t = C[c]; - } - else - { - c = T[p = n - 1]; - t = q; - } - if ((t < 0) || (p != SA[t])) - { - if (verbose) - { - Console.WriteLine("Suffixes in wrong position."); - Console.WriteLine(" SA[" + t + "]=" + ((0 <= t) ? SA[t] : -1) + " or"); - Console.WriteLine(" SA[" + i + "]=" + SA[i]); - } - return -4; - } - if (t != q) - { - ++C[c]; - if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } - } - } - - if (verbose) { Console.WriteLine("Done."); } - return 0; - } - } -} From 6a44c592282f3b960868ec24d17aed6d5b35b680 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 02:45:27 -0500 Subject: [PATCH 168/325] Update LibDivSufSortTests --- .../LibDivSufSortTests.cs | 49 +++---------------- 1 file changed, 6 insertions(+), 43 deletions(-) diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs index ce1ba5d..17ad8a6 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs @@ -81,23 +81,13 @@ public void CheckShruggy() { const string shruggy = @"¯\_(ツ)_/¯"; - var size = Encoding.UTF8.GetByteCount(shruggy); - using var ownedT = SpanOwner.Allocate(size); + ReadOnlySpan T = Encoding.UTF8.GetBytes(shruggy); - var actualSize = Encoding.UTF8.GetBytes(shruggy, ownedT.Span); - Assert.Equal(size, actualSize); - - ReadOnlySpan T = ownedT.Span; - //int result; - using (var ownedSA = SpanOwner.Allocate(size, AllocationMode.Clear)) - { - var SA = ownedSA.Span; + using var ownedSA = SpanOwner.Allocate(T.Length, AllocationMode.Clear); + var SA = ownedSA.Span; - DivSufSort.divsufsort(T, SA); - Verify(T, SA); - //result = SAISChecker.Check(T, SA, T.Length, false); - } - //Assert.Equal(expected: 0, result); + DivSufSort.divsufsort(T, SA); + Verify(T, SA); } public static IEnumerable FuzzFiles => FuzzFilesInner.Select(fuzzFile => new object[] { Path.Join(FuzzFilesBasePath, fuzzFile) }); @@ -136,7 +126,7 @@ public void CheckFile(string path) DivSufSort.divsufsort(T, SA); Verify(T, SA); - Trace.Flush(); + FinalizeCrosscheck(); } [Theory] @@ -167,16 +157,13 @@ public void CheckRandomBuffer(int size) #else ReadOnlySpan T = ownedT.Span; #endif - //int result; using (var ownedSA = SpanOwner.Allocate(size, AllocationMode.Clear)) { var SA = ownedSA.Span; DivSufSort.divsufsort(T, SA); Verify(T, SA); - //result = SAISChecker.Check(T, SA, T.Length, false); } - //Assert.Equal(expected: 0, result); } #if NET461 finally @@ -185,29 +172,5 @@ public void CheckRandomBuffer(int size) } #endif } - - //[Theory] - //[InlineData(0)] - //[InlineData(1)] - //[InlineData(2)] - //[InlineData(4)] - //[InlineData(8)] - //[InlineData(16)] - //[InlineData(32)] - //[InlineData(51)] - //[InlineData(0x1000)] - //public void CheckRandomBufferContinuous(int size) - //{ - // const int repetitions = 2_000; - // for (int i = 0; i < repetitions; i++) - // { - // CheckRandomBuffer(size); - - // if (i % 100 == 0) - // { - // System.Diagnostics.Debug.WriteLine("Gen0:{0} Gen1:{1} Gen2:{2}", GC.CollectionCount(0), GC.CollectionCount(1), GC.CollectionCount(2)); - // } - // } - //} } } From fc1744faa4ae993355d8fc66d8f17e865e608616 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 02:46:04 -0500 Subject: [PATCH 169/325] Update solution --- deltaq.sln | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deltaq.sln b/deltaq.sln index 01ff79d..8b9f9b3 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -29,7 +29,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Utility.Memory", "sr EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LibDivSufSort", "src\DeltaQ.SuffixSorting.LibDivSufSort\DeltaQ.SuffixSorting.LibDivSufSort.csproj", "{E89B007E-0BDE-4642-B40F-CCB7569F88B8}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.SuffixSorting.LivDivSufSort.Tests", "test\DeltaQ.SuffixSorting.LivDivSufSort.Tests\DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj", "{5486E391-BFF9-4ED9-8383-032AE249C588}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LivDivSufSort.Tests", "test\DeltaQ.SuffixSorting.LivDivSufSort.Tests\DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj", "{5486E391-BFF9-4ED9-8383-032AE249C588}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution From 4caa7a0db60875ac71f47e802f0eff8310cd36d0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 03:34:32 -0500 Subject: [PATCH 170/325] Add back in ISuffixSort impl to LibDivSufSort and remove old port attempt files --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 2 +- .../LibDivSufSort.cs | 22 +- .../Utils.cs | 414 --------- .../divsufsort.cs | 263 ------ .../sssort.cs | 816 ------------------ 5 files changed, 12 insertions(+), 1505 deletions(-) delete mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs delete mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs delete mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index e194dfa..d48b570 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -1,4 +1,4 @@ - + net6.0 diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs index cc0627e..177e535 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs @@ -1,4 +1,5 @@ -using System; +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; using System.Buffers; namespace DeltaQ.SuffixSorting.LibDivSufSort @@ -7,27 +8,26 @@ public partial class LibDivSufSort : ISuffixSort { public IMemoryOwner Sort(ReadOnlySpan textBuffer) { - throw new NotImplementedException(); + var owner = MemoryOwner.Allocate(textBuffer.Length); + + Sort(textBuffer, suffixBuffer: owner.Span); + + return owner; } public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { if(textBuffer.Length != suffixBuffer.Length) { - throw new ArgumentException($"{nameof(textBuffer)} and {nameof(suffixBuffer)} should have the same length"); + ThrowHelper(); } //TODO: add 0/1/2 fast cases - //let T = Text(T); - //let mut SA = SuffixArray(SA); - - //// Suffixsort. - //construct_SA(&T, &mut SA, res.A, res.B, res.m); - var res = sort_typeBstar(textBuffer, SA); - //construct_SA(&T, &mut SA, res.A, res.B, res.m); - construct_SA(textBuffer, suffixBuffer, res.A, res.B, res.m); + DivSufSort.divsufsort(textBuffer, suffixBuffer); + return suffixBuffer.Length; } + private static void ThrowHelper() => throw new ArgumentException("Text and suffix buffers should have the same length"); } } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs deleted file mode 100644 index ac0b023..0000000 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ /dev/null @@ -1,414 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using sauchar_t = System.Byte; -using saint_t = System.Int32; -using saidx_t = System.Int32; -using System.Diagnostics; - -namespace DeltaQ.SuffixSorting.LibDivSufSort -{ - class Utils - { - private const int ALPHABET_SIZE = sizeof(byte) + 1; - - /* Binary search for inverse bwt. */ - static saidx_t binarysearch_lower(ReadOnlySpan A, saidx_t size, saidx_t value) - { - saidx_t half, i; - for (i = 0, half = size >> 1; - 0 < size; - size = half, half >>= 1) - { - if (A[i + half] < value) - { - i += half + 1; - half -= (size & 1) ^ 1; - } - } - return i; - } - - - /*- Functions -*/ - - /* Burrows-Wheeler transform. */ - saint_t - bw_transform(ReadOnlySpan T, Span U, Span SA, - saidx_t n, ref saidx_t idx) - { - Span A; - saint_t c; - - /* Check arguments. */ - if ((T == null) || (U == null) || (n < 0) || (idx == null)) { return -1; } - if (n <= 1) - { - if (n == 1) { U[0] = T[0]; } - idx = n; - return 0; - } - - if ((A = SA) == null) - { - saidx_t i = divbwt(T, U, null, n); - if (0 <= i) { idx = i; i = 0; } - return (saint_t)i; - } - - /* BW transform. */ - if (T == U) - { - saidx_t i, j, p, t = n; - for (i = 0, j = 0; i < n; ++i) - { - p = t - 1; - t = A[i]; - if (0 <= p) - { - c = T[j]; - U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; - A[j] = c; - j++; - } - else - { - idx = i; - } - } - p = t - 1; - if (0 <= p) - { - c = T[j]; - U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; - A[j] = c; - } - else - { - idx = i; - } - } - else - { - saidx_t i; - U[0] = T[n - 1]; - for (i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } - idx = i + 1; - for (++i; i < n; ++i) { U[i] = T[A[i] - 1]; } - } - - return 0; - } - - /* Inverse Burrows-Wheeler transform. */ - saint_t - inverse_bw_transform(ReadOnlySpan T, Span U, Span A, - saidx_t n, saidx_t idx) - { - Span C = new saidx_t[ALPHABET_SIZE]; - Span D = new sauchar_t[ALPHABET_SIZE]; - //saidx_t C[ALPHABET_SIZE]; - //sauchar_t D[ALPHABET_SIZE]; - Span B; - saidx_t i, p; - saint_t c, d; - - /* Check arguments. */ - if ((T == null) || (U == null) || (n < 0) || (idx < 0) || - (n < idx) || ((0 < n) && (idx == 0))) - { - return -1; - } - if (n <= 1) { return 0; } - - /* Inverse BW transform. */ - for (c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } - for (i = 0; i < n; ++i) { ++C[T[i]]; } - for (c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) - { - p = C[c]; - if (0 < p) - { - C[c] = i; - D[d++] = (sauchar_t)c; - i += p; - } - } - for (i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } - for (; i < n; ++i) { B[C[T[i]]++] = i + 1; } - for (c = 0; c < d; ++c) { C[c] = C[D[c]]; } - for (i = 0, p = idx; i < n; ++i) - { - U[i] = D[binarysearch_lower(C, d, p)]; - p = B[p - 1]; - } - - return 0; - } - - /* Checks the suffix array SA of the string T. */ - saint_t - sufcheck(ReadOnlySpan T, ReadOnlySpan SA, - saidx_t n) - { - Span C = new saidx_t[ALPHABET_SIZE]; - //saidx_t C[ALPHABET_SIZE]; - saidx_t i, p, q, t; - saint_t c; - - Debug.Write("sufcheck: "); - - /* Check arguments. */ - if ((T == null) || (SA == null) || (n < 0)) - { - Debug.WriteLine("Invalid arguments."); - return -1; - } - if (n == 0) - { - Debug.WriteLine("Done."); - return 0; - } - - /* check range: [0..n-1] */ - for (i = 0; i < n; ++i) - { - if ((SA[i] < 0) || (n <= SA[i])) - { - Debug.WriteLine("Out of the range [0,{0}]", n - 1); - Debug.WriteLine("SA[{0}]={1}", i, SA[i]); - return -2; - } - } - - /* check first characters. */ - for (i = 1; i < n; ++i) - { - if (T[SA[i - 1]] > T[SA[i]]) - { - Debug.WriteLine("Suffixes in wrong order."); - Debug.WriteLine(" T[SA[{0}]={1}]={2}", i - 1, SA[i - 1], T[SA[i - 1]]); - Debug.WriteLine(" > T[SA[{0}]={1}]={2}", i, SA[i], T[SA[i]]); - return -3; - } - } - - /* check suffixes. */ - for (i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } - for (i = 0; i < n; ++i) { ++C[T[i]]; } - for (i = 0, p = 0; i < ALPHABET_SIZE; ++i) - { - t = C[i]; - C[i] = p; - p += t; - } - - q = C[T[n - 1]]; - C[T[n - 1]] += 1; - for (i = 0; i < n; ++i) - { - p = SA[i]; - if (0 < p) - { - c = T[--p]; - t = C[c]; - } - else - { - c = T[p = n - 1]; - t = q; - } - if ((t < 0) || (p != SA[t])) - { - Debug.WriteLine("Suffix in wrong position."); - Debug.WriteLine(" SA[{0}]={1} or", t, 0 <= t ? SA[t] : -1); - Debug.WriteLine(" SA[{0}]={1}", i, SA[i]); - return -4; - } - if (t != q) - { - ++C[c]; - if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } - } - } - - Debug.WriteLine("Done."); - return 0; - } - - - static - int - _compare(ReadOnlySpan T, saidx_t Tsize, - ReadOnlySpan P, saidx_t Psize, - saidx_t suf, ref saidx_t match) - { - saidx_t i, j; - saint_t r; - for (i = suf + match, j = match, r = 0; - (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } - match = j; - //TODO: checkme - return (r == 0) ? (j != Psize ? -1 : 0) : r; - } - - /* Search for the pattern P in the string T. */ - saidx_t - sa_search(ReadOnlySpan T, saidx_t Tsize, - ReadOnlySpan P, saidx_t Psize, - ReadOnlySpan SA, saidx_t SAsize, - ref saidx_t idx) - { - saidx_t size, lsize, rsize, half; - saidx_t match, lmatch, rmatch; - saidx_t llmatch, lrmatch, rlmatch, rrmatch; - saidx_t i, j, k; - saint_t r; - - if (idx != null) { idx = -1; } - if ((T == null) || (P == null) || (SA == null) || - (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } - if ((Tsize == 0) || (SAsize == 0)) { return 0; } - if (Psize == 0) { if (idx != null) { idx = 0; } return SAsize; } - - for (i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) - { - match = Math.Min(lmatch, rmatch); - r = _compare(T, Tsize, P, Psize, SA[i + half], ref match); - if (r < 0) - { - i += half + 1; - half -= (size & 1) ^ 1; - lmatch = match; - } - else if (r > 0) - { - rmatch = match; - } - else - { - lsize = half; - j = i; - rsize = size - half - 1; - k = i + half + 1; - - /* left part */ - for (llmatch = lmatch, lrmatch = match, half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) - { - lmatch = Math.Min(llmatch, lrmatch); - r = _compare(T, Tsize, P, Psize, SA[j + half], ref lmatch); - if (r < 0) - { - j += half + 1; - half -= (lsize & 1) ^ 1; - llmatch = lmatch; - } - else - { - lrmatch = lmatch; - } - } - - /* right part */ - for (rlmatch = match, rrmatch = rmatch, half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) - { - rmatch = Math.Min(rlmatch, rrmatch); - r = _compare(T, Tsize, P, Psize, SA[k + half], ref rmatch); - if (r <= 0) - { - k += half + 1; - half -= (rsize & 1) ^ 1; - rlmatch = rmatch; - } - else - { - rrmatch = rmatch; - } - } - - break; - } - } - - if (idx != null) { idx = (0 < (k - j)) ? j : i; } - return k - j; - } - - /* Search for the character c in the string T. */ - saidx_t - sa_simplesearch(ReadOnlySpan T, saidx_t Tsize, - ReadOnlySpan SA, saidx_t SAsize, - saint_t c, ref saidx_t idx) - { - saidx_t size, lsize, rsize, half; - saidx_t i, j, k, p; - saint_t r; - - if (idx != null) { idx = -1; } - if ((T == null) || (SA == null) || (Tsize < 0) || (SAsize < 0)) { return -1; } - if ((Tsize == 0) || (SAsize == 0)) { return 0; } - - for (i = j = k = 0, size = SAsize, half = size >> 1; - 0 < size; - size = half, half >>= 1) - { - p = SA[i + half]; - r = (p < Tsize) ? T[p] - c : -1; - if (r < 0) - { - i += half + 1; - half -= (size & 1) ^ 1; - } - else if (r == 0) - { - lsize = half; - j = i; - rsize = size - half - 1; - k = i + half + 1; - - /* left part */ - for (half = lsize >> 1; - 0 < lsize; - lsize = half, half >>= 1) - { - p = SA[j + half]; - r = (p < Tsize) ? T[p] - c : -1; - if (r < 0) - { - j += half + 1; - half -= (lsize & 1) ^ 1; - } - } - - /* right part */ - for (half = rsize >> 1; - 0 < rsize; - rsize = half, half >>= 1) - { - p = SA[k + half]; - r = (p < Tsize) ? T[p] - c : -1; - if (r <= 0) - { - k += half + 1; - half -= (rsize & 1) ^ 1; - } - } - - break; - } - } - - if (idx != null) { idx = (0 < (k - j)) ? j : i; } - return k - j; - } - - } -} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs deleted file mode 100644 index c90feb6..0000000 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/divsufsort.cs +++ /dev/null @@ -1,263 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using sauchar_t = System.Byte; -using saint_t = System.Int32; -using saidx_t = System.Int32; -using Microsoft.Toolkit.HighPerformance.Buffers; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace DeltaQ.SuffixSorting.LibDivSufSort -{ - public partial class LibDivSufSort - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int BUCKET_A(Span bucket_A, int c0) => ref bucket_A[c0]; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int BUCKET_B(Span bucket_B, int c0, int c1) => ref bucket_B[((c1) << 8) | (c0)]; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int BUCKET_BSTAR(Span bucket_B, int c0, int c1) => ref bucket_B[((c0) << 8) | (c1)]; - - /* Constructs the suffix array by using the sorted order of type B* suffixes. */ - static - void - construct_SA(ReadOnlySpan T, Span SA, - Span bucket_A, Span bucket_B, - saidx_t n, saidx_t m) - { - saidx_t s; - saint_t c0, c1, c2; - - if (0 < m) - { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) - { - /* Scan the suffix array from right to left. */ - c2 = -1; - for (ref saidx_t i = ref Unsafe.Add(ref MemoryMarshal.GetReference(SA), BUCKET_BSTAR(bucket_B, c1, c1 + 1)), - j = ref Unsafe.Add(ref MemoryMarshal.GetReference(SA), BUCKET_A(bucket_A, c1 + 1) - 1), - k = ref Unsafe.NullRef(); - i <= j; - --j) - { - if (0 < (s = j)) - { - Debug.Assert(T[s] == c1); - Debug.Assert(((s + 1) < n) && (T[s] <= T[s + 1])); - Debug.Assert(T[s - 1] <= T[s]); - j = ~s; - c0 = T[--s]; - if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } - if (c0 != c2) - { - if (0 <= c2) { BUCKET_B(bucket_B, c2, c1) = k - MemoryMarshal.GetReference(SA); } - k = Unsafe.Add(ref MemoryMarshal.GetReference(SA), BUCKET_B(bucket_B, c2 = c0, c1)); - } - Debug.Assert(k < j); - k = ref Unsafe.Subtract(ref k, 1); - k = s; - } - else - { - Debug.Assert(((s == 0) && (T[s] == c1)) || (s < 0)); - j = ~s; - } - } - } - } - - /* Construct the suffix array by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); - /* Scan the suffix array from left to right. */ - for (i = SA, j = SA + n; i < j; ++i) - { - if (0 < (s = *i)) - { - Debug.Assert(T[s - 1] >= T[s]); - c0 = T[--s]; - if ((s == 0) || (T[s - 1] < c0)) { s = ~s; } - if (c0 != c2) - { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - Debug.Assert(i < k); - *k++ = s; - } - else - { - Debug.Assert(s < 0); - *i = ~s; - } - } - } - - /* Constructs the burrows-wheeler transformed string directly - by using the sorted order of type B* suffixes. */ - static - saidx_t - construct_BWT(ReadOnlySpan T, Span SA, - Span bucket_A, Span bucket_B, - saidx_t n, saidx_t m) - { - saidx_t i, j, k, orig; - saidx_t s; - saint_t c0, c1, c2; - - if (0 < m) - { - /* Construct the sorted order of type B suffixes by using - the sorted order of type B* suffixes. */ - for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) - { - /* Scan the suffix array from right to left. */ - for (i = SA + BUCKET_BSTAR(c1, c1 + 1), - j = SA + BUCKET_A(c1 + 1) - 1, k = null, c2 = -1; - i <= j; - --j) - { - if (0 < (s = *j)) - { - Debug.Assert(T[s] == c1); - Debug.Assert(((s + 1) < n) && (T[s] <= T[s + 1])); - Debug.Assert(T[s - 1] <= T[s]); - c0 = T[--s]; - *j = ~((saidx_t)c0); - if ((0 < s) && (T[s - 1] > c0)) { s = ~s; } - if (c0 != c2) - { - if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; } - k = SA + BUCKET_B(c2 = c0, c1); - } - Debug.Assert(k < j); - *k-- = s; - } - else if (s != 0) - { - *j = ~s; -#if DEBUG - } - else - { - Debug.Assert(T[s] == c1); -#endif - } - } - } - } - - /* Construct the BWTed string by using - the sorted order of type B suffixes. */ - k = SA + BUCKET_A(c2 = T[n - 1]); - *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); - /* Scan the suffix array from left to right. */ - for (i = SA, j = SA + n, orig = SA; i < j; ++i) - { - if (0 < (s = *i)) - { - Debug.Assert(T[s - 1] >= T[s]); - c0 = T[--s]; - *i = c0; - if ((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } - if (c0 != c2) - { - BUCKET_A(c2) = k - SA; - k = SA + BUCKET_A(c2 = c0); - } - Debug.Assert(i < k); - *k++ = s; - } - else if (s != 0) - { - *i = ~s; - } - else - { - orig = i; - } - } - - return orig - SA; - } - - - /*---------------------------------------------------------------------------*/ - - /*- Function -*/ - - saint_t - divsufsort(ReadOnlySpan T, Span SA, saidx_t n) - { - saidx_t m; - - /* Check arguments. */ - if ((T == null) || (SA == null) || (n < 0)) { return -1; } - else if (n == 0) { return 0; } - else if (n == 1) { SA[0] = 0; return 0; } - else if (n == 2) { /*TODO: checkme*/m = T[0] < T[1] ? 1 : 0; SA[m ^ 1] = 0; SA[m] = 1; return 0; } - - using var owner_A = SpanOwner.Allocate(BUCKET_A_SIZE); - using var owner_B = SpanOwner.Allocate(BUCKET_B_SIZE); - - Span bucket_A = owner_A.Span; - Span bucket_B = owner_B.Span; - - /* Suffixsort. */ - if (bucket_A == null || bucket_B == null) - { - return -2; - } - - m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); - construct_SA(T, SA, bucket_A, bucket_B, n, m); - return 0; - } - - saidx_t - divbwt(ReadOnlySpan T, Span U, Span A, saidx_t n) - { - Span B; - Span bucket_A, bucket_B; - saidx_t m, pidx, i; - - /* Check arguments. */ - if ((T == null) || (U == null) || (n < 0)) { return -1; } - else if (n <= 1) { if (n == 1) { U[0] = T[0]; } return n; } - - if ((B = A) == null) { B = new saidx_t[n + 1]; } - bucket_A = new saidx_t[BUCKET_A_SIZE]; - bucket_B = new saidx_t[BUCKET_B_SIZE]; - - /* Burrows-Wheeler Transform. */ - if ((B != null) && (bucket_A != null) && (bucket_B != null)) - { - m = sort_typeBstar(T, B, bucket_A, bucket_B, n); - pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); - - /* Copy to output string. */ - U[0] = T[n - 1]; - for (i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } - for (i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } - pidx += 1; - } - else - { - pidx = -2; - } - - //free(bucket_B); - //free(bucket_A); - //if (A == null) { free(B); } - - return pidx; - } - } -} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs deleted file mode 100644 index 097bf86..0000000 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/sssort.cs +++ /dev/null @@ -1,816 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using sauchar_t = System.Byte; -using saint_t = System.Int32; -using saidx_t = System.Int32; -using System.Runtime.CompilerServices; - -namespace DeltaQ.SuffixSorting.LibDivSufSort -{ - public partial class LibDivSufSort - { - //# define SS_BLOCKSIZE (1024) - private const int SS_BLOCKSIZE = 1024; - //# define SS_INSERTIONSORT_THRESHOLD (8) - private const int SS_INSERTIONSORT_THRESHOLD = 8; - - private static readonly saint_t[] lg_table_array = new[] { - -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 -}; - internal static ReadOnlySpan lg_table => lg_table_array; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ saint_t ss_ilg(saidx_t n) - { - return (n & 0xff00) != 0 ? - 8 + lg_table[(n >> 8) & 0xff] : - 0 + lg_table[(n >> 0) & 0xff]; - } - - private static readonly saint_t[] sqq_table_array = new[] { - 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, - 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, - 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, -110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, -128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, -143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, -156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, -169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, -181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, -192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, -202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, -212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, -221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, -230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, -239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, -247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 -}; - private static ReadOnlySpan sqq_table => sqq_table_array; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ saidx_t ss_isqrt(saidx_t x) - { - saidx_t y, e; - - if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } - e = (x & 0xffff0000) != 0 ? - ((x & 0xff000000) != 0 ? - 24 + lg_table[(x >> 24) & 0xff] : - 16 + lg_table[(x >> 16) & 0xff]) : - ((x & 0x0000ff00) != 0 ? - 8 + lg_table[(x >> 8) & 0xff] : - 0 + lg_table[(x >> 0) & 0xff]); - - if (e >= 16) - { - y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); - if (e >= 24) { y = (y + 1 + x / y) >> 1; } - y = (y + 1 + x / y) >> 1; - } - else if (e >= 8) - { - y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; - } - else - { - return sqq_table[x] >> 4; - } - - return (x < (y * y)) ? y - 1 : y; - } - - /*---------------------------------------------------------------------------*/ - - /* Compares two suffixes. */ - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ saint_t ss_compare(ReadOnlySpan T, - ReadOnlySpan p1, ReadOnlySpan p2, - saidx_t depth) - { - ReadOnlySpan U1, *U2, *U1n, *U2n; - - for (U1 = T + depth + *p1, - U2 = T + depth + *p2, - U1n = T + *(p1 + 1) + 2, - U2n = T + *(p2 + 1) + 2; - (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); - ++U1, ++U2) - { - } - - return U1 < U1n ? - (U2 < U2n ? *U1 - *U2 : 1) : - (U2 < U2n ? -1 : 0); - } - - - /*---------------------------------------------------------------------------*/ - - /* Insertionsort for small size groups */ - static void ss_insertionsort(ReadOnlySpan T, ReadOnlySpan PA, - saidx_t* first, saidx_t* last, saidx_t depth) - { - saidx_t* i, *j; - saidx_t t; - saint_t r; - - for (i = last - 2; first <= i; --i) - { - for (t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) - { - do { *(j - 1) = *j; } while ((++j < last) && (*j < 0)); - if (last <= j) { break; } - } - if (r == 0) { *j = ~*j; } - *(j - 1) = t; - } - } - - /*---------------------------------------------------------------------------*/ - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ void ss_fixdown(ReadOnlySpan Td, ReadOnlySpan PA, - saidx_t* SA, saidx_t i, saidx_t size) - { - saidx_t j, k; - saidx_t v; - saint_t c, d, e; - - for (v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) - { - d = Td[PA[SA[k = j++]]]; - if (d < (e = Td[PA[SA[j]]])) { k = j; d = e; } - if (d <= c) { break; } - } - SA[i] = v; - } - - /* Simple top-down heapsort. */ - static void ss_heapsort(ReadOnlySpan Td, ReadOnlySpan PA, saidx_t* SA, saidx_t size) - { - saidx_t i, m; - saidx_t t; - - m = size; - if ((size % 2) == 0) - { - m--; - if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } - } - - for (i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } - if ((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } - for (i = m - 1; 0 < i; --i) - { - t = SA[0], SA[0] = SA[i]; - ss_fixdown(Td, PA, SA, 0, i); - SA[i] = t; - } - } - - - /*---------------------------------------------------------------------------*/ - - /* Returns the median of three elements. */ - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ saidx_t* ss_median3(ReadOnlySpan Td, ReadOnlySpan PA, - saidx_t* v1, saidx_t* v2, saidx_t* v3) - { - saidx_t* t; - if (Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } - if (Td[PA[*v2]] > Td[PA[*v3]]) - { - if (Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } - else { return v3; } - } - return v2; - } - - /* Returns the median of five elements. */ - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ saidx_t* ss_median5(ReadOnlySpan Td, ReadOnlySpan PA, - saidx_t* v1, saidx_t* v2, saidx_t* v3, saidx_t* v4, saidx_t* v5) - { - saidx_t* t; - if (Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } - if (Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } - if (Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } - if (Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } - if (Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } - if (Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } - return v3; - } - - /* Returns the pivot element. */ - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ saidx_t* ss_pivot(ReadOnlySpan Td, ReadOnlySpan PA, saidx_t* first, saidx_t* last) - { - saidx_t* middle; - saidx_t t; - - t = last - first; - middle = first + t / 2; - - if (t <= 512) - { - if (t <= 32) - { - return ss_median3(Td, PA, first, middle, last - 1); - } - else - { - t >>= 2; - return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); - } - } - t >>= 3; - first = ss_median3(Td, PA, first, first + t, first + (t << 1)); - middle = ss_median3(Td, PA, middle - t, middle, middle + t); - last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); - return ss_median3(Td, PA, first, middle, last); - } - - - /*---------------------------------------------------------------------------*/ - - /* Binary partition for substrings. */ - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static /*INLINE*/ saidx_t* ss_partition(ReadOnlySpan PA, - saidx_t* first, saidx_t* last, saidx_t depth) - { - saidx_t* a, *b; - saidx_t t; - for (a = first - 1, b = last; ;) - { - for (; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } - for (; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } - if (b <= a) { break; } - t = ~*b; - *b = *a; - *a = t; - } - if (first < a) { *first = ~*first; } - return a; - } - - //#define STACK_SIZE SS_MISORT_STACKSIZE - //#define SS_MISORT_STACKSIZE (16) - private const int STACK_SIZE = 16; - - private struct stack - { - ref saidx_t a; - ref saidx_t b; - saidx_t c; - saint_t d; - } - - /* Multikey introsort for medium size groups. */ - static void ss_mintrosort(ReadOnlySpan T, ReadOnlySpan PA, - ref saidx_t first, ref saidx_t last, - saidx_t depth) - { - //struct { saidx_t* a, * b, c; saint_t d; } stack[STACK_SIZE]; - Span stack = stackalloc stack[STACK_SIZE]; - - ReadOnlySpan Td; - ref saidx_t a, b, c, d, e, f; - saidx_t s, t; - saint_t ssize; - saint_t limit; - saint_t v, x = 0; - - for (ssize = 0, limit = ss_ilg(last - first); ;) - { - - if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) - { - if (1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } - STACK_POP(first, last, depth, limit); - continue; - } - - Td = T + depth; - if (limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } - if (limit < 0) - { - for (a = first + 1, v = Td[PA[ref first]]; a < last; ++a) - { - if ((x = Td[PA[ref a]]) != v) - { - if (1 < (a - first)) { break; } - v = x; - first = a; - } - } - if (Td[PA[ref first] - 1] < v) - { - first = ss_partition(PA, first, a, depth); - } - if ((a - first) <= (last - a)) - { - if (1 < (a - first)) - { - STACK_PUSH(a, last, depth, -1); - last = a, depth += 1, limit = ss_ilg(a - first); - } - else - { - first = a, limit = -1; - } - } - else - { - if (1 < (last - a)) - { - STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); - first = a, limit = -1; - } - else - { - last = a, depth += 1, limit = ss_ilg(a - first); - } - } - continue; - } - - /* choose pivot */ - a = ss_pivot(Td, PA, first, last); - v = Td[PA[ref a]]; - SWAP(ref first, ref a); - - /* partition */ - for (b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } - if (((a = b) < last) && (x < v)) - { - for (; (++b < last) && ((x = Td[PA[*b]]) <= v);) - { - if (x == v) { SWAP(*b, *a); ++a; } - } - } - for (c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } - if ((b < (d = c)) && (x > v)) - { - for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) - { - if (x == v) { SWAP(*c, *d); --d; } - } - } - for (; b < c;) - { - SWAP(*b, *c); - for (; (++b < c) && ((x = Td[PA[*b]]) <= v);) - { - if (x == v) { SWAP(*b, *a); ++a; } - } - for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) - { - if (x == v) { SWAP(*c, *d); --d; } - } - } - - if (a <= d) - { - c = b - 1; - - if ((s = a - first) > (t = b - a)) { s = t; } - for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - if ((s = d - c) > (t = last - d - 1)) { s = t; } - for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } - - a = first + (b - a), c = last - (d - c); - b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); - - if ((a - first) <= (last - c)) - { - if ((last - c) <= (c - b)) - { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(c, last, depth, limit); - last = a; - } - else if ((a - first) <= (c - b)) - { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - last = a; - } - else - { - STACK_PUSH(c, last, depth, limit); - STACK_PUSH(first, a, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); - } - } - else - { - if ((a - first) <= (c - b)) - { - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - STACK_PUSH(first, a, depth, limit); - first = c; - } - else if ((last - c) <= (c - b)) - { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); - first = c; - } - else - { - STACK_PUSH(first, a, depth, limit); - STACK_PUSH(c, last, depth, limit); - first = b, last = c, depth += 1, limit = ss_ilg(c - b); - } - } - } - else - { - limit += 1; - if (Td[PA[*first] - 1] < v) - { - first = ss_partition(PA, first, last, depth); - limit = ss_ilg(last - first); - } - depth += 1; - } - } - } - - /*---------------------------------------------------------------------------*/ - -#if SS_BLOCKSIZE != 0 - -static INLINE -void -ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { - saidx_t t; - for(; 0 < n; --n, ++a, ++b) { - t = *a, *a = *b, *b = t; - } -} - -static INLINE -void -ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { - saidx_t *a, *b, t; - saidx_t l, r; - l = middle - first, r = last - middle; - for(; (0 < l) && (0 < r);) { - if(l == r) { ss_blockswap(first, middle, l); break; } - if(l < r) { - a = last - 1, b = middle - 1; - t = *a; - do { - *a-- = *b, *b-- = *a; - if(b < first) { - *a = t; - last = a; - if((r -= l + 1) <= l) { break; } - a -= 1, b = middle - 1; - t = *a; - } - } while(1); - } else { - a = first, b = middle; - t = *a; - do { - *a++ = *b, *b++ = *a; - if(last <= b) { - *a = t; - first = a + 1; - if((l -= r + 1) <= r) { break; } - a += 1, b = middle; - t = *a; - } - } while(1); - } - } -} - - -/*---------------------------------------------------------------------------*/ - -static -void -ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t depth) { - const saidx_t *p; - saidx_t *a, *b; - saidx_t len, half; - saint_t q, r; - saint_t x; - - for(;;) { - if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } - else { x = 0; p = PA + *(last - 1); } - for(a = first, len = middle - first, half = len >> 1, r = -1; - 0 < len; - len = half, half >>= 1) { - b = a + half; - q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); - if(q < 0) { - a = b + 1; - half -= (len & 1) ^ 1; - } else { - r = q; - } - } - if(a < middle) { - if(r == 0) { *a = ~*a; } - ss_rotate(a, middle, last); - last -= middle - a; - middle = a; - if(first == middle) { break; } - } - --last; - if(x != 0) { while(*--last < 0) { } } - if(middle == last) { break; } - } -} - - -/*---------------------------------------------------------------------------*/ - -/* Merge-forward with internal buffer. */ -static -void -ss_mergeforward(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t *buf, saidx_t depth) { - saidx_t *a, *b, *c, *bufend; - saidx_t t; - saint_t r; - - bufend = buf + (middle - first) - 1; - ss_blockswap(buf, first, middle - first); - - for(t = *(a = first), b = buf, c = middle;;) { - r = ss_compare(T, PA + *b, PA + *c, depth); - if(r < 0) { - do { - *a++ = *b; - if(bufend <= b) { *bufend = t; return; } - *b++ = *a; - } while(*b < 0); - } else if(r > 0) { - do { - *a++ = *c, *c++ = *a; - if(last <= c) { - while(b < bufend) { *a++ = *b, *b++ = *a; } - *a = *b, *b = t; - return; - } - } while(*c < 0); - } else { - *c = ~*c; - do { - *a++ = *b; - if(bufend <= b) { *bufend = t; return; } - *b++ = *a; - } while(*b < 0); - - do { - *a++ = *c, *c++ = *a; - if(last <= c) { - while(b < bufend) { *a++ = *b, *b++ = *a; } - *a = *b, *b = t; - return; - } - } while(*c < 0); - } - } -} - -/* Merge-backward with internal buffer. */ -static -void -ss_mergebackward(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t *buf, saidx_t depth) { - const saidx_t *p1, *p2; - saidx_t *a, *b, *c, *bufend; - saidx_t t; - saint_t r; - saint_t x; - - bufend = buf + (last - middle) - 1; - ss_blockswap(buf, middle, last - middle); - - x = 0; - if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } - else { p1 = PA + *bufend; } - if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } - else { p2 = PA + *(middle - 1); } - for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { - r = ss_compare(T, p1, p2, depth); - if(0 < r) { - if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } - *a-- = *b; - if(b <= buf) { *buf = t; break; } - *b-- = *a; - if(*b < 0) { p1 = PA + ~*b; x |= 1; } - else { p1 = PA + *b; } - } else if(r < 0) { - if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } - *a-- = *c, *c-- = *a; - if(c < first) { - while(buf < b) { *a-- = *b, *b-- = *a; } - *a = *b, *b = t; - break; - } - if(*c < 0) { p2 = PA + ~*c; x |= 2; } - else { p2 = PA + *c; } - } else { - if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } - *a-- = ~*b; - if(b <= buf) { *buf = t; break; } - *b-- = *a; - if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } - *a-- = *c, *c-- = *a; - if(c < first) { - while(buf < b) { *a-- = *b, *b-- = *a; } - *a = *b, *b = t; - break; - } - if(*b < 0) { p1 = PA + ~*b; x |= 1; } - else { p1 = PA + *b; } - if(*c < 0) { p2 = PA + ~*c; x |= 2; } - else { p2 = PA + *c; } - } - } -} - -/* D&C based merge. */ -static -void -ss_swapmerge(const sauchar_t *T, const saidx_t *PA, - saidx_t *first, saidx_t *middle, saidx_t *last, - saidx_t *buf, saidx_t bufsize, saidx_t depth) { -#define STACK_SIZE SS_SMERGE_STACKSIZE -#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) -#define MERGE_CHECK(a, b, c)\ - do {\ - if(((c) & 1) ||\ - (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ - *(a) = ~*(a);\ - }\ - if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ - *(b) = ~*(b);\ - }\ - } while(0) - struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; - saidx_t *l, *r, *lm, *rm; - saidx_t m, len, half; - saint_t ssize; - saint_t check, next; - - for(check = 0, ssize = 0;;) { - if((last - middle) <= bufsize) { - if((first < middle) && (middle < last)) { - ss_mergebackward(T, PA, first, middle, last, buf, depth); - } - MERGE_CHECK(first, last, check); - STACK_POP(first, middle, last, check); - continue; - } - - if((middle - first) <= bufsize) { - if(first < middle) { - ss_mergeforward(T, PA, first, middle, last, buf, depth); - } - MERGE_CHECK(first, last, check); - STACK_POP(first, middle, last, check); - continue; - } - - for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; - 0 < len; - len = half, half >>= 1) { - if(ss_compare(T, PA + GETIDX(*(middle + m + half)), - PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { - m += half + 1; - half -= (len & 1) ^ 1; - } - } - - if(0 < m) { - lm = middle - m, rm = middle + m; - ss_blockswap(lm, middle, m); - l = r = middle, next = 0; - if(rm < last) { - if(*rm < 0) { - *rm = ~*rm; - if(first < lm) { for(; *--l < 0;) { } next |= 4; } - next |= 1; - } else if(first < lm) { - for(; *r < 0; ++r) { } - next |= 2; - } - } - - if((l - first) <= (last - r)) { - STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); - middle = lm, last = l, check = (check & 3) | (next & 4); - } else { - if((next & 2) && (r == middle)) { next ^= 6; } - STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); - first = r, middle = rm, check = (next & 3) | (check & 4); - } - } else { - if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { - *middle = ~*middle; - } - MERGE_CHECK(first, last, check); - STACK_POP(first, middle, last, check); - } - } -#undef STACK_SIZE -} - -#endif /* SS_BLOCKSIZE != 0 */ - - - /*---------------------------------------------------------------------------*/ - - /*- Function -*/ - - /* Substring sort */ - void sssort(ReadOnlySpan T, ReadOnlySpan PA, - saidx_t first, saidx_t last, - Span buf, saidx_t bufsize, - saidx_t depth, saidx_t n, bool lastsuffix) - { - ref saidx_t a; - ref saidx_t b, middle, curbuf; - saidx_t j, k, curbufsize, limit; - saidx_t i; - - if (lastsuffix) { ++first; } - - if ((bufsize < SS_BLOCKSIZE) && - (bufsize < (last - first)) && - (bufsize < (limit = ss_isqrt(last - first)))) - { - if (SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } - buf = middle = last - limit, bufsize = limit; - } - else - { - middle = last, limit = 0; - } - for (a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) - { - ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); - curbufsize = last - (a + SS_BLOCKSIZE); - curbuf = a + SS_BLOCKSIZE; - if (curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } - for (b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) - { - ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); - } - } - ss_mintrosort(T, PA, a, middle, depth); - for (k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) - { - if (i & 1) - { - ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); - a -= k; - } - } - if (limit != 0) - { - ss_mintrosort(T, PA, middle, last, depth); - ss_inplacemerge(T, PA, first, middle, last, depth); - } - - if (lastsuffix) - { - /* Insert last type B* suffix. */ - Span PAi = stackalloc saidx_t[2]; - PAi[0] = PA[*(first - 1)]; - PAi[1] = n - 2; - for (a = first, i = *(first - 1); - (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); - ++a) - { - *(a - 1) = *a; - } - *(a - 1) = i; - } - } - - } -} From 0bce40671b2f3d582705721c3e01bcd67d81d151 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 03:40:28 -0500 Subject: [PATCH 171/325] Use public API in LibDivSufSortTests --- .../LibDivSufSortTests.cs | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs index 17ad8a6..fe10a95 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs @@ -83,10 +83,9 @@ public void CheckShruggy() ReadOnlySpan T = Encoding.UTF8.GetBytes(shruggy); - using var ownedSA = SpanOwner.Allocate(T.Length, AllocationMode.Clear); - var SA = ownedSA.Span; - - DivSufSort.divsufsort(T, SA); + var ldss = new LibDivSufSort(); + using var owner = ldss.Sort(T); + var SA = owner.Memory.Span; Verify(T, SA); } @@ -120,10 +119,9 @@ public void CheckFile(string path) var bytes = File.ReadAllBytes(path); ReadOnlySpan T = bytes; - using var ownedSA = SpanOwner.Allocate(T.Length, AllocationMode.Clear); - var SA = ownedSA.Span; - - DivSufSort.divsufsort(T, SA); + var ldss = new LibDivSufSort(); + using var owner = ldss.Sort(T); + var SA = owner.Memory.Span; Verify(T, SA); FinalizeCrosscheck(); @@ -140,10 +138,11 @@ public void CheckFile(string path) [InlineData(51)] [InlineData(0x1000)] [InlineData(0x8000)] - //[InlineData(0x80000)] - //[InlineData(0x800000)] + [InlineData(0x8000 - 1)] public void CheckRandomBuffer(int size) { + var ldss = new LibDivSufSort(); + #if NET461 var ownedT = ArrayPool.Shared.Rent(size); try @@ -160,8 +159,7 @@ public void CheckRandomBuffer(int size) using (var ownedSA = SpanOwner.Allocate(size, AllocationMode.Clear)) { var SA = ownedSA.Span; - - DivSufSort.divsufsort(T, SA); + ldss.Sort(T, SA); Verify(T, SA); } } From 3c19284cd912264de64be4272c9b0173db342368 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 03:40:44 -0500 Subject: [PATCH 172/325] Make DivSufSort internal in LibDivSufSort --- src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 39229e2..76b5302 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -7,7 +7,7 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort; -public static class DivSufSort +internal static class DivSufSort { private const int ALPHABET_SIZE = byte.MaxValue + 1; private const int BUCKET_A_SIZE = ALPHABET_SIZE; From 3c574d020e3c7c2dc01574d27ddd8861fc56afb0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 03:41:39 -0500 Subject: [PATCH 173/325] Bump LibDivSufSort package version to 0.3.0 --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index d48b570..e1e903d 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -5,7 +5,7 @@ DeltaQ jzebedee true - 0.2.0 + 0.3.0 From 8fb8acba104da69e70f5711fd0480c3758444907 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 04:32:24 -0500 Subject: [PATCH 174/325] Update CLI stub --- src/DeltaQ.CLI/DeltaQ.CLI.csproj | 6 +-- src/DeltaQ.CLI/Program.cs | 68 +++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CLI/DeltaQ.CLI.csproj index c602d2b..75c0a60 100644 --- a/src/DeltaQ.CLI/DeltaQ.CLI.csproj +++ b/src/DeltaQ.CLI/DeltaQ.CLI.csproj @@ -2,16 +2,16 @@ Exe - net6.0;netstandard2.0 + net6.0 DeltaQ jzebedee - true dq + false - + diff --git a/src/DeltaQ.CLI/Program.cs b/src/DeltaQ.CLI/Program.cs index b9e0bf6..b6248a8 100644 --- a/src/DeltaQ.CLI/Program.cs +++ b/src/DeltaQ.CLI/Program.cs @@ -1,24 +1,54 @@ using System; -using System.IO; +using Microsoft.Extensions.CommandLineUtils; -namespace DeltaQ.CLI +const string HelpOptions = "-?|-h|--help"; + +// Description of the application +var app = new CommandLineApplication() +{ + Name = "dq", + FullName = "DeltaQ", + Description = "DeltaQ binary diff and patch tool" +}; + +app.HelpOption(HelpOptions); +app.VersionOption("--version", "0.1.0"); + +//No args +app.OnExecute(() => +{ + app.ShowRootCommandFullNameAndVersion(); + app.ShowHint(); + return 0; +}); + +app.Command("diff", command => { - class Program + command.Description = "Diff two files"; + command.HelpOption(HelpOptions); + + var oldFileArg = command.Argument("[oldfile]", ""); + var newFileArg = command.Argument("[newfile]", ""); + var deltaFileArg = command.Argument("[deltafile]", ""); + + command.OnExecute(() => { - static void Main(string[] args) - { - var f1 = args[1]; - var f2 = args[2]; - var o = args[3]; - switch(args[0]) - { - case "diff": - BsDiff.BsDiff.Create(File.ReadAllBytes(f1), File.ReadAllBytes(f2), File.OpenWrite(o)); - break; - case "patch": - BsDiff.BsPatch.Apply(File.ReadAllBytes(f1), File.ReadAllBytes(f2), File.OpenWrite(o)); - break; - } - } - } + var oldFile = oldFileArg.Value; + var newFile = newFileArg.Value; + var deltaFile = deltaFileArg.Value; + Console.WriteLine($"Diff: old:{oldFile} new:{newFile} delta:{deltaFile}"); + return 0; + }); +}); + +try +{ + return app.Execute(args); } +catch (CommandParsingException ex) +{ + Console.Error.WriteLine(ex.Message); + Console.Error.WriteLine(); + app.ShowHelp(); + return -1; +} \ No newline at end of file From 612ba08044a7ae58723b693faa1ddaff01bdf8ed Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 05:29:45 -0500 Subject: [PATCH 175/325] Fuzzing --- src/DeltaQ.CLI/DeltaQ.CLI.csproj | 7 +++++ src/DeltaQ.CLI/Program.cs | 54 +++++++++++++++++++++++++++++++- src/DeltaQ.CLI/fuzz.sh | 1 + 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 src/DeltaQ.CLI/fuzz.sh diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CLI/DeltaQ.CLI.csproj index 75c0a60..8ece8d4 100644 --- a/src/DeltaQ.CLI/DeltaQ.CLI.csproj +++ b/src/DeltaQ.CLI/DeltaQ.CLI.csproj @@ -12,6 +12,13 @@ + + + + + + ..\..\pkg\DeltaQ.SuffixSorting.LibDivSufSort.0.3.0\lib\net6.0\DeltaQ.SuffixSorting.LibDivSufSort.dll + diff --git a/src/DeltaQ.CLI/Program.cs b/src/DeltaQ.CLI/Program.cs index b6248a8..81aadbe 100644 --- a/src/DeltaQ.CLI/Program.cs +++ b/src/DeltaQ.CLI/Program.cs @@ -1,5 +1,29 @@ using System; +using System.IO; +using System.Text; +using DeltaQ.SuffixSorting.LibDivSufSort; using Microsoft.Extensions.CommandLineUtils; +using SharpFuzz; + +static void Verify(ReadOnlySpan input, ReadOnlySpan sa) +{ + //ref byte suff(int index) => ref input[sa[index]]; + for (int i = 0; i < input.Length - 1; i++) + { + //if(!(suff(i) < suff(i + 1))) + var cur = input[sa[i]..]; + var next = input[sa[i + 1]..]; + var cmp = cur.SequenceCompareTo(next); + if (!(cmp < 0)) + //if (!(cur < next)) + { + var ex = new InvalidOperationException("Input was unsorted"); + ex.Data["i"] = i; + ex.Data["j"] = i + 1; + throw ex; + } + } +} const string HelpOptions = "-?|-h|--help"; @@ -22,6 +46,31 @@ return 0; }); +app.Command("fuzz", command => +{ + command.Description = "Fuzzit"; + command.HelpOption(HelpOptions); + + command.OnExecute(() => + { + Fuzzer.Run((Stream s) => + { + using var ms = new MemoryStream(); + s.CopyTo(ms); + + if(!ms.TryGetBuffer(out var T)) + { + throw new InvalidOperationException(); + } + + var ldss = new LibDivSufSort(); + using var ownedSA = ldss.Sort(T); + Verify(T, ownedSA.Memory.Span); + }); + return 0; + }); +}); + app.Command("diff", command => { command.Description = "Diff two files"; @@ -30,13 +79,16 @@ var oldFileArg = command.Argument("[oldfile]", ""); var newFileArg = command.Argument("[newfile]", ""); var deltaFileArg = command.Argument("[deltafile]", ""); + var algoArg = command.Option("-a|--algorithm ", "", CommandOptionType.SingleValue); command.OnExecute(() => { var oldFile = oldFileArg.Value; var newFile = newFileArg.Value; var deltaFile = deltaFileArg.Value; - Console.WriteLine($"Diff: old:{oldFile} new:{newFile} delta:{deltaFile}"); + var algo = algoArg.Value(); + DeltaQ.BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), new LibDivSufSort()); + Console.WriteLine($"Diff [algo:{algo}]: old:{oldFile} new:{newFile} delta:{deltaFile}"); return 0; }); }); diff --git a/src/DeltaQ.CLI/fuzz.sh b/src/DeltaQ.CLI/fuzz.sh new file mode 100644 index 0000000..54270da --- /dev/null +++ b/src/DeltaQ.CLI/fuzz.sh @@ -0,0 +1 @@ +afl-fuzz -i ../../test/assets -o findings -t 5000 -m 10000 -- dotnet bin/Release/net6.0/dq.dll fuzz From 5a20478dc806723529304c134be34882ad789228 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 06:05:31 -0500 Subject: [PATCH 176/325] Add initial test assets --- ...rash-04dc74e45e66386a3312a5a5825b020bcadc175c | Bin 0 -> 4765 bytes ...rash-16356e91966a827f79e49167170194fc3088a7ab | Bin 0 -> 4707 bytes ...rash-4f8c31dec8c3678a07e0fbacc6bd69e7cc9037fb | 1 + ...rash-8765ef2258178ca027876eab83e01d6d58db9ca0 | 1 + ...rash-90b42d1c55ee90a8b004fb9db1853429ceb4c4ba | 1 + ...rash-c792e788de61771b6cd65c1aa5670c62e57a33c4 | 1 + ...rash-ce407adf7cf638d3fa89b5637a94355d7d658872 | 1 + ...rash-cf8673530fdca659e0ddf070b4718b9c0bb504ec | Bin 0 -> 2798 bytes test/assets/fuzz1 | 1 + test/assets/fuzz2 | 1 + test/assets/fuzz3 | Bin 0 -> 2421 bytes 11 files changed, 7 insertions(+) create mode 100644 test/assets/crash-04dc74e45e66386a3312a5a5825b020bcadc175c create mode 100644 test/assets/crash-16356e91966a827f79e49167170194fc3088a7ab create mode 100644 test/assets/crash-4f8c31dec8c3678a07e0fbacc6bd69e7cc9037fb create mode 100644 test/assets/crash-8765ef2258178ca027876eab83e01d6d58db9ca0 create mode 100644 test/assets/crash-90b42d1c55ee90a8b004fb9db1853429ceb4c4ba create mode 100644 test/assets/crash-c792e788de61771b6cd65c1aa5670c62e57a33c4 create mode 100644 test/assets/crash-ce407adf7cf638d3fa89b5637a94355d7d658872 create mode 100644 test/assets/crash-cf8673530fdca659e0ddf070b4718b9c0bb504ec create mode 100644 test/assets/fuzz1 create mode 100644 test/assets/fuzz2 create mode 100644 test/assets/fuzz3 diff --git a/test/assets/crash-04dc74e45e66386a3312a5a5825b020bcadc175c b/test/assets/crash-04dc74e45e66386a3312a5a5825b020bcadc175c new file mode 100644 index 0000000000000000000000000000000000000000..81210828e5eebc4adce67966cedc3afd7509ec76 GIT binary patch literal 4765 zcmeHJTW$g|3`FV|Qg6~5a0gD)yY(hLM^&X#e-LS$tn--eUI@dkL0 zAMq4UF^1z;JfA}h@iV>qHIy)Y*3E*zI9+}vsRVtB@r`C_0&8e@c)i7fxVZHS@9$Mn zvBd7Sklt>u?k;tgR1@AXH-q+J7q5&ti2E~-5KgPaMv*o?q_<4n-81Y;CXG1C0+Ta% zM!OKiHlALkq|wlK!pwO3qPJnK`*$2XGu$v^M5yB**W_%m5Q*(}Z3LRZ89bvJg4pIj zWkc+S9&w=Tsy0Fm(-j#dCR0~ZTakV|Jm*(KOc~*HUf1ygJ)Dj5ZfFm@3dx2cm5Cc*`SvAR@5x<-Os`PvpHBd Z1h)o)WB84=5y;^To>2`!Jmx$degKF^`5FKK literal 0 HcmV?d00001 diff --git a/test/assets/crash-16356e91966a827f79e49167170194fc3088a7ab b/test/assets/crash-16356e91966a827f79e49167170194fc3088a7ab new file mode 100644 index 0000000000000000000000000000000000000000..66a9b15a2f759d29d6c0cd45415f48045529e411 GIT binary patch literal 4707 zcmeHIL2|<|2<(5_-{o)pRSy|mJ)=k_Lh&L5OdaLo_+TN}WkJZvKVS4qxjpx?+SsDq z{0kc`IOXEFsm|h_B0J4&RTa&jk>)uf6=kT@2}5_(V*T) zd(2Ygh#YzCjCMrVPb(dvdm2ZLh+Ix&B->HACAA3(HC(+DCkl#os1|}<;^J6Wloc7t zW;&VRxP$PccehR+^vY>xH6sQ>p=gr=;^)F_wi>bY#zYg&3v3OIvSMS%9`v>)IKB~n z^zN2_02%CuT>qcLqrzkPZ^oj}BsVPTkWuYQ)m7K})Q@-*6{>oOl@mG^#7PJEGeODjhX|7QSU~vl7y)r+is;t)`*4xYIw}gsVa+(0TW4J!2f6lR8V)()HK|=Uc9(R@8CvN)^FCu|1>qEAL!IH8 F`UXNqF!}%h literal 0 HcmV?d00001 diff --git a/test/assets/fuzz1 b/test/assets/fuzz1 new file mode 100644 index 0000000..ff41057 --- /dev/null +++ b/test/assets/fuzz1 @@ -0,0 +1 @@ +ýþýÿýÿýÿýÿ&ÿýÿýÿÿ \ No newline at end of file diff --git a/test/assets/fuzz2 b/test/assets/fuzz2 new file mode 100644 index 0000000..3a016ab --- /dev/null +++ b/test/assets/fuzz2 @@ -0,0 +1 @@ +ýÿ}ÿýÿùÿýÿýÿýÿýÿ# \ No newline at end of file diff --git a/test/assets/fuzz3 b/test/assets/fuzz3 new file mode 100644 index 0000000000000000000000000000000000000000..26bd3e06ff501036c5415d038438e182f653a1be GIT binary patch literal 2421 zcmd5)(GkKh2-N``!(}|3!Z|a5BY9^UOH-OcpdpwK+q(uHkH@2zct7PQJyV?DH+(Nk z2x(kCyL94UsTrf2#9PO(X_;BoDuD{%IzeI}DdB^_3)q;H05!JC3?TOVxmUHvAC;;W zWUJ?TPnpP2NIOR{1+iNM+l|gPDts&wC>b)=5U2mY;>0%dPZz4!)sZZu)vGZYal Date: Wed, 15 Dec 2021 06:10:32 -0500 Subject: [PATCH 177/325] Add fuzz: crash-aoob-ss_mintrosort The `Td[PA[SA[$x]] - 1]` access can go below 0 when Td is a span slice --- test/assets/crash-aoob-ss_mintrosort | Bin 0 -> 68 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/assets/crash-aoob-ss_mintrosort diff --git a/test/assets/crash-aoob-ss_mintrosort b/test/assets/crash-aoob-ss_mintrosort new file mode 100644 index 0000000000000000000000000000000000000000..5dc18b9642bad4a30accc2247a1e270889efd363 GIT binary patch literal 68 zcmezSzxMyt|34Xj{r~&-f9?I>|9?LG`~UBM28RFtArwgXJ5U(Ng9szBAp*((iYhg< literal 0 HcmV?d00001 From 7dc535cc6f08c749de00081ff817033f4171ab8a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 06:11:06 -0500 Subject: [PATCH 178/325] Add crash-aoob-ss_mintrosort test --- .../LibDivSufSortTests.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs index fe10a95..aa97187 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs @@ -106,12 +106,14 @@ private static IEnumerable FuzzFilesInner yield return "crash-4f8c31dec8c3678a07e0fbacc6bd69e7cc9037fb"; yield return "crash-16356e91966a827f79e49167170194fc3088a7ab"; //Crosscheck untested: - //yield return prefix + ""; + yield return "crash-aoob-ss_mintrosort"; + //yield return ""; } } [Theory] [MemberData(nameof(FuzzFiles))] + //[InlineData(@"")] public void CheckFile(string path) { SetupCrosscheckListeners(); From 28943f9b63e78d0f1e3d80fd6a92068ab0f64cac Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 06:13:14 -0500 Subject: [PATCH 179/325] Add first fix for crash-aoob-ss_mintrosort --- .../RsDivSufSort.cs | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 76b5302..dbf7c4e 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1493,31 +1493,50 @@ public SpanOffsetAccessor(Span span, int offset) public ref T this[int index] => ref _span[_offset + index]; } + private ref struct ReadOnlySpanOffsetAccessor + { + private readonly ReadOnlySpan _span; + private readonly int _offset; + + public ReadOnlySpanOffsetAccessor(ReadOnlySpan span, int offset) + { + _span = span; + _offset = offset; + } + + public ref readonly T this[int index] => ref _span[_offset + index]; + } + private ref struct TdPAStarAccessor { + private readonly ReadOnlySpanOffsetAccessor _TO; private readonly ReadOnlySpan _SA; private readonly ReadOnlySpan _PA; private readonly IntAccessor _TD; public TdPAStarAccessor(ReadOnlySpan T, ReadOnlySpan SA, int partitionOffset, int tdOffset) { + _TO = new ReadOnlySpanOffsetAccessor(T, tdOffset); + _SA = SA; _PA = SA[partitionOffset..]; _TD = new(T[tdOffset..]); } public readonly int this[int index] => _TD[_PA[_SA[index]]]; + + public readonly int AsOffset(int index) => _TO[index]; } /// /// Multikey introsort for medium size groups /// - private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, /*ref*/ SAPtr first, /*ref*/ SAPtr last, /*ref*/ Idx depth) + private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, SAPtr first, SAPtr last, Idx depth) { - //PA($x) => - var PA = SA[partitionOffset..];//new SpanOffsetAccessor(SA, PA); + var PA = SA[partitionOffset..]; - var stack = new SsStack(stackalloc SsStackItem[SS_STACK_SIZE]); + using var stackOwner = SpanOwner.Allocate(SS_STACK_SIZE); + var stack = new SsStack(stackOwner.Span); SAPtr a; SAPtr b; @@ -1550,13 +1569,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf continue; } - //Td!($x) => T[Td + $x] var tdOffset = depth; - var Td = T.span[tdOffset..]; - - //TdPAStar!($x) => Td!(PA!(SA[$x])) - //TdPAStar!($x) => T[Td + SA[PA + SA[$x]]] - //var TdPAStar = Td[PA[SA[$x]]]; var TdPAStar = new TdPAStarAccessor(T.span, SA, partitionOffset, tdOffset); /*readonly*/ @@ -1592,7 +1605,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf a += 1; } - if (Td[PA[SA[first]] - 1] < v) + if (TdPAStar.AsOffset(PA[SA[first]] - 1) < v) { first = ss_partition(SA, partitionOffset, first, a, depth); } @@ -1802,7 +1815,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf a = first + (b - a); c = last - (d - c); - b = v <= Td[PA[SA[a]] - 1] ? a : ss_partition(SA, partitionOffset, a, c, depth); + b = v <= TdPAStar.AsOffset(PA[SA[a]] - 1) ? a : ss_partition(SA, partitionOffset, a, c, depth); if ((a - first) <= (last - c)) { @@ -1856,7 +1869,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf else { limit += 1; - if (Td[PA[SA[first]] - 1] < v) + if (TdPAStar.AsOffset(PA[SA[first]] - 1) < v) { first = ss_partition(SA, partitionOffset, first, last, depth); limit = ss_ilg(last - first); From e78cea84167e1a3cbf6f49e48134cf1e3aac8627 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 07:21:27 -0500 Subject: [PATCH 180/325] Add raw rust to ss_heapsort --- .../RsDivSufSort.cs | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index dbf7c4e..8a6c9c9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2127,9 +2127,58 @@ private static int ss_ilg(int n) } } - private static void ss_heapsort(IntAccessor t, int td, Span sA, int pA, int first, object p) + /// Simple top-down heapsort. + private static void ss_heapsort(IntAccessor T, Idx Td, Span SA_top, SAPtr PA, SAPtr first, Idx size) { - throw new NotImplementedException(); + let mut i: Idx; + let mut m = size; + let mut t: Idx; + + macro_rules! Td { + ($x: expr) => { + T[Td + $x] + }; + }; + macro_rules! PA { + ($x: expr) => { + SA_top[PA + $x] + }; + }; + macro_rules! SA { + ($x: expr) => { + SA_top[first + $x] + }; + } + macro_rules! SA_swap { + ($x: expr, $y: expr) => { + SA_top.swap($x + first, $y + first) + }; + } + + if (size % 2) == 0 { + m -= 1; + if Td!(PA!(SA!(m / 2))) < Td!(PA!(SA!(m))) { + SA_swap!(SAPtr(m), SAPtr(m / 2)); + } + } + + // LADY + for i in (0..(m / 2)).rev() { + ss_fixdown(T, Td, PA, SA_top, first, i, m); + } + + if (size % 2) == 0 { + SA_swap!(SAPtr(0), SAPtr(m)); + ss_fixdown(T, Td, PA, SA_top, first, 0, m); + } + + // TRUMPET + for i in (1..m).rev() { + t = SA!(0); + SA!(0) = SA!(i); + ss_fixdown(T, Td, PA, SA_top, first, 0, i); + SA!(i) = t; + } } private static readonly Idx[] sqq_table_array = new[] From c222df25f5e31b18693545e5bd8673f03aef593a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 07:31:43 -0500 Subject: [PATCH 181/325] Port ss_heapsort and add ss_fixdown stub --- .../RsDivSufSort.cs | 95 +++++++++++-------- 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 8a6c9c9..2b62b57 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2128,59 +2128,78 @@ private static int ss_ilg(int n) } /// Simple top-down heapsort. - private static void ss_heapsort(IntAccessor T, Idx Td, Span SA_top, SAPtr PA, SAPtr first, Idx size) + private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, SAPtr paOffset, SAPtr first, Idx size) { - let mut i: Idx; - let mut m = size; - let mut t: Idx; - - macro_rules! Td { - ($x: expr) => { - T[Td + $x] - }; - }; - macro_rules! PA { - ($x: expr) => { - SA_top[PA + $x] - }; - }; - macro_rules! SA { - ($x: expr) => { - SA_top[first + $x] - }; - } - macro_rules! SA_swap { - ($x: expr, $y: expr) => { - SA_top.swap($x + first, $y + first) - }; - } + Idx i; + var m = size; + Idx t; - if (size % 2) == 0 { + var Td = new IntAccessor(T.span[tdOffset..]); + //macro_rules! Td { + // ($x: expr) => { + // T[Td + $x] + // }; + //}; + + var PA = SA_top[paOffset..]; + //macro_rules! PA { + // ($x: expr) => { + // SA_top[PA + $x] + // }; + //}; + + var SA = SA_top[first..]; + //macro_rules! SA { + // ($x: expr) => { + // SA_top[first + $x] + // }; + //} + + + //macro_rules! SA_swap { + // ($x: expr, $y: expr) => { + // SA_top.swap($x + first, $y + first) + // }; + //} + + if ((size % 2) == 0) + { m -= 1; - if Td!(PA!(SA!(m / 2))) < Td!(PA!(SA!(m))) { - SA_swap!(SAPtr(m), SAPtr(m / 2)); + if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) + { + SA.Swap(m, m / 2); } } // LADY - for i in (0..(m / 2)).rev() { - ss_fixdown(T, Td, PA, SA_top, first, i, m); + //TODO: checkme + for (i = (m / 2) - 1; i >= 0; i--) + { + ss_fixdown(T, tdOffset, paOffset, SA_top, first, i, m); } - if (size % 2) == 0 { - SA_swap!(SAPtr(0), SAPtr(m)); - ss_fixdown(T, Td, PA, SA_top, first, 0, m); + if ((size % 2) == 0) + { + SA.Swap(0, m); + ss_fixdown(T, tdOffset, paOffset, SA_top, first, 0, m); } // TRUMPET - for i in (1..m).rev() { - t = SA!(0); - SA!(0) = SA!(i); - ss_fixdown(T, Td, PA, SA_top, first, 0, i); - SA!(i) = t; + //TODO: checkme + for (i = m - 1; i > 0; i--) + { + t = SA[0]; + SA[0] = SA[i]; + ss_fixdown(T, tdOffset, paOffset, SA_top, first, 0, i); + SA[i] = t; } } + private static void ss_fixdown(IntAccessor t, int td, int pA, Span sA_top, int first, int v, int m) + { + throw new NotImplementedException(); + } + private static readonly Idx[] sqq_table_array = new[] { 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, From 691e72d95d9e7e496d27ca966e8be9710b6311ec Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 07:33:52 -0500 Subject: [PATCH 182/325] Add raw rust to ss_fixdown --- .../RsDivSufSort.cs | 80 +++++++++++++------ 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 2b62b57..537cba0 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2135,32 +2135,8 @@ private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, S Idx t; var Td = new IntAccessor(T.span[tdOffset..]); - //macro_rules! Td { - // ($x: expr) => { - // T[Td + $x] - // }; - //}; - var PA = SA_top[paOffset..]; - //macro_rules! PA { - // ($x: expr) => { - // SA_top[PA + $x] - // }; - //}; - var SA = SA_top[first..]; - //macro_rules! SA { - // ($x: expr) => { - // SA_top[first + $x] - // }; - //} - - - //macro_rules! SA_swap { - // ($x: expr, $y: expr) => { - // SA_top.swap($x + first, $y + first) - // }; - //} if ((size % 2) == 0) { @@ -2195,9 +2171,61 @@ private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, S } } - private static void ss_fixdown(IntAccessor t, int td, int pA, Span sA_top, int first, int v, int m) + private static void ss_fixdown(IntAccessor T, Idx Td, SAPtr PA, Span SA_top, SAPtr first, Idx i, Idx size) { - throw new NotImplementedException(); + let mut j: Idx; + let mut v: Idx; + let mut c: Idx; + let mut d: Idx; + let mut e: Idx; + let mut k: Idx; + + macro_rules! Td { + ($x: expr) => { + T.get(Td + $x) + }; + } + macro_rules! PA { + ($x: expr) => { + SA_top[PA + $x] + }; + } + macro_rules! SA { + ($x: expr) => { + SA_top[first + $x] + }; + } + + v = SA!(i); + c = Td!(PA!(v)); + + // BEAST + loop { + // cond + j = 2 * i + 1; + if !(j < size) { + break; + } + + // body + k = j; + j += 1; + + d = Td!(PA!(SA!(k))); + e = Td!(PA!(SA!(j))); + if (d < e) { + k = j; + d = e; + } + if (d <= c) { + break; + } + + // iter + SA!(i) = SA!(k); + i = k; + } + SA!(i) = v; } private static readonly Idx[] sqq_table_array = new[] From 8fa5fe7e71ab7f520c11116056ca0ad76081c462 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 07:38:04 -0500 Subject: [PATCH 183/325] Port and Spanify ss_fixdown --- .../RsDivSufSort.cs | 55 +++++++------------ 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 537cba0..8df4067 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -2151,13 +2151,13 @@ private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, S //TODO: checkme for (i = (m / 2) - 1; i >= 0; i--) { - ss_fixdown(T, tdOffset, paOffset, SA_top, first, i, m); + ss_fixdown(Td, PA, SA, i, m); } if ((size % 2) == 0) { SA.Swap(0, m); - ss_fixdown(T, tdOffset, paOffset, SA_top, first, 0, m); + ss_fixdown(Td, PA, SA, 0, m); } // TRUMPET @@ -2166,44 +2166,25 @@ private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, S { t = SA[0]; SA[0] = SA[i]; - ss_fixdown(T, tdOffset, paOffset, SA_top, first, 0, i); + ss_fixdown(Td, PA, SA, 0, i); SA[i] = t; } } - private static void ss_fixdown(IntAccessor T, Idx Td, SAPtr PA, Span SA_top, SAPtr first, Idx i, Idx size) + private static void ss_fixdown(IntAccessor Td, Span PA, Span SA, Idx i, Idx size) { - let mut j: Idx; - let mut v: Idx; - let mut c: Idx; - let mut d: Idx; - let mut e: Idx; - let mut k: Idx; + Idx j, v, c, d, e, k; - macro_rules! Td { - ($x: expr) => { - T.get(Td + $x) - }; - } - macro_rules! PA { - ($x: expr) => { - SA_top[PA + $x] - }; - } - macro_rules! SA { - ($x: expr) => { - SA_top[first + $x] - }; - } - - v = SA!(i); - c = Td!(PA!(v)); + v = SA[i]; + c = Td[PA[v]]; // BEAST - loop { + while (true) + { // cond j = 2 * i + 1; - if !(j < size) { + if (!(j < size)) + { break; } @@ -2211,21 +2192,23 @@ private static void ss_fixdown(IntAccessor T, Idx Td, SAPtr PA, Span SA_top k = j; j += 1; - d = Td!(PA!(SA!(k))); - e = Td!(PA!(SA!(j))); - if (d < e) { + d = Td[PA[SA[k]]]; + e = Td[PA[SA[j]]]; + if (d < e) + { k = j; d = e; } - if (d <= c) { + if (d <= c) + { break; } // iter - SA!(i) = SA!(k); + SA[i] = SA[k]; i = k; } - SA!(i) = v; + SA[i] = v; } private static readonly Idx[] sqq_table_array = new[] From 8bc7f910d173b50e3d9edcc89511fb83f50cd680 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 07:40:55 -0500 Subject: [PATCH 184/325] Add raw rust to tr_partialcopy --- .../RsDivSufSort.cs | 81 ++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 8df4067..9312f6f 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3404,9 +3404,86 @@ private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr } } - private static void tr_partialcopy(int iSA, Span sA, int first, int a, int b, int last, int v) + private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) { - throw new NotImplementedException(); + let mut c: SAPtr; + let mut d: SAPtr; + let mut e: SAPtr; + let mut s: Idx; + let mut v: Idx; + let mut rank: Idx; + let mut lastrank: Idx; + let mut newrank: Idx = -1; + + macro_rules! ISA { + ($x: expr) => { + SA[ISA + $x] + }; + } + + v = (b - 1).0; + lastrank = -1; + // JETHRO + c = first; + d = a - 1; + while c <= d { + s = SA[c] - depth; + if (0 <= s) && (ISA!(s) == v) { + d += 1; + SA[d] = s; + rank = ISA!(s + depth); + if lastrank != rank { + lastrank = rank; + newrank = d.0; + } + ISA!(s) = newrank; + } + + // iter (JETHRO) + c += 1; + } + + lastrank = -1; + // SCROOGE + e = d; + while first <= e { + rank = ISA![SA[e]]; + if lastrank != rank { + lastrank = rank; + newrank = e.0; + } + if newrank != rank { + { + let SA_e = SA[e]; + ISA!(SA_e) = newrank; + } + } + + // iter (SCROOGE) + e -= 1; + } + + lastrank = -1; + // DEWEY + c = last - 1; + e = d + 1; + d = b; + while e < d { + s = SA[c] - depth; + if (0 <= s) && (ISA!(s) == v) { + d -= 1; + SA[d] = s; + rank = ISA!(s + depth); + if lastrank != rank { + lastrank = rank; + newrank = d.0; + } + ISA!(s) = newrank; + } + + // iter (DEWEY) + c -= 1; + } } /// Tandem repeat copy From d02160852b0904999ffc3ccc0c17a191ccf37420 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 07:43:36 -0500 Subject: [PATCH 185/325] Begin porting tr_partialcopy --- .../RsDivSufSort.cs | 59 ++++++++++--------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 9312f6f..ea06a79 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3406,33 +3406,30 @@ private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) { - let mut c: SAPtr; - let mut d: SAPtr; - let mut e: SAPtr; - let mut s: Idx; - let mut v: Idx; - let mut rank: Idx; - let mut lastrank: Idx; - let mut newrank: Idx = -1; - - macro_rules! ISA { - ($x: expr) => { - SA[ISA + $x] - }; - } - - v = (b - 1).0; + SAPtr c, d, e; + Idx s, v, rank, lastrank, newrank = -1; + + //macro_rules! ISA { + // ($x: expr) => { + // SA[ISA + $x] + // }; + //} + + v = (b - 1); lastrank = -1; // JETHRO c = first; d = a - 1; - while c <= d { + while (c <= d) + { s = SA[c] - depth; - if (0 <= s) && (ISA!(s) == v) { + if ((0 <= s) && (ISA!(s) == v)) + { d += 1; SA[d] = s; rank = ISA!(s + depth); - if lastrank != rank { + if (lastrank != rank) + { lastrank = rank; newrank = d.0; } @@ -3446,15 +3443,18 @@ private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a lastrank = -1; // SCROOGE e = d; - while first <= e { + while (first <= e) + { rank = ISA![SA[e]]; - if lastrank != rank { + if (lastrank != rank) + { lastrank = rank; - newrank = e.0; + newrank = e; } - if newrank != rank { + if (newrank != rank) + { { - let SA_e = SA[e]; + var SA_e = SA[e]; ISA!(SA_e) = newrank; } } @@ -3468,15 +3468,18 @@ private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a c = last - 1; e = d + 1; d = b; - while e < d { + while (e < d) + { s = SA[c] - depth; - if (0 <= s) && (ISA!(s) == v) { + if ((0 <= s) && (ISA!(s) == v)) + { d -= 1; SA[d] = s; rank = ISA!(s + depth); - if lastrank != rank { + if (lastrank != rank) + { lastrank = rank; - newrank = d.0; + newrank = d; } ISA!(s) = newrank; } From c92cf9bca56c9e9cb5a00f7fba4f79f8be84e6f2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 07:46:25 -0500 Subject: [PATCH 186/325] Port tr_partialcopy --- .../RsDivSufSort.cs | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index ea06a79..f5a46ea 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -3404,16 +3404,12 @@ private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr } } - private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) + private static void tr_partialcopy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) { SAPtr c, d, e; Idx s, v, rank, lastrank, newrank = -1; - //macro_rules! ISA { - // ($x: expr) => { - // SA[ISA + $x] - // }; - //} + Span ISA = SA[isaOffset..]; v = (b - 1); lastrank = -1; @@ -3423,17 +3419,17 @@ private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a while (c <= d) { s = SA[c] - depth; - if ((0 <= s) && (ISA!(s) == v)) + if ((0 <= s) && (ISA[s] == v)) { d += 1; SA[d] = s; - rank = ISA!(s + depth); + rank = ISA[s + depth]; if (lastrank != rank) { lastrank = rank; - newrank = d.0; + newrank = d; } - ISA!(s) = newrank; + ISA[s] = newrank; } // iter (JETHRO) @@ -3445,7 +3441,7 @@ private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a e = d; while (first <= e) { - rank = ISA![SA[e]]; + rank = ISA[SA[e]]; if (lastrank != rank) { lastrank = rank; @@ -3455,7 +3451,7 @@ private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a { { var SA_e = SA[e]; - ISA!(SA_e) = newrank; + ISA[SA_e] = newrank; } } @@ -3471,17 +3467,17 @@ private static void tr_partialcopy(SAPtr ISA, Span SA, SAPtr first, SAPtr a while (e < d) { s = SA[c] - depth; - if ((0 <= s) && (ISA!(s) == v)) + if ((0 <= s) && (ISA[s] == v)) { d -= 1; SA[d] = s; - rank = ISA!(s + depth); + rank = ISA[s + depth]; if (lastrank != rank) { lastrank = rank; newrank = d; } - ISA!(s) = newrank; + ISA[s] = newrank; } // iter (DEWEY) From c541223a7f81d95393d5c4c55bfac01f7e968dd8 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 17:43:22 -0500 Subject: [PATCH 187/325] Add length:2 fast path in LibDivSufSort --- .../RsDivSufSort.cs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index f5a46ea..6481428 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -25,12 +25,15 @@ public static void divsufsort(ReadOnlySpan T, Span SA) case 1: SA[0] = 0; return; - //case 2: - // if(T[0] < T[1]) - // { - // SA.copy - // } - // break; + case 2: + if(T[0] < T[1]) + { + (stackalloc[] { 0, 1 }).CopyTo(SA); + } else + { + (stackalloc[] { 1, 0 }).CopyTo(SA); + } + return; } var result = sort_typeBstar(new IntAccessor(T), SA); From 257559f28db48c575370839c463848bca3c796ff Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 18:12:12 -0500 Subject: [PATCH 188/325] Remove some old comments and space-wasters --- .../RsDivSufSort.cs | 76 ++++++------------- 1 file changed, 23 insertions(+), 53 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 6481428..6d61eb7 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -26,10 +26,11 @@ public static void divsufsort(ReadOnlySpan T, Span SA) SA[0] = 0; return; case 2: - if(T[0] < T[1]) + if (T[0] < T[1]) { (stackalloc[] { 0, 1 }).CopyTo(SA); - } else + } + else { (stackalloc[] { 1, 0 }).CopyTo(SA); } @@ -2216,23 +2217,23 @@ private static void ss_fixdown(IntAccessor Td, Span PA, Span SA, Idx i private static readonly Idx[] sqq_table_array = new[] { - 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, - 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, - 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, - 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, - 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, - 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, - 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, - 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, - 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, - 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, - 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, - 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, - 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, - 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, - 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 - }; + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, + 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, + 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, + 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, + 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, + 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, + 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, + 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, + 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, + 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, + 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, + 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 + }; private static ReadOnlySpan sqq_table => sqq_table_array; /// @@ -2308,7 +2309,7 @@ private static int ss_isqrt(int x) 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 - }; + }; private static ReadOnlySpan lg_table => lg_table_array; private static int tr_ilg(int n) @@ -3250,25 +3251,8 @@ private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, Idx t; Span ISAd = SA_top[isadOffset..]; - //macro_rules! ISAd { - // ($x: expr) => { - // SA_top[ISAd + $x] - // }; - //} Span SA = SA_top[first..]; - //macro_rules! SA { - // ($x: expr) => { - // SA_top[first + $x] - // }; - //} - - //void SA_swap(int a, int b) => SA_top.Swap(first + a, first + b); - //macro_rules! SA_swap { - // ($a: expr, $b: expr) => { - // SA_top.swap(first + $a, first + $b); - // }; - //} m = size; if ((size % 2) == 0) @@ -3546,25 +3530,11 @@ private static void tr_copy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, /// private static void tr_partition(Span SA, SAPtr isadOffset, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) { - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr d; - SAPtr e; - SAPtr f; - Idx t; - Idx s; - Idx x = 0; + SAPtr a, b, c, d, e, f; + Idx t, s, x = 0; - //ref int get(int x) => ref SA[ISAd + SA[x]]; Span ISAd = SA[isadOffset..]; - //macro_rules! get { - // ($x: expr) => { - // SA[ISAd + SA[$x]] - // }; - //} - // JOSEPH b = middle - 1; while (true) From 5935338a1f09ad1eb413c94587d93809a71dedef Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 18:14:13 -0500 Subject: [PATCH 189/325] Break out Crosscheck into its own class --- .../Crosscheck.cs | 23 +++++++++++++++++++ .../RsDivSufSort.cs | 16 +------------ 2 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs new file mode 100644 index 0000000..8630900 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs @@ -0,0 +1,23 @@ +using System; +using System.Diagnostics; + +namespace DeltaQ.SuffixSorting.LibDivSufSort +{ + internal static class Crosscheck + { + [Conditional("DEBUG")] + internal static void SA_dump(ReadOnlySpan span, string v) + { + Debug.WriteLine($":: {v}"); + for (int i = 0; i < span.Length; i++) + { + Debug.Write($"{span[i]} "); + Debug.WriteLineIf((i + 1) % 25 == 0, ""); + } + Debug.WriteLine(""); + } + + [Conditional("DEBUG")] + internal static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); + } +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 6d61eb7..8553c06 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -6,6 +6,7 @@ using SAPtr = System.Int32; namespace DeltaQ.SuffixSorting.LibDivSufSort; +using static Crosscheck; internal static class DivSufSort { @@ -3705,19 +3706,4 @@ private static void tr_partition(Span SA, SAPtr isadOffset, SAPtr first, SA pa = first; pb = last; } - - [Conditional("DEBUG")] - private static void SA_dump(ReadOnlySpan span, string v) - { - Debug.WriteLine($":: {v}"); - for (int i = 0; i < span.Length; i++) - { - Debug.Write($"{span[i]} "); - Debug.WriteLineIf((i + 1) % 25 == 0, ""); - } - Debug.WriteLine(""); - } - - [Conditional("DEBUG")] - private static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); } From 2fae0b2e7e7ee344b91b40c74191e41f1370877a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 18:31:43 -0500 Subject: [PATCH 190/325] Use crosscheck conditional --- .../Crosscheck.cs | 4 +- .../Crosscheck.cs | 37 +++++++++++++++++++ .../LibDivSufSortTests.cs | 28 ++++---------- 3 files changed, 46 insertions(+), 23 deletions(-) create mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/Crosscheck.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs index 8630900..3859e72 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs @@ -5,7 +5,7 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort { internal static class Crosscheck { - [Conditional("DEBUG")] + [Conditional("CROSSCHECK")] internal static void SA_dump(ReadOnlySpan span, string v) { Debug.WriteLine($":: {v}"); @@ -17,7 +17,7 @@ internal static void SA_dump(ReadOnlySpan span, string v) Debug.WriteLine(""); } - [Conditional("DEBUG")] + [Conditional("CROSSCHECK")] internal static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); } } diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/Crosscheck.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/Crosscheck.cs new file mode 100644 index 0000000..c51e412 --- /dev/null +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/Crosscheck.cs @@ -0,0 +1,37 @@ +using System.Diagnostics; +using System.IO; + +namespace DeltaQ.Tests +{ + internal static class Crosscheck + { + [Conditional("CROSSCHECK")] + internal static void SetupCrosscheckListeners() + { + const string crosscheckDir = "crosscheck/"; + const string crosscheckFilename = crosscheckDir + "csharp"; + try + { + Directory.CreateDirectory(crosscheckDir); + File.Create(crosscheckFilename).Dispose(); + } + catch (IOException) { } + + if (Trace.Listeners[0] is DefaultTraceListener dtl) + { + dtl!.LogFileName = "crosscheck/csharp"; + } + else + { + var lflistener = new TextWriterTraceListener(crosscheckFilename); + Trace.Listeners.Add(lflistener); + } + } + + [Conditional("CROSSCHECK")] + internal static void FinalizeCrosscheck() + { + Trace.Flush(); + } + } +} diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs index aa97187..e793002 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs @@ -1,9 +1,7 @@ using DeltaQ.SuffixSorting.LibDivSufSort; using Microsoft.Toolkit.HighPerformance.Buffers; using System; -using System.Collections; using System.Collections.Generic; -using System.Diagnostics; using System.IO; using System.Linq; using System.Text; @@ -11,31 +9,19 @@ namespace DeltaQ.Tests { - public class LibDivSufSortTests + using static Crosscheck; + + public class LibDivSufSortTests : IDisposable { private const string FuzzFilesBasePath = "assets/"; - [Conditional("DEBUG")] - private void SetupCrosscheckListeners() + public LibDivSufSortTests() { - const string crosscheckFilename = "crosscheck/csharp"; - try - { - Directory.CreateDirectory(crosscheckFilename); - File.Create(crosscheckFilename).Dispose(); - } - catch (IOException) { } - //var dtl = Trace.Listeners[0] as DefaultTraceListener; - //dtl!.LogFileName = "crosscheck/csharp"; - var lflistener = new TextWriterTraceListener(crosscheckFilename); - Trace.Listeners.Clear(); - Trace.Listeners.Add(lflistener); + SetupCrosscheckListeners(); } - - [Conditional("DEBUG")] - private void FinalizeCrosscheck() + public void Dispose() { - Trace.Flush(); + FinalizeCrosscheck(); } #if NET461 From e17d10ad26e68274d989998f08215cd64d6e4e1b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 20:29:17 -0500 Subject: [PATCH 191/325] Break out TrSort and Utils from RsDivSufSort --- .../RsDivSufSort.cs | 1419 +---------------- .../TrSort.cs | 1411 ++++++++++++++++ .../Utils.cs | 29 + 3 files changed, 1442 insertions(+), 1417 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 8553c06..0647e1c 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -7,6 +7,7 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort; using static Crosscheck; +using static Utils; internal static class DivSufSort { @@ -407,7 +408,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) // Construct the inverse suffix array of type B* suffixes using trsort. SA_dump(SA, "trsort(A)"); crosscheck($"enter trsort: ISAb={ISAb} m={m} depth={1}"); - trsort(ISAb, SA, m, 1); + TrSort.trsort(ISAb, SA, m, 1); SA_dump(SA, "trsort(B)"); // Set the sorted order of type B* suffixes @@ -1959,15 +1960,6 @@ private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAP } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void Swap(ref T lhs, ref T rhs) - { - T temp; - temp = lhs; - lhs = rhs; - rhs = temp; - } - /// /// Returns the median of three elements /// @@ -2299,1411 +2291,4 @@ private static int ss_isqrt(int x) return y; } } - - private static readonly int[] lg_table_array = new[] - { - -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 - }; - private static ReadOnlySpan lg_table => lg_table_array; - - private static int tr_ilg(int n) - { - if ((n & 0xffff_0000) > 0) - { - if ((n & 0xff00_0000) > 0) - { - return 24 + lg_table[((n >> 24) & 0xff)]; - } - else - { - return 16 + lg_table[((n >> 16) & 0xff)]; - } - } - else - { - if ((n & 0x0000_ff00) > 0) - { - return 8 + lg_table[((n >> 8) & 0xff)]; - } - else - { - return 0 + lg_table[((n >> 0) & 0xff)]; - } - } - } - - private ref struct Budget - { - public int Chance; - public int Remain; - public int IncVal; - public int Count; - - public Budget(int chance, int incVal) - { - Chance = chance; - Remain = incVal; - IncVal = incVal; - Count = 0; - } - - public bool Check(int size) - { - if (size <= Remain) - { - Remain -= size; - return true; - } - - if (Chance == 0) - { - Count += size; - return false; - } - - Remain += IncVal - size; - Chance -= 1; - return true; - } - } - - /// Tandem repeat sort - private static void trsort(SAPtr ISA, Span SA, int n, int depth) - { - SAPtr ISAd; - SAPtr first; - SAPtr last; - /*Index*/ - int t; - /*Index*/ - int skip; - /*Index*/ - int unsorted; - Budget budget = new(tr_ilg(n) * 2 / 3, n); - - //macro_rules! ISA { - // ($x: expr) => { - // SA[ISA + $x] - // }; - //} - - //ref int getISA(int x) => ref SA[ISA + x]; - - // JERRY - ISAd = ISA + depth; - while (-n < SA[0]) - { - first = 0; - skip = 0; - unsorted = 0; - - // PETER - while (true) - { - t = SA[first]; - if (t < 0) - { - first -= t; - skip += t; - } - else - { - if (skip != 0) - { - SA[first + skip] = skip; - skip = 0; - } - last = SA[ISA + (t)] + 1; - if (1 < (last - first)) - { - budget.Count = 0; - crosscheck($"enter tr_introsort: ISA={ISA} ISAd={ISAd} first={first} last={last}"); - crosscheck($" budget: count={budget.Count} chance={budget.Chance} incval={budget.IncVal} remain={budget.Remain}"); - SA_dump(SA, "tr_introsort(A)"); - tr_introsort(ISA, ISAd, SA, first, last, ref budget); - SA_dump(SA, "tr_introsort(B)"); - crosscheck($"exit tr_introsort"); - crosscheck($" budget: count={budget.Count} chance={budget.Chance} incval={budget.IncVal} remain={budget.Remain}"); - if (budget.Count != 0) - { - unsorted += budget.Count; - } - else - { - skip = first - last; - } - } - else if ((last - first) == 1) - { - skip = -1; - } - first = last; - } - - // cond (PETER) - if (!(first < n)) - { - break; - } - } - - if (skip != 0) - { - SA[first + skip] = skip; - } - if (unsorted == 0) - { - break; - } - - // iter - ISAd += ISAd - ISA; - } - } - - private struct TrStackItem - { - public SAPtr a; - public SAPtr b; - public SAPtr c; - public Idx d; - public Idx e; - } - - private const int TR_STACK_SIZE = 64; - private ref struct TrStack - { - public readonly Span Items; - public int Size; - - public TrStack(Span items) - { - Items = items; - Size = 0; - } - - public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) - { - Debug.Assert(Size < Items.Length); - ref TrStackItem item = ref Items[Size++]; - item.a = a; - item.b = b; - item.c = c; - item.d = d; - item.e = e; - } - public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) - { - //Debug.Assert(Size > 0); - if (Size == 0) return false; - - ref TrStackItem item = ref Items[--Size]; - a = item.a; - b = item.b; - c = item.c; - d = item.d; - e = item.e; - return true; - } - } - - private const Idx TR_INSERTIONSORT_THRESHOLD = 8; - private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA, SAPtr first, SAPtr last, ref Budget budget) - { - SAPtr a = 0; - SAPtr b = 0; - SAPtr c; - Idx v, x; - Idx incr = isadOffset - isaOffset; - Idx next; - Idx trlink = -1; - - using var stackOwner = SpanOwner.Allocate(TR_STACK_SIZE, AllocationMode.Clear); - TrStack stack = new(stackOwner.Span); - - /* - macro_rules! ISA { - ($x: expr) => { - SA[ISA + $x] - }; - } - macro_rules! ISAd { - ($x: expr) => { - SA[ISAd + $x] - }; - } - */ - var ISA = SA[isaOffset..]; - var ISAd = SA[isadOffset..]; - - var limit = tr_ilg(last - first); - - // PASCAL - while (true) - { - crosscheck($"pascal limit={limit} first={first} last={last}"); - if (limit < 0) - { - if (limit == -1) - { - // tandem repeat partition - tr_partition(SA, isadOffset - incr, first, first, last, ref a, ref b, last - 1); - - // update ranks - if (a < last) - { - //TODO: crosscheck - crosscheck("ranks a SA, SAPtr ISAd, SAPtr first, SAPtr last) - { - Idx t = last - first; - SAPtr middle = first + t / 2; - - if (t <= 512) - { - if (t <= 32) - { - return tr_median3(SA, ISAd, first, middle, last - 1); - } - else - { - t >>= 2; - return tr_median5(SA, ISAd, first, first + t, middle, last - 1 - t, last - 1); - } - } - t >>= 3; - first = tr_median3(SA, ISAd, first, first + t, first + (t << 1)); - middle = tr_median3(SA, ISAd, middle - t, middle, middle + t); - last = tr_median3(SA, ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); - return tr_median3(SA, ISAd, first, middle, last); - } - - /// Returns the median of five elements - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr tr_median5(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) - { - Span ISAd = SA[isadOffset..]; - - //get(x) => ISAd[SA[x]] - - if (ISAd[SA[v2]] > ISAd[SA[v3]]) - { - Swap(ref v2, ref v3); - } - if (ISAd[SA[v4]] > ISAd[SA[v5]]) - { - Swap(ref v4, ref v5); - } - if (ISAd[SA[v2]] > ISAd[SA[v4]]) - { - Swap(ref v2, ref v4); - Swap(ref v3, ref v5); - } - if (ISAd[SA[v1]] > ISAd[SA[v3]]) - { - Swap(ref v1, ref v3); - } - if (ISAd[SA[v1]] > ISAd[SA[v4]]) - { - Swap(ref v1, ref v4); - Swap(ref v3, ref v5); - } - if (ISAd[SA[v3]] > ISAd[SA[v4]]) - { - return v4; - } - else - { - return v3; - } - } - - /// Returns the median of three elements - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr tr_median3(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3) - { - Span ISAd = SA[isadOffset..]; - - //get(x) => ISAd[SA[x]] - - if (ISAd[SA[v1]] > ISAd[SA[v2]]) - { - Swap(ref v1, ref v2); - } - if (ISAd[SA[v2]] > ISAd[SA[v3]]) - { - if (ISAd[SA[v1]] > ISAd[SA[v3]]) - { - return v1; - } - else - { - return v3; - } - } - else - { - return v2; - } - } - - /// Simple top-down heapsort - private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, Idx size) - { - Idx i; - Idx m; - Idx t; - - Span ISAd = SA_top[isadOffset..]; - - Span SA = SA_top[first..]; - - m = size; - if ((size % 2) == 0) - { - m -= 1; - if (ISAd[SA[m / 2]] < ISAd[SA[m]]) - { - SA_top.Swap(first + m, first + (m / 2)); - } - } - - // LISA - //TODO: checkme - //for i in (0..(m / 2)).rev() { - for (i = (m / 2) - 1; i >= 0; i--) - { - crosscheck($"LISA i={i}"); - tr_fixdown(ISAd, SA, i, m); - } - if ((size % 2) == 0) - { - SA_top.Swap(first + 0, first + m); - tr_fixdown(ISAd, SA, 0, m); - } - // MARK - //TODO: checkme - //for i in (1..m).rev() { - for (i = m - 1; i > 0; i--) - { - crosscheck($"MARK i={i}"); - t = SA[0]; - SA[0] = SA[i]; - tr_fixdown(ISAd, SA, 0, i); - SA[i] = t; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void tr_fixdown(Span ISAd, Span SA, Idx i, Idx size) - { - Idx j; - Idx k; - Idx d; - Idx e; - - crosscheck($"fixdown i={i} size={size}"); - - // WILMOT - var v = SA[i]; - var c = ISAd[v]; - while (true) - { - // cond - j = 2 * i + 1; - if (!(j < size)) - { - break; - } - - // body - k = j; - d = ISAd[SA[k]]; - j += 1; - e = ISAd[SA[j]]; - if (d < e) - { - k = j; - d = e; - } - if (d <= c) - { - break; - } - - // iter (WILMOT) - SA[i] = SA[k]; - i = k; - } - SA[i] = v; - } - - /// - /// Simple insertionsort for small size groups - /// - private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr first, SAPtr last) - { - SAPtr a; - SAPtr b; - Idx t; - Idx r; - - a = first + 1; - // KAREN - while (a < last) - { - // JEZEBEL - t = SA[a]; - b = a - 1; - while (true) - { - // cond (JEZEBEL) - r = ISAd[t] - ISAd[SA[b]]; - if (!(0 > r)) - { - break; - } - - // LILITH - while (true) - { - SA[b + 1] = SA[b]; - - // cond (LILITH) - b -= 1; - if (!((first <= b) && (SA[b] < 0))) - { - break; - } - } - - // body (JEZEBEL) - if (b < first) - { - break; - } - } - - if (r == 0) - { - SA[b] = ~SA[b]; - } - SA[b + 1] = t; - - // iter - a += 1; - } - } - - private static void tr_partialcopy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) - { - SAPtr c, d, e; - Idx s, v, rank, lastrank, newrank = -1; - - Span ISA = SA[isaOffset..]; - - v = (b - 1); - lastrank = -1; - // JETHRO - c = first; - d = a - 1; - while (c <= d) - { - s = SA[c] - depth; - if ((0 <= s) && (ISA[s] == v)) - { - d += 1; - SA[d] = s; - rank = ISA[s + depth]; - if (lastrank != rank) - { - lastrank = rank; - newrank = d; - } - ISA[s] = newrank; - } - - // iter (JETHRO) - c += 1; - } - - lastrank = -1; - // SCROOGE - e = d; - while (first <= e) - { - rank = ISA[SA[e]]; - if (lastrank != rank) - { - lastrank = rank; - newrank = e; - } - if (newrank != rank) - { - { - var SA_e = SA[e]; - ISA[SA_e] = newrank; - } - } - - // iter (SCROOGE) - e -= 1; - } - - lastrank = -1; - // DEWEY - c = last - 1; - e = d + 1; - d = b; - while (e < d) - { - s = SA[c] - depth; - if ((0 <= s) && (ISA[s] == v)) - { - d -= 1; - SA[d] = s; - rank = ISA[s + depth]; - if (lastrank != rank) - { - lastrank = rank; - newrank = d; - } - ISA[s] = newrank; - } - - // iter (DEWEY) - c -= 1; - } - } - - /// Tandem repeat copy - private static void tr_copy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) - { - // sort suffixes of middle partition - // by using sorted order of suffixes of left and right partition. - SAPtr c; - SAPtr d; - SAPtr e; - Idx s; - Idx v; - - crosscheck($"tr_copy first={first} a={a} b={b} last={last}"); - - v = (b - 1); - - Span ISA = SA[isaOffset..]; - - // JACK - c = first; - d = a - 1; - while (c <= d) - { - s = SA[c] - depth; - if ((0 <= s) && (ISA[s] == v)) - { - d += 1; - SA[d] = s; - ISA[s] = d; - } - - // iter (JACK) - c += 1; - } - - // JILL - c = last - 1; - e = d + 1; - d = b; - while (e < d) - { - s = SA[c] - depth; - if ((0 <= s) && (ISA[s] == v)) - { - d -= 1; - SA[d] = s; - ISA[s] = d; - } - - // iter (JILL) - c -= 1; - } - } - - /// - /// Tandem repeat partition - /// - private static void tr_partition(Span SA, SAPtr isadOffset, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) - { - SAPtr a, b, c, d, e, f; - Idx t, s, x = 0; - - Span ISAd = SA[isadOffset..]; - - // JOSEPH - b = middle - 1; - while (true) - { - // cond - b += 1; - if (!(b < last)) - { - break; - } - x = ISAd[SA[b]]; - if (!(x == v)) - { - break; - } - } - a = b; - if ((a < last) && (x < v)) - { - // MARY - while (true) - { - b += 1; - if (!(b < last)) - { - break; - } - x = ISAd[SA[b]]; - if (!(x <= v)) - { - break; - } - - // body - if (x == v) - { - SA.Swap(b, a); - a += 1; - } - } - } - - // JEREMIAH - c = last; - while (true) - { - c -= 1; - if (!(b < c)) - { - break; - } - x = ISAd[SA[c]]; - if (!(x == v)) - { - break; - } - } - d = c; - if ((b < d) && (x > v)) - { - // BEDELIA - while (true) - { - c -= 1; - if (!(b < c)) - { - break; - } - x = ISAd[SA[c]]; - if (!(x >= v)) - { - break; - } - if (x == v) - { - SA.Swap(c, d); - d -= 1; - } - } - } - - // ALEX - while (b < c) - { - SA.Swap(b, c); - // SIMON - while (true) - { - b += 1; - if (!(b < c)) - { - break; - } - x = ISAd[SA[b]]; - if (!(x <= v)) - { - break; - } - if (x == v) - { - SA.Swap(b, a); - a += 1; - } - } - - // GREGORY - while (true) - { - c -= 1; - if (!(b < c)) - { - break; - } - x = ISAd[SA[c]]; - if (!(x >= v)) - { - break; - } - if (x == v) - { - SA.Swap(c, d); - d -= 1; - } - } - } // end ALEX - - if (a <= d) - { - c = b - 1; - - s = (a - first); - t = (b - a); - if (s > t) - { - s = t; - } - - // GENEVIEVE - e = first; - f = b - s; - while (0 < s) - { - SA.Swap(e, f); - s -= 1; - e += 1; - f += 1; - } - s = (d - c); - t = (last - d - 1); - if (s > t) - { - s = t; - } - - // MARISSA - e = b; - f = last - s; - while (0 < s) - { - SA.Swap(e, f); - s -= 1; - e += 1; - f += 1; - } - first += (b - a); - last -= (d - c); - } - pa = first; - pb = last; - } } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs new file mode 100644 index 0000000..ce9a1a8 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -0,0 +1,1411 @@ +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; + +using Idx = System.Int32; +using SAPtr = System.Int32; + +namespace DeltaQ.SuffixSorting.LibDivSufSort; +using static Crosscheck; +using static Utils; + +internal static class TrSort +{ + private static int tr_ilg(int n) + { + if ((n & 0xffff_0000) > 0) + { + if ((n & 0xff00_0000) > 0) + { + return 24 + lg_table[((n >> 24) & 0xff)]; + } + else + { + return 16 + lg_table[((n >> 16) & 0xff)]; + } + } + else + { + if ((n & 0x0000_ff00) > 0) + { + return 8 + lg_table[((n >> 8) & 0xff)]; + } + else + { + return 0 + lg_table[((n >> 0) & 0xff)]; + } + } + } + + private ref struct Budget + { + public int Chance; + public int Remain; + public int IncVal; + public int Count; + + public Budget(int chance, int incVal) + { + Chance = chance; + Remain = incVal; + IncVal = incVal; + Count = 0; + } + + public bool Check(int size) + { + if (size <= Remain) + { + Remain -= size; + return true; + } + + if (Chance == 0) + { + Count += size; + return false; + } + + Remain += IncVal - size; + Chance -= 1; + return true; + } + } + + /// Tandem repeat sort + internal static void trsort(SAPtr ISA, Span SA, int n, int depth) + { + SAPtr ISAd; + SAPtr first; + SAPtr last; + /*Index*/ + int t; + /*Index*/ + int skip; + /*Index*/ + int unsorted; + Budget budget = new(tr_ilg(n) * 2 / 3, n); + + //macro_rules! ISA { + // ($x: expr) => { + // SA[ISA + $x] + // }; + //} + + //ref int getISA(int x) => ref SA[ISA + x]; + + // JERRY + ISAd = ISA + depth; + while (-n < SA[0]) + { + first = 0; + skip = 0; + unsorted = 0; + + // PETER + while (true) + { + t = SA[first]; + if (t < 0) + { + first -= t; + skip += t; + } + else + { + if (skip != 0) + { + SA[first + skip] = skip; + skip = 0; + } + last = SA[ISA + (t)] + 1; + if (1 < (last - first)) + { + budget.Count = 0; + crosscheck($"enter tr_introsort: ISA={ISA} ISAd={ISAd} first={first} last={last}"); + crosscheck($" budget: count={budget.Count} chance={budget.Chance} incval={budget.IncVal} remain={budget.Remain}"); + SA_dump(SA, "tr_introsort(A)"); + tr_introsort(ISA, ISAd, SA, first, last, ref budget); + SA_dump(SA, "tr_introsort(B)"); + crosscheck($"exit tr_introsort"); + crosscheck($" budget: count={budget.Count} chance={budget.Chance} incval={budget.IncVal} remain={budget.Remain}"); + if (budget.Count != 0) + { + unsorted += budget.Count; + } + else + { + skip = first - last; + } + } + else if ((last - first) == 1) + { + skip = -1; + } + first = last; + } + + // cond (PETER) + if (!(first < n)) + { + break; + } + } + + if (skip != 0) + { + SA[first + skip] = skip; + } + if (unsorted == 0) + { + break; + } + + // iter + ISAd += ISAd - ISA; + } + } + + private struct TrStackItem + { + public SAPtr a; + public SAPtr b; + public SAPtr c; + public Idx d; + public Idx e; + } + + private const int TR_STACK_SIZE = 64; + private ref struct TrStack + { + public readonly Span Items; + public int Size; + + public TrStack(Span items) + { + Items = items; + Size = 0; + } + + public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) + { + Trace.Assert(Size < Items.Length); + ref TrStackItem item = ref Items[Size++]; + item.a = a; + item.b = b; + item.c = c; + item.d = d; + item.e = e; + } + public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) + { + if (Size == 0) return false; + + ref TrStackItem item = ref Items[--Size]; + a = item.a; + b = item.b; + c = item.c; + d = item.d; + e = item.e; + return true; + } + } + + private const Idx TR_INSERTIONSORT_THRESHOLD = 8; + private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA, SAPtr first, SAPtr last, ref Budget budget) + { + SAPtr a = 0; + SAPtr b = 0; + SAPtr c; + Idx v, x; + Idx incr = isadOffset - isaOffset; + Idx next; + Idx trlink = -1; + + using var stackOwner = SpanOwner.Allocate(TR_STACK_SIZE, AllocationMode.Clear); + TrStack stack = new(stackOwner.Span); + + /* + macro_rules! ISA { + ($x: expr) => { + SA[ISA + $x] + }; + } + macro_rules! ISAd { + ($x: expr) => { + SA[ISAd + $x] + }; + } + */ + var ISA = SA[isaOffset..]; + var ISAd = SA[isadOffset..]; + + var limit = tr_ilg(last - first); + + // PASCAL + while (true) + { + crosscheck($"pascal limit={limit} first={first} last={last}"); + if (limit < 0) + { + if (limit == -1) + { + // tandem repeat partition + tr_partition(SA, isadOffset - incr, first, first, last, ref a, ref b, last - 1); + + // update ranks + if (a < last) + { + //TODO: crosscheck + crosscheck("ranks a SA, SAPtr ISAd, SAPtr first, SAPtr last) + { + Idx t = last - first; + SAPtr middle = first + t / 2; + + if (t <= 512) + { + if (t <= 32) + { + return tr_median3(SA, ISAd, first, middle, last - 1); + } + else + { + t >>= 2; + return tr_median5(SA, ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(SA, ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(SA, ISAd, middle - t, middle, middle + t); + last = tr_median3(SA, ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(SA, ISAd, first, middle, last); + } + + /// Returns the median of five elements + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr tr_median5(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) + { + Span ISAd = SA[isadOffset..]; + + //get(x) => ISAd[SA[x]] + + if (ISAd[SA[v2]] > ISAd[SA[v3]]) + { + Swap(ref v2, ref v3); + } + if (ISAd[SA[v4]] > ISAd[SA[v5]]) + { + Swap(ref v4, ref v5); + } + if (ISAd[SA[v2]] > ISAd[SA[v4]]) + { + Swap(ref v2, ref v4); + Swap(ref v3, ref v5); + } + if (ISAd[SA[v1]] > ISAd[SA[v3]]) + { + Swap(ref v1, ref v3); + } + if (ISAd[SA[v1]] > ISAd[SA[v4]]) + { + Swap(ref v1, ref v4); + Swap(ref v3, ref v5); + } + if (ISAd[SA[v3]] > ISAd[SA[v4]]) + { + return v4; + } + else + { + return v3; + } + } + + /// Returns the median of three elements + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr tr_median3(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3) + { + Span ISAd = SA[isadOffset..]; + + //get(x) => ISAd[SA[x]] + + if (ISAd[SA[v1]] > ISAd[SA[v2]]) + { + Swap(ref v1, ref v2); + } + if (ISAd[SA[v2]] > ISAd[SA[v3]]) + { + if (ISAd[SA[v1]] > ISAd[SA[v3]]) + { + return v1; + } + else + { + return v3; + } + } + else + { + return v2; + } + } + + /// Simple top-down heapsort + private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, Idx size) + { + Idx i; + Idx m; + Idx t; + + Span ISAd = SA_top[isadOffset..]; + + Span SA = SA_top[first..]; + + m = size; + if ((size % 2) == 0) + { + m -= 1; + if (ISAd[SA[m / 2]] < ISAd[SA[m]]) + { + SA_top.Swap(first + m, first + (m / 2)); + } + } + + // LISA + //TODO: checkme + //for i in (0..(m / 2)).rev() { + for (i = (m / 2) - 1; i >= 0; i--) + { + crosscheck($"LISA i={i}"); + tr_fixdown(ISAd, SA, i, m); + } + if ((size % 2) == 0) + { + SA_top.Swap(first + 0, first + m); + tr_fixdown(ISAd, SA, 0, m); + } + // MARK + //TODO: checkme + //for i in (1..m).rev() { + for (i = m - 1; i > 0; i--) + { + crosscheck($"MARK i={i}"); + t = SA[0]; + SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void tr_fixdown(Span ISAd, Span SA, Idx i, Idx size) + { + Idx j; + Idx k; + Idx d; + Idx e; + + crosscheck($"fixdown i={i} size={size}"); + + // WILMOT + var v = SA[i]; + var c = ISAd[v]; + while (true) + { + // cond + j = 2 * i + 1; + if (!(j < size)) + { + break; + } + + // body + k = j; + d = ISAd[SA[k]]; + j += 1; + e = ISAd[SA[j]]; + if (d < e) + { + k = j; + d = e; + } + if (d <= c) + { + break; + } + + // iter (WILMOT) + SA[i] = SA[k]; + i = k; + } + SA[i] = v; + } + + /// + /// Simple insertionsort for small size groups + /// + private static void tr_insertionsort(Span SA, ReadOnlySpan ISAd, SAPtr first, SAPtr last) + { + SAPtr a; + SAPtr b; + Idx t; + Idx r; + + a = first + 1; + // KAREN + while (a < last) + { + // JEZEBEL + t = SA[a]; + b = a - 1; + while (true) + { + // cond (JEZEBEL) + r = ISAd[t] - ISAd[SA[b]]; + if (!(0 > r)) + { + break; + } + + // LILITH + while (true) + { + SA[b + 1] = SA[b]; + + // cond (LILITH) + b -= 1; + if (!((first <= b) && (SA[b] < 0))) + { + break; + } + } + + // body (JEZEBEL) + if (b < first) + { + break; + } + } + + if (r == 0) + { + SA[b] = ~SA[b]; + } + SA[b + 1] = t; + + // iter + a += 1; + } + } + + private static void tr_partialcopy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) + { + SAPtr c, d, e; + Idx s, v, rank, lastrank, newrank = -1; + + Span ISA = SA[isaOffset..]; + + v = (b - 1); + lastrank = -1; + // JETHRO + c = first; + d = a - 1; + while (c <= d) + { + s = SA[c] - depth; + if ((0 <= s) && (ISA[s] == v)) + { + d += 1; + SA[d] = s; + rank = ISA[s + depth]; + if (lastrank != rank) + { + lastrank = rank; + newrank = d; + } + ISA[s] = newrank; + } + + // iter (JETHRO) + c += 1; + } + + lastrank = -1; + // SCROOGE + e = d; + while (first <= e) + { + rank = ISA[SA[e]]; + if (lastrank != rank) + { + lastrank = rank; + newrank = e; + } + if (newrank != rank) + { + { + var SA_e = SA[e]; + ISA[SA_e] = newrank; + } + } + + // iter (SCROOGE) + e -= 1; + } + + lastrank = -1; + // DEWEY + c = last - 1; + e = d + 1; + d = b; + while (e < d) + { + s = SA[c] - depth; + if ((0 <= s) && (ISA[s] == v)) + { + d -= 1; + SA[d] = s; + rank = ISA[s + depth]; + if (lastrank != rank) + { + lastrank = rank; + newrank = d; + } + ISA[s] = newrank; + } + + // iter (DEWEY) + c -= 1; + } + } + + /// Tandem repeat copy + private static void tr_copy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) + { + // sort suffixes of middle partition + // by using sorted order of suffixes of left and right partition. + SAPtr c; + SAPtr d; + SAPtr e; + Idx s; + Idx v; + + crosscheck($"tr_copy first={first} a={a} b={b} last={last}"); + + v = (b - 1); + + Span ISA = SA[isaOffset..]; + + // JACK + c = first; + d = a - 1; + while (c <= d) + { + s = SA[c] - depth; + if ((0 <= s) && (ISA[s] == v)) + { + d += 1; + SA[d] = s; + ISA[s] = d; + } + + // iter (JACK) + c += 1; + } + + // JILL + c = last - 1; + e = d + 1; + d = b; + while (e < d) + { + s = SA[c] - depth; + if ((0 <= s) && (ISA[s] == v)) + { + d -= 1; + SA[d] = s; + ISA[s] = d; + } + + // iter (JILL) + c -= 1; + } + } + + /// + /// Tandem repeat partition + /// + private static void tr_partition(Span SA, SAPtr isadOffset, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) + { + SAPtr a, b, c, d, e, f; + Idx t, s, x = 0; + + Span ISAd = SA[isadOffset..]; + + // JOSEPH + b = middle - 1; + while (true) + { + // cond + b += 1; + if (!(b < last)) + { + break; + } + x = ISAd[SA[b]]; + if (!(x == v)) + { + break; + } + } + a = b; + if ((a < last) && (x < v)) + { + // MARY + while (true) + { + b += 1; + if (!(b < last)) + { + break; + } + x = ISAd[SA[b]]; + if (!(x <= v)) + { + break; + } + + // body + if (x == v) + { + SA.Swap(b, a); + a += 1; + } + } + } + + // JEREMIAH + c = last; + while (true) + { + c -= 1; + if (!(b < c)) + { + break; + } + x = ISAd[SA[c]]; + if (!(x == v)) + { + break; + } + } + d = c; + if ((b < d) && (x > v)) + { + // BEDELIA + while (true) + { + c -= 1; + if (!(b < c)) + { + break; + } + x = ISAd[SA[c]]; + if (!(x >= v)) + { + break; + } + if (x == v) + { + SA.Swap(c, d); + d -= 1; + } + } + } + + // ALEX + while (b < c) + { + SA.Swap(b, c); + // SIMON + while (true) + { + b += 1; + if (!(b < c)) + { + break; + } + x = ISAd[SA[b]]; + if (!(x <= v)) + { + break; + } + if (x == v) + { + SA.Swap(b, a); + a += 1; + } + } + + // GREGORY + while (true) + { + c -= 1; + if (!(b < c)) + { + break; + } + x = ISAd[SA[c]]; + if (!(x >= v)) + { + break; + } + if (x == v) + { + SA.Swap(c, d); + d -= 1; + } + } + } // end ALEX + + if (a <= d) + { + c = b - 1; + + s = (a - first); + t = (b - a); + if (s > t) + { + s = t; + } + + // GENEVIEVE + e = first; + f = b - s; + while (0 < s) + { + SA.Swap(e, f); + s -= 1; + e += 1; + f += 1; + } + s = (d - c); + t = (last - d - 1); + if (s > t) + { + s = t; + } + + // MARISSA + e = b; + f = last - s; + while (0 < s) + { + SA.Swap(e, f); + s -= 1; + e += 1; + f += 1; + } + first += (b - a); + last -= (d - c); + } + pa = first; + pb = last; + } +} \ No newline at end of file diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs new file mode 100644 index 0000000..4de0f0a --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -0,0 +1,29 @@ +using System; +using System.Runtime.CompilerServices; + +namespace DeltaQ.SuffixSorting.LibDivSufSort; + +internal static class Utils +{ + private static readonly int[] lg_table_array = new[] + { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + }; + internal static ReadOnlySpan lg_table => lg_table_array; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void Swap(ref T lhs, ref T rhs) + { + T temp; + temp = lhs; + lhs = rhs; + rhs = temp; + } +} \ No newline at end of file From ae2cb444b270dae8812f53656f77da4b3cedc41c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 20:36:33 -0500 Subject: [PATCH 192/325] Break out SsSort --- .../IntAccessor.cs | 13 + .../RsDivSufSort.cs | 1782 +---------------- .../SsSort.cs | 1779 ++++++++++++++++ .../Utils.cs | 2 +- 4 files changed, 1794 insertions(+), 1782 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs new file mode 100644 index 0000000..07761b5 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs @@ -0,0 +1,13 @@ +using System; +using Idx = System.Int32; + +namespace DeltaQ.SuffixSorting.LibDivSufSort; + +internal ref struct IntAccessor +{ + public readonly ReadOnlySpan span; + public IntAccessor(ReadOnlySpan span) => this.span = span; + + public readonly int this[Idx index] => span[index]; + public readonly int Length => span.Length; +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 0647e1c..0e82cc9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -183,25 +183,6 @@ public ref struct BBucket public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; } - //[DebuggerDisplay("")] - //public ref struct SAPtr - //{ - // public readonly Index Index; - // public SAPtr(Index idx) - // { - // this.Index = idx; - // } - //} - - public ref struct IntAccessor - { - public readonly ReadOnlySpan span; - public IntAccessor(ReadOnlySpan span) => this.span = span; - - public readonly int this[Idx index] => span[index]; - public readonly int Length => span.Length; - } - public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) { var n = T.Length; @@ -348,7 +329,7 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) if (1 < (j - i)) { SA_dump(SA[i..j], "sssort(A)"); - sssort(T, SA, PAb, i, j, buf, bufsize, 2, n, SA[i] == (m - 1)); + SsSort.sssort(T, SA, PAb, i, j, buf, bufsize, 2, n, SA[i] == (m - 1)); SA_dump(SA[i..j], "sssort(B)"); } @@ -530,1765 +511,4 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) return new SortTypeBstarResult { A = A, B = B, m = m }; } - - private const Idx SS_BLOCKSIZE = 1024; - - /// - /// Substring sort - /// - private static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) - { - // Note: in most of this file "PA" seems to mean "Partition Array" - we're - // working on a slice of SA. This is also why SA (or a mutable reference to it) - // is passed around, so we don't run into lifetime issues. - - SAPtr a; - SAPtr b; - SAPtr middle; - SAPtr curbuf; - Idx j; - Idx k; - Idx curbufsize; - Idx limit; - Idx i; - - if (lastsuffix) - { - first += 1; - } - - limit = ss_isqrt(last - first); - if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < limit)) - { - if (SS_BLOCKSIZE < limit) - { - limit = SS_BLOCKSIZE; - } - middle = last - limit; - buf = middle; - bufsize = limit; - } - else - { - middle = last; - limit = 0; - } - - // ESPRESSO - a = first; - i = 0; - while (SS_BLOCKSIZE < (middle - a)) - { - crosscheck($"ss_mintrosort (espresso) a={a - PA} depth={depth}"); - ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); - - curbufsize = (last - (a + SS_BLOCKSIZE)); - curbuf = a + SS_BLOCKSIZE; - if (curbufsize <= bufsize) - { - curbufsize = bufsize; - curbuf = buf; - } - - // FRESCO - b = a; - k = SS_BLOCKSIZE; - j = i; - while ((j & 1) > 0) - { - crosscheck($"ss_swapmerge {k}"); - ss_swapmerge(T, SA, PA, b - k, b, b + k, curbuf, curbufsize, depth); - - // iter - b -= k; - k <<= 1; - j >>= 1; - } - - // iter - a += SS_BLOCKSIZE; - i += 1; - } - - crosscheck($"ss_mintrosort (pre-mariachi) a={a - PA} depth={depth}"); - ss_mintrosort(T, SA, PA, a, middle, depth); - - SA_dump(SA[first..last], "pre-mariachi"); - - // MARIACHI - k = SS_BLOCKSIZE; - while (i != 0) - { - if ((i & 1) > 0) - { - SA_dump(SA[first..last], "in-mariachi pre-swap"); - crosscheck($"a={a - first} middle={middle - first} bufsize={bufsize} depth={depth}"); - ss_swapmerge(T, SA, PA, a - k, a, middle, buf, bufsize, depth); - SA_dump(SA[first..last], "in-mariachi post-swap"); - a -= k; - } - - // iter - k <<= 1; - i >>= 1; - } - SA_dump(SA[first..last], "post-mariachi"); - - if (limit != 0) - { - crosscheck("ss_mintrosort limit!=0"); - ss_mintrosort(T, SA, PA, middle, last, depth); - SA_dump(SA[first..last], "post-mintrosort limit!=0"); - ss_inplacemerge(T, SA, PA, first, middle, last, depth); - SA_dump(SA[first..last], "post-inplacemerge limit!=0"); - } - SA_dump(SA[first..last], "post-limit!=0"); - - if (lastsuffix) - { - crosscheck("lastsuffix!"); - - // Insert last type B* suffix - Span PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; - //let mut PAi:[Idx; 2] = [SA[PA + SA[first - 1]], n - 2]; - //let SAI = SuffixArray(&mut PAi); - - a = first; - i = SA[first - 1]; - - // CELINE - while ((a < last) && ((SA[a] < 0) || (0 < ss_compare(T, PAi, (SAPtr)0, SA, PA + SA[a], depth)))) - { - // body - SA[a - 1] = SA[a]; - - // iter - a += 1; - } - SA[a - 1] = i; - } - } - - /// - /// Compare two suffixes - /// - private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) - { - //TODO: possible perf improvement - JZ - - var U1 = depth + SAp1[p1]; - var U2 = depth + SAp2[p2]; - var U1n = SAp1[p1 + 1] + 2; - var U2n = SAp2[p2 + 1] + 2; - - while ((U1 < U1n) && (U2 < U2n) && (T[U1] == T[U2])) - { - U1 += 1; - U2 += 1; - } - - if (U1 < U1n) - { - if (U2 < U2n) - { - return T[U1] - T[U2]; - } - else - { - return 1; - } - } - else - { - if (U2 < U2n) - { - return -1; - } - else - { - return 0; - } - } - } - - private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) - { - SAPtr p; - SAPtr a; - SAPtr b; - Idx len; - Idx half; - Idx q; - Idx r; - Idx x; - - var original_first = first; - var original_last = last; - - SA_dump(SA[original_first..original_last], "inplacemerge start"); - - // FERRIS - while (true) - { - if (SA[last - 1] < 0) - { - x = 1; - p = PA + ~SA[last - 1]; - } - else - { - x = 0; - p = PA + SA[last - 1]; - } - - // LOIS - a = first; - len = (middle - first)/*.0*/; - half = len >> 1; - r = -1; - while (0 < len) - { - b = a + half; - q = ss_compare(T, SA, PA + (0 <= SA[b] ? SA[b] : ~SA[b]), SA, p, depth); - if (q < 0) - { - a = b + 1; - half -= (len & 1) ^ 1; - } - else - { - r = q; - } - - // iter - len = half; - half >>= 1; - } - SA_dump(SA[original_first..original_last], "post-lois"); - - if (a < middle) - { - if (r == 0) - { - SA[a] = ~SA[a]; - } - ss_rotate(SA, a, middle, last); - SA_dump(SA[original_first..original_last], "post-rotate"); - last -= middle - a; - middle = a; - if (first == middle) - { - break; - } - } - - last -= 1; - if (x != 0) - { - // TIMMY - last -= 1; - while (SA[last] < 0) - { - last -= 1; - } - SA_dump(SA[original_first..original_last], "post-timmy"); - } - if (middle == last) - { - break; - } - - SA_dump(SA[original_first..original_last], "ferris-wrap"); - } - } - - private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr last) - { - SAPtr a; - SAPtr b; - Idx t; - Idx l; - Idx r; - - var original_first = first; - var original_last = last; - - l = (middle - first)/*.0*/; - r = (last - middle)/*.0*/; - - SA_dump(SA[original_first..original_last], "pre-brendan"); - - // BRENDAN - while ((0 < l) && (0 < r)) - { - if (l == r) - { - ss_blockswap(SA, first, middle, l); - SA_dump(SA[original_first..original_last], "post-blockswap"); - break; - } - - if (l < r) - { - a = last - 1; - b = middle - 1; - t = SA[a]; - - // ALICE - while (true) - { - SA[a] = SA[b]; - a -= 1; - SA[b] = SA[a]; - b -= 1; - if (b < first) - { - SA[a] = t; - last = a; - r -= l + 1; - if (r <= l) - { - break; - } - a -= 1; - b = middle - 1; - t = SA[a]; - } - } - SA_dump(SA[original_first..original_last], "post-alice"); - } - else - { - a = first; - b = middle; - t = SA[a]; - // ROBERT - while (true) - { - SA[a] = SA[b]; - a += 1; - SA[b] = SA[a]; - b += 1; - if (last <= b) - { - SA[a] = t; - first = a + 1; - - l -= r + 1; - if (l <= r) - { - break; - } - a += 1; - b = middle; - t = SA[a]; - } - } - SA_dump(SA[original_first..original_last], "post-robert"); - } - } - } - - private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) - { - for (int i = 0; i < n; i++) - { - SA.Swap(a + i, b + i); - } - } - - /// D&C based merge - private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) - { - static Idx get_idx(Idx a) => 0 <= a ? a : ~a; - - void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) - { - crosscheck($"mc c={c}"); - if (((c & 1) > 0) || (((c & 2) > 0) && (ss_compare(T, SA, PA + get_idx(SA[a - 1]), SA, PA + SA[a], depth) == 0))) - { - crosscheck($"swapping a-first={a - first}"); - SA[a] = ~SA[a]; - } - if (((c & 4) > 0) && (ss_compare(T, SA, PA + get_idx(SA[b - 1]), SA, PA + SA[b], depth) == 0)) - { - crosscheck($"swapping b-first={b - first}"); - SA[b] = ~SA[b]; - } - } - - //MergeStack is the same as SsStack - using var stackOwner = SpanOwner.Allocate(MERGE_STACK_SIZE, AllocationMode.Clear); - var stack = new SsStack(stackOwner.Span); - - SAPtr l; - SAPtr r; - SAPtr lm; - SAPtr rm; - - Idx m; - Idx len; - Idx half; - Idx check; - Idx next; - - // BARBARIAN - check = 0; - while (true) - { - crosscheck($"barbarian check={check}"); - SA_dump(SA[first..last], "ss_swapmerge barbarian"); - SA_dump(SA[buf..(buf + bufsize)], "ss_swapmerge barbarian buf"); - if ((last - middle) <= bufsize) - { - crosscheck("<=bufsize"); - if ((first < middle) && (middle < last)) - { - crosscheck("f> 1; - while (0 < len) - { - crosscheck($"in-olanna len={len} half={half}"); - if (ss_compare( - T, - SA, - PA + get_idx(SA[middle + m + half]), - SA, - PA + get_idx(SA[middle - m - half - 1]), - depth) < 0) - { - m += half + 1; - half -= (len & 1) ^ 1; - } - - // iter - len = half; - half >>= 1; - } - - if (0 < m) - { - crosscheck($"0 < m, m={m}"); - lm = middle - m; - rm = middle + m; - ss_blockswap(SA, lm, middle, m); - r = middle; - l = middle; - next = 0; - if (rm < last) - { - if (SA[rm] < 0) - { - SA[rm] = ~SA[rm]; - if (first < lm) - { - // KOOPA - l -= 1; - while (SA[l] < 0) - { - l -= 1; - } - crosscheck($"post-koopa l-first={l - first}"); - next |= 4; - crosscheck($"post-koopa next={next}"); - } - next |= 1; - } - else if (first < lm) - { - // MUNCHER - while (SA[r] < 0) - { - r += 1; - } - crosscheck($"post-muncher r-first={r - first}"); - next |= 2; - } - } - - if ((l - first) <= (last - r)) - { - crosscheck("post-muncher l-f 0) && (r == middle)) - { - crosscheck($"post-muncher next ^= 6 old={next}"); - next ^= 6; - crosscheck($"post-muncher next ^= 6 new={next}"); - } - stack.Push(first, lm, l, (check & 3) | (next & 4)); - first = r; - middle = rm; - crosscheck($"post-muncher not, check was={check} next was={next}"); - check = (next & 3) | (check & 4); - crosscheck($"post-muncher not, check is={check} next is={next}"); - } - } - else - { - if (ss_compare( - T, - SA, - PA + get_idx(SA[middle - 1]), - SA, - PA + SA[middle], - depth) == 0) - { - SA[middle] = ~SA[middle]; - } - merge_check(T, SA, first, last, check); - SA_dump(SA[first..last], "ss_swapmerge pop 3"); - if (!stack.Pop(ref first, ref middle, ref last, ref check)) - { - return; - } - } - } - } - - /// Merge-backward with internal buffer - private static void ss_mergebackward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) - { - SAPtr p1; - SAPtr p2; - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr bufend; - - Idx t; - Idx r; - Idx x; - - bufend = buf + (last - middle) - 1; - ss_blockswap(SA, buf, middle, (last - middle)); - - x = 0; - if (SA[bufend] < 0) - { - p1 = PA + ~SA[bufend]; - x |= 1; - } - else - { - p1 = PA + SA[bufend]; - } - if (SA[middle - 1] < 0) - { - p2 = PA + ~SA[middle - 1]; - x |= 2; - } - else - { - p2 = PA + SA[middle - 1]; - } - - // MARTIN - a = last - 1; - t = SA[a]; - b = bufend; - c = middle - 1; - while (true) - { - r = ss_compare(T, SA, p1, SA, p2, depth); - if (0 < r) - { - if ((x & 1) > 0) - { - // BAPTIST - while (true) - { - SA[a] = SA[b]; - a -= 1; - SA[b] = SA[a]; - b -= 1; - - // cond - if (!(SA[b] < 0)) - { - break; - } - } - x ^= 1; - } - SA[a] = SA[b]; - a -= 1; - if (b <= buf) - { - SA[buf] = t; - break; - } - SA[b] = SA[a]; - b -= 1; - if (SA[b] < 0) - { - p1 = PA + ~SA[b]; - x |= 1; - } - else - { - p1 = PA + SA[b]; - } - } - else if (r < 0) - { - if ((x & 2) > 0) - { - // JULES - while (true) - { - SA[a] = SA[c]; - a -= 1; - SA[c] = SA[a]; - c -= 1; - - // cond - if (~SA[c] < 0) - { - break; - } - } - x ^= 2; - } - SA[a] = SA[c]; - a -= 1; - SA[c] = SA[a]; - c -= 1; - if (c < first) - { - // GARAMOND - while (buf < b) - { - SA[a] = SA[b]; - a -= 1; - SA[b] = SA[a]; - b -= 1; - } - SA[a] = SA[b]; - SA[b] = t; - break; - } - if (SA[c] < 0) - { - p2 = PA + ~SA[c]; - x |= 2; - } - else - { - p2 = PA + SA[c]; - } - } - else - { - if ((x & 1) > 0) - { - // XAVIER - while (true) - { - SA[a] = SA[b]; - a -= 1; - SA[b] = SA[a]; - b -= 1; - if (!(SA[b] < 0)) - { - break; - } - } - x ^= 1; - } - SA[a] = ~SA[b]; - a -= 1; - if (b <= buf) - { - SA[buf] = t; - break; - } - SA[b] = SA[a]; - b -= 1; - if ((x & 2) > 0) - { - // WALTER - while (true) - { - SA[a] = SA[c]; - a -= 1; - SA[c] = SA[a]; - c -= 1; - - // cond - if (!(SA[c] < 0)) - { - break; - } - } - x ^= 2; - } - SA[a] = SA[c]; - a -= 1; - SA[c] = SA[a]; - c -= 1; - if (c < first) - { - // ZENITH - while (buf < b) - { - SA[a] = SA[b]; - a -= 1; - SA[b] = SA[a]; - b -= 1; - } - SA[a] = SA[b]; - SA[b] = t; - break; - } - if (SA[b] < 0) - { - p1 = PA + ~SA[b]; - x |= 1; - } - else - { - p1 = PA + SA[b]; - } - if (SA[c] < 0) - { - p2 = PA + ~SA[c]; - x |= 2; - } - else - { - p2 = PA + SA[c]; - } - } - } - } - - /// Merge-forward with internal buffer - private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) - { - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr bufend; - Idx t; - Idx r; - - SA_dump(SA[first..last], "ss_mergeforward start"); - - bufend = buf + (middle - first) - 1; - ss_blockswap(SA, buf, first, middle - first); - - // IGNACE - a = first; - t = SA[a]; - b = buf; - c = middle; - while (true) - { - r = ss_compare(T, SA, PA + SA[b], SA, PA + SA[c], depth); - if (r < 0) - { - // RONALD - while (true) - { - SA[a] = SA[b]; - a += 1; - if (bufend <= b) - { - SA[bufend] = t; - return; - } - SA[b] = SA[a]; - b += 1; - - // cond - if (!(SA[b] < 0)) - { - break; - } - } - } - else if (r > 0) - { - // JEREMY - while (true) - { - SA[a] = SA[c]; - a += 1; - SA[c] = SA[a]; - c += 1; - if (last <= c) - { - // TONY - while (b < bufend) - { - SA[a] = SA[b]; - a += 1; - SA[b] = SA[a]; - b += 1; - } - SA[a] = SA[b]; - SA[b] = t; - return; - } - - // cond (JEMERY) - if (!(SA[c] < 0)) - { - break; - } - } - } - else - { - SA[c] = ~SA[c]; - // JENS - while (true) - { - SA[a] = SA[b]; - a += 1; - if (bufend <= b) - { - SA[bufend] = t; - return; - } - SA[b] = SA[a]; - b += 1; - - // cond (JENS) - if (!(SA[b] < 0)) - { - break; - } - } - - // DIMITER - while (true) - { - SA[a] = SA[c]; - a += 1; - SA[c] = SA[a]; - c += 1; - if (last <= c) - { - // MIDORI - while (b < bufend) - { - SA[a] = SA[b]; - a += 1; - SA[b] = SA[a]; - b += 1; - } - SA[a] = SA[b]; - SA[b] = t; - return; - } - - // cond (DIMITER) - if (!(SA[c] < 0)) - { - break; - } - } - } - } - } - - private struct SsStackItem - { - public SAPtr a; - public SAPtr b; - public SAPtr c; - public Idx d; - } - - private const int SS_STACK_SIZE = 16; - private const int MERGE_STACK_SIZE = 32; - private ref struct SsStack - { - public readonly Span Items; - public int Size; - - public SsStack(Span items) - { - Items = items; - Size = 0; - } - - public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) - { - Debug.Assert(Size < Items.Length); - ref SsStackItem item = ref Items[Size++]; - item.a = a; - item.b = b; - item.c = c; - item.d = d; - } - public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) - { - //Debug.Assert(Size > 0); - if (Size == 0) return false; - - ref SsStackItem item = ref Items[--Size]; - a = item.a; - b = item.b; - c = item.c; - d = item.d; - return true; - } - } - - private const Idx SS_INSERTIONSORT_THRESHOLD = 8; - - private ref struct SpanOffsetAccessor - { - private readonly Span _span; - private readonly int _offset; - - public SpanOffsetAccessor(Span span, int offset) - { - _span = span; - _offset = offset; - } - - public ref T this[int index] => ref _span[_offset + index]; - } - - private ref struct ReadOnlySpanOffsetAccessor - { - private readonly ReadOnlySpan _span; - private readonly int _offset; - - public ReadOnlySpanOffsetAccessor(ReadOnlySpan span, int offset) - { - _span = span; - _offset = offset; - } - - public ref readonly T this[int index] => ref _span[_offset + index]; - } - - private ref struct TdPAStarAccessor - { - private readonly ReadOnlySpanOffsetAccessor _TO; - private readonly ReadOnlySpan _SA; - private readonly ReadOnlySpan _PA; - private readonly IntAccessor _TD; - - public TdPAStarAccessor(ReadOnlySpan T, ReadOnlySpan SA, int partitionOffset, int tdOffset) - { - _TO = new ReadOnlySpanOffsetAccessor(T, tdOffset); - - _SA = SA; - _PA = SA[partitionOffset..]; - _TD = new(T[tdOffset..]); - } - - public readonly int this[int index] => _TD[_PA[_SA[index]]]; - - public readonly int AsOffset(int index) => _TO[index]; - } - - /// - /// Multikey introsort for medium size groups - /// - private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, SAPtr first, SAPtr last, Idx depth) - { - var PA = SA[partitionOffset..]; - - using var stackOwner = SpanOwner.Allocate(SS_STACK_SIZE); - var stack = new SsStack(stackOwner.Span); - - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr d; - SAPtr e; - SAPtr f; - - Idx s; - Idx t; - - Idx limit; - Idx v; - Idx x = 0; - - // RENEE - limit = ss_ilg(last - first); - while (true) - { - if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) - { - if (1 < (last - first)) - { - ss_insertionsort(T, SA, partitionOffset, first, last, depth); - } - if (!stack.Pop(ref first, ref last, ref depth, ref limit)) - { - return; - } - continue; - } - - var tdOffset = depth; - var TdPAStar = new TdPAStarAccessor(T.span, SA, partitionOffset, tdOffset); - - /*readonly*/ - var old_limit = limit; - limit -= 1; - if (old_limit == 0) - { - SA_dump(SA[first..last], "before heapsort"); - ss_heapsort(T, tdOffset, SA, partitionOffset, first, (last - first)); - SA_dump(SA[first..last], "after heapsort"); - } - - if (limit < 0) - { - a = first + 1; - v = TdPAStar[first]; - - // DAVE - while (a < last) - { - x = TdPAStar[a]; - if (x != v) - { - if (1 < (a - first)) - { - break; - } - v = x; - first = a; - } - - // loop iter - a += 1; - } - - if (TdPAStar.AsOffset(PA[SA[first]] - 1) < v) - { - first = ss_partition(SA, partitionOffset, first, a, depth); - } - if ((a - first) <= (last - a)) - { - if (1 < (a - first)) - { - stack.Push(a, last, depth, -1); - last = a; - depth += 1; - limit = ss_ilg(a - first); - } - else - { - first = a; - limit = -1; - } - } - else - { - if (1 < (last - a)) - { - stack.Push(first, a, depth + 1, ss_ilg(a - first)); - first = a; - limit = -1; - } - else - { - last = a; - depth += 1; - limit = ss_ilg(a - first); - } - } - continue; - } - - // choose pivot - a = ss_pivot(T, tdOffset, SA, partitionOffset, first, last); - v = TdPAStar[a]; - SA.Swap(first, a); - - // partition - // NORA - b = first; - while (true) - { - b += 1; - if (!(b < last)) - { - break; - } - x = TdPAStar[b]; - if (!(x == v)) - { - break; - } - // body - } - a = b; - if ((a < last) && (x < v)) - { - // STAN - while (true) - { - b += 1; - if (!(b < last)) - { - break; - } - x = TdPAStar[b]; - if (!(x <= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(b, a); - a += 1; - } - } - } - - // NATHAN - c = last; - while (true) - { - c -= 1; - if (!(b < c)) - { - break; - } - x = TdPAStar[c]; - if (!(x == v)) - { - break; - } - // body - } - d = c; - if ((b < d) && (x > v)) - { - // JACOB - while (true) - { - c -= 1; - if (!(b < c)) - { - break; - } - x = TdPAStar[c]; - if (!(x >= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(c, d); - d -= 1; - } - } - } - - // RITA - while (b < c) - { - SA.Swap(b, c); - // ROMEO - while (true) - { - b += 1; - if (!(b < c)) - { - break; - } - x = TdPAStar[b]; - if (!(x <= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(b, a); - a += 1; - } - } - // JULIET - while (true) - { - c -= 1; - if (!(b < c)) - { - break; - } - x = TdPAStar[c]; - if (!(x >= v)) - { - break; - } - // body - if (x == v) - { - SA.Swap(c, d); - d -= 1; - } - } - } - - if (a <= d) - { - c = b - 1; - s = (a - first)/*.0*/; - t = (b - a)/*.0*/; - if (s > t) - { - s = t; - } - - // JOSHUA - e = first; - f = b - s; - while (0 < s) - { - SA.Swap(e, f); - s -= 1; - e += 1; - f += 1; - } - s = (d - c)/*.0*/; - t = (last - d - 1)/*.0*/; - if (s > t) - { - s = t; - } - // BERENICE - e = b; - f = last - s; - while (0 < s) - { - SA.Swap(e, f); - s -= 1; - e += 1; - f += 1; - } - - a = first + (b - a); - c = last - (d - c); - b = v <= TdPAStar.AsOffset(PA[SA[a]] - 1) ? a : ss_partition(SA, partitionOffset, a, c, depth); - - if ((a - first) <= (last - c)) - { - if ((last - c) <= (c - b)) - { - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - stack.Push(c, last, depth, limit); - last = a; - } - else if ((a - first) <= (c - b)) - { - stack.Push(c, last, depth, limit); - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - last = a; - } - else - { - stack.Push(c, last, depth, limit); - stack.Push(first, a, depth, limit); - first = b; - last = c; - depth += 1; - limit = ss_ilg(c - b); - } - } - else - { - if ((a - first) <= (c - b)) - { - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - stack.Push(first, a, depth, limit); - first = c; - } - else if ((last - c) <= (c - b)) - { - stack.Push(first, a, depth, limit); - stack.Push(b, c, depth + 1, ss_ilg(c - b)); - first = c; - } - else - { - stack.Push(first, a, depth, limit); - stack.Push(c, last, depth, limit); - first = b; - last = c; - depth += 1; - limit = ss_ilg(c - b); - } - } - } - else - { - limit += 1; - if (TdPAStar.AsOffset(PA[SA[first]] - 1) < v) - { - first = ss_partition(SA, partitionOffset, first, last, depth); - limit = ss_ilg(last - first); - } - depth += 1; - } - } - } - - /// - /// Returns the pivot element - /// - private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr first, SAPtr last) - { - Idx t = (last - first)/*.0*/; - SAPtr middle = first + (t / 2); - - if (t <= 512) - { - if (t <= 32) - { - return ss_median3(T, Td, SA, PA, first, middle, last - 1); - } - else - { - t >>= 2; - return ss_median5( - T, - Td, - SA, - PA, - first, - first + t, - middle, - last - 1 - t, - last - 1); - } - } - - t >>= 3; - first = ss_median3(T, Td, SA, PA, first, first + t, first + (t << 1)); - middle = ss_median3(T, Td, SA, PA, middle - t, middle, middle + t); - last = ss_median3(T, Td, SA, PA, last - 1 - (t << 1), last - 1 - t, last - 1); - - return ss_median3(T, Td, SA, PA, first, middle, last); - } - - /// Returns the median of five elements - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) - { - var get = new TdPAStarAccessor(T.span, SA, PA, Td); - if (get[v2] > get[v3]) - { - Swap(ref v2, ref v3); - } - if (get[v4] > get[v5]) - { - Swap(ref v4, ref v5); - } - if (get[v2] > get[v4]) - { - Swap(ref v2, ref v4); - Swap(ref v3, ref v5); - } - if (get[v1] > get[v3]) - { - Swap(ref v1, ref v3); - } - if (get[v1] > get[v4]) - { - Swap(ref v1, ref v4); - Swap(ref v3, ref v5); - } - if (get[v3] > get[v4]) - { - return v4; - } - else - { - return v3; - } - } - - /// - /// Returns the median of three elements - /// - private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3) - { - //int get(int x) => T[Td + SA[PA + SA[x]]] - var get = new TdPAStarAccessor(T.span, SA, PA, Td); - - if (get[v1] > get[v2]) - { - Swap(ref v1, ref v2); - } - - if (get[v2] > get[v3]) - { - if (get[v1] > get[v3]) - { - return v1; - } - else - { - return v3; - } - } - else - { - return v2; - } - } - - /// Binary partition for substrings. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr ss_partition(Span SA, SAPtr paOffset, SAPtr first, SAPtr last, Idx depth) - { - Span PA = SA[paOffset..]; - - // JIMMY - var a = first - 1; - var b = last; - - while (true) - { - // JANINE - while (true) - { - a += 1; - if (!(a < b)) - { - break; - } - if (!((PA[SA[a]] + depth) >= (PA[SA[a] + 1] + 1))) - { - break; - } - - // loop body - SA[a] = ~SA[a]; - } - - // GEORGIO - while (true) - { - b -= 1; - if (!(a < b)) - { - break; - } - if (!((PA[SA[b]] + depth) < (PA[SA[b] + 1] + 1))) - { - break; - } - - // loop body is empty - } - - if (b <= a) - { - break; - } - - var t = ~SA[b]; - SA[b] = SA[a]; - SA[a] = t; - } - - if (first < a) - { - SA[first] = ~SA[first]; - } - return a; - } - - private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) - { - SAPtr i; - SAPtr j; - Idx t; - Idx r; - - i = last - 2; - // for 1 - while (first <= i) - { - t = SA[i]; - j = i + 1; - - // for 2 - while (true) - { - // cond for 2 - r = ss_compare(T, SA, PA + t, SA, PA + SA[j], depth); - if (!(0 < r)) - { - break; - } - - // body for 2 - - // do while - while (true) - { - SA[j - 1] = SA[j]; - - j += 1; - if (!((j < last) && SA[j] < 0)) - { - break; - } - } - - if (last <= j) - { - break; - } - - // iter for 2 (empty) - } - - if (r == 0) - { - SA[j] = ~SA[j]; - } - SA[j - 1] = t; - - // iter - i -= 1; - } - } - - /// - /// Fast log2, using lookup tables - /// - private static int ss_ilg(int n) - { - if ((n & 0xff00) > 0) - { - return 8 + lg_table[((n >> 8) & 0xff)]; - } - else - { - return 0 + lg_table[((n >> 0) & 0xff)]; - } - } - - /// Simple top-down heapsort. - private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, SAPtr paOffset, SAPtr first, Idx size) - { - Idx i; - var m = size; - Idx t; - - var Td = new IntAccessor(T.span[tdOffset..]); - var PA = SA_top[paOffset..]; - var SA = SA_top[first..]; - - if ((size % 2) == 0) - { - m -= 1; - if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) - { - SA.Swap(m, m / 2); - } - } - - // LADY - //TODO: checkme - for (i = (m / 2) - 1; i >= 0; i--) - { - ss_fixdown(Td, PA, SA, i, m); - } - - if ((size % 2) == 0) - { - SA.Swap(0, m); - ss_fixdown(Td, PA, SA, 0, m); - } - - // TRUMPET - //TODO: checkme - for (i = m - 1; i > 0; i--) - { - t = SA[0]; - SA[0] = SA[i]; - ss_fixdown(Td, PA, SA, 0, i); - SA[i] = t; - } - } - - private static void ss_fixdown(IntAccessor Td, Span PA, Span SA, Idx i, Idx size) - { - Idx j, v, c, d, e, k; - - v = SA[i]; - c = Td[PA[v]]; - - // BEAST - while (true) - { - // cond - j = 2 * i + 1; - if (!(j < size)) - { - break; - } - - // body - k = j; - j += 1; - - d = Td[PA[SA[k]]]; - e = Td[PA[SA[j]]]; - if (d < e) - { - k = j; - d = e; - } - if (d <= c) - { - break; - } - - // iter - SA[i] = SA[k]; - i = k; - } - SA[i] = v; - } - - private static readonly Idx[] sqq_table_array = new[] - { - 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, - 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, - 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, - 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, - 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, - 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, - 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, - 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, - 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, - 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, - 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, - 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, - 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, - 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, - 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 - }; - private static ReadOnlySpan sqq_table => sqq_table_array; - - /// - /// Fast sqrt, using lookup tables - /// - private static int ss_isqrt(int x) - { - if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) - { - return SS_BLOCKSIZE; - } - - Idx e; - if ((x & 0xffff_0000) > 0) - { - if ((x & 0xff00_0000) > 0) - { - e = 24 + lg_table[((x >> 24) & 0xff)]; - } - else - { - e = 16 + lg_table[((x >> 16) & 0xff)]; - } - } - else - { - if ((x & 0x0000_ff00) > 0) - { - e = 8 + lg_table[(((x >> 8) & 0xff))]; - } - else - { - e = 0 + lg_table[(((x >> 0) & 0xff))]; - } - }; - - Idx y; - if (e >= 16) - { - y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); - if (e >= 24) - { - y = (y + 1 + x / y) >> 1; - } - y = (y + 1 + x / y) >> 1; - } - else if (e >= 8) - { - y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; - } - else - { - return sqq_table[x] >> 4; - } - - if (x < (y * y)) - { - return y - 1; - } - else - { - return y; - } - } } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs new file mode 100644 index 0000000..6d82ed7 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -0,0 +1,1779 @@ +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; +using Idx = System.Int32; +using SAPtr = System.Int32; + +namespace DeltaQ.SuffixSorting.LibDivSufSort; +using static Crosscheck; +using static Utils; + +internal static class SsSort +{ + private const Idx SS_BLOCKSIZE = 1024; + + /// + /// Substring sort + /// + public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) + { + // Note: in most of this file "PA" seems to mean "Partition Array" - we're + // working on a slice of SA. This is also why SA (or a mutable reference to it) + // is passed around, so we don't run into lifetime issues. + + SAPtr a; + SAPtr b; + SAPtr middle; + SAPtr curbuf; + Idx j; + Idx k; + Idx curbufsize; + Idx limit; + Idx i; + + if (lastsuffix) + { + first += 1; + } + + limit = ss_isqrt(last - first); + if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < limit)) + { + if (SS_BLOCKSIZE < limit) + { + limit = SS_BLOCKSIZE; + } + middle = last - limit; + buf = middle; + bufsize = limit; + } + else + { + middle = last; + limit = 0; + } + + // ESPRESSO + a = first; + i = 0; + while (SS_BLOCKSIZE < (middle - a)) + { + crosscheck($"ss_mintrosort (espresso) a={a - PA} depth={depth}"); + ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); + + curbufsize = (last - (a + SS_BLOCKSIZE)); + curbuf = a + SS_BLOCKSIZE; + if (curbufsize <= bufsize) + { + curbufsize = bufsize; + curbuf = buf; + } + + // FRESCO + b = a; + k = SS_BLOCKSIZE; + j = i; + while ((j & 1) > 0) + { + crosscheck($"ss_swapmerge {k}"); + ss_swapmerge(T, SA, PA, b - k, b, b + k, curbuf, curbufsize, depth); + + // iter + b -= k; + k <<= 1; + j >>= 1; + } + + // iter + a += SS_BLOCKSIZE; + i += 1; + } + + crosscheck($"ss_mintrosort (pre-mariachi) a={a - PA} depth={depth}"); + ss_mintrosort(T, SA, PA, a, middle, depth); + + SA_dump(SA[first..last], "pre-mariachi"); + + // MARIACHI + k = SS_BLOCKSIZE; + while (i != 0) + { + if ((i & 1) > 0) + { + SA_dump(SA[first..last], "in-mariachi pre-swap"); + crosscheck($"a={a - first} middle={middle - first} bufsize={bufsize} depth={depth}"); + ss_swapmerge(T, SA, PA, a - k, a, middle, buf, bufsize, depth); + SA_dump(SA[first..last], "in-mariachi post-swap"); + a -= k; + } + + // iter + k <<= 1; + i >>= 1; + } + SA_dump(SA[first..last], "post-mariachi"); + + if (limit != 0) + { + crosscheck("ss_mintrosort limit!=0"); + ss_mintrosort(T, SA, PA, middle, last, depth); + SA_dump(SA[first..last], "post-mintrosort limit!=0"); + ss_inplacemerge(T, SA, PA, first, middle, last, depth); + SA_dump(SA[first..last], "post-inplacemerge limit!=0"); + } + SA_dump(SA[first..last], "post-limit!=0"); + + if (lastsuffix) + { + crosscheck("lastsuffix!"); + + // Insert last type B* suffix + Span PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; + //let mut PAi:[Idx; 2] = [SA[PA + SA[first - 1]], n - 2]; + //let SAI = SuffixArray(&mut PAi); + + a = first; + i = SA[first - 1]; + + // CELINE + while ((a < last) && ((SA[a] < 0) || (0 < ss_compare(T, PAi, (SAPtr)0, SA, PA + SA[a], depth)))) + { + // body + SA[a - 1] = SA[a]; + + // iter + a += 1; + } + SA[a - 1] = i; + } + } + + /// + /// Compare two suffixes + /// + private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) + { + //TODO: possible perf improvement - JZ + + var U1 = depth + SAp1[p1]; + var U2 = depth + SAp2[p2]; + var U1n = SAp1[p1 + 1] + 2; + var U2n = SAp2[p2 + 1] + 2; + + while ((U1 < U1n) && (U2 < U2n) && (T[U1] == T[U2])) + { + U1 += 1; + U2 += 1; + } + + if (U1 < U1n) + { + if (U2 < U2n) + { + return T[U1] - T[U2]; + } + else + { + return 1; + } + } + else + { + if (U2 < U2n) + { + return -1; + } + else + { + return 0; + } + } + } + + private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) + { + SAPtr p; + SAPtr a; + SAPtr b; + Idx len; + Idx half; + Idx q; + Idx r; + Idx x; + + var original_first = first; + var original_last = last; + + SA_dump(SA[original_first..original_last], "inplacemerge start"); + + // FERRIS + while (true) + { + if (SA[last - 1] < 0) + { + x = 1; + p = PA + ~SA[last - 1]; + } + else + { + x = 0; + p = PA + SA[last - 1]; + } + + // LOIS + a = first; + len = (middle - first)/*.0*/; + half = len >> 1; + r = -1; + while (0 < len) + { + b = a + half; + q = ss_compare(T, SA, PA + (0 <= SA[b] ? SA[b] : ~SA[b]), SA, p, depth); + if (q < 0) + { + a = b + 1; + half -= (len & 1) ^ 1; + } + else + { + r = q; + } + + // iter + len = half; + half >>= 1; + } + SA_dump(SA[original_first..original_last], "post-lois"); + + if (a < middle) + { + if (r == 0) + { + SA[a] = ~SA[a]; + } + ss_rotate(SA, a, middle, last); + SA_dump(SA[original_first..original_last], "post-rotate"); + last -= middle - a; + middle = a; + if (first == middle) + { + break; + } + } + + last -= 1; + if (x != 0) + { + // TIMMY + last -= 1; + while (SA[last] < 0) + { + last -= 1; + } + SA_dump(SA[original_first..original_last], "post-timmy"); + } + if (middle == last) + { + break; + } + + SA_dump(SA[original_first..original_last], "ferris-wrap"); + } + } + + private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr last) + { + SAPtr a; + SAPtr b; + Idx t; + Idx l; + Idx r; + + var original_first = first; + var original_last = last; + + l = (middle - first)/*.0*/; + r = (last - middle)/*.0*/; + + SA_dump(SA[original_first..original_last], "pre-brendan"); + + // BRENDAN + while ((0 < l) && (0 < r)) + { + if (l == r) + { + ss_blockswap(SA, first, middle, l); + SA_dump(SA[original_first..original_last], "post-blockswap"); + break; + } + + if (l < r) + { + a = last - 1; + b = middle - 1; + t = SA[a]; + + // ALICE + while (true) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + if (b < first) + { + SA[a] = t; + last = a; + r -= l + 1; + if (r <= l) + { + break; + } + a -= 1; + b = middle - 1; + t = SA[a]; + } + } + SA_dump(SA[original_first..original_last], "post-alice"); + } + else + { + a = first; + b = middle; + t = SA[a]; + // ROBERT + while (true) + { + SA[a] = SA[b]; + a += 1; + SA[b] = SA[a]; + b += 1; + if (last <= b) + { + SA[a] = t; + first = a + 1; + + l -= r + 1; + if (l <= r) + { + break; + } + a += 1; + b = middle; + t = SA[a]; + } + } + SA_dump(SA[original_first..original_last], "post-robert"); + } + } + } + + private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) + { + for (int i = 0; i < n; i++) + { + SA.Swap(a + i, b + i); + } + } + + /// D&C based merge + private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) + { + static Idx get_idx(Idx a) => 0 <= a ? a : ~a; + + void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) + { + crosscheck($"mc c={c}"); + if (((c & 1) > 0) || (((c & 2) > 0) && (ss_compare(T, SA, PA + get_idx(SA[a - 1]), SA, PA + SA[a], depth) == 0))) + { + crosscheck($"swapping a-first={a - first}"); + SA[a] = ~SA[a]; + } + if (((c & 4) > 0) && (ss_compare(T, SA, PA + get_idx(SA[b - 1]), SA, PA + SA[b], depth) == 0)) + { + crosscheck($"swapping b-first={b - first}"); + SA[b] = ~SA[b]; + } + } + + //MergeStack is the same as SsStack + using var stackOwner = SpanOwner.Allocate(MERGE_STACK_SIZE, AllocationMode.Clear); + var stack = new SsStack(stackOwner.Span); + + SAPtr l; + SAPtr r; + SAPtr lm; + SAPtr rm; + + Idx m; + Idx len; + Idx half; + Idx check; + Idx next; + + // BARBARIAN + check = 0; + while (true) + { + crosscheck($"barbarian check={check}"); + SA_dump(SA[first..last], "ss_swapmerge barbarian"); + SA_dump(SA[buf..(buf + bufsize)], "ss_swapmerge barbarian buf"); + if ((last - middle) <= bufsize) + { + crosscheck("<=bufsize"); + if ((first < middle) && (middle < last)) + { + crosscheck("f> 1; + while (0 < len) + { + crosscheck($"in-olanna len={len} half={half}"); + if (ss_compare( + T, + SA, + PA + get_idx(SA[middle + m + half]), + SA, + PA + get_idx(SA[middle - m - half - 1]), + depth) < 0) + { + m += half + 1; + half -= (len & 1) ^ 1; + } + + // iter + len = half; + half >>= 1; + } + + if (0 < m) + { + crosscheck($"0 < m, m={m}"); + lm = middle - m; + rm = middle + m; + ss_blockswap(SA, lm, middle, m); + r = middle; + l = middle; + next = 0; + if (rm < last) + { + if (SA[rm] < 0) + { + SA[rm] = ~SA[rm]; + if (first < lm) + { + // KOOPA + l -= 1; + while (SA[l] < 0) + { + l -= 1; + } + crosscheck($"post-koopa l-first={l - first}"); + next |= 4; + crosscheck($"post-koopa next={next}"); + } + next |= 1; + } + else if (first < lm) + { + // MUNCHER + while (SA[r] < 0) + { + r += 1; + } + crosscheck($"post-muncher r-first={r - first}"); + next |= 2; + } + } + + if ((l - first) <= (last - r)) + { + crosscheck("post-muncher l-f 0) && (r == middle)) + { + crosscheck($"post-muncher next ^= 6 old={next}"); + next ^= 6; + crosscheck($"post-muncher next ^= 6 new={next}"); + } + stack.Push(first, lm, l, (check & 3) | (next & 4)); + first = r; + middle = rm; + crosscheck($"post-muncher not, check was={check} next was={next}"); + check = (next & 3) | (check & 4); + crosscheck($"post-muncher not, check is={check} next is={next}"); + } + } + else + { + if (ss_compare( + T, + SA, + PA + get_idx(SA[middle - 1]), + SA, + PA + SA[middle], + depth) == 0) + { + SA[middle] = ~SA[middle]; + } + merge_check(T, SA, first, last, check); + SA_dump(SA[first..last], "ss_swapmerge pop 3"); + if (!stack.Pop(ref first, ref middle, ref last, ref check)) + { + return; + } + } + } + } + + /// Merge-backward with internal buffer + private static void ss_mergebackward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) + { + SAPtr p1; + SAPtr p2; + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr bufend; + + Idx t; + Idx r; + Idx x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(SA, buf, middle, (last - middle)); + + x = 0; + if (SA[bufend] < 0) + { + p1 = PA + ~SA[bufend]; + x |= 1; + } + else + { + p1 = PA + SA[bufend]; + } + if (SA[middle - 1] < 0) + { + p2 = PA + ~SA[middle - 1]; + x |= 2; + } + else + { + p2 = PA + SA[middle - 1]; + } + + // MARTIN + a = last - 1; + t = SA[a]; + b = bufend; + c = middle - 1; + while (true) + { + r = ss_compare(T, SA, p1, SA, p2, depth); + if (0 < r) + { + if ((x & 1) > 0) + { + // BAPTIST + while (true) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + + // cond + if (!(SA[b] < 0)) + { + break; + } + } + x ^= 1; + } + SA[a] = SA[b]; + a -= 1; + if (b <= buf) + { + SA[buf] = t; + break; + } + SA[b] = SA[a]; + b -= 1; + if (SA[b] < 0) + { + p1 = PA + ~SA[b]; + x |= 1; + } + else + { + p1 = PA + SA[b]; + } + } + else if (r < 0) + { + if ((x & 2) > 0) + { + // JULES + while (true) + { + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + + // cond + if (~SA[c] < 0) + { + break; + } + } + x ^= 2; + } + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + if (c < first) + { + // GARAMOND + while (buf < b) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + } + SA[a] = SA[b]; + SA[b] = t; + break; + } + if (SA[c] < 0) + { + p2 = PA + ~SA[c]; + x |= 2; + } + else + { + p2 = PA + SA[c]; + } + } + else + { + if ((x & 1) > 0) + { + // XAVIER + while (true) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + if (!(SA[b] < 0)) + { + break; + } + } + x ^= 1; + } + SA[a] = ~SA[b]; + a -= 1; + if (b <= buf) + { + SA[buf] = t; + break; + } + SA[b] = SA[a]; + b -= 1; + if ((x & 2) > 0) + { + // WALTER + while (true) + { + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + + // cond + if (!(SA[c] < 0)) + { + break; + } + } + x ^= 2; + } + SA[a] = SA[c]; + a -= 1; + SA[c] = SA[a]; + c -= 1; + if (c < first) + { + // ZENITH + while (buf < b) + { + SA[a] = SA[b]; + a -= 1; + SA[b] = SA[a]; + b -= 1; + } + SA[a] = SA[b]; + SA[b] = t; + break; + } + if (SA[b] < 0) + { + p1 = PA + ~SA[b]; + x |= 1; + } + else + { + p1 = PA + SA[b]; + } + if (SA[c] < 0) + { + p2 = PA + ~SA[c]; + x |= 2; + } + else + { + p2 = PA + SA[c]; + } + } + } + } + + /// Merge-forward with internal buffer + private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) + { + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr bufend; + Idx t; + Idx r; + + SA_dump(SA[first..last], "ss_mergeforward start"); + + bufend = buf + (middle - first) - 1; + ss_blockswap(SA, buf, first, middle - first); + + // IGNACE + a = first; + t = SA[a]; + b = buf; + c = middle; + while (true) + { + r = ss_compare(T, SA, PA + SA[b], SA, PA + SA[c], depth); + if (r < 0) + { + // RONALD + while (true) + { + SA[a] = SA[b]; + a += 1; + if (bufend <= b) + { + SA[bufend] = t; + return; + } + SA[b] = SA[a]; + b += 1; + + // cond + if (!(SA[b] < 0)) + { + break; + } + } + } + else if (r > 0) + { + // JEREMY + while (true) + { + SA[a] = SA[c]; + a += 1; + SA[c] = SA[a]; + c += 1; + if (last <= c) + { + // TONY + while (b < bufend) + { + SA[a] = SA[b]; + a += 1; + SA[b] = SA[a]; + b += 1; + } + SA[a] = SA[b]; + SA[b] = t; + return; + } + + // cond (JEMERY) + if (!(SA[c] < 0)) + { + break; + } + } + } + else + { + SA[c] = ~SA[c]; + // JENS + while (true) + { + SA[a] = SA[b]; + a += 1; + if (bufend <= b) + { + SA[bufend] = t; + return; + } + SA[b] = SA[a]; + b += 1; + + // cond (JENS) + if (!(SA[b] < 0)) + { + break; + } + } + + // DIMITER + while (true) + { + SA[a] = SA[c]; + a += 1; + SA[c] = SA[a]; + c += 1; + if (last <= c) + { + // MIDORI + while (b < bufend) + { + SA[a] = SA[b]; + a += 1; + SA[b] = SA[a]; + b += 1; + } + SA[a] = SA[b]; + SA[b] = t; + return; + } + + // cond (DIMITER) + if (!(SA[c] < 0)) + { + break; + } + } + } + } + } + + private struct SsStackItem + { + public SAPtr a; + public SAPtr b; + public SAPtr c; + public Idx d; + } + + private const int SS_STACK_SIZE = 16; + private const int MERGE_STACK_SIZE = 32; + private ref struct SsStack + { + public readonly Span Items; + public int Size; + + public SsStack(Span items) + { + Items = items; + Size = 0; + } + + public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) + { + Trace.Assert(Size < Items.Length); + ref SsStackItem item = ref Items[Size++]; + item.a = a; + item.b = b; + item.c = c; + item.d = d; + } + public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) + { + //Debug.Assert(Size > 0); + if (Size == 0) return false; + + ref SsStackItem item = ref Items[--Size]; + a = item.a; + b = item.b; + c = item.c; + d = item.d; + return true; + } + } + + private const Idx SS_INSERTIONSORT_THRESHOLD = 8; + + private ref struct SpanOffsetAccessor + { + private readonly Span _span; + private readonly int _offset; + + public SpanOffsetAccessor(Span span, int offset) + { + _span = span; + _offset = offset; + } + + public ref T this[int index] => ref _span[_offset + index]; + } + + private ref struct ReadOnlySpanOffsetAccessor + { + private readonly ReadOnlySpan _span; + private readonly int _offset; + + public ReadOnlySpanOffsetAccessor(ReadOnlySpan span, int offset) + { + _span = span; + _offset = offset; + } + + public ref readonly T this[int index] => ref _span[_offset + index]; + } + + private ref struct TdPAStarAccessor + { + private readonly ReadOnlySpanOffsetAccessor _TO; + private readonly ReadOnlySpan _SA; + private readonly ReadOnlySpan _PA; + private readonly IntAccessor _TD; + + public TdPAStarAccessor(ReadOnlySpan T, ReadOnlySpan SA, int partitionOffset, int tdOffset) + { + _TO = new ReadOnlySpanOffsetAccessor(T, tdOffset); + + _SA = SA; + _PA = SA[partitionOffset..]; + _TD = new(T[tdOffset..]); + } + + public readonly int this[int index] => _TD[_PA[_SA[index]]]; + + public readonly int AsOffset(int index) => _TO[index]; + } + + /// + /// Multikey introsort for medium size groups + /// + private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, SAPtr first, SAPtr last, Idx depth) + { + var PA = SA[partitionOffset..]; + + using var stackOwner = SpanOwner.Allocate(SS_STACK_SIZE); + var stack = new SsStack(stackOwner.Span); + + SAPtr a; + SAPtr b; + SAPtr c; + SAPtr d; + SAPtr e; + SAPtr f; + + Idx s; + Idx t; + + Idx limit; + Idx v; + Idx x = 0; + + // RENEE + limit = ss_ilg(last - first); + while (true) + { + if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) + { + if (1 < (last - first)) + { + ss_insertionsort(T, SA, partitionOffset, first, last, depth); + } + if (!stack.Pop(ref first, ref last, ref depth, ref limit)) + { + return; + } + continue; + } + + var tdOffset = depth; + var TdPAStar = new TdPAStarAccessor(T.span, SA, partitionOffset, tdOffset); + + /*readonly*/ + var old_limit = limit; + limit -= 1; + if (old_limit == 0) + { + SA_dump(SA[first..last], "before heapsort"); + ss_heapsort(T, tdOffset, SA, partitionOffset, first, (last - first)); + SA_dump(SA[first..last], "after heapsort"); + } + + if (limit < 0) + { + a = first + 1; + v = TdPAStar[first]; + + // DAVE + while (a < last) + { + x = TdPAStar[a]; + if (x != v) + { + if (1 < (a - first)) + { + break; + } + v = x; + first = a; + } + + // loop iter + a += 1; + } + + if (TdPAStar.AsOffset(PA[SA[first]] - 1) < v) + { + first = ss_partition(SA, partitionOffset, first, a, depth); + } + if ((a - first) <= (last - a)) + { + if (1 < (a - first)) + { + stack.Push(a, last, depth, -1); + last = a; + depth += 1; + limit = ss_ilg(a - first); + } + else + { + first = a; + limit = -1; + } + } + else + { + if (1 < (last - a)) + { + stack.Push(first, a, depth + 1, ss_ilg(a - first)); + first = a; + limit = -1; + } + else + { + last = a; + depth += 1; + limit = ss_ilg(a - first); + } + } + continue; + } + + // choose pivot + a = ss_pivot(T, tdOffset, SA, partitionOffset, first, last); + v = TdPAStar[a]; + SA.Swap(first, a); + + // partition + // NORA + b = first; + while (true) + { + b += 1; + if (!(b < last)) + { + break; + } + x = TdPAStar[b]; + if (!(x == v)) + { + break; + } + // body + } + a = b; + if ((a < last) && (x < v)) + { + // STAN + while (true) + { + b += 1; + if (!(b < last)) + { + break; + } + x = TdPAStar[b]; + if (!(x <= v)) + { + break; + } + // body + if (x == v) + { + SA.Swap(b, a); + a += 1; + } + } + } + + // NATHAN + c = last; + while (true) + { + c -= 1; + if (!(b < c)) + { + break; + } + x = TdPAStar[c]; + if (!(x == v)) + { + break; + } + // body + } + d = c; + if ((b < d) && (x > v)) + { + // JACOB + while (true) + { + c -= 1; + if (!(b < c)) + { + break; + } + x = TdPAStar[c]; + if (!(x >= v)) + { + break; + } + // body + if (x == v) + { + SA.Swap(c, d); + d -= 1; + } + } + } + + // RITA + while (b < c) + { + SA.Swap(b, c); + // ROMEO + while (true) + { + b += 1; + if (!(b < c)) + { + break; + } + x = TdPAStar[b]; + if (!(x <= v)) + { + break; + } + // body + if (x == v) + { + SA.Swap(b, a); + a += 1; + } + } + // JULIET + while (true) + { + c -= 1; + if (!(b < c)) + { + break; + } + x = TdPAStar[c]; + if (!(x >= v)) + { + break; + } + // body + if (x == v) + { + SA.Swap(c, d); + d -= 1; + } + } + } + + if (a <= d) + { + c = b - 1; + s = (a - first)/*.0*/; + t = (b - a)/*.0*/; + if (s > t) + { + s = t; + } + + // JOSHUA + e = first; + f = b - s; + while (0 < s) + { + SA.Swap(e, f); + s -= 1; + e += 1; + f += 1; + } + s = (d - c)/*.0*/; + t = (last - d - 1)/*.0*/; + if (s > t) + { + s = t; + } + // BERENICE + e = b; + f = last - s; + while (0 < s) + { + SA.Swap(e, f); + s -= 1; + e += 1; + f += 1; + } + + a = first + (b - a); + c = last - (d - c); + b = v <= TdPAStar.AsOffset(PA[SA[a]] - 1) ? a : ss_partition(SA, partitionOffset, a, c, depth); + + if ((a - first) <= (last - c)) + { + if ((last - c) <= (c - b)) + { + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + stack.Push(c, last, depth, limit); + last = a; + } + else if ((a - first) <= (c - b)) + { + stack.Push(c, last, depth, limit); + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } + else + { + stack.Push(c, last, depth, limit); + stack.Push(first, a, depth, limit); + first = b; + last = c; + depth += 1; + limit = ss_ilg(c - b); + } + } + else + { + if ((a - first) <= (c - b)) + { + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + stack.Push(first, a, depth, limit); + first = c; + } + else if ((last - c) <= (c - b)) + { + stack.Push(first, a, depth, limit); + stack.Push(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } + else + { + stack.Push(first, a, depth, limit); + stack.Push(c, last, depth, limit); + first = b; + last = c; + depth += 1; + limit = ss_ilg(c - b); + } + } + } + else + { + limit += 1; + if (TdPAStar.AsOffset(PA[SA[first]] - 1) < v) + { + first = ss_partition(SA, partitionOffset, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } + } + + /// + /// Returns the pivot element + /// + private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr first, SAPtr last) + { + Idx t = (last - first)/*.0*/; + SAPtr middle = first + (t / 2); + + if (t <= 512) + { + if (t <= 32) + { + return ss_median3(T, Td, SA, PA, first, middle, last - 1); + } + else + { + t >>= 2; + return ss_median5( + T, + Td, + SA, + PA, + first, + first + t, + middle, + last - 1 - t, + last - 1); + } + } + + t >>= 3; + first = ss_median3(T, Td, SA, PA, first, first + t, first + (t << 1)); + middle = ss_median3(T, Td, SA, PA, middle - t, middle, middle + t); + last = ss_median3(T, Td, SA, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + + return ss_median3(T, Td, SA, PA, first, middle, last); + } + + /// Returns the median of five elements + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) + { + var get = new TdPAStarAccessor(T.span, SA, PA, Td); + if (get[v2] > get[v3]) + { + Swap(ref v2, ref v3); + } + if (get[v4] > get[v5]) + { + Swap(ref v4, ref v5); + } + if (get[v2] > get[v4]) + { + Swap(ref v2, ref v4); + Swap(ref v3, ref v5); + } + if (get[v1] > get[v3]) + { + Swap(ref v1, ref v3); + } + if (get[v1] > get[v4]) + { + Swap(ref v1, ref v4); + Swap(ref v3, ref v5); + } + if (get[v3] > get[v4]) + { + return v4; + } + else + { + return v3; + } + } + + /// + /// Returns the median of three elements + /// + private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3) + { + //int get(int x) => T[Td + SA[PA + SA[x]]] + var get = new TdPAStarAccessor(T.span, SA, PA, Td); + + if (get[v1] > get[v2]) + { + Swap(ref v1, ref v2); + } + + if (get[v2] > get[v3]) + { + if (get[v1] > get[v3]) + { + return v1; + } + else + { + return v3; + } + } + else + { + return v2; + } + } + + /// Binary partition for substrings. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr ss_partition(Span SA, SAPtr paOffset, SAPtr first, SAPtr last, Idx depth) + { + Span PA = SA[paOffset..]; + + // JIMMY + var a = first - 1; + var b = last; + + while (true) + { + // JANINE + while (true) + { + a += 1; + if (!(a < b)) + { + break; + } + if (!((PA[SA[a]] + depth) >= (PA[SA[a] + 1] + 1))) + { + break; + } + + // loop body + SA[a] = ~SA[a]; + } + + // GEORGIO + while (true) + { + b -= 1; + if (!(a < b)) + { + break; + } + if (!((PA[SA[b]] + depth) < (PA[SA[b] + 1] + 1))) + { + break; + } + + // loop body is empty + } + + if (b <= a) + { + break; + } + + var t = ~SA[b]; + SA[b] = SA[a]; + SA[a] = t; + } + + if (first < a) + { + SA[first] = ~SA[first]; + } + return a; + } + + private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) + { + SAPtr i; + SAPtr j; + Idx t; + Idx r; + + i = last - 2; + // for 1 + while (first <= i) + { + t = SA[i]; + j = i + 1; + + // for 2 + while (true) + { + // cond for 2 + r = ss_compare(T, SA, PA + t, SA, PA + SA[j], depth); + if (!(0 < r)) + { + break; + } + + // body for 2 + + // do while + while (true) + { + SA[j - 1] = SA[j]; + + j += 1; + if (!((j < last) && SA[j] < 0)) + { + break; + } + } + + if (last <= j) + { + break; + } + + // iter for 2 (empty) + } + + if (r == 0) + { + SA[j] = ~SA[j]; + } + SA[j - 1] = t; + + // iter + i -= 1; + } + } + + /// + /// Fast log2, using lookup tables + /// + private static int ss_ilg(int n) + { + if ((n & 0xff00) > 0) + { + return 8 + lg_table[((n >> 8) & 0xff)]; + } + else + { + return 0 + lg_table[((n >> 0) & 0xff)]; + } + } + + /// Simple top-down heapsort. + private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, SAPtr paOffset, SAPtr first, Idx size) + { + Idx i; + var m = size; + Idx t; + + var Td = new IntAccessor(T.span[tdOffset..]); + var PA = SA_top[paOffset..]; + var SA = SA_top[first..]; + + if ((size % 2) == 0) + { + m -= 1; + if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) + { + SA.Swap(m, m / 2); + } + } + + // LADY + //TODO: checkme + for (i = (m / 2) - 1; i >= 0; i--) + { + ss_fixdown(Td, PA, SA, i, m); + } + + if ((size % 2) == 0) + { + SA.Swap(0, m); + ss_fixdown(Td, PA, SA, 0, m); + } + + // TRUMPET + //TODO: checkme + for (i = m - 1; i > 0; i--) + { + t = SA[0]; + SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } + } + + private static void ss_fixdown(IntAccessor Td, Span PA, Span SA, Idx i, Idx size) + { + Idx j, v, c, d, e, k; + + v = SA[i]; + c = Td[PA[v]]; + + // BEAST + while (true) + { + // cond + j = 2 * i + 1; + if (!(j < size)) + { + break; + } + + // body + k = j; + j += 1; + + d = Td[PA[SA[k]]]; + e = Td[PA[SA[j]]]; + if (d < e) + { + k = j; + d = e; + } + if (d <= c) + { + break; + } + + // iter + SA[i] = SA[k]; + i = k; + } + SA[i] = v; + } + + private static readonly Idx[] sqq_table_array = new[] + { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, + 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, + 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, + 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, + 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, + 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, + 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, + 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, + 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, + 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, + 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, + 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 + }; + private static ReadOnlySpan sqq_table => sqq_table_array; + + /// + /// Fast sqrt, using lookup tables + /// + private static int ss_isqrt(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + + Idx e; + if ((x & 0xffff_0000) > 0) + { + if ((x & 0xff00_0000) > 0) + { + e = 24 + lg_table[((x >> 24) & 0xff)]; + } + else + { + e = 16 + lg_table[((x >> 16) & 0xff)]; + } + } + else + { + if ((x & 0x0000_ff00) > 0) + { + e = 8 + lg_table[(((x >> 8) & 0xff))]; + } + else + { + e = 0 + lg_table[(((x >> 0) & 0xff))]; + } + }; + + Idx y; + if (e >= 16) + { + y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); + if (e >= 24) + { + y = (y + 1 + x / y) >> 1; + } + y = (y + 1 + x / y) >> 1; + } + else if (e >= 8) + { + y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; + } + else + { + return sqq_table[x] >> 4; + } + + if (x < (y * y)) + { + return y - 1; + } + else + { + return y; + } + } +} + diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index 4de0f0a..8272559 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -15,7 +15,7 @@ internal static class Utils 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - }; + }; internal static ReadOnlySpan lg_table => lg_table_array; [MethodImpl(MethodImplOptions.AggressiveInlining)] From 454a6bdd323273f0fcb4b93606cb5f754f5d4740 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 20:44:12 -0500 Subject: [PATCH 193/325] Break out more types and remove space-wasters --- .../ReadOnlySpanOffsetAccessor.cs | 17 +++ .../RsDivSufSort.cs | 2 - .../SpanOffsetAccessor.cs | 17 +++ .../SsSort.cs | 105 ++++-------------- .../TdPAStarAccessor.cs | 24 ++++ .../TrSort.cs | 4 - 6 files changed, 79 insertions(+), 90 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs new file mode 100644 index 0000000..3b437f8 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs @@ -0,0 +1,17 @@ +using System; + +namespace DeltaQ.SuffixSorting.LibDivSufSort; + +internal ref struct ReadOnlySpanOffsetAccessor +{ + private readonly ReadOnlySpan _span; + private readonly int _offset; + + public ReadOnlySpanOffsetAccessor(ReadOnlySpan span, int offset) + { + _span = span; + _offset = offset; + } + + public ref readonly T this[int index] => ref _span[_offset + index]; +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs index 0e82cc9..48c62dc 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs @@ -1,13 +1,11 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Diagnostics; -using System.Runtime.CompilerServices; using Idx = System.Int32; using SAPtr = System.Int32; namespace DeltaQ.SuffixSorting.LibDivSufSort; using static Crosscheck; -using static Utils; internal static class DivSufSort { diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs new file mode 100644 index 0000000..e16fed9 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs @@ -0,0 +1,17 @@ +using System; + +namespace DeltaQ.SuffixSorting.LibDivSufSort; + +internal ref struct SpanOffsetAccessor +{ + private readonly Span _span; + private readonly int _offset; + + public SpanOffsetAccessor(Span span, int offset) + { + _span = span; + _offset = offset; + } + + public ref T this[int index] => ref _span[_offset + index]; +} \ No newline at end of file diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 6d82ed7..56a3be8 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -1,11 +1,7 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Runtime.CompilerServices; -using System.Text; -using System.Threading.Tasks; using Idx = System.Int32; using SAPtr = System.Int32; @@ -66,7 +62,7 @@ public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SA crosscheck($"ss_mintrosort (espresso) a={a - PA} depth={depth}"); ss_mintrosort(T, SA, PA, a, a + SS_BLOCKSIZE, depth); - curbufsize = (last - (a + SS_BLOCKSIZE)); + curbufsize = last - (a + SS_BLOCKSIZE); curbuf = a + SS_BLOCKSIZE; if (curbufsize <= bufsize) { @@ -227,7 +223,7 @@ private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr // LOIS a = first; - len = (middle - first)/*.0*/; + len = middle - first; half = len >> 1; r = -1; while (0 < len) @@ -288,17 +284,14 @@ private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr last) { - SAPtr a; - SAPtr b; - Idx t; - Idx l; - Idx r; + SAPtr a, b; + Idx t, l, r; var original_first = first; var original_last = last; - l = (middle - first)/*.0*/; - r = (last - middle)/*.0*/; + l = middle - first; + r = last - middle; SA_dump(SA[original_first..original_last], "pre-brendan"); @@ -578,19 +571,12 @@ void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) /// Merge-backward with internal buffer private static void ss_mergebackward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) { - SAPtr p1; - SAPtr p2; - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr bufend; + SAPtr p1, p2, a, b, c, bufend; - Idx t; - Idx r; - Idx x; + Idx t, r, x; bufend = buf + (last - middle) - 1; - ss_blockswap(SA, buf, middle, (last - middle)); + ss_blockswap(SA, buf, middle, last - middle); x = 0; if (SA[bufend] < 0) @@ -969,55 +955,6 @@ public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) private const Idx SS_INSERTIONSORT_THRESHOLD = 8; - private ref struct SpanOffsetAccessor - { - private readonly Span _span; - private readonly int _offset; - - public SpanOffsetAccessor(Span span, int offset) - { - _span = span; - _offset = offset; - } - - public ref T this[int index] => ref _span[_offset + index]; - } - - private ref struct ReadOnlySpanOffsetAccessor - { - private readonly ReadOnlySpan _span; - private readonly int _offset; - - public ReadOnlySpanOffsetAccessor(ReadOnlySpan span, int offset) - { - _span = span; - _offset = offset; - } - - public ref readonly T this[int index] => ref _span[_offset + index]; - } - - private ref struct TdPAStarAccessor - { - private readonly ReadOnlySpanOffsetAccessor _TO; - private readonly ReadOnlySpan _SA; - private readonly ReadOnlySpan _PA; - private readonly IntAccessor _TD; - - public TdPAStarAccessor(ReadOnlySpan T, ReadOnlySpan SA, int partitionOffset, int tdOffset) - { - _TO = new ReadOnlySpanOffsetAccessor(T, tdOffset); - - _SA = SA; - _PA = SA[partitionOffset..]; - _TD = new(T[tdOffset..]); - } - - public readonly int this[int index] => _TD[_PA[_SA[index]]]; - - public readonly int AsOffset(int index) => _TO[index]; - } - /// /// Multikey introsort for medium size groups /// @@ -1068,7 +1005,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf if (old_limit == 0) { SA_dump(SA[first..last], "before heapsort"); - ss_heapsort(T, tdOffset, SA, partitionOffset, first, (last - first)); + ss_heapsort(T, tdOffset, SA, partitionOffset, first, last - first); SA_dump(SA[first..last], "after heapsort"); } @@ -1269,8 +1206,8 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf if (a <= d) { c = b - 1; - s = (a - first)/*.0*/; - t = (b - a)/*.0*/; + s = a - first; + t = b - a; if (s > t) { s = t; @@ -1286,8 +1223,8 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf e += 1; f += 1; } - s = (d - c)/*.0*/; - t = (last - d - 1)/*.0*/; + s = d - c; + t = last - d - 1; if (s > t) { s = t; @@ -1374,7 +1311,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf /// private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr first, SAPtr last) { - Idx t = (last - first)/*.0*/; + Idx t = last - first; SAPtr middle = first + (t / 2); if (t <= 512) @@ -1600,11 +1537,11 @@ private static int ss_ilg(int n) { if ((n & 0xff00) > 0) { - return 8 + lg_table[((n >> 8) & 0xff)]; + return 8 + lg_table[(n >> 8) & 0xff]; } else { - return 0 + lg_table[((n >> 0) & 0xff)]; + return 0 + lg_table[(n >> 0) & 0xff]; } } @@ -1728,22 +1665,22 @@ private static int ss_isqrt(int x) { if ((x & 0xff00_0000) > 0) { - e = 24 + lg_table[((x >> 24) & 0xff)]; + e = 24 + lg_table[(x >> 24) & 0xff]; } else { - e = 16 + lg_table[((x >> 16) & 0xff)]; + e = 16 + lg_table[(x >> 16) & 0xff]; } } else { if ((x & 0x0000_ff00) > 0) { - e = 8 + lg_table[(((x >> 8) & 0xff))]; + e = 8 + lg_table[(x >> 8) & 0xff]; } else { - e = 0 + lg_table[(((x >> 0) & 0xff))]; + e = 0 + lg_table[(x >> 0) & 0xff]; } }; diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs new file mode 100644 index 0000000..3f6f71f --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs @@ -0,0 +1,24 @@ +using System; + +namespace DeltaQ.SuffixSorting.LibDivSufSort; + +internal ref struct TdPAStarAccessor +{ + private readonly ReadOnlySpanOffsetAccessor _TO; + private readonly ReadOnlySpan _SA; + private readonly ReadOnlySpan _PA; + private readonly IntAccessor _TD; + + public TdPAStarAccessor(ReadOnlySpan T, ReadOnlySpan SA, int partitionOffset, int tdOffset) + { + _TO = new ReadOnlySpanOffsetAccessor(T, tdOffset); + + _SA = SA; + _PA = SA[partitionOffset..]; + _TD = new(T[tdOffset..]); + } + + public readonly int this[int index] => _TD[_PA[_SA[index]]]; + + public readonly int AsOffset(int index) => _TO[index]; +} diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index ce9a1a8..8d9d7cf 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -1,11 +1,7 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Runtime.CompilerServices; -using System.Text; -using System.Threading.Tasks; using Idx = System.Int32; using SAPtr = System.Int32; From ee1a800b9d0369c10d33188f0ac761c23319152c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 20:48:10 -0500 Subject: [PATCH 194/325] Remove space-wasters --- .../SsSort.cs | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 56a3be8..9181a4a 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -18,9 +18,7 @@ internal static class SsSort /// public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) { - // Note: in most of this file "PA" seems to mean "Partition Array" - we're - // working on a slice of SA. This is also why SA (or a mutable reference to it) - // is passed around, so we don't run into lifetime issues. + // "PA" = "Partition Array", slice of SA SAPtr a; SAPtr b; @@ -130,9 +128,6 @@ public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SA // Insert last type B* suffix Span PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; - //let mut PAi:[Idx; 2] = [SA[PA + SA[first - 1]], n - 2]; - //let SAI = SuffixArray(&mut PAi); - a = first; i = SA[first - 1]; @@ -193,14 +188,8 @@ private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) { - SAPtr p; - SAPtr a; - SAPtr b; - Idx len; - Idx half; - Idx q; - Idx r; - Idx x; + SAPtr p, a, b; + Idx len, half, q, r, x; var original_first = first; var original_last = last; From ac3fe946d613f64589fe03217d754e374b5639d3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 20:49:38 -0500 Subject: [PATCH 195/325] Break out Budget --- .../Budget.cs | 35 +++++++++++++++++++ .../TrSort.cs | 35 ------------------- 2 files changed, 35 insertions(+), 35 deletions(-) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/Budget.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Budget.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Budget.cs new file mode 100644 index 0000000..8751015 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Budget.cs @@ -0,0 +1,35 @@ +namespace DeltaQ.SuffixSorting.LibDivSufSort; +internal ref struct Budget +{ + public int Chance; + public int Remain; + public int IncVal; + public int Count; + + public Budget(int chance, int incVal) + { + Chance = chance; + Remain = incVal; + IncVal = incVal; + Count = 0; + } + + public bool Check(int size) + { + if (size <= Remain) + { + Remain -= size; + return true; + } + + if (Chance == 0) + { + Count += size; + return false; + } + + Remain += IncVal - size; + Chance -= 1; + return true; + } +} \ No newline at end of file diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index 8d9d7cf..6e37fdc 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -38,41 +38,6 @@ private static int tr_ilg(int n) } } - private ref struct Budget - { - public int Chance; - public int Remain; - public int IncVal; - public int Count; - - public Budget(int chance, int incVal) - { - Chance = chance; - Remain = incVal; - IncVal = incVal; - Count = 0; - } - - public bool Check(int size) - { - if (size <= Remain) - { - Remain -= size; - return true; - } - - if (Chance == 0) - { - Count += size; - return false; - } - - Remain += IncVal - size; - Chance -= 1; - return true; - } - } - /// Tandem repeat sort internal static void trsort(SAPtr ISA, Span SA, int n, int depth) { From b8c854d61b3be0a3358bf3879b744b3d326a649c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 15 Dec 2021 20:51:13 -0500 Subject: [PATCH 196/325] Rename DivSufSort --- .../{RsDivSufSort.cs => DivSufSort.cs} | 0 src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs | 2 -- 2 files changed, 2 deletions(-) rename src/DeltaQ.SuffixSorting.LibDivSufSort/{RsDivSufSort.cs => DivSufSort.cs} (100%) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs similarity index 100% rename from src/DeltaQ.SuffixSorting.LibDivSufSort/RsDivSufSort.cs rename to src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs index 177e535..f1c57ba 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs @@ -22,8 +22,6 @@ public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) ThrowHelper(); } - //TODO: add 0/1/2 fast cases - DivSufSort.divsufsort(textBuffer, suffixBuffer); return suffixBuffer.Length; } From c7a5c2713ce7b88fbcddf27da4d52b8fd9dcc50d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 06:31:14 -0500 Subject: [PATCH 197/325] Make SpanExtensions internal --- src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs index 6cab4b1..2326c50 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs @@ -3,7 +3,7 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort { - public static class SpanExtensions + internal static class SpanExtensions { [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Swap(this Span span, int i, int j) From 39445a898f2728c040929911980620fb85e5a154 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 06:31:34 -0500 Subject: [PATCH 198/325] Remove old comments --- src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs index 48c62dc..d39b129 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs @@ -114,8 +114,6 @@ private static void construct_SA(ReadOnlySpan T, Span SA, Span A // Construct the suffix array by using the sorted order of type B suffixes c2 = T[n - 1]; k = A[c2]; - //TODO: check this - //SA[k] = T[n - 2] < c2 ? !(n - 1) : n - 1; SA[k] = T[n - 2] < c2 ? ~(n - 1) : n - 1; k += 1; // Scan the suffix array from left to right @@ -293,7 +291,6 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) SAPtr PAb = n - m; SAPtr ISAb = m; - //for i in (0.. = (m - 2)).rev() { for (i = m - 2; i >= 0; i--) { t = SA[PAb + i]; @@ -453,7 +450,6 @@ public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) j -= 1; { var pos = SA[ISAb + j]; - //TODO: check complement SA[pos] = (t == 0 || (1 < (t - i))) ? t : ~t; } } From 253c92302a61ff35226460931601a0569ef0e3a6 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 06:38:42 -0500 Subject: [PATCH 199/325] Remove old comments --- src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs | 2 -- src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs | 6 ------ 2 files changed, 8 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 9181a4a..2b25606 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -1555,7 +1555,6 @@ private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, S } // LADY - //TODO: checkme for (i = (m / 2) - 1; i >= 0; i--) { ss_fixdown(Td, PA, SA, i, m); @@ -1568,7 +1567,6 @@ private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, S } // TRUMPET - //TODO: checkme for (i = m - 1; i > 0; i--) { t = SA[0]; diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index 6e37fdc..218d7da 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -222,7 +222,6 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA // update ranks if (a < last) { - //TODO: crosscheck crosscheck("ranks a SA } if (b < last) { - //TODO: crosscheck crosscheck("ranks b SA_top, SAPtr first, } // LISA - //TODO: checkme - //for i in (0..(m / 2)).rev() { for (i = (m / 2) - 1; i >= 0; i--) { crosscheck($"LISA i={i}"); @@ -942,8 +938,6 @@ private static void tr_heapsort(SAPtr isadOffset, Span SA_top, SAPtr first, tr_fixdown(ISAd, SA, 0, m); } // MARK - //TODO: checkme - //for i in (1..m).rev() { for (i = m - 1; i > 0; i--) { crosscheck($"MARK i={i}"); From 865b3ccd815daaaea5f6be569af22e8f1d137fa4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 20:09:45 -0500 Subject: [PATCH 200/325] Update .gitignore --- .gitignore | 287 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 245 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 2676c3e..9491a2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,34 +1,85 @@ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore # User-specific files +*.rsuser *.suo *.user +*.userosscache *.sln.docstates -# Build results +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* +# Build results [Dd]ebug/ +[Dd]ebugPublic/ [Rr]elease/ +[Rr]eleases/ x64/ -build/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ [Bb]in/ [Oo]bj/ +[Oo]ut/ +[Ll]og/ +[Ll]ogs/ -# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets -!packages/*/build/ +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ # MSTest test Results [Tt]est[Rr]esult*/ [Bb]uild[Ll]og.* +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio *_i.c *_p.c +*_h.h *.ilk *.meta *.obj +*.iobj *.pch *.pdb +*.ipdb *.pgc *.pgd *.rsp @@ -38,26 +89,40 @@ build/ *.tlh *.tmp *.tmp_proj +*_wpftmp.csproj *.log *.vspscc *.vssscc .builds *.pidb -*.log +*.svclog *.scc +# Chutzpah Test files +_Chutzpah* + # Visual C++ cache files ipch/ *.aps *.ncb +*.opendb *.opensdf *.sdf *.cachefile +*.VC.db +*.VC.VC.opendb # Visual Studio profiler *.psess *.vsp *.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ # Guidance Automation Toolkit *.gpState @@ -65,6 +130,7 @@ ipch/ # ReSharper is a .NET coding add-in _ReSharper*/ *.[Rr]e[Ss]harper +*.DotSettings.user # TeamCity is a build add-in _TeamCity* @@ -72,9 +138,30 @@ _TeamCity* # DotCover is a Code Coverage Tool *.dotCover +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + # NCrunch -*.ncrunch* +_NCrunch_* .*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ # Installshield output folder [Ee]xpress/ @@ -93,68 +180,184 @@ DocProject/Help/html publish/ # Publish Web Output -*.Publish.xml - -# NuGet Packages Directory -## TODO: If you have NuGet Package Restore enabled, uncomment the next line -packages/ - -# Windows Azure Build Output -csx +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ *.build.csdef -# Windows Store app package directory +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ # Others -sql/ -*.Cache ClientBin/ -[Ss]tyle[Cc]op.* ~$* *~ *.dbmdl -*.[Pp]ublish.xml +*.dbproj.schemaview +*.jfm *.pfx *.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ # RIA/Silverlight projects Generated_Code/ -# Backup & report files from converting an old project file to a newer -# Visual Studio version. Backup files are not needed, because we have git ;-) +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) _UpgradeReport_Files/ Backup*/ UpgradeLog*.XML UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak # SQL Server files -App_Data/*.mdf -App_Data/*.ldf +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ +# MSBuild Binary and Structured Log +*.binlog -#LightSwitch generated files -GeneratedArtifacts/ -_Pvt_Extensions/ -ModelManifest.xml +# NVidia Nsight GPU debugger configuration file +*.nvuser -# ========================= -# Windows detritus -# ========================= +# MFractors (Xamarin productivity tool) working folder +.mfractor/ -# Windows image file caches -Thumbs.db -ehthumbs.db +# Local History for Visual Studio +.localhistory/ -# Folder config file -Desktop.ini +# BeatPulse healthcheck temp database +healthchecksdb -# Recycle Bin used on file shares -$RECYCLE.BIN/ +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ -# Mac desktop service store files -.DS_Store -*.snk -/.vs +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ -*.lock.json \ No newline at end of file +# Fody - auto-generated XML schema +FodyWeavers.xsd \ No newline at end of file From 8769e71429dbd201819a39f9790672f505834506 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 20:42:09 -0500 Subject: [PATCH 201/325] Use SequenceCompareTo-based ss_compare --- src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 2b25606..cb0c964 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -144,13 +144,18 @@ public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SA } } + public static bool new_ss_compare_feature_flag = false; /// /// Compare two suffixes /// - private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) - { - //TODO: possible perf improvement - JZ + internal static int ss_compare(IntAccessor TA, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) + => new_ss_compare_feature_flag ? ss_compare_new(TA.span, SAp1, p1, SAp2, p2, depth) : ss_compare_old(TA, SAp1, p1, SAp2, p2, depth); + + private static int ss_compare_new(ReadOnlySpan T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) + => T[(depth + SAp1[p1])..(SAp1[p1 + 1] + 2)].SequenceCompareTo(T[(depth + SAp2[p2])..(SAp2[p2 + 1] + 2)]); + private static int ss_compare_old(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) + { var U1 = depth + SAp1[p1]; var U2 = depth + SAp2[p2]; var U1n = SAp1[p1 + 1] + 2; From 54b8444dea6bb7013fa0e70c07899240e6cc8b3b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 20:43:00 -0500 Subject: [PATCH 202/325] Add benchmark project --- .../DeltaQ.Benchmarks.csproj | 24 +++++++++++++++++++ deltaq.sln | 9 +++++++ 2 files changed, 33 insertions(+) create mode 100644 bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj diff --git a/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj new file mode 100644 index 0000000..f3f3d58 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj @@ -0,0 +1,24 @@ + + + + Exe + net6.0 + enable + enable + + + + + PreserveNewest + + + + + + + + + + + + diff --git a/deltaq.sln b/deltaq.sln index 8b9f9b3..5dad45e 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -31,6 +31,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LibDiv EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LivDivSufSort.Tests", "test\DeltaQ.SuffixSorting.LivDivSufSort.Tests\DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj", "{5486E391-BFF9-4ED9-8383-032AE249C588}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "bench", "bench", "{BF7CD739-6B0C-424D-A5CD-7970D80915E4}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.Benchmarks", "bench\DeltaQ.Benchmarks\DeltaQ.Benchmarks.csproj", "{A5FCA064-2EED-4CDB-93D7-FF16E3314885}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -85,6 +89,10 @@ Global {5486E391-BFF9-4ED9-8383-032AE249C588}.Debug|Any CPU.Build.0 = Debug|Any CPU {5486E391-BFF9-4ED9-8383-032AE249C588}.Release|Any CPU.ActiveCfg = Release|Any CPU {5486E391-BFF9-4ED9-8383-032AE249C588}.Release|Any CPU.Build.0 = Release|Any CPU + {A5FCA064-2EED-4CDB-93D7-FF16E3314885}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A5FCA064-2EED-4CDB-93D7-FF16E3314885}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A5FCA064-2EED-4CDB-93D7-FF16E3314885}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A5FCA064-2EED-4CDB-93D7-FF16E3314885}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -102,6 +110,7 @@ Global {96F1E46E-53CB-4463-82E2-0F81BEB87080} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {E89B007E-0BDE-4642-B40F-CCB7569F88B8} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {5486E391-BFF9-4ED9-8383-032AE249C588} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} + {A5FCA064-2EED-4CDB-93D7-FF16E3314885} = {BF7CD739-6B0C-424D-A5CD-7970D80915E4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {595D8046-0D57-4408-A80A-777358A7E831} From f93e3812fafbe0f98ee07e0b02de648648d66bc1 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 20:44:10 -0500 Subject: [PATCH 203/325] Add LibDivSufSort benchmark --- .../LibDivSufSortBenchmarks.cs | 34 +++++++++++++++++++ bench/DeltaQ.Benchmarks/Program.cs | 4 +++ 2 files changed, 38 insertions(+) create mode 100644 bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs create mode 100644 bench/DeltaQ.Benchmarks/Program.cs diff --git a/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs b/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs new file mode 100644 index 0000000..b2720d2 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs @@ -0,0 +1,34 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Engines; +using DeltaQ.SuffixSorting.LibDivSufSort; + +namespace DeltaQ.Benchmarks +{ + [SimpleJob(RunStrategy.Throughput)] + public class LibDivSufSortBenchmarks + { + private LibDivSufSort ldss = new LibDivSufSort(); + + private static readonly byte[][] _assets = Directory.EnumerateFiles("./assets/").Select(File.ReadAllBytes).ToArray(); + + [Benchmark(Baseline = true)] + public void ss_compare_old() + { + SsSort.new_ss_compare_feature_flag = false; + foreach (var asset in _assets) + { + ldss.Sort(asset).Dispose(); + } + } + + [Benchmark] + public void ss_compare_new() + { + SsSort.new_ss_compare_feature_flag = true; + foreach (var asset in _assets) + { + ldss.Sort(asset).Dispose(); + } + } + } +} diff --git a/bench/DeltaQ.Benchmarks/Program.cs b/bench/DeltaQ.Benchmarks/Program.cs new file mode 100644 index 0000000..3b9be9a --- /dev/null +++ b/bench/DeltaQ.Benchmarks/Program.cs @@ -0,0 +1,4 @@ +using BenchmarkDotNet.Running; +using DeltaQ.Benchmarks; + +BenchmarkRunner.Run(); \ No newline at end of file From d2aebca2ab51d93346913d36cdf42974c221c862 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 20:45:48 -0500 Subject: [PATCH 204/325] Make LibDivSufSort internals visible to benchmark project --- src/DeltaQ.SuffixSorting.LibDivSufSort/Assembly.cs | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/Assembly.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Assembly.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Assembly.cs new file mode 100644 index 0000000..613687c --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Assembly.cs @@ -0,0 +1,3 @@ +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("DeltaQ.Benchmarks")] \ No newline at end of file From 7f7e37c13eb2bb2695c52e83efe565acbc68dadd Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 20:47:10 -0500 Subject: [PATCH 205/325] Remove new ss_compare implementation --- .../LibDivSufSortBenchmarks.cs | 19 ------------------- .../SsSort.cs | 12 ++++-------- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs b/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs index b2720d2..feb35f2 100644 --- a/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs +++ b/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs @@ -11,24 +11,5 @@ public class LibDivSufSortBenchmarks private static readonly byte[][] _assets = Directory.EnumerateFiles("./assets/").Select(File.ReadAllBytes).ToArray(); - [Benchmark(Baseline = true)] - public void ss_compare_old() - { - SsSort.new_ss_compare_feature_flag = false; - foreach (var asset in _assets) - { - ldss.Sort(asset).Dispose(); - } - } - - [Benchmark] - public void ss_compare_new() - { - SsSort.new_ss_compare_feature_flag = true; - foreach (var asset in _assets) - { - ldss.Sort(asset).Dispose(); - } - } } } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index cb0c964..6b4bd78 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -144,18 +144,14 @@ public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SA } } - public static bool new_ss_compare_feature_flag = false; /// /// Compare two suffixes /// - internal static int ss_compare(IntAccessor TA, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) - => new_ss_compare_feature_flag ? ss_compare_new(TA.span, SAp1, p1, SAp2, p2, depth) : ss_compare_old(TA, SAp1, p1, SAp2, p2, depth); - - private static int ss_compare_new(ReadOnlySpan T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) - => T[(depth + SAp1[p1])..(SAp1[p1 + 1] + 2)].SequenceCompareTo(T[(depth + SAp2[p2])..(SAp2[p2 + 1] + 2)]); - - private static int ss_compare_old(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) + private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) { + //Slower in bench: + //T[(depth + SAp1[p1])..(SAp1[p1 + 1] + 2)].SequenceCompareTo(T[(depth + SAp2[p2])..(SAp2[p2 + 1] + 2)]) + var U1 = depth + SAp1[p1]; var U2 = depth + SAp2[p2]; var U1n = SAp1[p1 + 1] + 2; From 6c60666f19fa1288beb4fd9e1816f79568725cff Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 20:50:28 -0500 Subject: [PATCH 206/325] Remove space-wasters and make bucket types private --- .../DivSufSort.cs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs index d39b129..3138909 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs @@ -17,9 +17,7 @@ public static void divsufsort(ReadOnlySpan T, Span SA) { Trace.Assert(T.Length == SA.Length); - var n = T.Length; - - switch (n) + switch (T.Length) { case 0: return; case 1: @@ -48,17 +46,13 @@ private static void construct_SA(ReadOnlySpan T, Span SA, Span A BBucket Bb = new(B); BStarBucket Bstar = new(B); - SAPtr i; - SAPtr j; - Idx k; - Idx s; - Idx c0; - Idx c2; + SAPtr i, j; + Idx k, s, c0, c1, c2; if (0 < m) { // Construct the sorted order of type B suffixes by using the // sorted order of type B* suffixes - Idx c1 = ALPHABET_SIZE - 2; + c1 = ALPHABET_SIZE - 2; while (0 <= c1) { // Scan the suffix array from right to left @@ -163,7 +157,7 @@ public ref struct SortTypeBstarResult public int m; } - public ref struct BStarBucket + private ref struct BStarBucket { public readonly Span B; public BStarBucket(Span B) => this.B = B; @@ -171,7 +165,7 @@ public ref struct BStarBucket public ref int this[(int c0, int c1) index] => ref B[(index.c0 << 8) | index.c1]; } - public ref struct BBucket + private ref struct BBucket { public readonly Span B; public BBucket(Span B) => this.B = B; From 2dcb004bcaa610bb7f20b1cbdb5ff39c3dcf4006 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 16 Dec 2021 21:08:37 -0500 Subject: [PATCH 207/325] Remove space-wasters --- .../SsSort.cs | 33 ++++--------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 6b4bd78..bcf7b26 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -388,16 +388,9 @@ void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) using var stackOwner = SpanOwner.Allocate(MERGE_STACK_SIZE, AllocationMode.Clear); var stack = new SsStack(stackOwner.Span); - SAPtr l; - SAPtr r; - SAPtr lm; - SAPtr rm; + SAPtr l, r, lm, rm; - Idx m; - Idx len; - Idx half; - Idx check; - Idx next; + Idx m, len, half, check, next; // BARBARIAN check = 0; @@ -771,12 +764,8 @@ private static void ss_mergebackward(IntAccessor T, Span SA, SAPtr PA, SAPt /// Merge-forward with internal buffer private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) { - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr bufend; - Idx t; - Idx r; + SAPtr a, b, c, bufend; + Idx t, r; SA_dump(SA[first..last], "ss_mergeforward start"); @@ -955,19 +944,9 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf using var stackOwner = SpanOwner.Allocate(SS_STACK_SIZE); var stack = new SsStack(stackOwner.Span); - SAPtr a; - SAPtr b; - SAPtr c; - SAPtr d; - SAPtr e; - SAPtr f; - - Idx s; - Idx t; + SAPtr a, b, c, d, e, f; - Idx limit; - Idx v; - Idx x = 0; + Idx s, t, limit, v, x = 0; // RENEE limit = ss_ilg(last - first); From 5ea316556d2709d23937499c6e9e54ec3d5d175c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 02:48:39 -0500 Subject: [PATCH 208/325] Apply inlining to ReadOnlySpanOffsetAccessor --- .../ReadOnlySpanOffsetAccessor.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs index 3b437f8..e869ba6 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/ReadOnlySpanOffsetAccessor.cs @@ -1,4 +1,5 @@ using System; +using System.Runtime.CompilerServices; namespace DeltaQ.SuffixSorting.LibDivSufSort; @@ -7,11 +8,16 @@ internal ref struct ReadOnlySpanOffsetAccessor private readonly ReadOnlySpan _span; private readonly int _offset; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public ReadOnlySpanOffsetAccessor(ReadOnlySpan span, int offset) { _span = span; _offset = offset; } - public ref readonly T this[int index] => ref _span[_offset + index]; + public ref readonly T this[int index] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => ref _span[_offset + index]; + } } From 7337fdc458113236f42b476b5b36ff9720b26530 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:43:57 -0500 Subject: [PATCH 209/325] Remove IntAccessor --- .../DivSufSort.cs | 9 +++++---- .../IntAccessor.cs | 13 ------------- 2 files changed, 5 insertions(+), 17 deletions(-) delete mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs index 3138909..b1aa66a 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs @@ -1,6 +1,7 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Diagnostics; +using Text = System.ReadOnlySpan; using Idx = System.Int32; using SAPtr = System.Int32; @@ -13,7 +14,7 @@ internal static class DivSufSort private const int BUCKET_A_SIZE = ALPHABET_SIZE; private const int BUCKET_B_SIZE = ALPHABET_SIZE * ALPHABET_SIZE; - public static void divsufsort(ReadOnlySpan T, Span SA) + public static void divsufsort(Text T, Span SA) { Trace.Assert(T.Length == SA.Length); @@ -35,11 +36,11 @@ public static void divsufsort(ReadOnlySpan T, Span SA) return; } - var result = sort_typeBstar(new IntAccessor(T), SA); + var result = sort_typeBstar(T, SA); construct_SA(T, SA, result.A, result.B, result.m); } - private static void construct_SA(ReadOnlySpan T, Span SA, Span A, Span B, int m) + private static void construct_SA(Text T, Span SA, Span A, Span B, int m) { Idx n = T.Length; @@ -173,7 +174,7 @@ private ref struct BBucket public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; } - public static SortTypeBstarResult sort_typeBstar(in IntAccessor T, Span SA) + public static SortTypeBstarResult sort_typeBstar(Text T, Span SA) { var n = T.Length; diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs deleted file mode 100644 index 07761b5..0000000 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/IntAccessor.cs +++ /dev/null @@ -1,13 +0,0 @@ -using System; -using Idx = System.Int32; - -namespace DeltaQ.SuffixSorting.LibDivSufSort; - -internal ref struct IntAccessor -{ - public readonly ReadOnlySpan span; - public IntAccessor(ReadOnlySpan span) => this.span = span; - - public readonly int this[Idx index] => span[index]; - public readonly int Length => span.Length; -} From 00d3045832d072dee7f4d5270e0e65c86462ff3c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:51:40 -0500 Subject: [PATCH 210/325] SsSort rework Remove IntAccessor Decorate all inline-eligible methods in SsSort Use ROS in place of Span where possible Use TdPAStarAccessor directly in ss_pivot and ss_median3/5 --- .../SsSort.cs | 69 ++++++++++--------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index bcf7b26..55888dd 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -2,6 +2,7 @@ using System; using System.Diagnostics; using System.Runtime.CompilerServices; +using Text = System.ReadOnlySpan; using Idx = System.Int32; using SAPtr = System.Int32; @@ -16,7 +17,7 @@ internal static class SsSort /// /// Substring sort /// - public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) + public static void sssort(Text T, Span SA, SAPtr PA, SAPtr first, SAPtr last, SAPtr buf, Idx bufsize, Idx depth, Idx n, bool lastsuffix) { // "PA" = "Partition Array", slice of SA @@ -127,7 +128,7 @@ public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SA crosscheck("lastsuffix!"); // Insert last type B* suffix - Span PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; + ReadOnlySpan PAi = stackalloc Idx[2] { SA[PA + SA[first - 1]], n - 2 }; a = first; i = SA[first - 1]; @@ -147,7 +148,8 @@ public static void sssort(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SA /// /// Compare two suffixes /// - private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span SAp2, SAPtr p2, Idx depth) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ss_compare(Text T, ReadOnlySpan SAp1, SAPtr p1, ReadOnlySpan SAp2, SAPtr p2, Idx depth) { //Slower in bench: //T[(depth + SAp1[p1])..(SAp1[p1 + 1] + 2)].SequenceCompareTo(T[(depth + SAp2[p2])..(SAp2[p2 + 1] + 2)]) @@ -187,7 +189,7 @@ private static int ss_compare(IntAccessor T, Span SAp1, SAPtr p1, Span } } - private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) + private static void ss_inplacemerge(Text T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, Idx depth) { SAPtr p, a, b; Idx len, half, q, r, x; @@ -272,6 +274,7 @@ private static void ss_inplacemerge(IntAccessor T, Span SA, SAPtr PA, SAPtr } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr last) { SAPtr a, b; @@ -356,6 +359,7 @@ private static void ss_rotate(Span SA, SAPtr first, SAPtr middle, SAPtr las } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) { for (int i = 0; i < n; i++) @@ -365,11 +369,11 @@ private static void ss_blockswap(Span SA, SAPtr a, SAPtr b, Idx n) } /// D&C based merge - private static void ss_swapmerge(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) + private static void ss_swapmerge(Text T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx bufsize, Idx depth) { static Idx get_idx(Idx a) => 0 <= a ? a : ~a; - void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) + void merge_check(Text T, Span SA, Idx a, Idx b, Idx c) { crosscheck($"mc c={c}"); if (((c & 1) > 0) || (((c & 2) > 0) && (ss_compare(T, SA, PA + get_idx(SA[a - 1]), SA, PA + SA[a], depth) == 0))) @@ -552,7 +556,7 @@ void merge_check(IntAccessor T, Span SA, Idx a, Idx b, Idx c) } /// Merge-backward with internal buffer - private static void ss_mergebackward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) + private static void ss_mergebackward(Text T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) { SAPtr p1, p2, a, b, c, bufend; @@ -762,7 +766,7 @@ private static void ss_mergebackward(IntAccessor T, Span SA, SAPtr PA, SAPt } /// Merge-forward with internal buffer - private static void ss_mergeforward(IntAccessor T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) + private static void ss_mergeforward(Text T, Span SA, SAPtr PA, SAPtr first, SAPtr middle, SAPtr last, SAPtr buf, Idx depth) { SAPtr a, b, c, bufend; Idx t, r; @@ -909,6 +913,7 @@ public SsStack(Span items) Size = 0; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) { Trace.Assert(Size < Items.Length); @@ -918,6 +923,8 @@ public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) item.c = c; item.d = d; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) { //Debug.Assert(Size > 0); @@ -937,7 +944,7 @@ public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) /// /// Multikey introsort for medium size groups /// - private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOffset, SAPtr first, SAPtr last, Idx depth) + private static void ss_mintrosort(Text T, Span SA, SAPtr partitionOffset, SAPtr first, SAPtr last, Idx depth) { var PA = SA[partitionOffset..]; @@ -966,7 +973,7 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf } var tdOffset = depth; - var TdPAStar = new TdPAStarAccessor(T.span, SA, partitionOffset, tdOffset); + var TdPAStar = new TdPAStarAccessor(T, SA, partitionOffset, tdOffset); /*readonly*/ var old_limit = limit; @@ -1278,25 +1285,24 @@ private static void ss_mintrosort(IntAccessor T, Span SA, SAPtr partitionOf /// /// Returns the pivot element /// - private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr first, SAPtr last) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static SAPtr ss_pivot(Text T, Idx Td, ReadOnlySpan SA, SAPtr PA, SAPtr first, SAPtr last) { Idx t = last - first; SAPtr middle = first + (t / 2); + var get = new TdPAStarAccessor(T, SA, PA, Td); if (t <= 512) { if (t <= 32) { - return ss_median3(T, Td, SA, PA, first, middle, last - 1); + return ss_median3(get, first, middle, last - 1); } else { t >>= 2; return ss_median5( - T, - Td, - SA, - PA, + get, first, first + t, middle, @@ -1306,18 +1312,17 @@ private static SAPtr ss_pivot(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP } t >>= 3; - first = ss_median3(T, Td, SA, PA, first, first + t, first + (t << 1)); - middle = ss_median3(T, Td, SA, PA, middle - t, middle, middle + t); - last = ss_median3(T, Td, SA, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + first = ss_median3(get, first, first + t, first + (t << 1)); + middle = ss_median3(get, middle - t, middle, middle + t); + last = ss_median3(get, last - 1 - (t << 1), last - 1 - t, last - 1); - return ss_median3(T, Td, SA, PA, first, middle, last); + return ss_median3(get, first, middle, last); } /// Returns the median of five elements [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) + private static SAPtr ss_median5(TdPAStarAccessor get, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) { - var get = new TdPAStarAccessor(T.span, SA, PA, Td); if (get[v2] > get[v3]) { Swap(ref v2, ref v3); @@ -1353,11 +1358,9 @@ private static SAPtr ss_median5(IntAccessor T, Idx Td, ReadOnlySpan SA, SAP /// /// Returns the median of three elements /// - private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAPtr v1, SAPtr v2, SAPtr v3) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ss_median3(TdPAStarAccessor get, SAPtr v1, SAPtr v2, SAPtr v3) { - //int get(int x) => T[Td + SA[PA + SA[x]]] - var get = new TdPAStarAccessor(T.span, SA, PA, Td); - if (get[v1] > get[v2]) { Swap(ref v1, ref v2); @@ -1384,7 +1387,7 @@ private static int ss_median3(IntAccessor T, Idx Td, Span SA, SAPtr PA, SAP [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr ss_partition(Span SA, SAPtr paOffset, SAPtr first, SAPtr last, Idx depth) { - Span PA = SA[paOffset..]; + ReadOnlySpan PA = SA[paOffset..]; // JIMMY var a = first - 1; @@ -1442,7 +1445,7 @@ private static SAPtr ss_partition(Span SA, SAPtr paOffset, SAPtr first, SAP return a; } - private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int first, int last, int depth) + private static void ss_insertionsort(Text T, Span SA, int PA, int first, int last, int depth) { SAPtr i; SAPtr j; @@ -1502,6 +1505,7 @@ private static void ss_insertionsort(IntAccessor T, Span SA, int PA, int fi /// /// Fast log2, using lookup tables /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ss_ilg(int n) { if ((n & 0xff00) > 0) @@ -1515,14 +1519,14 @@ private static int ss_ilg(int n) } /// Simple top-down heapsort. - private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, SAPtr paOffset, SAPtr first, Idx size) + private static void ss_heapsort(Text T, Idx tdOffset, Span SA_top, SAPtr paOffset, SAPtr first, Idx size) { Idx i; var m = size; Idx t; - var Td = new IntAccessor(T.span[tdOffset..]); - var PA = SA_top[paOffset..]; + Text Td = T[tdOffset..]; + ReadOnlySpan PA = SA_top[paOffset..]; var SA = SA_top[first..]; if ((size % 2) == 0) @@ -1556,7 +1560,7 @@ private static void ss_heapsort(IntAccessor T, Idx tdOffset, Span SA_top, S } } - private static void ss_fixdown(IntAccessor Td, Span PA, Span SA, Idx i, Idx size) + private static void ss_fixdown(Text Td, ReadOnlySpan PA, Span SA, Idx i, Idx size) { Idx j, v, c, d, e, k; @@ -1620,6 +1624,7 @@ private static void ss_fixdown(IntAccessor Td, Span PA, Span SA, Idx i /// /// Fast sqrt, using lookup tables /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ss_isqrt(int x) { if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) From b9dccb2c4d3cfca870f8859709959c9a1a5e8fd9 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:51:51 -0500 Subject: [PATCH 211/325] Remove SpanOffsetAccessor --- .../SpanOffsetAccessor.cs | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs deleted file mode 100644 index e16fed9..0000000 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanOffsetAccessor.cs +++ /dev/null @@ -1,17 +0,0 @@ -using System; - -namespace DeltaQ.SuffixSorting.LibDivSufSort; - -internal ref struct SpanOffsetAccessor -{ - private readonly Span _span; - private readonly int _offset; - - public SpanOffsetAccessor(Span span, int offset) - { - _span = span; - _offset = offset; - } - - public ref T this[int index] => ref _span[_offset + index]; -} \ No newline at end of file From efdebabaeb4d5626426986e7cebe6b2bcafdffe3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:52:08 -0500 Subject: [PATCH 212/325] Flatten namespace in Crosscheck --- .../Crosscheck.cs | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs index 3859e72..c308fcf 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Crosscheck.cs @@ -1,23 +1,22 @@ using System; using System.Diagnostics; -namespace DeltaQ.SuffixSorting.LibDivSufSort +namespace DeltaQ.SuffixSorting.LibDivSufSort; + +internal static class Crosscheck { - internal static class Crosscheck + [Conditional("CROSSCHECK")] + internal static void SA_dump(ReadOnlySpan span, string v) { - [Conditional("CROSSCHECK")] - internal static void SA_dump(ReadOnlySpan span, string v) + Debug.WriteLine($":: {v}"); + for (int i = 0; i < span.Length; i++) { - Debug.WriteLine($":: {v}"); - for (int i = 0; i < span.Length; i++) - { - Debug.Write($"{span[i]} "); - Debug.WriteLineIf((i + 1) % 25 == 0, ""); - } - Debug.WriteLine(""); + Debug.Write($"{span[i]} "); + Debug.WriteLineIf((i + 1) % 25 == 0, ""); } - - [Conditional("CROSSCHECK")] - internal static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); + Debug.WriteLine(""); } + + [Conditional("CROSSCHECK")] + internal static void crosscheck(string v, params object[] args) => Debug.WriteLine(v, args); } From f7dc13485b04156fcf07e942bffa8e4eba14a4ad Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:52:26 -0500 Subject: [PATCH 213/325] Flatten namespace in SpanExtensions --- .../SpanExtensions.cs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs index 2326c50..f859076 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SpanExtensions.cs @@ -1,12 +1,11 @@ using System; using System.Runtime.CompilerServices; -namespace DeltaQ.SuffixSorting.LibDivSufSort +namespace DeltaQ.SuffixSorting.LibDivSufSort; + +internal static class SpanExtensions { - internal static class SpanExtensions - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Swap(this Span span, int i, int j) - => (span[j], span[i]) = (span[i], span[j]); - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void Swap(this Span span, int i, int j) + => (span[j], span[i]) = (span[i], span[j]); } From 1c25c6d78985f2ff89279210e7cdbe721ad32d52 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:52:53 -0500 Subject: [PATCH 214/325] Add inlining and remove IntAccessor in TdPAStarAccessor --- .../TdPAStarAccessor.cs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs index 3f6f71f..7d0dee3 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TdPAStarAccessor.cs @@ -1,4 +1,6 @@ using System; +using System.Runtime.CompilerServices; +using Text = System.ReadOnlySpan; namespace DeltaQ.SuffixSorting.LibDivSufSort; @@ -7,18 +9,21 @@ internal ref struct TdPAStarAccessor private readonly ReadOnlySpanOffsetAccessor _TO; private readonly ReadOnlySpan _SA; private readonly ReadOnlySpan _PA; - private readonly IntAccessor _TD; - public TdPAStarAccessor(ReadOnlySpan T, ReadOnlySpan SA, int partitionOffset, int tdOffset) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TdPAStarAccessor(Text T, ReadOnlySpan SA, int partitionOffset, int tdOffset) { _TO = new ReadOnlySpanOffsetAccessor(T, tdOffset); - _SA = SA; _PA = SA[partitionOffset..]; - _TD = new(T[tdOffset..]); } - public readonly int this[int index] => _TD[_PA[_SA[index]]]; + public readonly int this[int index] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => _TO[_PA[_SA[index]]]; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly int AsOffset(int index) => _TO[index]; } From b34fcb049d19ce31e95ceec4e3869b8ea63ced3e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:54:05 -0500 Subject: [PATCH 215/325] Move sqq_table into Utils --- .../SsSort.cs | 21 ------------------ .../Utils.cs | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 55888dd..27afdd2 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -1600,27 +1600,6 @@ private static void ss_fixdown(Text Td, ReadOnlySpan PA, Span SA, Idx SA[i] = v; } - private static readonly Idx[] sqq_table_array = new[] - { - 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, - 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, - 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, - 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, - 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, - 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, - 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, - 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, - 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, - 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, - 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, - 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, - 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, - 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, - 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 - }; - private static ReadOnlySpan sqq_table => sqq_table_array; - /// /// Fast sqrt, using lookup tables /// diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index 8272559..0e49e51 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -1,5 +1,6 @@ using System; using System.Runtime.CompilerServices; +using Idx = System.Int32; namespace DeltaQ.SuffixSorting.LibDivSufSort; @@ -18,6 +19,27 @@ internal static class Utils }; internal static ReadOnlySpan lg_table => lg_table_array; + private static readonly Idx[] sqq_table_array = new[] + { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, + 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, + 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, + 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, + 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, + 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, + 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, + 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, + 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, + 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, + 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, + 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 + }; + internal static ReadOnlySpan sqq_table => sqq_table_array; + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void Swap(ref T lhs, ref T rhs) { From cfecef75c6f65561771cbc1978f527a80968ed7a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 03:55:36 -0500 Subject: [PATCH 216/325] Add inlining to buckets --- .../DivSufSort.cs | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs index b1aa66a..ee7e4f8 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DivSufSort.cs @@ -1,9 +1,10 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Diagnostics; -using Text = System.ReadOnlySpan; +using System.Runtime.CompilerServices; using Idx = System.Int32; using SAPtr = System.Int32; +using Text = System.ReadOnlySpan; namespace DeltaQ.SuffixSorting.LibDivSufSort; using static Crosscheck; @@ -160,18 +161,30 @@ public ref struct SortTypeBstarResult private ref struct BStarBucket { - public readonly Span B; + private readonly Span B; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public BStarBucket(Span B) => this.B = B; - public ref int this[(int c0, int c1) index] => ref B[(index.c0 << 8) | index.c1]; + public ref int this[(int c0, int c1) index] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => ref B[(index.c0 << 8) | index.c1]; + } } private ref struct BBucket { - public readonly Span B; + private readonly Span B; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public BBucket(Span B) => this.B = B; - public ref int this[(int c0, int c1) index] => ref B[(index.c1 << 8) | index.c0]; + public ref int this[(int c0, int c1) index] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => ref B[(index.c1 << 8) | index.c0]; + } } public static SortTypeBstarResult sort_typeBstar(Text T, Span SA) From aed56f50a9b9e815a29e3f00f0dde91f3122b5ea Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 13:06:58 -0500 Subject: [PATCH 217/325] Add SqrtBenchmarks --- bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs | 153 ++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs diff --git a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs new file mode 100644 index 0000000..9e79005 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs @@ -0,0 +1,153 @@ +using BenchmarkDotNet.Attributes; +using System.Runtime.CompilerServices; +using Idx = System.Int32; +using static DeltaQ.SuffixSorting.LibDivSufSort.Utils; +using BenchmarkDotNet.Diagnosers; + +namespace DeltaQ.Benchmarks +{ + [RyuJitX64Job] + //[RyuJitX86Job] + [HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + public class SqrtBenchmarks + { + private const int Step = 1; + public SqrtBenchmarks() + { + //sanity check range + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i++) + { + var sqrtFast = ss_isqrt(i); + var sqrtD = (int)Math.Sqrt(i); + var sqrtF = (int)MathF.Sqrt(i); + if (sqrtFast != sqrtD || sqrtD != sqrtF) throw new InvalidOperationException($"{i} did not match"); + } + } + + [Benchmark] + public void SqrtsFast() + { + Idx y = -1; + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) + { + y = ss_isqrt(i); + } + GC.KeepAlive(y); + } + + [Benchmark(Baseline = true)] + public void Sqrts() + { + Idx y = -1; + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) + { + y = ss_isqrt_math(i); + } + GC.KeepAlive(y); + } + + [Benchmark] + public void SqrtsF() + { + Idx y = -1; + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) + { + y = ss_isqrt_mathf(i); + } + GC.KeepAlive(y); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ss_isqrt_math(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + return (int)Math.Sqrt(x); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ss_isqrt_mathf(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + return (int)MathF.Sqrt(x); + } + + //=> x switch + //{ + // >= (SS_BLOCKSIZE * SS_BLOCKSIZE) => SS_BLOCKSIZE, + // _ => (int)MathF.Sqrt(x) + //}; + + + private const Idx SS_BLOCKSIZE = 1024; + /// + /// Fast sqrt, using lookup tables + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + /*unchecked*/ + private static int ss_isqrt(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + + Idx e; + if ((x & 0xffff_0000) > 0) + { + if ((x & 0xff00_0000) > 0) + { + e = 24 + lg_table[(x >> 24) & 0xff]; + } + else + { + e = 16 + lg_table[(x >> 16) & 0xff]; + } + } + else + { + if ((x & 0x0000_ff00) > 0) + { + e = 8 + lg_table[(x >> 8) & 0xff]; + } + else + { + e = 0 + lg_table[(x >> 0) & 0xff]; + } + }; + + Idx y; + if (e >= 16) + { + y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); + if (e >= 24) + { + y = (y + 1 + x / y) >> 1; + } + y = (y + 1 + x / y) >> 1; + } + else if (e >= 8) + { + y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; + } + else + { + return sqq_table[x] >> 4; + } + + if (x < (y * y)) + { + return y - 1; + } + else + { + return y; + } + } + } +} From d80c292d01c1310e48da2779701f9c9200ff1b5a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 13:17:11 -0500 Subject: [PATCH 218/325] Put lookup-based ss_isqrt behind SS_ISQRT_LOOKUP flag and default to MathF.Sqrt --- src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs | 11 ++++++++++- src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs | 5 ++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 27afdd2..a3248ab 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -1,4 +1,5 @@ -using Microsoft.Toolkit.HighPerformance.Buffers; +//#define SS_ISQRT_LOOKUP +using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Diagnostics; using System.Runtime.CompilerServices; @@ -1605,6 +1606,7 @@ private static void ss_fixdown(Text Td, ReadOnlySpan PA, Span SA, Idx /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ss_isqrt(int x) +#if SS_ISQRT_LOOKUP { if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { @@ -1663,5 +1665,12 @@ private static int ss_isqrt(int x) return y; } } +#else + => x switch + { + >= (SS_BLOCKSIZE * SS_BLOCKSIZE) => SS_BLOCKSIZE, + _ => (int)MathF.Sqrt(x) + }; +#endif } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs index 0e49e51..854a16c 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/Utils.cs @@ -1,4 +1,5 @@ -using System; +//#define SS_ISQRT_LOOKUP +using System; using System.Runtime.CompilerServices; using Idx = System.Int32; @@ -19,6 +20,7 @@ internal static class Utils }; internal static ReadOnlySpan lg_table => lg_table_array; +#if SS_ISQRT_LOOKUP private static readonly Idx[] sqq_table_array = new[] { 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, @@ -39,6 +41,7 @@ internal static class Utils 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 }; internal static ReadOnlySpan sqq_table => sqq_table_array; +#endif [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void Swap(ref T lhs, ref T rhs) From 5587b104e340cb5ab4b3a368c8567d242389eae7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 13:17:20 -0500 Subject: [PATCH 219/325] Delete SqrtBenchmarks --- bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs | 153 ---------------------- 1 file changed, 153 deletions(-) delete mode 100644 bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs diff --git a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs deleted file mode 100644 index 9e79005..0000000 --- a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs +++ /dev/null @@ -1,153 +0,0 @@ -using BenchmarkDotNet.Attributes; -using System.Runtime.CompilerServices; -using Idx = System.Int32; -using static DeltaQ.SuffixSorting.LibDivSufSort.Utils; -using BenchmarkDotNet.Diagnosers; - -namespace DeltaQ.Benchmarks -{ - [RyuJitX64Job] - //[RyuJitX86Job] - [HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] - public class SqrtBenchmarks - { - private const int Step = 1; - public SqrtBenchmarks() - { - //sanity check range - for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i++) - { - var sqrtFast = ss_isqrt(i); - var sqrtD = (int)Math.Sqrt(i); - var sqrtF = (int)MathF.Sqrt(i); - if (sqrtFast != sqrtD || sqrtD != sqrtF) throw new InvalidOperationException($"{i} did not match"); - } - } - - [Benchmark] - public void SqrtsFast() - { - Idx y = -1; - for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) - { - y = ss_isqrt(i); - } - GC.KeepAlive(y); - } - - [Benchmark(Baseline = true)] - public void Sqrts() - { - Idx y = -1; - for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) - { - y = ss_isqrt_math(i); - } - GC.KeepAlive(y); - } - - [Benchmark] - public void SqrtsF() - { - Idx y = -1; - for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) - { - y = ss_isqrt_mathf(i); - } - GC.KeepAlive(y); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ss_isqrt_math(int x) - { - if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) - { - return SS_BLOCKSIZE; - } - return (int)Math.Sqrt(x); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ss_isqrt_mathf(int x) - { - if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) - { - return SS_BLOCKSIZE; - } - return (int)MathF.Sqrt(x); - } - - //=> x switch - //{ - // >= (SS_BLOCKSIZE * SS_BLOCKSIZE) => SS_BLOCKSIZE, - // _ => (int)MathF.Sqrt(x) - //}; - - - private const Idx SS_BLOCKSIZE = 1024; - /// - /// Fast sqrt, using lookup tables - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - /*unchecked*/ - private static int ss_isqrt(int x) - { - if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) - { - return SS_BLOCKSIZE; - } - - Idx e; - if ((x & 0xffff_0000) > 0) - { - if ((x & 0xff00_0000) > 0) - { - e = 24 + lg_table[(x >> 24) & 0xff]; - } - else - { - e = 16 + lg_table[(x >> 16) & 0xff]; - } - } - else - { - if ((x & 0x0000_ff00) > 0) - { - e = 8 + lg_table[(x >> 8) & 0xff]; - } - else - { - e = 0 + lg_table[(x >> 0) & 0xff]; - } - }; - - Idx y; - if (e >= 16) - { - y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); - if (e >= 24) - { - y = (y + 1 + x / y) >> 1; - } - y = (y + 1 + x / y) >> 1; - } - else if (e >= 8) - { - y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; - } - else - { - return sqq_table[x] >> 4; - } - - if (x < (y * y)) - { - return y - 1; - } - else - { - return y; - } - } - } -} From 8cfea5a5f96be84d693a77a71c48c636b0abd5b5 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 13:22:54 -0500 Subject: [PATCH 220/325] Use ROS in tr_median3 --- src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index 218d7da..cc41f22 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -878,11 +878,9 @@ private static SAPtr tr_median5(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr /// Returns the median of three elements [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr tr_median3(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3) + private static SAPtr tr_median3(ReadOnlySpan SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3) { - Span ISAd = SA[isadOffset..]; - - //get(x) => ISAd[SA[x]] + ReadOnlySpan ISAd = SA[isadOffset..]; if (ISAd[SA[v1]] > ISAd[SA[v2]]) { From 69fadd92844447e78384cb3d81ac209ac49c0e4b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Fri, 17 Dec 2021 13:23:24 -0500 Subject: [PATCH 221/325] Use ROS in tr_median5 --- src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index cc41f22..5974ce7 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -838,11 +838,9 @@ private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) /// Returns the median of five elements [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static SAPtr tr_median5(Span SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) + private static SAPtr tr_median5(ReadOnlySpan SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) { - Span ISAd = SA[isadOffset..]; - - //get(x) => ISAd[SA[x]] + ReadOnlySpan ISAd = SA[isadOffset..]; if (ISAd[SA[v2]] > ISAd[SA[v3]]) { From cfc7bf7d5d2dd8de1eb43238b054779c3a2b9b2c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 18 Dec 2021 10:39:43 -0500 Subject: [PATCH 222/325] Fix passing in mismatched SA span in Diff --- src/DeltaQ.BsDiff/Diff.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.BsDiff/Diff.cs b/src/DeltaQ.BsDiff/Diff.cs index 2c74f33..443db25 100644 --- a/src/DeltaQ.BsDiff/Diff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -31,6 +31,7 @@ using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.Buffers; +using System.Diagnostics; using System.IO; namespace DeltaQ.BsDiff @@ -115,7 +116,12 @@ 0 32 Header using (var extraStream = GetEncodingStream(msExtra, true)) { Span I = saOwner.Span; - suffixSort.Sort(oldData, I); +#if NETSTANDARD2_0 + var sortLen = suffixSort.Sort(oldData, I.Slice(0, oldData.Length)); +#else + var sortLen = suffixSort.Sort(oldData, I[..^1]); +#endif + Trace.Assert(sortLen == oldData.Length); var scan = 0; var pos = 0; From 24538255fd06209969b2a8faf4def4dea168931c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sat, 18 Dec 2021 10:41:06 -0500 Subject: [PATCH 223/325] add SwapBenchmarks --- bench/DeltaQ.Benchmarks/SwapBenchmarks.cs | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 bench/DeltaQ.Benchmarks/SwapBenchmarks.cs diff --git a/bench/DeltaQ.Benchmarks/SwapBenchmarks.cs b/bench/DeltaQ.Benchmarks/SwapBenchmarks.cs new file mode 100644 index 0000000..8eff492 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/SwapBenchmarks.cs @@ -0,0 +1,51 @@ +using BenchmarkDotNet.Attributes; +using System.Runtime.CompilerServices; + +namespace DeltaQ.Benchmarks +{ + [SimpleJob] + public class SwapBenchmarks + { + [Benchmark] + public void SwapTupleByte() + { + Span bytes = stackalloc byte[] { 0, 10, 15, 9, 12 }; + Swap(bytes, 0, 4); + } + + [Benchmark(Baseline = true)] + public void SwapTempByte() + { + Span bytes = stackalloc byte[] { 0, 10, 15, 9, 12 }; + Swap(ref bytes[0], ref bytes[4]); + } + + [Benchmark] + public void SwapTupleInt() + { + Span ints = stackalloc int[] { 0, 10, 15, 9, 12 }; + Swap(ints, 0, 4); + } + + [Benchmark] + public void SwapTempInt() + { + Span ints = stackalloc int[] { 0, 10, 15, 9, 12 }; + Swap(ref ints[0], ref ints[4]); + } + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void Swap(Span span, int i, int j) + => (span[j], span[i]) = (span[i], span[j]); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void Swap(ref T lhs, ref T rhs) + { + T temp; + temp = lhs; + lhs = rhs; + rhs = temp; + } + } +} From 2aceb875bcc1cda6f75860b5a49f197049075746 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 09:59:13 -0500 Subject: [PATCH 224/325] Add Log2Benchmarks --- bench/DeltaQ.Benchmarks/Log2Benchmarks.cs | 144 ++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 bench/DeltaQ.Benchmarks/Log2Benchmarks.cs diff --git a/bench/DeltaQ.Benchmarks/Log2Benchmarks.cs b/bench/DeltaQ.Benchmarks/Log2Benchmarks.cs new file mode 100644 index 0000000..73802f5 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/Log2Benchmarks.cs @@ -0,0 +1,144 @@ +using BenchmarkDotNet.Attributes; +using System.Runtime.CompilerServices; +using Idx = System.Int32; +using static DeltaQ.SuffixSorting.LibDivSufSort.Utils; +using BenchmarkDotNet.Diagnosers; + +namespace DeltaQ.Benchmarks +{ + [RyuJitX64Job] + //[RyuJitX86Job] + [HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + public class Log2Benchmarks + { + private const int Step = 1; + //public Log2Benchmarks() + //{ + // //sanity check range + // for (int i = 1; i < int.MaxValue; i++) + // { + // var x = tr_ilg(i); + // var y = Log2(i); + // var z = Math.ILogB(i); + // //var a = MathF.ILogB(i); + // var a = (int)Math.Log2(i); + // //var a = (int)MathF.Log2(i); + // if (x != y || y != z || z != a) + // { + // throw new InvalidOperationException($"{i} did not match"); + // } + // } + //} + + [Benchmark(Baseline = true)] + public void tr_ilg() + { + Idx y = -1; + for (int i = 0; i < int.MaxValue; i += Step) + { + y = tr_ilg(i); + } + GC.KeepAlive(y); + } + + [Benchmark] + public void Log2() + { + Idx y = -1; + for (int i = 0; i < int.MaxValue; i += Step) + { + y = Log2(i); + } + GC.KeepAlive(y); + } + + [Benchmark] + public void MathLog2() + { + Idx y = -1; + for (int i = 0; i < int.MaxValue; i += Step) + { + y = (int)Math.Log2(i); + } + GC.KeepAlive(y); + } + + [Benchmark] + public void MathFLog2() + { + Idx y = -1; + for (int i = 0; i < int.MaxValue; i += Step) + { + y = (int)MathF.Log2(i); + } + GC.KeepAlive(y); + } + + [Benchmark] + public void MathILogB() + { + Idx y = -1; + for (int i = 0; i < int.MaxValue; i += Step) + { + y = Math.ILogB(i); + } + GC.KeepAlive(y); + } + + [Benchmark] + public void MathFILogB() + { + Idx y = -1; + for (int i = 0; i < int.MaxValue; i += Step) + { + y = MathF.ILogB(i); + } + GC.KeepAlive(y); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int tr_ilg(int n) + { + if ((n & 0xffff_0000) > 0) + { + if ((n & 0xff00_0000) > 0) + { + return 24 + lg_table[((n >> 24) & 0xff)]; + } + else + { + return 16 + lg_table[((n >> 16) & 0xff)]; + } + } + else + { + if ((n & 0x0000_ff00) > 0) + { + return 8 + lg_table[((n >> 8) & 0xff)]; + } + else + { + return 0 + lg_table[((n >> 0) & 0xff)]; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Log2(int v) + { + int r = 0xFFFF - v >> 31 & 0x10; + v >>= r; + int shift = 0xFF - v >> 31 & 0x8; + v >>= shift; + r |= shift; + shift = 0xF - v >> 31 & 0x4; + v >>= shift; + r |= shift; + shift = 0x3 - v >> 31 & 0x2; + v >>= shift; + r |= shift; + r |= (v >> 1); + return r; + } + } +} From 8873976b4dd32107525001a71f502c9db8a2b8a0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 09:59:56 -0500 Subject: [PATCH 225/325] Revert "Delete SqrtBenchmarks" This reverts commit 5587b104e340cb5ab4b3a368c8567d242389eae7. --- bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs | 153 ++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs diff --git a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs new file mode 100644 index 0000000..9e79005 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs @@ -0,0 +1,153 @@ +using BenchmarkDotNet.Attributes; +using System.Runtime.CompilerServices; +using Idx = System.Int32; +using static DeltaQ.SuffixSorting.LibDivSufSort.Utils; +using BenchmarkDotNet.Diagnosers; + +namespace DeltaQ.Benchmarks +{ + [RyuJitX64Job] + //[RyuJitX86Job] + [HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + public class SqrtBenchmarks + { + private const int Step = 1; + public SqrtBenchmarks() + { + //sanity check range + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i++) + { + var sqrtFast = ss_isqrt(i); + var sqrtD = (int)Math.Sqrt(i); + var sqrtF = (int)MathF.Sqrt(i); + if (sqrtFast != sqrtD || sqrtD != sqrtF) throw new InvalidOperationException($"{i} did not match"); + } + } + + [Benchmark] + public void SqrtsFast() + { + Idx y = -1; + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) + { + y = ss_isqrt(i); + } + GC.KeepAlive(y); + } + + [Benchmark(Baseline = true)] + public void Sqrts() + { + Idx y = -1; + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) + { + y = ss_isqrt_math(i); + } + GC.KeepAlive(y); + } + + [Benchmark] + public void SqrtsF() + { + Idx y = -1; + for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) + { + y = ss_isqrt_mathf(i); + } + GC.KeepAlive(y); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ss_isqrt_math(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + return (int)Math.Sqrt(x); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int ss_isqrt_mathf(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + return (int)MathF.Sqrt(x); + } + + //=> x switch + //{ + // >= (SS_BLOCKSIZE * SS_BLOCKSIZE) => SS_BLOCKSIZE, + // _ => (int)MathF.Sqrt(x) + //}; + + + private const Idx SS_BLOCKSIZE = 1024; + /// + /// Fast sqrt, using lookup tables + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + /*unchecked*/ + private static int ss_isqrt(int x) + { + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + + Idx e; + if ((x & 0xffff_0000) > 0) + { + if ((x & 0xff00_0000) > 0) + { + e = 24 + lg_table[(x >> 24) & 0xff]; + } + else + { + e = 16 + lg_table[(x >> 16) & 0xff]; + } + } + else + { + if ((x & 0x0000_ff00) > 0) + { + e = 8 + lg_table[(x >> 8) & 0xff]; + } + else + { + e = 0 + lg_table[(x >> 0) & 0xff]; + } + }; + + Idx y; + if (e >= 16) + { + y = sqq_table[(x >> ((e - 6) - (e & 1)))] << ((e >> 1) - 7); + if (e >= 24) + { + y = (y + 1 + x / y) >> 1; + } + y = (y + 1 + x / y) >> 1; + } + else if (e >= 8) + { + y = (sqq_table[(x >> ((e - 6) - (e & 1)))] >> (7 - (e >> 1))) + 1; + } + else + { + return sqq_table[x] >> 4; + } + + if (x < (y * y)) + { + return y - 1; + } + else + { + return y; + } + } + } +} From 1be0435ab5605be051e9d03931cc135acc7a8af3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 10:01:40 -0500 Subject: [PATCH 226/325] Update SqrtBenchmarks --- bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs | 39 ++++++++++------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs index 9e79005..f0b8bee 100644 --- a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs +++ b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs @@ -12,20 +12,20 @@ namespace DeltaQ.Benchmarks public class SqrtBenchmarks { private const int Step = 1; - public SqrtBenchmarks() - { - //sanity check range - for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i++) - { - var sqrtFast = ss_isqrt(i); - var sqrtD = (int)Math.Sqrt(i); - var sqrtF = (int)MathF.Sqrt(i); - if (sqrtFast != sqrtD || sqrtD != sqrtF) throw new InvalidOperationException($"{i} did not match"); - } - } + //public SqrtBenchmarks() + //{ + // //sanity check range + // for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i++) + // { + // var sqrtFast = ss_isqrt(i); + // var sqrtD = (int)Math.Sqrt(i); + // var sqrtF = (int)MathF.Sqrt(i); + // if (sqrtFast != sqrtD || sqrtD != sqrtF) throw new InvalidOperationException($"{i} did not match"); + // } + //} - [Benchmark] - public void SqrtsFast() + [Benchmark(Baseline = true)] + public void SqrtsSS() { Idx y = -1; for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) @@ -35,8 +35,8 @@ public void SqrtsFast() GC.KeepAlive(y); } - [Benchmark(Baseline = true)] - public void Sqrts() + [Benchmark] + public void SqrtsMath() { Idx y = -1; for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) @@ -47,7 +47,7 @@ public void Sqrts() } [Benchmark] - public void SqrtsF() + public void SqrtsMathF() { Idx y = -1; for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i += Step) @@ -77,13 +77,6 @@ private static int ss_isqrt_mathf(int x) return (int)MathF.Sqrt(x); } - //=> x switch - //{ - // >= (SS_BLOCKSIZE * SS_BLOCKSIZE) => SS_BLOCKSIZE, - // _ => (int)MathF.Sqrt(x) - //}; - - private const Idx SS_BLOCKSIZE = 1024; /// /// Fast sqrt, using lookup tables From 9b05eba06d58248f2bc78cf7578ae7d9985f51d7 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 10:02:56 -0500 Subject: [PATCH 227/325] Update benchmarks and LDSS benchmarks --- .../DeltaQ.Benchmarks.csproj | 11 +++++++- .../LibDivSufSortBenchmarks.cs | 25 +++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj index f3f3d58..7046611 100644 --- a/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj +++ b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj @@ -1,4 +1,4 @@ - + Exe @@ -13,8 +13,17 @@ + + + + + + + + + diff --git a/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs b/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs index feb35f2..655870d 100644 --- a/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs +++ b/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs @@ -7,9 +7,30 @@ namespace DeltaQ.Benchmarks [SimpleJob(RunStrategy.Throughput)] public class LibDivSufSortBenchmarks { - private LibDivSufSort ldss = new LibDivSufSort(); - private static readonly byte[][] _assets = Directory.EnumerateFiles("./assets/").Select(File.ReadAllBytes).ToArray(); + [Benchmark(Baseline = true)] + public void ldss() + { + //SsSort.new_ss_pivot_feature_flag = false; + + var ldss = new LibDivSufSort(); + foreach (var asset in _assets) + { + ldss.Sort(asset).Dispose(); + } + } + + //[Benchmark] + //public void ss_pivot_new() + //{ + // SsSort.new_ss_pivot_feature_flag = true; + + // var ldss = new LibDivSufSort(); + // foreach (var asset in _assets) + // { + // ldss.Sort(asset).Dispose(); + // } + //} } } From 26dbf8878b2d1e47415fb59d57cee5bdcfea3441 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 18:21:33 -0500 Subject: [PATCH 228/325] Add inlining and XML comments to TrSort --- .../TrSort.cs | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index 5974ce7..55c8624 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -12,6 +12,11 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort; internal static class TrSort { + /// + /// Fast log2, using lookup tables + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + //allow unchecked private static int tr_ilg(int n) { if ((n & 0xffff_0000) > 0) @@ -38,7 +43,9 @@ private static int tr_ilg(int n) } } + /// /// Tandem repeat sort + /// internal static void trsort(SAPtr ISA, Span SA, int n, int depth) { SAPtr ISAd; @@ -52,14 +59,6 @@ internal static void trsort(SAPtr ISA, Span SA, int n, int depth) int unsorted; Budget budget = new(tr_ilg(n) * 2 / 3, n); - //macro_rules! ISA { - // ($x: expr) => { - // SA[ISA + $x] - // }; - //} - - //ref int getISA(int x) => ref SA[ISA + x]; - // JERRY ISAd = ISA + depth; while (-n < SA[0]) @@ -147,12 +146,14 @@ private ref struct TrStack public readonly Span Items; public int Size; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public TrStack(Span items) { Items = items; Size = 0; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) { Trace.Assert(Size < Items.Length); @@ -163,6 +164,8 @@ public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d, Idx e) item.d = d; item.e = e; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d, ref Idx e) { if (Size == 0) return false; @@ -191,18 +194,6 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA using var stackOwner = SpanOwner.Allocate(TR_STACK_SIZE, AllocationMode.Clear); TrStack stack = new(stackOwner.Span); - /* - macro_rules! ISA { - ($x: expr) => { - SA[ISA + $x] - }; - } - macro_rules! ISAd { - ($x: expr) => { - SA[ISAd + $x] - }; - } - */ var ISA = SA[isaOffset..]; var ISAd = SA[isadOffset..]; @@ -810,7 +801,9 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA } // end PASCAL } + /// /// Returns the pivot element + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) { @@ -836,7 +829,9 @@ private static SAPtr tr_pivot(Span SA, SAPtr ISAd, SAPtr first, SAPtr last) return tr_median3(SA, ISAd, first, middle, last); } + /// /// Returns the median of five elements + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr tr_median5(ReadOnlySpan SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3, SAPtr v4, SAPtr v5) { @@ -874,7 +869,9 @@ private static SAPtr tr_median5(ReadOnlySpan SA, SAPtr isadOffset, SAPtr v1 } } + /// /// Returns the median of three elements + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static SAPtr tr_median3(ReadOnlySpan SA, SAPtr isadOffset, SAPtr v1, SAPtr v2, SAPtr v3) { @@ -1126,7 +1123,9 @@ private static void tr_partialcopy(SAPtr isaOffset, Span SA, SAPtr first, S } } + /// /// Tandem repeat copy + /// private static void tr_copy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, SAPtr b, SAPtr last, Idx depth) { // sort suffixes of middle partition @@ -1182,6 +1181,7 @@ private static void tr_copy(SAPtr isaOffset, Span SA, SAPtr first, SAPtr a, /// /// Tandem repeat partition /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void tr_partition(Span SA, SAPtr isadOffset, SAPtr first, SAPtr middle, SAPtr last, ref SAPtr pa, ref SAPtr pb, Idx v) { SAPtr a, b, c, d, e, f; From 0525c367cab86700f32d158d1b2d23397a17ea6b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 18:21:55 -0500 Subject: [PATCH 229/325] Bump LibDivSufSort version to 0.4.0 --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index e1e903d..cf83dd9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -4,14 +4,11 @@ net6.0 DeltaQ jzebedee - true - 0.3.0 + 0.4.0 - - From 8b7bb54aaa00b84091721443bd87e0e09c23aaea Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 18:39:55 -0500 Subject: [PATCH 230/325] Add and use LDSSChecker The logic is the same as SAISChecker, just a fresher port. --- .../LDSSChecker.cs | 121 ++++++++++++++++++ .../LibDivSufSortTests.cs | 13 +- 2 files changed, 129 insertions(+), 5 deletions(-) create mode 100644 test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LDSSChecker.cs diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LDSSChecker.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LDSSChecker.cs new file mode 100644 index 0000000..d8907ad --- /dev/null +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LDSSChecker.cs @@ -0,0 +1,121 @@ +using Microsoft.Toolkit.HighPerformance.Buffers; +using System; +using System.Diagnostics; + +namespace DeltaQ.Tests +{ + internal static class LDSSChecker + { + private const int ALPHABET_SIZE = byte.MaxValue + 1; + + internal enum ResultCode + { + Done = 0, + BadArguments = -1, + OutOfRange = -2, + WrongOrder = -3, + WrongPosition = -4, + } + + /// + /// Checks the suffix array SA of the string T. + /// + public static ResultCode Check(ReadOnlySpan T, ReadOnlySpan SA, bool verbose) + { + if (verbose) { Trace.TraceInformation("sufcheck: "); } + + // Check arguments. + if (T.Length != SA.Length) + { + if (verbose) { Trace.TraceError("Invalid arguments.\n"); } + return ResultCode.BadArguments; + } + + if (T.IsEmpty) + { + if (verbose) { Trace.TraceInformation("Done.\n"); } + return ResultCode.Done; + } + + int i, p, q, t; + int c; + + // check range: [0..n-1] + int n = T.Length; + for (i = 0; i < n; ++i) + { + if ((SA[i] < 0) || (n <= SA[i])) + { + if (verbose) + { + Trace.TraceError($"Out of the range [0,{n - 1}].\n"); + Trace.TraceError($" SA[{i}]={SA[i]}\n"); + } + return ResultCode.OutOfRange; + } + } + + // check first characters. + for (i = 1; i < n; ++i) + { + if (T[SA[i - 1]] > T[SA[i]]) + { + if (verbose) + { + Trace.TraceError("Suffixes in wrong order.\n"); + Trace.TraceError($" T[SA[{i - 1}]={SA[i - 1]}]={T[SA[i - 1]]}"); + Trace.TraceError($" > T[SA[{i}]={SA[i]}]={T[SA[i]]}\n"); + } + return ResultCode.WrongOrder; + } + } + + // check suffixes. + using var cOwner = SpanOwner.Allocate(ALPHABET_SIZE, AllocationMode.Clear); + Span C = cOwner.Span; + + for (i = 0; i < n; ++i) { ++C[T[i]]; } + for (i = 0, p = 0; i < ALPHABET_SIZE; ++i) + { + t = C[i]; + C[i] = p; + p += t; + } + + q = C[T[n - 1]]; + C[T[n - 1]] += 1; + for (i = 0; i < n; ++i) + { + p = SA[i]; + if (0 < p) + { + c = T[--p]; + t = C[c]; + } + else + { + c = T[p = n - 1]; + t = q; + } + if ((t < 0) || (p != SA[t])) + { + if (verbose) + { + Trace.TraceError("Suffix in wrong position.\n"); + Trace.TraceError($" SA[{t}]={((0 <= t) ? SA[t] : -1)} or\n"); + Trace.TraceError($" SA[{i}]={SA[i]}\n"); + } + return ResultCode.WrongPosition; + } + if (t != q) + { + ++C[c]; + if ((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } + } + } + + if (verbose) { Trace.TraceInformation("Done.\n"); } + return ResultCode.Done; + } + } +} diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs index e793002..37495d7 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs @@ -42,17 +42,16 @@ private static SpanOwner GetOwnedRandomBuffer(int size) } #endif - private static void Verify(ReadOnlySpan input, ReadOnlySpan sa) + private static void Verify(ReadOnlySpan T, ReadOnlySpan SA) { //ref byte suff(int index) => ref input[sa[index]]; - for (int i = 0; i < input.Length - 1; i++) + for (int i = 0; i < T.Length - 1; i++) { //if(!(suff(i) < suff(i + 1))) - var cur = input[sa[i]..]; - var next = input[sa[i + 1]..]; + var cur = T[SA[i]..]; + var next = T[SA[i + 1]..]; var cmp = cur.SequenceCompareTo(next); if (!(cmp < 0)) - //if (!(cur < next)) { var ex = new InvalidOperationException("Input was unsorted"); ex.Data["i"] = i; @@ -60,6 +59,10 @@ private static void Verify(ReadOnlySpan input, ReadOnlySpan sa) throw ex; } } + + const LDSSChecker.ResultCode expected = LDSSChecker.ResultCode.Done; + var actual = LDSSChecker.Check(T, SA, true); + Assert.Equal(expected, actual); } [Fact] From d4c8c74171d68d87a41ceae8f02fb6515215182f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 19 Dec 2021 19:59:30 -0500 Subject: [PATCH 231/325] Add ROS usage and inlining to SAIS --- src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 33 ++++++--------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 8acc2ae..12c5e67 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -72,7 +72,7 @@ private static void GetCounts(IntAccessor T, Span c, int n, int k) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void GetBuckets(Span c, Span b, int k, bool end) + private static void GetBuckets(ReadOnlySpan c, Span b, int k, bool end) { for (int i = 0, sum = 0; i < k; ++i) { @@ -521,17 +521,19 @@ public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) private ref struct IntAccessor { - private readonly Span intSpan; + private readonly ReadOnlySpan intSpan; private readonly ReadOnlySpan byteSpan; private readonly bool packedIndex; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public IntAccessor(ReadOnlySpan buffer) { this.byteSpan = buffer; this.intSpan = default; this.packedIndex = true; } - public IntAccessor(Span buffer) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public IntAccessor(ReadOnlySpan buffer) { this.byteSpan = default; this.intSpan = buffer; @@ -540,29 +542,8 @@ public IntAccessor(Span buffer) public int this[int index] { - get - { - if (packedIndex) - { - return byteSpan[index]; - } - else - { - return intSpan[index]; - } - } - - set - { - if (packedIndex) - { - throw new InvalidOperationException("Can't use setter while accessing read only span"); - } - else - { - intSpan[index] = (byte)value; - } - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => packedIndex ? byteSpan[index] : intSpan[index]; } } } From 7b10fa916cc1a6ddc8914921f2f4c89299b4f391 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 26 Dec 2021 07:07:28 -0500 Subject: [PATCH 232/325] Add netstandard2.0 fallbacks to DeltaQ.Utility.Memory --- .../DeltaQ.Utility.Memory.csproj | 7 +- src/DeltaQ.Utility.Memory/HashHelpers.cs | 21 ++ src/DeltaQ.Utility.Memory/Index.cs | 162 ++++++++++++++ .../NullableAttributes.cs | 206 ++++++++++++++++++ src/DeltaQ.Utility.Memory/Range.cs | 143 ++++++++++++ src/DeltaQ.Utility.Memory/ThrowHelper.cs | 71 ++++++ 6 files changed, 608 insertions(+), 2 deletions(-) create mode 100644 src/DeltaQ.Utility.Memory/HashHelpers.cs create mode 100644 src/DeltaQ.Utility.Memory/Index.cs create mode 100644 src/DeltaQ.Utility.Memory/NullableAttributes.cs create mode 100644 src/DeltaQ.Utility.Memory/Range.cs create mode 100644 src/DeltaQ.Utility.Memory/ThrowHelper.cs diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 93fa53f..9c6603b 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -4,8 +4,7 @@ net6.0;netstandard2.0 DeltaQ jzebedee - true - 0.2.0 + 0.3.0 latest enable @@ -14,4 +13,8 @@ + + + + diff --git a/src/DeltaQ.Utility.Memory/HashHelpers.cs b/src/DeltaQ.Utility.Memory/HashHelpers.cs new file mode 100644 index 0000000..e28f0b8 --- /dev/null +++ b/src/DeltaQ.Utility.Memory/HashHelpers.cs @@ -0,0 +1,21 @@ +//Generated 2021-12-26 +//https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/Numerics/Hashing/HashHelpers.cs + +#if NETSTANDARD2_0 +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.Numerics.Hashing +{ + internal static class HashHelpers + { + public static int Combine(int h1, int h2) + { + // RyuJIT optimizes this to use the ROL instruction + // Related GitHub pull request: https://github.com/dotnet/coreclr/pull/1830 + uint rol5 = ((uint)h1 << 5) | ((uint)h1 >> 27); + return ((int)rol5 + h1) ^ h2; + } + } +} +#endif \ No newline at end of file diff --git a/src/DeltaQ.Utility.Memory/Index.cs b/src/DeltaQ.Utility.Memory/Index.cs new file mode 100644 index 0000000..066dc96 --- /dev/null +++ b/src/DeltaQ.Utility.Memory/Index.cs @@ -0,0 +1,162 @@ +//Generated 2021-12-26 +//https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/Index.cs + +using System.Runtime.CompilerServices; + +#if NETCOREAPP3_0_OR_GREATER +[assembly: TypeForwardedTo(typeof(System.Index))] +#else +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; + +namespace System +{ + /// Represent a type can be used to index a collection either from the start or the end. + /// + /// Index is used by the C# compiler to support the new index syntax + /// + /// int[] someArray = new int[5] { 1, 2, 3, 4, 5 } ; + /// int lastElement = someArray[^1]; // lastElement = 5 + /// + /// + public readonly struct Index : IEquatable + { + private readonly int _value; + + /// Construct an Index using a value and indicating if the index is from the start or from the end. + /// The index value. it has to be zero or positive number. + /// Indicating if the index is from the start or from the end. + /// + /// If the Index constructed from the end, index value 1 means pointing at the last element and index value 0 means pointing at beyond last element. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Index(int value, bool fromEnd = false) + { + if (value < 0) + { + ThrowHelper.ThrowValueArgumentOutOfRange_NeedNonNegNumException(); + } + + if (fromEnd) + _value = ~value; + else + _value = value; + } + + // The following private constructors mainly created for perf reason to avoid the checks + private Index(int value) + { + _value = value; + } + + /// Create an Index pointing at first element. + public static Index Start => new Index(0); + + /// Create an Index pointing at beyond last element. + public static Index End => new Index(~0); + + /// Create an Index from the start at the position indicated by the value. + /// The index value from the start. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Index FromStart(int value) + { + if (value < 0) + { + ThrowHelper.ThrowValueArgumentOutOfRange_NeedNonNegNumException(); + } + + return new Index(value); + } + + /// Create an Index from the end at the position indicated by the value. + /// The index value from the end. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Index FromEnd(int value) + { + if (value < 0) + { + ThrowHelper.ThrowValueArgumentOutOfRange_NeedNonNegNumException(); + } + + return new Index(~value); + } + + /// Returns the index value. + public int Value + { + get + { + if (_value < 0) + return ~_value; + else + return _value; + } + } + + /// Indicates whether the index is from the start or the end. + public bool IsFromEnd => _value < 0; + + /// Calculate the offset from the start using the giving collection length. + /// The length of the collection that the Index will be used with. length has to be a positive value + /// + /// For performance reason, we don't validate the input length parameter and the returned offset value against negative values. + /// we don't validate either the returned offset is greater than the input length. + /// It is expected Index will be used with collections which always have non negative length/count. If the returned offset is negative and + /// then used to index a collection will get out of range exception which will be same affect as the validation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffset(int length) + { + int offset = _value; + if (IsFromEnd) + { + // offset = length - (~value) + // offset = length + (~(~value) + 1) + // offset = length + value + 1 + + offset += length + 1; + } + return offset; + } + + /// Indicates whether the current Index object is equal to another object of the same type. + /// An object to compare with this object + public override bool Equals([NotNullWhen(true)] object? value) => value is Index && _value == ((Index)value)._value; + + /// Indicates whether the current Index object is equal to another Index object. + /// An object to compare with this object + public bool Equals(Index other) => _value == other._value; + + /// Returns the hash code for this instance. + public override int GetHashCode() => _value; + + /// Converts integer number to an Index. + public static implicit operator Index(int value) => FromStart(value); + + /// Converts the value of the current Index object to its equivalent string representation. + public override string ToString() + { + if (IsFromEnd) + return ToStringFromEnd(); + + return ((uint)Value).ToString(); + } + + private string ToStringFromEnd() + { +#if (!NETSTANDARD2_0 && !NETFRAMEWORK) + Span span = stackalloc char[11]; // 1 for ^ and 10 for longest possible uint value + bool formatted = ((uint)Value).TryFormat(span.Slice(1), out int charsWritten); + Debug.Assert(formatted); + span[0] = '^'; + return new string(span.Slice(0, charsWritten + 1)); +#else + return '^' + Value.ToString(); +#endif + } + } +} +#endif \ No newline at end of file diff --git a/src/DeltaQ.Utility.Memory/NullableAttributes.cs b/src/DeltaQ.Utility.Memory/NullableAttributes.cs new file mode 100644 index 0000000..41025ab --- /dev/null +++ b/src/DeltaQ.Utility.Memory/NullableAttributes.cs @@ -0,0 +1,206 @@ +//Generated 2021-12-26 +//https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/NullableAttributes.cs + +#if NETSTANDARD2_0 +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System.Diagnostics.CodeAnalysis +{ +#if !NETSTANDARD2_1 + /// Specifies that null is allowed as an input even if the corresponding type disallows it. + [AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Property, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class AllowNullAttribute : Attribute + { } + + /// Specifies that null is disallowed as an input even if the corresponding type allows it. + [AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Property, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class DisallowNullAttribute : Attribute + { } + + /// Specifies that an output may be null even if the corresponding type disallows it. + [AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Property | AttributeTargets.ReturnValue, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class MaybeNullAttribute : Attribute + { } + + /// Specifies that an output will not be null even if the corresponding type allows it. Specifies that an input argument was not null when the call returns. + [AttributeUsage(AttributeTargets.Field | AttributeTargets.Parameter | AttributeTargets.Property | AttributeTargets.ReturnValue, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class NotNullAttribute : Attribute + { } + + /// Specifies that when a method returns , the parameter may be null even if the corresponding type disallows it. + [AttributeUsage(AttributeTargets.Parameter, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class MaybeNullWhenAttribute : Attribute + { + /// Initializes the attribute with the specified return value condition. + /// + /// The return value condition. If the method returns this value, the associated parameter may be null. + /// + public MaybeNullWhenAttribute(bool returnValue) => ReturnValue = returnValue; + + /// Gets the return value condition. + public bool ReturnValue { get; } + } + + /// Specifies that when a method returns , the parameter will not be null even if the corresponding type allows it. + [AttributeUsage(AttributeTargets.Parameter, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class NotNullWhenAttribute : Attribute + { + /// Initializes the attribute with the specified return value condition. + /// + /// The return value condition. If the method returns this value, the associated parameter will not be null. + /// + public NotNullWhenAttribute(bool returnValue) => ReturnValue = returnValue; + + /// Gets the return value condition. + public bool ReturnValue { get; } + } + + /// Specifies that the output will be non-null if the named parameter is non-null. + [AttributeUsage(AttributeTargets.Parameter | AttributeTargets.Property | AttributeTargets.ReturnValue, AllowMultiple = true, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class NotNullIfNotNullAttribute : Attribute + { + /// Initializes the attribute with the associated parameter name. + /// + /// The associated parameter name. The output will be non-null if the argument to the parameter specified is non-null. + /// + public NotNullIfNotNullAttribute(string parameterName) => ParameterName = parameterName; + + /// Gets the associated parameter name. + public string ParameterName { get; } + } + + /// Applied to a method that will never return under any circumstance. + [AttributeUsage(AttributeTargets.Method, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class DoesNotReturnAttribute : Attribute + { } + + /// Specifies that the method will not return if the associated Boolean parameter is passed the specified value. + [AttributeUsage(AttributeTargets.Parameter, Inherited = false)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class DoesNotReturnIfAttribute : Attribute + { + /// Initializes the attribute with the specified parameter value. + /// + /// The condition parameter value. Code after the method will be considered unreachable by diagnostics if the argument to + /// the associated parameter matches this value. + /// + public DoesNotReturnIfAttribute(bool parameterValue) => ParameterValue = parameterValue; + + /// Gets the condition parameter value. + public bool ParameterValue { get; } + } +#endif + + /// Specifies that the method or property will ensure that the listed field and property members have not-null values. + [AttributeUsage(AttributeTargets.Method | AttributeTargets.Property, Inherited = false, AllowMultiple = true)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class MemberNotNullAttribute : Attribute + { + /// Initializes the attribute with a field or property member. + /// + /// The field or property member that is promised to be not-null. + /// + public MemberNotNullAttribute(string member) => Members = new[] { member }; + + /// Initializes the attribute with the list of field and property members. + /// + /// The list of field and property members that are promised to be not-null. + /// + public MemberNotNullAttribute(params string[] members) => Members = members; + + /// Gets field or property member names. + public string[] Members { get; } + } + + /// Specifies that the method or property will ensure that the listed field and property members have not-null values when returning with the specified return value condition. + [AttributeUsage(AttributeTargets.Method | AttributeTargets.Property, Inherited = false, AllowMultiple = true)] +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + sealed class MemberNotNullWhenAttribute : Attribute + { + /// Initializes the attribute with the specified return value condition and a field or property member. + /// + /// The return value condition. If the method returns this value, the associated parameter will not be null. + /// + /// + /// The field or property member that is promised to be not-null. + /// + public MemberNotNullWhenAttribute(bool returnValue, string member) + { + ReturnValue = returnValue; + Members = new[] { member }; + } + + /// Initializes the attribute with the specified return value condition and list of field and property members. + /// + /// The return value condition. If the method returns this value, the associated parameter will not be null. + /// + /// + /// The list of field and property members that are promised to be not-null. + /// + public MemberNotNullWhenAttribute(bool returnValue, params string[] members) + { + ReturnValue = returnValue; + Members = members; + } + + /// Gets the return value condition. + public bool ReturnValue { get; } + + /// Gets field or property member names. + public string[] Members { get; } + } +} +#endif \ No newline at end of file diff --git a/src/DeltaQ.Utility.Memory/Range.cs b/src/DeltaQ.Utility.Memory/Range.cs new file mode 100644 index 0000000..0f18d22 --- /dev/null +++ b/src/DeltaQ.Utility.Memory/Range.cs @@ -0,0 +1,143 @@ +//Generated 2021-12-26 +//https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/Range.cs + +using System.Runtime.CompilerServices; + +#if NETCOREAPP3_0_OR_GREATER +[assembly: TypeForwardedTo(typeof(System.Range))] +#else +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; + +#if NETSTANDARD2_0 || NETFRAMEWORK +using System.Numerics.Hashing; +#endif + +namespace System +{ + /// Represent a range has start and end indexes. + /// + /// Range is used by the C# compiler to support the range syntax. + /// + /// int[] someArray = new int[5] { 1, 2, 3, 4, 5 }; + /// int[] subArray1 = someArray[0..2]; // { 1, 2 } + /// int[] subArray2 = someArray[1..^0]; // { 2, 3, 4, 5 } + /// + /// + public readonly struct Range : IEquatable + { + /// Represent the inclusive start index of the Range. + public Index Start { get; } + + /// Represent the exclusive end index of the Range. + public Index End { get; } + + /// Construct a Range object using the start and end indexes. + /// Represent the inclusive start index of the range. + /// Represent the exclusive end index of the range. + public Range(Index start, Index end) + { + Start = start; + End = end; + } + + /// Indicates whether the current Range object is equal to another object of the same type. + /// An object to compare with this object + public override bool Equals([NotNullWhen(true)] object? value) => + value is Range r && + r.Start.Equals(Start) && + r.End.Equals(End); + + /// Indicates whether the current Range object is equal to another Range object. + /// An object to compare with this object + public bool Equals(Range other) => other.Start.Equals(Start) && other.End.Equals(End); + + /// Returns the hash code for this instance. + public override int GetHashCode() + { +#if (!NETSTANDARD2_0 && !NETFRAMEWORK) + return HashCode.Combine(Start.GetHashCode(), End.GetHashCode()); +#else + return HashHelpers.Combine(Start.GetHashCode(), End.GetHashCode()); +#endif + } + + /// Converts the value of the current Range object to its equivalent string representation. + public override string ToString() + { +#if (!NETSTANDARD2_0 && !NETFRAMEWORK) + Span span = stackalloc char[2 + (2 * 11)]; // 2 for "..", then for each index 1 for '^' and 10 for longest possible uint + int pos = 0; + + if (Start.IsFromEnd) + { + span[0] = '^'; + pos = 1; + } + bool formatted = ((uint)Start.Value).TryFormat(span.Slice(pos), out int charsWritten); + Debug.Assert(formatted); + pos += charsWritten; + + span[pos++] = '.'; + span[pos++] = '.'; + + if (End.IsFromEnd) + { + span[pos++] = '^'; + } + formatted = ((uint)End.Value).TryFormat(span.Slice(pos), out charsWritten); + Debug.Assert(formatted); + pos += charsWritten; + + return new string(span.Slice(0, pos)); +#else + return Start.ToString() + ".." + End.ToString(); +#endif + } + + /// Create a Range object starting from start index to the end of the collection. + public static Range StartAt(Index start) => new Range(start, Index.End); + + /// Create a Range object starting from first element in the collection to the end Index. + public static Range EndAt(Index end) => new Range(Index.Start, end); + + /// Create a Range object starting from first element to the end. + public static Range All => new Range(Index.Start, Index.End); + + /// Calculate the start offset and length of range object using a collection length. + /// The length of the collection that the range will be used with. length has to be a positive value. + /// + /// For performance reason, we don't validate the input length parameter against negative values. + /// It is expected Range will be used with collections which always have non negative length/count. + /// We validate the range is inside the length scope though. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public (int Offset, int Length) GetOffsetAndLength(int length) + { + int start; + Index startIndex = Start; + if (startIndex.IsFromEnd) + start = length - startIndex.Value; + else + start = startIndex.Value; + + int end; + Index endIndex = End; + if (endIndex.IsFromEnd) + end = length - endIndex.Value; + else + end = endIndex.Value; + + if ((uint)end > (uint)length || (uint)start > (uint)end) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.length); + } + + return (start, end - start); + } + } +} +#endif \ No newline at end of file diff --git a/src/DeltaQ.Utility.Memory/ThrowHelper.cs b/src/DeltaQ.Utility.Memory/ThrowHelper.cs new file mode 100644 index 0000000..ef94640 --- /dev/null +++ b/src/DeltaQ.Utility.Memory/ThrowHelper.cs @@ -0,0 +1,71 @@ +//Generated 2021-12-26 +//https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs + +#if NETSTANDARD2_0 + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; + +namespace System +{ + internal static class ThrowHelper + { + [DoesNotReturn] + internal static void ThrowValueArgumentOutOfRange_NeedNonNegNumException() + { + throw GetArgumentOutOfRangeException(ExceptionArgument.value, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); + } + + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException(ExceptionArgument argument) + { + throw new ArgumentOutOfRangeException(GetArgumentName(argument)); + } + + private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(ExceptionArgument argument, ExceptionResource resource) + { + return new ArgumentOutOfRangeException(GetArgumentName(argument), GetResourceString(resource)); + } + + private static string GetArgumentName(ExceptionArgument argument) + { + switch (argument) + { + case ExceptionArgument.value: + return "value"; + case ExceptionArgument.length: + return "length"; + default: + Debug.Fail("The enum value is not defined, please check the ExceptionArgument Enum."); + return ""; + } + } + + private static string GetResourceString(ExceptionResource resource) + { + switch (resource) + { + case ExceptionResource.ArgumentOutOfRange_NeedNonNegNum: + return "Non-negative number required."; + default: + Debug.Fail("The enum value is not defined, please check the ExceptionResource Enum."); + return ""; + } + } + } + + internal enum ExceptionArgument + { + value, + length + } + + internal enum ExceptionResource + { + ArgumentOutOfRange_NeedNonNegNum, + } +} +#endif \ No newline at end of file From 47c4c93ee39bfbb0466c6a0162324e444d5d2825 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 26 Dec 2021 07:07:59 -0500 Subject: [PATCH 233/325] Update SAIS to use fallbacks from DeltaQ.Utility.Memory --- .../DeltaQ.SuffixSorting.SAIS.csproj | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index 86a73cf..ff22bde 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -4,14 +4,12 @@ net6.0;netstandard2.0 DeltaQ jzebedee - true - 0.2.0 + 0.3.0 - - + From 2c070432eba799df5efdce1036d471e4f359c820 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 26 Dec 2021 07:09:50 -0500 Subject: [PATCH 234/325] Update BsDiff to use fallback types --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 3 +-- src/DeltaQ.BsDiff/Diff.cs | 18 +++++++----------- src/DeltaQ.BsDiff/Patch.cs | 16 ++++++++-------- src/DeltaQ.BsDiff/SpanExtensions.cs | 2 +- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index c098b13..c9e0843 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -5,13 +5,12 @@ DeltaQ jzebedee 0.2.0 - true latest - + diff --git a/src/DeltaQ.BsDiff/Diff.cs b/src/DeltaQ.BsDiff/Diff.cs index 443db25..b45c18b 100644 --- a/src/DeltaQ.BsDiff/Diff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -27,8 +27,8 @@ using bz2core; using DeltaQ.SuffixSorting; +using Microsoft.Toolkit.HighPerformance; using Microsoft.Toolkit.HighPerformance.Buffers; -using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.Buffers; using System.Diagnostics; @@ -88,7 +88,7 @@ 0 32 Header ?? ?? Bzip2ed extra block */ Span header = stackalloc byte[HeaderSize]; - Span header_signature = header.Slice(0, sizeof(long)); + Span header_signature = header[..sizeof(long)]; header_signature.WritePackedLong(Signature); Span header_compressed_ctrl = header.Slice(sizeof(long), sizeof(long)); @@ -116,12 +116,8 @@ 0 32 Header using (var extraStream = GetEncodingStream(msExtra, true)) { Span I = saOwner.Span; -#if NETSTANDARD2_0 - var sortLen = suffixSort.Sort(oldData, I.Slice(0, oldData.Length)); -#else var sortLen = suffixSort.Sort(oldData, I[..^1]); -#endif - Trace.Assert(sortLen == oldData.Length); + Debug.Assert(sortLen == oldData.Length); var scan = 0; var pos = 0; @@ -137,7 +133,7 @@ 0 32 Header for (var scsc = scan += len; scan < newData.Length; scan++) { - len = Search(I, oldData, newData.Slice(scan), 0, oldData.Length, out pos); + len = Search(I, oldData, newData[scan..], 0, oldData.Length, out pos); for (; scsc < scan + len; scsc++) { @@ -282,8 +278,8 @@ private static int Search(ReadOnlySpan I, ReadOnlySpan oldData, ReadO { if (end - start < 2) { - var x = MatchLength(oldData.Slice(I[start]), newData); - var y = MatchLength(oldData.Slice(I[end]), newData); + var x = MatchLength(oldData[I[start]..], newData); + var y = MatchLength(oldData[I[end]..], newData); if (x > y) { @@ -298,7 +294,7 @@ private static int Search(ReadOnlySpan I, ReadOnlySpan oldData, ReadO } var midPoint = start + (end - start) / 2; - if (CompareBytes(oldData.Slice(I[midPoint]), newData) < 0) + if (CompareBytes(oldData[I[midPoint]..], newData) < 0) { start = midPoint; } diff --git a/src/DeltaQ.BsDiff/Patch.cs b/src/DeltaQ.BsDiff/Patch.cs index 9ef7716..89bef4f 100644 --- a/src/DeltaQ.BsDiff/Patch.cs +++ b/src/DeltaQ.BsDiff/Patch.cs @@ -25,8 +25,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +using Microsoft.Toolkit.HighPerformance; using Microsoft.Toolkit.HighPerformance.Buffers; -using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.IO; @@ -96,9 +96,9 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre throw new InvalidOperationException("Corrupt patch"); // read lengths from header - controlLength = header.Slice(sizeof(long)).ReadPackedLong(); - diffLength = header.Slice(sizeof(long) * 2).ReadPackedLong(); - newSize = header.Slice(sizeof(long) * 3).ReadPackedLong(); + controlLength = header[sizeof(long)..].ReadPackedLong(); + diffLength = header[(sizeof(long) * 2)..].ReadPackedLong(); + newSize = header[(sizeof(long) * 3)..].ReadPackedLong(); if (controlLength < 0 || diffLength < 0 || newSize < 0) throw new InvalidOperationException("Corrupt patch"); @@ -147,9 +147,9 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea // add x bytes from oldfile to x bytes from the diff block; var addSize = ctrlBuffer.ReadPackedLong(); // copy y bytes from the extra block; - var copySize = ctrlBuffer.Slice(sizeof(long)).ReadPackedLong(); + var copySize = ctrlBuffer[sizeof(long)..].ReadPackedLong(); // seek forwards in oldfile by z bytes; - var seekAmount = ctrlBuffer.Slice(sizeof(long) * 2).ReadPackedLong(); + var seekAmount = ctrlBuffer[(sizeof(long) * 2)..].ReadPackedLong(); // sanity-check if (output.Position + addSize > newSize) @@ -169,7 +169,7 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea for (var i = 0; i < diffBytesRead; i++) diffBuffer[i] += inputBuffer[i]; - output.Write(diffBuffer.Slice(0, diffBytesRead)); + output.Write(diffBuffer[..diffBytesRead]); addSize -= diffBytesRead; } @@ -181,7 +181,7 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea while (copySize > 0) { var bytesRead = extra.Read(diffBuffer.SliceUpTo((int)copySize)); - output.Write(diffBuffer.Slice(0, bytesRead)); + output.Write(diffBuffer[..bytesRead]); copySize -= bytesRead; } diff --git a/src/DeltaQ.BsDiff/SpanExtensions.cs b/src/DeltaQ.BsDiff/SpanExtensions.cs index f064496..c784165 100644 --- a/src/DeltaQ.BsDiff/SpanExtensions.cs +++ b/src/DeltaQ.BsDiff/SpanExtensions.cs @@ -73,6 +73,6 @@ public static long ReadPackedLong(this Span span) return (span[7] & 0x80) != 0 ? -y : y; } - public static Span SliceUpTo(this Span span, int max) => span.Slice(0, Math.Min(span.Length, max)); + public static Span SliceUpTo(this Span span, int max) => span[..Math.Min(span.Length, max)]; } } From 0dcd97234fed7462e5b4cbfe50279bc07ac509e0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 26 Dec 2021 07:11:02 -0500 Subject: [PATCH 235/325] Update LibDivSufSort to support netstandard2.0 --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 7 ++++--- src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs | 6 +++++- src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index cf83dd9..10fbbee 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -1,14 +1,15 @@  - net6.0 + net6.0;netstandard2.0 DeltaQ jzebedee - 0.4.0 + 0.5.0 + latest - + diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index a3248ab..298a015 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -1669,8 +1669,12 @@ private static int ss_isqrt(int x) => x switch { >= (SS_BLOCKSIZE * SS_BLOCKSIZE) => SS_BLOCKSIZE, +#if NETSTANDARD2_0 + _ => (int)Math.Sqrt(x) +#else _ => (int)MathF.Sqrt(x) +#endif }; #endif -} + } diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index 55c8624..d43daa9 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -192,7 +192,7 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA Idx trlink = -1; using var stackOwner = SpanOwner.Allocate(TR_STACK_SIZE, AllocationMode.Clear); - TrStack stack = new(stackOwner.Span); + TrStack stack = new TrStack(stackOwner.Span); var ISA = SA[isaOffset..]; var ISAd = SA[isadOffset..]; From c33ed927c876fc270e5857303f4e6a73bdf60d8a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 26 Dec 2021 07:42:48 -0500 Subject: [PATCH 236/325] Use Microsoft.Bcl.HashCode impl in DeltaQ.Utility.Memory --- src/DeltaQ.Utility.Memory/HashHelpers.cs | 21 --------------------- src/DeltaQ.Utility.Memory/Range.cs | 4 ---- 2 files changed, 25 deletions(-) delete mode 100644 src/DeltaQ.Utility.Memory/HashHelpers.cs diff --git a/src/DeltaQ.Utility.Memory/HashHelpers.cs b/src/DeltaQ.Utility.Memory/HashHelpers.cs deleted file mode 100644 index e28f0b8..0000000 --- a/src/DeltaQ.Utility.Memory/HashHelpers.cs +++ /dev/null @@ -1,21 +0,0 @@ -//Generated 2021-12-26 -//https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/Numerics/Hashing/HashHelpers.cs - -#if NETSTANDARD2_0 -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace System.Numerics.Hashing -{ - internal static class HashHelpers - { - public static int Combine(int h1, int h2) - { - // RyuJIT optimizes this to use the ROL instruction - // Related GitHub pull request: https://github.com/dotnet/coreclr/pull/1830 - uint rol5 = ((uint)h1 << 5) | ((uint)h1 >> 27); - return ((int)rol5 + h1) ^ h2; - } - } -} -#endif \ No newline at end of file diff --git a/src/DeltaQ.Utility.Memory/Range.cs b/src/DeltaQ.Utility.Memory/Range.cs index 0f18d22..5959171 100644 --- a/src/DeltaQ.Utility.Memory/Range.cs +++ b/src/DeltaQ.Utility.Memory/Range.cs @@ -58,11 +58,7 @@ value is Range r && /// Returns the hash code for this instance. public override int GetHashCode() { -#if (!NETSTANDARD2_0 && !NETFRAMEWORK) return HashCode.Combine(Start.GetHashCode(), End.GetHashCode()); -#else - return HashHelpers.Combine(Start.GetHashCode(), End.GetHashCode()); -#endif } /// Converts the value of the current Range object to its equivalent string representation. From 0e0bcd6744a53e17edce6a47f7948b13a756de65 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 26 Dec 2021 15:20:11 -0500 Subject: [PATCH 237/325] Add logo assets --- assets/dq-icon.png | Bin 0 -> 4971 bytes assets/dq.svg | 1 + 2 files changed, 1 insertion(+) create mode 100644 assets/dq-icon.png create mode 100644 assets/dq.svg diff --git a/assets/dq-icon.png b/assets/dq-icon.png new file mode 100644 index 0000000000000000000000000000000000000000..6e9e5196b2e0cec2ff68aef9a69bc67740f5a95d GIT binary patch literal 4971 zcmV-x6O`org>6q#iq_DW7CBi|($xvUz?cE1v zvrbKt*FOLKgz?0cUp69<}ZPloA z0GMR;J2c>syoY6A!+>>brds_1ptWmjkt&kiKX{SiSj+;q1ZR-^n)M5Sta+)h#W^8| zpbN7=V@Mu}u3n!2n0RYijM}uDNAty2d&NlcZMw2lMT+Ylh5$x&@07jt;xE{-_ z-Dd!R9hiA0f?a(9Ae$d8?9?NXXx5g^qO;MAgm)F0))xRW=5Mg5oe+&a^v9hlp0A$Q= zO#kVKwf7bPut~&i#LB)t0Fbdx!}Nc3ESi(@DAC?{M=Wi{&`5m%z$iS6qu;hM=!=@j zV;0yarnVw)ruG5QW{wv{@n;Q9k)!7iHR#kN#o{P2yX6HnVe%{<~~B&ydJ@I1+}vY2@ijNG`PvfVPF(ieehmzoHb2Tj7g+Nf_p1p!~XuT>W>% z4Rj!WpbkoH1Hdq^z!-dAF%B};iSE3a}n4Mw&-Mszkm zsO&?632GPs8o?e?PQ{u1XZ`fgHVep`aY1x^g2z|cwb}%LVI7NLkDHa!xjAwe7OsqK zo-5;#!y3dHqgN>Vl2C#g1%OFczM~a7y$YK@=M7?}uor?w7PE|rlv&I&9W1=6z@+v7 zpgVXUEn7>>JLq#)opLP$LcWG0A)otM%pj;e02n6u>3iSHOIg5IiKrltk}+FxdDO|G zZz&H$dv2*`Sel56No@gO`l2t1Aw93Em7(d%%{B{&2e90XSNAJ<6tx8aqi_)nIp=z+ zQHnPZ&9k|dsz( ze1h_17G6bhfSBFH<9^1{A(zQ2Lf+w6XS==w!_N&$CYHWl=GlY2g_gg^+JAT?is9P~eu5clSR{R9VwyYv zWEYGS_K=s9@}=$=M&I}Y|NU7mhpAocXBh3+s;>4oav=>=&0Fbc`!}04brA(_U z@)w|YOXErdKh^Q;E-eq_%YRIt_UKV5@+5v@)xTxUPAmgVb_RPImM!({rc?kgL2>|K zXa##*jty0LckUG&LSOV*8mgitqcXziIQ-l1R@LR31ztqozoOBnSV@urfb+~MwOl-8;6tcX6FXzjk;hg#_o1}HW@1~IcUYb!1lM+z+^EP%B$nL~u z&|nxl78cb9fX*h%7DvM2lfF=|H-LhE{bfbc)$P3wTBQi%s{jI?%pyX32r&D2#FAy4 zU71uD02%WXn0>ENidZ`Od9D2fjBS*B)C5k0OiRHi0N_UauPPgvx&@*r0s2lo;%KR= z3jo9XAH>_zQ$$;2pe#xS<8|z#Gd)M1V5iL>b#I0+@-QZ)D9ac}S&Ug=Zw^Lbc~lPo zhP5v~h51SWN?$*jdc9n}5jY|!UtrkcbR;C6r5VOB$+^RisoA|hJ?W8R&e)g3b~tSP z>H$C}*h9$cvGbnKJ}6sq;Oh;6PBG1Qhmt5LnFYy5&YwZDF|Gz0EbUqrX28K%x)oFtqI!bv2a{0}Wt^9SR zJbDE00`z3;ioI|tPTlcr0l^+p73^_4z67TumdX;yC5P8-BgwdWJBIl*{*B2!-98An zggj2D>d{z5!KFCwCXM-<;yV2eWxu@N;_&5H3nQE`@EL!Gg6fVQcaJHJg28E3nh0Z1( zUK-8yU(+-z`6AOx@Ys#7U&rOjYmF`$X@VE8>7Ted^Uvki!Df(>P4bkO?+2ov0d#^r zgnbS+Ksw19X%=wv33UEK^@c@t%JB9yHcUF*?4j3`lIkRU4C=%fN;Y}~(E^|i;Z;SA z@G5_(cblricm=sRf|Rw`d;D|KYbJj%LfY=pQ@N7rX#ikHB^47vv;fcv_OQieSoi$C z=;^i;&_hywvvY;}`d94**n9?Y*(i6!W{{4Uk(cNI&}MEAd%0SO?(}-%qOpG0P_hfq zSL^g_j34I^;zQH7+tIJrHC#0mMgjf;GNU5X`nxS|!AB8xQ>)cfbO6u`_P8Ix9zh)~ z-9XSk#?M_9?f8?Ii?+YxrOs(bYgj?US5g>|%a zS`G*V6dTwZ#b*)El zq5**3RCo*oR9_A2tFoLV!YN#w29)9t{)r!@pYvMxgAIPUpN2^@-`e`!_Ml7^Tg0rE z0t+t}Cj0;}Ov;q~Yh|aSTjR)9K*^;E2!36-!wo(OZCKa{+C1+F+F~J=0l3dcSE2xb zMzDvSMVK^?lDu_8>pTTukUMu}^lxQNTJwK^pTB9}seQc}P;XQlit8enl!9U-DZBv4 zSbHPOhdMepy070$NnU2;Aoc_IwhWWr5ImG+#H73kCnf4%B!wFQ>X$vFybc1zoiNF5 z@aeQb1EmJ7AN6PRqlKNK>uZkEgT2xa4n|Hq_yCa0Z;4A0n*O~@cX|sLeqK|NG5+0c z@acqb0?U-|)Y*)=6epogSmcnYqec5Tru4O|_lOSwTEQN25j=i)(ygd9OQ(XH@N-)G zd8d4*z8Ds+b!}V~GZZPGz^q9YPWHkG01aEX6is;5lXC7vUEHjMbYXNde!Nq@n>3u8 z!H81h@&F)fO^3s0Wum1@wY-hBY(&%}5B_B2p)-HwJ%MvBz$iOpm^1?&&(nmBK-p#{ zFXY}H@cgRKPv=klyI>8=N%*NW!b5-groYUa@ zyUFvRG+(4`U6bGxbFHsB?!>PG0Nxo@d^Qvg0EV?QcDm}s49_neB5pgn$7kPk$twrn z&vmeF8{z0UH-mQ1o}zOfb%2d#kHY~VT(E~`7d^=;eFVeX@;Q1{$-^+oW&Ly>*79B) zr4<1-`YiFp9i}bShl}$yPE5ENVU;Mj#rSK~Wro%`!M8im?%dep^EpoD`OYTJ% z8~{4O9yXST&gMuA7!ay&c2ABkpZL*HmJCOBz+@=!iI*qC5H;jw^y=k0ip4d%m3kX0AQ_PYFoWlFaTr=XF(X% zapW^}awtb9U3iqI|1j?0#M&z}n18o0FM)MF8eN6iDSTYxLVsKV0bsrd-?!nOeU)-5 z?AKf=)ts6t10YO_7?n#nohUZc9TO#8#<~R8<v+&O512$f$T$rqHjvu<+v;kYp+Q1O%%#m&XVE$XOikJkJHKquHkh`uj-@Ce(V zOWPb>g&(iwcB^ZWROT2{lP%x7cNggO-V6}TVJc^So(nLhxu&fi+3*6<3UE;ZCp z+yHJ&>La3#jP5jjgF5`20%+R58$Xr5pngz@A32Ny;vgj7=_2Gz>kj}r!5%hV=;~}z z`QQ0bnHO!}FIg1}K!71!^n%UBdOd~P2g_|Y$chBlSI#V>kikz9TVHiIktbkLJF4Fl86rYD2& z^8o-2kDsD_yqMk0l`|)ZJXD8f1X_Z2TL}iI0Mha)hGvPyu?%{pG=_y^tj)AkO1I=% zN*(F}p!&fcNz+qJw)}A9oF37mpg2jbW-AaKd2R(L7ffm zYU)x0AZw0+)vuhj+g;ZI5m>W>$I)@4fISNWKUj*r3^svv{IXI5KaRbnIlHbM$ z>7fD6`vD>MgRj5>KYH~BK*kyY+x44%7!J}_RRg}0O?m@BE7+r|0uMG`sezv5!^NEa zKhRZ-0stCEKi!rF>Eh9VZT}QbI6u;D%pCxF!5+Hp4ARA^0UsN3cL3-Fd&DW!U|T*k zAdkal&?`UEDqhY2&<*zR(WODs7&Rb~2B*Dg2b~t30iYA?p~HQUrg|FiPQC9j1Ly{O pR8O?QmUT2B<&AL3qkK)C<^RC91k8ASQs@8x002ovPDHLkV1jfPLAL+^ literal 0 HcmV?d00001 diff --git a/assets/dq.svg b/assets/dq.svg new file mode 100644 index 0000000..0611a59 --- /dev/null +++ b/assets/dq.svg @@ -0,0 +1 @@ +Δδ \ No newline at end of file From 74b9ade1169dcb6c31124509eb90dc59c2c45d25 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Sun, 26 Dec 2021 15:36:07 -0500 Subject: [PATCH 238/325] Target netstandard2.1 in BsDiff --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index c9e0843..9d5827e 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -1,10 +1,10 @@  - net6.0;netstandard2.0 + net6.0;netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.2.0 + 0.3.0 latest From 5889c587f44832a8e4cca2e7bdcc111b4c445acf Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 06:52:07 -0500 Subject: [PATCH 239/325] Change ISuffixSort signature and add XML documentation --- .../ISuffixSort.cs | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs index 387b9cd..2d4fcca 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs +++ b/src/DeltaQ.SuffixSorting.Abstractions/ISuffixSort.cs @@ -3,9 +3,27 @@ namespace DeltaQ.SuffixSorting { + /// + /// Provides functionality to sort inputs into a suffix array representation. + /// public interface ISuffixSort { - IMemoryOwner Sort(ReadOnlySpan textBuffer); - int Sort(ReadOnlySpan textBuffer, Span suffixBuffer); + /// + /// Sort a buffer and return a new + /// IMemoryOwner<int> containing + /// suffixes of in sorted lexicographical order. + /// + /// Buffer containing text to sort. + /// IMemoryOwner<int> containing suffixes of in sorted lexicographical order. + IMemoryOwner Sort(ReadOnlySpan text); + /// + /// Sort a buffer and fill a result buffer + /// with suffixes of + /// in sorted lexicographical order. + /// + /// The buffer and buffer should be the same length. + /// Buffer containing text to sort. + /// Buffer containing suffixes of in sorted lexicographical order. + void Sort(ReadOnlySpan text, Span suffixes); } } \ No newline at end of file From 4885b0ffb3a6b93914ec33c03385cac46e8e773e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 06:53:06 -0500 Subject: [PATCH 240/325] Target netstandard 2.1 and 2.0 and bump package version in SuffixSorting.Abstractions --- .../DeltaQ.SuffixSorting.Abstractions.csproj | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 5f91da1..e991c11 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -1,15 +1,14 @@ - + - net6.0;netstandard2.0 + netstandard2.1;netstandard2.0 DeltaQ jzebedee - true - 0.2.0 + 0.3.0 - - + + From 14188d62bc30cb235da414e16afa4646a5b63700 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 09:45:41 -0500 Subject: [PATCH 241/325] Add icon and license to SuffixSorting.Abstractions Bump version to 0.4.1 --- .../DeltaQ.SuffixSorting.Abstractions.csproj | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index e991c11..968f92d 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -4,11 +4,18 @@ netstandard2.1;netstandard2.0 DeltaQ jzebedee - 0.3.0 + 0.4.1 + dq.png + RPL-1.5 - + + + + + + From 795f90149791e9fe754ad219374dd228b9eee28a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 09:53:29 -0500 Subject: [PATCH 242/325] Add .editorconfig --- .editorconfig | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..9a17955 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,149 @@ +# You can modify the rules from these initially generated values to suit your own policies +# You can learn more about editorconfig here: https://docs.microsoft.com/en-us/visualstudio/ide/editorconfig-code-style-settings-reference + +############################### +# Core EditorConfig Options # +############################### +root = true +# All files +[*] +indent_style = space + +# XML project files +[*.{csproj,vbproj,vcxproj,vcxproj.filters,proj,projitems,shproj}] +indent_size = 2 + +# XML config files +[*.{props,targets,ruleset,config,nuspec,resx,vsixmanifest,vsct}] +indent_size = 2 + +[*.cs] +#Core editorconfig formatting - indentation + +#use soft tabs (spaces) for indentation +indent_style = space + +#Formatting - indentation options + +#indent switch case contents. +csharp_indent_case_contents = true +#indent switch labels +csharp_indent_switch_labels = true + +#Formatting - new line options + +#place catch statements on a new line +csharp_new_line_before_catch = true +#place else statements on a new line +csharp_new_line_before_else = true +#require members of object initializers to be on the same line +csharp_new_line_before_members_in_object_initializers = false +#require braces to be on a new line for object_collection_array_initializers, accessors, methods, properties, control_blocks, types, and lambdas (also known as "Allman" style) +csharp_new_line_before_open_brace = object_collection_array_initializers, accessors, methods, properties, control_blocks, types, lambdas + +#Formatting - organize using options + +#do not place System.* using directives before other using directives +dotnet_sort_system_directives_first = false + +#Formatting - spacing options + +#require NO space between a cast and the value +csharp_space_after_cast = false +#require a space before the colon for bases or interfaces in a type declaration +csharp_space_after_colon_in_inheritance_clause = true +#require a space after a keyword in a control flow statement such as a for loop +csharp_space_after_keywords_in_control_flow_statements = true +#require a space before the colon for bases or interfaces in a type declaration +csharp_space_before_colon_in_inheritance_clause = true +#remove space within empty argument list parentheses +csharp_space_between_method_call_empty_parameter_list_parentheses = false +#remove space between method call name and opening parenthesis +csharp_space_between_method_call_name_and_opening_parenthesis = false +#do not place space characters after the opening parenthesis and before the closing parenthesis of a method call +csharp_space_between_method_call_parameter_list_parentheses = false +#remove space within empty parameter list parentheses for a method declaration +csharp_space_between_method_declaration_empty_parameter_list_parentheses = false +#place a space character after the opening parenthesis and before the closing parenthesis of a method declaration parameter list. +csharp_space_between_method_declaration_parameter_list_parentheses = false + +#Formatting - wrapping options + +#leave code block on single line +csharp_preserve_single_line_blocks = true +#leave statements and member declarations on the same line +csharp_preserve_single_line_statements = true + +#Style - Code block preferences + +#prefer curly braces even for one line of code +csharp_prefer_braces = true:suggestion + +#Style - expression bodied member options + +#prefer expression-bodied members for accessors +csharp_style_expression_bodied_accessors = true:suggestion +#prefer expression-bodied members for constructors +csharp_style_expression_bodied_constructors = true:suggestion +#prefer expression-bodied members for indexers +csharp_style_expression_bodied_indexers = true:suggestion +#prefer expression-bodied members for methods +csharp_style_expression_bodied_methods = true:suggestion +#prefer expression-bodied members for properties +csharp_style_expression_bodied_properties = true:suggestion + +#Style - expression level options + +#prefer out variables to be declared inline in the argument list of a method call when possible +csharp_style_inlined_variable_declaration = true:suggestion +#prefer tuple names to ItemX properties +dotnet_style_explicit_tuple_names = true:suggestion +#prefer the language keyword for member access expressions, instead of the type name, for types that have a keyword to represent them +dotnet_style_predefined_type_for_member_access = true:suggestion + +#Style - Expression-level preferences + +#prefer default over default(T) +csharp_prefer_simple_default_expression = true:suggestion +#prefer objects to be initialized using object initializers when possible +dotnet_style_object_initializer = true:suggestion +#prefer inferred tuple element names +dotnet_style_prefer_inferred_tuple_names = true:suggestion + +#Style - implicit and explicit types + +#prefer var over explicit type in all cases, unless overridden by another code style rule +csharp_style_var_elsewhere = true:suggestion +#prefer var is used to declare variables with built-in system types such as int +csharp_style_var_for_built_in_types = true:suggestion +#prefer var when the type is already mentioned on the right-hand side of a declaration expression +csharp_style_var_when_type_is_apparent = true:suggestion + +#Style - language keyword and framework type options + +#prefer the language keyword for local variables, method parameters, and class members, instead of the type name, for types that have a keyword to represent them +dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion + +#Style - Miscellaneous preferences + +#prefer local functions over anonymous functions +csharp_style_pattern_local_over_anonymous_function = true:suggestion + +#Style - modifier options + +#prefer accessibility modifiers to be declared except for public interface members. This will currently not differ from always and will act as future proofing for if C# adds default interface methods. +dotnet_style_require_accessibility_modifiers = for_non_interface_members:suggestion + +#Style - Modifier preferences + +#when this rule is set to a list of modifiers, prefer the specified ordering. +csharp_preferred_modifier_order = public,private,internal,static,readonly,sealed,override:suggestion + +#Style - qualification options + +#prefer fields to be prefaced with this. in C# or Me. in Visual Basic +dotnet_style_qualification_for_field = true:suggestion +#prefer methods not to be prefaced with this. or Me. in Visual Basic +dotnet_style_qualification_for_method = false:suggestion +#prefer properties not to be prefaced with this. or Me. in Visual Basic +dotnet_style_qualification_for_property = false:suggestion From e252100ab357fe60fc98aeee402f6d5559b3caca Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 09:53:51 -0500 Subject: [PATCH 243/325] Add GH actions stubs --- .github/workflows/ci.yml | 34 ++++++++++++++++++++++++++++++++++ .github/workflows/release.yml | 19 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..62da666 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +name: CI build-test-pack +on: [push] +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + dotnet-version: [ '6.0.x', '5.0.x', '3.1.x' ] + steps: + - uses: actions/checkout@v2 + - name: Setup .NET SDK ${{ matrix.dotnet }} + uses: actions/setup-dotnet@v1.7.2 + with: + dotnet-version: ${{ matrix.dotnet-version }} + - name: Install dependencies + run: dotnet restore + - name: Build + run: dotnet build --no-restore --configuration Release + - name: Test + run: dotnet test --no-restore --no-build --configuration Release --logger trx --results-directory "TestResults-${{ matrix.dotnet-version }}" + - name: Upload test results + uses: actions/upload-artifact@v2 + with: + name: dotnet-results-${{ matrix.dotnet-version }} + path: TestResults-${{ matrix.dotnet-version }} + # Use always() to always run this step to publish test results when there are test failures + if: ${{ always() }} + - name: Pack + run: dotnet pack --no-restore --no-build --version-suffix CI-$GITHUB_RUN_ID --output pkg + - name: Upload package + uses: actions/upload-artifact@v2 + with: + name: DeltaQ-$GITHUB_RUN_ID + path: pkg/*.* diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..cd82ed3 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,19 @@ +name: Upload packages to feeds +on: + release: + types: [created] + workflow_dispatch: +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-dotnet@v1 + with: + dotnet-version: '6.0.x' # SDK Version to use. + - name: Pack + run: dotnet pack -c Release --output pkg + - name: Publish the package to GPR + run: dotnet nuget push pkg/*.nupkg -k ${{ secrets.GITHUB_TOKEN }} -s https://nuget.pkg.github.com/jzebedee/index.json --skip-duplicate + - name: Publish the package to NuGet + run: dotnet nuget push pkg/*.nupkg -k ${{ secrets.DELTAQ_NUGET_TOKEN }} -s https://api.nuget.org/v3/index.json --skip-duplicate From 8d9d05494b1c6c9eaa11a7db4fa4f903abb871b6 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 09:54:04 -0500 Subject: [PATCH 244/325] Update solution items --- deltaq.sln | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/deltaq.sln b/deltaq.sln index 5dad45e..d7bf58a 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -35,6 +35,17 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "bench", "bench", "{BF7CD739 EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.Benchmarks", "bench\DeltaQ.Benchmarks\DeltaQ.Benchmarks.csproj", "{A5FCA064-2EED-4CDB-93D7-FF16E3314885}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{09BBE319-2E1C-4878-AA44-FCC730167792}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{8B749DE9-2B73-49DE-912E-DE5E17ADA029}" + ProjectSection(SolutionItems) = preProject + .github\workflows\ci.yml = .github\workflows\ci.yml + .github\workflows\release.yml = .github\workflows\release.yml + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU From bdd3099ac766573004bd55f77778cc56fbb2f27c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 09:55:40 -0500 Subject: [PATCH 245/325] Update test projects to target all supported .NETs --- test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj | 4 ++-- .../DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj | 7 +++---- .../DeltaQ.SuffixSorting.SAIS.Tests.csproj | 6 +++--- test/DeltaQ.Tests/DeltaQ.Tests.csproj | 6 +++--- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj index 6a6dbba..942f6d6 100644 --- a/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj +++ b/test/DeltaQ.BsDiff.Tests/DeltaQ.BsDiff.Tests.csproj @@ -1,11 +1,11 @@  - net6.0;net461 + netcoreapp3.1;net5.0;net6.0;net48 false latest - + diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj index 19d2438..4afcf0c 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj @@ -1,10 +1,9 @@  - net6.0 - enable - - false + netcoreapp3.1;net5.0;net6.0;net48 + false + latest diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj index 9b54fd2..21b4ac7 100644 --- a/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/DeltaQ.SuffixSorting.SAIS.Tests.csproj @@ -1,9 +1,9 @@  - net6.0;net461 - false - latest + netcoreapp3.1;net5.0;net6.0;net48 + false + latest diff --git a/test/DeltaQ.Tests/DeltaQ.Tests.csproj b/test/DeltaQ.Tests/DeltaQ.Tests.csproj index 8579195..6fc1b4b 100644 --- a/test/DeltaQ.Tests/DeltaQ.Tests.csproj +++ b/test/DeltaQ.Tests/DeltaQ.Tests.csproj @@ -1,9 +1,9 @@  - net6.0;net461 - false - latest + netcoreapp3.1;net5.0;net6.0;net48 + false + latest From 6757339b89f9e8f044dc3c74054339075643088a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 09:56:48 -0500 Subject: [PATCH 246/325] Update tests --- test/DeltaQ.BsDiff.Tests/BsDiffTests.cs | 28 +--------- .../LibDivSufSortTests.cs | 55 +++++++------------ .../SAISTests.cs | 10 +++- 3 files changed, 30 insertions(+), 63 deletions(-) diff --git a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs index 7d04cc0..74303ed 100644 --- a/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs +++ b/test/DeltaQ.BsDiff.Tests/BsDiffTests.cs @@ -1,30 +1,6 @@ -/* - * BsDiffTests.cs for DeltaQ - * Copyright (c) 2014 J. Zebedee - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -using DeltaQ.BsDiff; +using DeltaQ.BsDiff; +using Microsoft.Toolkit.HighPerformance; using Microsoft.Toolkit.HighPerformance.Buffers; -using Microsoft.Toolkit.HighPerformance.Extensions; using System; using System.Collections.Generic; using System.IO; diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs index 37495d7..928207d 100644 --- a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs +++ b/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs @@ -24,23 +24,21 @@ public void Dispose() FinalizeCrosscheck(); } -#if NET461 - private static void RandomFillBuffer(byte[] buffer) - { - var rand = new Random(63 * 13 * 63 * 13); - rand.NextBytes(buffer); - } -#else private static SpanOwner GetOwnedRandomBuffer(int size) { var rand = new Random(63 * 13 * 63 * 13); var owner = SpanOwner.Allocate(size); +#if NETFRAMEWORK + var buf = new byte[size]; + rand.NextBytes(buf); + buf.CopyTo(owner.Span); +#else rand.NextBytes(owner.Span); +#endif return owner; } -#endif private static void Verify(ReadOnlySpan T, ReadOnlySpan SA) { @@ -78,7 +76,14 @@ public void CheckShruggy() Verify(T, SA); } - public static IEnumerable FuzzFiles => FuzzFilesInner.Select(fuzzFile => new object[] { Path.Join(FuzzFilesBasePath, fuzzFile) }); + public static IEnumerable FuzzFiles + => FuzzFilesInner.Select(fuzzFile => new object[] { +#if NETCOREAPP3_1_OR_GREATER + Path.Join(FuzzFilesBasePath, fuzzFile) +#else + Path.Combine(FuzzFilesBasePath, fuzzFile) +#endif + }); private static IEnumerable FuzzFilesInner { get @@ -134,32 +139,12 @@ public void CheckRandomBuffer(int size) { var ldss = new LibDivSufSort(); -#if NET461 - var ownedT = ArrayPool.Shared.Rent(size); - try -#else - using (var ownedT = GetOwnedRandomBuffer(size)) -#endif - { -#if NET461 - RandomFillBuffer(ownedT); - ReadOnlySpan T = ownedT; -#else - ReadOnlySpan T = ownedT.Span; -#endif - using (var ownedSA = SpanOwner.Allocate(size, AllocationMode.Clear)) - { - var SA = ownedSA.Span; - ldss.Sort(T, SA); - Verify(T, SA); - } - } -#if NET461 - finally - { - ArrayPool.Shared.Return(ownedT); - } -#endif + using var ownedT = GetOwnedRandomBuffer(size); + ReadOnlySpan T = ownedT.Span; + using var ownedSA = SpanOwner.Allocate(size, AllocationMode.Clear); + var SA = ownedSA.Span; + ldss.Sort(T, SA); + Verify(T, SA); } } } diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs index d3f27c0..015e33f 100644 --- a/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs @@ -18,12 +18,18 @@ private static void RandomFillBuffer(byte[] buffer) rand.NextBytes(buffer); } #else - private static MemoryOwner GetOwnedRandomBuffer(int size) + private static SpanOwner GetOwnedRandomBuffer(int size) { var rand = new Random(63 * 13 * 63 * 13); - var owner = MemoryOwner.Allocate(size); + var owner = SpanOwner.Allocate(size); +#if NETFRAMEWORK + var buf = new byte[size]; + rand.NextBytes(buf); + buf.CopyTo(owner.Span); +#else rand.NextBytes(owner.Span); +#endif return owner; } From bdda038d0d85e9ad9ac91a937b6a8a73d114209f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 10:05:57 -0500 Subject: [PATCH 247/325] Interim commit of DQ.CLI from fuzz testing --- src/DeltaQ.CLI/DeltaQ.CLI.csproj | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CLI/DeltaQ.CLI.csproj index 8ece8d4..6b732f3 100644 --- a/src/DeltaQ.CLI/DeltaQ.CLI.csproj +++ b/src/DeltaQ.CLI/DeltaQ.CLI.csproj @@ -5,20 +5,24 @@ net6.0 DeltaQ jzebedee - dq - false + dq + true + ../../pkg - + + + + - + From a3694a9a066e5354f26a230e2ab2531030e134f3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 10:06:19 -0500 Subject: [PATCH 248/325] Update DQ.CLI --- src/DeltaQ.CLI/Program.cs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.CLI/Program.cs b/src/DeltaQ.CLI/Program.cs index 81aadbe..2603457 100644 --- a/src/DeltaQ.CLI/Program.cs +++ b/src/DeltaQ.CLI/Program.cs @@ -1,7 +1,9 @@ using System; using System.IO; using System.Text; +using DeltaQ.SuffixSorting; using DeltaQ.SuffixSorting.LibDivSufSort; +using DeltaQ.SuffixSorting.SAIS; using Microsoft.Extensions.CommandLineUtils; using SharpFuzz; @@ -23,6 +25,12 @@ static void Verify(ReadOnlySpan input, ReadOnlySpan sa) throw ex; } } + + var result = DeltaQ.Tests.LDSSChecker.Check(input, sa, true); + if (result != DeltaQ.Tests.LDSSChecker.ResultCode.Done) + { + throw new InvalidOperationException($"Input failed with result code {result}"); + } } const string HelpOptions = "-?|-h|--help"; @@ -58,7 +66,7 @@ static void Verify(ReadOnlySpan input, ReadOnlySpan sa) using var ms = new MemoryStream(); s.CopyTo(ms); - if(!ms.TryGetBuffer(out var T)) + if (!ms.TryGetBuffer(out var T)) { throw new InvalidOperationException(); } @@ -71,6 +79,7 @@ static void Verify(ReadOnlySpan input, ReadOnlySpan sa) }); }); +static ISuffixSort GetDefaultSort() => new LibDivSufSort(); app.Command("diff", command => { command.Description = "Diff two files"; @@ -79,16 +88,21 @@ static void Verify(ReadOnlySpan input, ReadOnlySpan sa) var oldFileArg = command.Argument("[oldfile]", ""); var newFileArg = command.Argument("[newfile]", ""); var deltaFileArg = command.Argument("[deltafile]", ""); - var algoArg = command.Option("-a|--algorithm ", "", CommandOptionType.SingleValue); + var algoArg = command.Option("-ss|--suffix-sort ", "Suffix sort library: [sais], [divsufsort]", CommandOptionType.SingleValue); command.OnExecute(() => { var oldFile = oldFileArg.Value; var newFile = newFileArg.Value; var deltaFile = deltaFileArg.Value; - var algo = algoArg.Value(); - DeltaQ.BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), new LibDivSufSort()); - Console.WriteLine($"Diff [algo:{algo}]: old:{oldFile} new:{newFile} delta:{deltaFile}"); + ISuffixSort sort = algoArg.Value() switch + { + "sais" => new SAIS(), + "divsufsort" => new LibDivSufSort(), + _ => GetDefaultSort() + }; + DeltaQ.BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), sort); + Console.WriteLine($"Diff [sort:{sort.GetType()}]: old:{oldFile} new:{newFile} delta:{deltaFile}"); return 0; }); }); From 356d6df56f3537602682ae6558bae2554869620d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 11:04:11 -0500 Subject: [PATCH 249/325] Add StreamExtensions --- src/DeltaQ.Utility.Memory/StreamExtensions.cs | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/DeltaQ.Utility.Memory/StreamExtensions.cs diff --git a/src/DeltaQ.Utility.Memory/StreamExtensions.cs b/src/DeltaQ.Utility.Memory/StreamExtensions.cs new file mode 100644 index 0000000..25ef4c9 --- /dev/null +++ b/src/DeltaQ.Utility.Memory/StreamExtensions.cs @@ -0,0 +1,57 @@ +using System; +using System.Buffers; +using System.IO; + +namespace DeltaQ.Utility.Memory +{ + public static class StreamExtensions + { +#if !(NETCOREAPP3_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER) + private static ArrayPool DefaultPool => ArrayPool.Shared; + + /// + /// Reads a sequence of bytes from a given instance. + /// + /// The source to read data from. + /// The target to write data to. + /// The number of bytes that have been read. + public static int Read(this Stream stream, Span buffer) + { + byte[] array = DefaultPool.Rent(buffer.Length); + try + { + int bytesRead = stream.Read(array, 0, buffer.Length); + if (bytesRead > 0) + { + array.AsSpan(0, bytesRead).CopyTo(buffer); + } + + return bytesRead; + } + finally + { + DefaultPool.Return(array); + } + } + + /// + /// Writes a sequence of bytes to a given instance. + /// + /// The destination to write data to. + /// The source to read data from. + public static void Write(this Stream stream, ReadOnlySpan buffer) + { + byte[] array = DefaultPool.Rent(buffer.Length); + try + { + buffer.CopyTo(array); + stream.Write(array, 0, buffer.Length); + } + finally + { + DefaultPool.Return(array); + } + } +#endif + } +} From 8253877e8429296d35a5560de976522b1ab8ca59 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 18:25:54 -0500 Subject: [PATCH 250/325] Match new ISuffixSort signature in LDSS --- src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs index f1c57ba..9a87c3f 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs @@ -4,7 +4,10 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort { - public partial class LibDivSufSort : ISuffixSort + /// + /// An implementation of the divsufsort suffix array construction algorithm. + /// + public class LibDivSufSort : ISuffixSort { public IMemoryOwner Sort(ReadOnlySpan textBuffer) { @@ -15,7 +18,7 @@ public IMemoryOwner Sort(ReadOnlySpan textBuffer) return owner; } - public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) + public void Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { if(textBuffer.Length != suffixBuffer.Length) { @@ -23,7 +26,6 @@ public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) } DivSufSort.divsufsort(textBuffer, suffixBuffer); - return suffixBuffer.Length; } private static void ThrowHelper() => throw new ArgumentException("Text and suffix buffers should have the same length"); From 8fcc1f5d3b89940bb0ad07ccc8a39095e8ffc9fb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 20:14:33 -0500 Subject: [PATCH 251/325] Update all projects to target netstandards and use ProjectReferences --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 8 +++++--- .../DeltaQ.SuffixSorting.Abstractions.csproj | 4 +++- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 12 ++++++++---- .../DeltaQ.SuffixSorting.SAIS.csproj | 11 +++++++---- .../DeltaQ.Utility.Memory.csproj | 19 +++++++++++-------- src/deltaq/deltaq.csproj | 10 +++++----- 6 files changed, 39 insertions(+), 25 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 9d5827e..9de0c65 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -1,16 +1,18 @@  - net6.0;netstandard2.0;netstandard2.1 + netstandard2.0;netstandard2.1 DeltaQ jzebedee 0.3.0 latest + true + snupkg - - + + diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 968f92d..28873cf 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -1,12 +1,14 @@  - netstandard2.1;netstandard2.0 + netstandard2.0;netstandard2.1 DeltaQ jzebedee 0.4.1 dq.png RPL-1.5 + true + snupkg diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index 10fbbee..dba7790 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -1,15 +1,19 @@  - net6.0;netstandard2.0 + netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.5.0 + 0.5.2 latest + enable + true + snupkg - - + + + diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index ff22bde..4283305 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -1,15 +1,18 @@  - net6.0;netstandard2.0 + netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.3.0 + 0.3.2 + latest + true + snupkg - - + + diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 9c6603b..6995053 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -1,20 +1,23 @@  - net6.0;netstandard2.0 + netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.3.0 + 0.4.0 latest enable + true + snupkg - - - - + - - + + + + + + \ No newline at end of file diff --git a/src/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj index 3d4aa29..2748a7b 100644 --- a/src/deltaq/deltaq.csproj +++ b/src/deltaq/deltaq.csproj @@ -1,17 +1,17 @@  - net6.0;netstandard2.0 + netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.2.0 - true + 0.2.1 latest - - + + + From 7430a4c672d32dd3314dd68cba23b469f32b3953 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 20:15:06 -0500 Subject: [PATCH 252/325] Remove old comment --- src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs index d43daa9..48d76de 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/TrSort.cs @@ -355,7 +355,6 @@ private static void tr_introsort(SAPtr isaOffset, SAPtr isadOffset, Span SA // GEMINI while (true) { - //Debug.Assert(SA[isaOffset..] == ISA); ISA[SA[a]] = a; // cond (GEMINI) From 07f00c60b5ab75c6cb0e5bc966cc381bf830fcac Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 20:17:03 -0500 Subject: [PATCH 253/325] Fix comments in SsSort --- src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 298a015..f1ed6dc 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -1,11 +1,11 @@ //#define SS_ISQRT_LOOKUP -using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Diagnostics; using System.Runtime.CompilerServices; using Text = System.ReadOnlySpan; using Idx = System.Int32; using SAPtr = System.Int32; +using Microsoft.Toolkit.HighPerformance.Buffers; namespace DeltaQ.SuffixSorting.LibDivSufSort; using static Crosscheck; @@ -928,7 +928,6 @@ public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) { - //Debug.Assert(Size > 0); if (Size == 0) return false; ref SsStackItem item = ref Items[--Size]; @@ -1602,7 +1601,7 @@ private static void ss_fixdown(Text Td, ReadOnlySpan PA, Span SA, Idx } /// - /// Fast sqrt, using lookup tables + /// Fast sqrt /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ss_isqrt(int x) @@ -1669,10 +1668,10 @@ private static int ss_isqrt(int x) => x switch { >= (SS_BLOCKSIZE * SS_BLOCKSIZE) => SS_BLOCKSIZE, -#if NETSTANDARD2_0 - _ => (int)Math.Sqrt(x) -#else +#if NETSTANDARD2_1_OR_GREATER _ => (int)MathF.Sqrt(x) +#else + _ => (int)Math.Sqrt(x) #endif }; #endif From 28ab061ca9e29bff7fb7551032b283fe426c3eb3 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 20:18:09 -0500 Subject: [PATCH 254/325] Spanify ss_heapsort --- .../SsSort.cs | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index f1ed6dc..5349755 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -981,7 +981,7 @@ private static void ss_mintrosort(Text T, Span SA, SAPtr partitionOffset, S if (old_limit == 0) { SA_dump(SA[first..last], "before heapsort"); - ss_heapsort(T, tdOffset, SA, partitionOffset, first, last - first); + ss_heapsort(T[tdOffset..], SA[partitionOffset..], SA[first..], last - first); SA_dump(SA[first..last], "after heapsort"); } @@ -1519,20 +1519,16 @@ private static int ss_ilg(int n) } /// Simple top-down heapsort. - private static void ss_heapsort(Text T, Idx tdOffset, Span SA_top, SAPtr paOffset, SAPtr first, Idx size) + private static void ss_heapsort(Text T, ReadOnlySpan PA, Span SA, Idx size) { Idx i; var m = size; Idx t; - Text Td = T[tdOffset..]; - ReadOnlySpan PA = SA_top[paOffset..]; - var SA = SA_top[first..]; - if ((size % 2) == 0) { m -= 1; - if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) + if (T[PA[SA[m / 2]]] < T[PA[SA[m]]]) { SA.Swap(m, m / 2); } @@ -1541,13 +1537,13 @@ private static void ss_heapsort(Text T, Idx tdOffset, Span SA_top, SAPtr pa // LADY for (i = (m / 2) - 1; i >= 0; i--) { - ss_fixdown(Td, PA, SA, i, m); + ss_fixdown(T, PA, SA, i, m); } if ((size % 2) == 0) { SA.Swap(0, m); - ss_fixdown(Td, PA, SA, 0, m); + ss_fixdown(T, PA, SA, 0, m); } // TRUMPET @@ -1555,17 +1551,17 @@ private static void ss_heapsort(Text T, Idx tdOffset, Span SA_top, SAPtr pa { t = SA[0]; SA[0] = SA[i]; - ss_fixdown(Td, PA, SA, 0, i); + ss_fixdown(T, PA, SA, 0, i); SA[i] = t; } } - private static void ss_fixdown(Text Td, ReadOnlySpan PA, Span SA, Idx i, Idx size) + private static void ss_fixdown(Text T, ReadOnlySpan PA, Span SA, Idx i, Idx size) { Idx j, v, c, d, e, k; v = SA[i]; - c = Td[PA[v]]; + c = T[PA[v]]; // BEAST while (true) @@ -1581,8 +1577,8 @@ private static void ss_fixdown(Text Td, ReadOnlySpan PA, Span SA, Idx k = j; j += 1; - d = Td[PA[SA[k]]]; - e = Td[PA[SA[j]]]; + d = T[PA[SA[k]]]; + e = T[PA[SA[j]]]; if (d < e) { k = j; @@ -1675,5 +1671,4 @@ private static int ss_isqrt(int x) #endif }; #endif - } - +} From a9e243d6b4e996cc31ac6b3165aae78abecdd1fa Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Mon, 27 Dec 2021 20:20:45 -0500 Subject: [PATCH 255/325] Use switch expression for ss_ilg --- src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 5349755..31413be 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -1507,16 +1507,11 @@ private static void ss_insertionsort(Text T, Span SA, int PA, int first, in /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ss_ilg(int n) - { - if ((n & 0xff00) > 0) - { - return 8 + lg_table[(n >> 8) & 0xff]; - } - else + => n & 0xff00 switch { - return 0 + lg_table[(n >> 0) & 0xff]; - } - } + > 0 => 8 + lg_table[n >> 8 & 0xff], + _ => 0 + lg_table[n >> 0 & 0xff] + }; /// Simple top-down heapsort. private static void ss_heapsort(Text T, ReadOnlySpan PA, Span SA, Idx size) From de2d534404f23fb4d78bcf9de19e245241c48c8d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 07:56:36 -0500 Subject: [PATCH 256/325] Add 3pn to SAIS --- .../THIRD-PARTY-NOTICES.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt diff --git a/src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt new file mode 100644 index 0000000..3661273 --- /dev/null +++ b/src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt @@ -0,0 +1,12 @@ +DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. + +The attached notices are provided for information only. + +License notice for SAIS-CSharp +------------------------------- + +Copyright (c) 2010 Yuta Mori. All Rights Reserved. +Licensed under the MIT license. + +Available at +https://sites.google.com/site/yuta256/sais \ No newline at end of file From bc4427e389fa4c5940983320a10487b784570c68 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 07:58:07 -0500 Subject: [PATCH 257/325] Update SAIS Remove inline copyright notices Use AlphabetSize constant Use Index/Range Match new ISuffixSort signature Use ThrowHelper for mismatched text/suffix buffers --- src/DeltaQ.SuffixSorting.SAIS/SAIS.cs | 109 +++++++++----------------- 1 file changed, 39 insertions(+), 70 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs index 12c5e67..7866bc5 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs +++ b/src/DeltaQ.SuffixSorting.SAIS/SAIS.cs @@ -1,54 +1,4 @@ -/* - * SAIS.cs for DeltaQ - * Copyright (c) 2014 J. Zebedee - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -/* - * SAIS.cs for SAIS-CSharp - * Copyright (c) 2010 Yuta Mori. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -using Microsoft.Toolkit.HighPerformance.Buffers; +using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Buffers; using System.Runtime.CompilerServices; @@ -60,15 +10,18 @@ namespace DeltaQ.SuffixSorting.SAIS /// public class SAIS : ISuffixSort { - private const int MinBucketSize = 256; + private const int AlphabetSize = byte.MaxValue + 1; + private const int MinBucketSize = byte.MaxValue + 1; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void GetCounts(IntAccessor T, Span c, int n, int k) { - c.Slice(0, k).Clear(); + c[..k].Clear(); for (int i = 0; i < n; ++i) + { c[T[i]]++; + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -90,7 +43,10 @@ private static void LMS_sort(IntAccessor T, Span sa, Span c, Span /* compute SAl */ if (c == b) + { GetCounts(T, c, n, k); + } + GetBuckets(c, b, k, false); /* find starts of buckets */ j = n - 1; @@ -118,7 +74,10 @@ private static void LMS_sort(IntAccessor T, Span sa, Span c, Span /* compute SAs */ if (c == b) + { GetCounts(T, c, n, k); + } + GetBuckets(c, b, k, true); /* find ends of buckets */ for (i = n - 1, bb = b[c1 = 0]; 0 <= i; --i) @@ -228,7 +187,10 @@ private static void InduceSA(IntAccessor T, Span sa, Span c, Span /* compute SAl */ if (c == b) + { GetCounts(T, c, n, k); + } + GetBuckets(c, b, k, false); /* find starts of buckets */ j = n - 1; @@ -251,7 +213,10 @@ private static void InduceSA(IntAccessor T, Span sa, Span c, Span /* compute SAs */ if (c == b) + { GetCounts(T, c, n, k); + } + GetBuckets(c, b, k, true); /* find ends of buckets */ for (i = n - 1, bb = b[c1 = 0]; 0 <= i; --i) @@ -288,7 +253,7 @@ private void sais_main(IntAccessor T, Span sa, int fs, int n, int k) c = new int[k];// ArrayPool.Shared.Rent(k); if (k <= fs) { - b = sa.Slice(n + fs - k, sa.Length - (n + fs - k)); + b = sa[(n + fs - k)..]; flags = 1; } else @@ -299,10 +264,10 @@ private void sais_main(IntAccessor T, Span sa, int fs, int n, int k) } else if (k <= fs) { - c = sa.Slice(n + fs - k, sa.Length - (n + fs - k)); + c = sa[(n + fs - k)..]; if (k <= fs - k) { - b = sa.Slice(n + fs - k * 2, sa.Length - (n + fs - k * 2)); + b = sa[(n + fs - k * 2)..]; flags = 0; } else if (k <= MinBucketSize * 4) @@ -327,7 +292,7 @@ sort all the LMS-substrings */ GetCounts(T, c, n, k); GetBuckets(c, b, k, true); /* find ends of buckets */ - sa.Slice(0, n).Clear(); + sa[..n].Clear(); bb = -1; i = n - 1; @@ -409,7 +374,7 @@ sort all the LMS-substrings */ } } - sais_main(new IntAccessor(sa.Slice(m + newfs, sa.Length - (m + newfs))), sa, newfs, m, name); + sais_main(new IntAccessor(sa[(m + newfs)..]), sa, newfs, m, name); i = n - 1; j = m * 2 - 1; @@ -502,10 +467,12 @@ public MemoryOwner Sort(ReadOnlySpan textBuffer) IMemoryOwner ISuffixSort.Sort(ReadOnlySpan textBuffer) => Sort(textBuffer); - public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) + public void Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { - if (suffixBuffer.Length < textBuffer.Length) - throw new ArgumentException("Output span must have length greater than or equal to input span", nameof(suffixBuffer)); + if (suffixBuffer.Length != textBuffer.Length) + { + ThrowHelper(); + } if (textBuffer.Length <= 1) { @@ -513,12 +480,14 @@ public int Sort(ReadOnlySpan textBuffer, Span suffixBuffer) { suffixBuffer[0] = 0; } + return; } - else sais_main(new IntAccessor(textBuffer), suffixBuffer, 0, textBuffer.Length, 256); - - return textBuffer.Length; + + sais_main(new IntAccessor(textBuffer), suffixBuffer, 0, textBuffer.Length, AlphabetSize); } + private static void ThrowHelper() => throw new ArgumentException("Text and suffix buffers should have the same length"); + private ref struct IntAccessor { private readonly ReadOnlySpan intSpan; @@ -528,16 +497,16 @@ private ref struct IntAccessor [MethodImpl(MethodImplOptions.AggressiveInlining)] public IntAccessor(ReadOnlySpan buffer) { - this.byteSpan = buffer; - this.intSpan = default; - this.packedIndex = true; + byteSpan = buffer; + intSpan = default; + packedIndex = true; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public IntAccessor(ReadOnlySpan buffer) { - this.byteSpan = default; - this.intSpan = buffer; - this.packedIndex = false; + byteSpan = default; + intSpan = buffer; + packedIndex = false; } public int this[int index] From fd14a6bebee2ff69f70cfaba40b321e8bf900da0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 07:59:52 -0500 Subject: [PATCH 258/325] Remove StreamExtensions --- src/DeltaQ.Utility.Memory/StreamExtensions.cs | 57 ------------------- 1 file changed, 57 deletions(-) delete mode 100644 src/DeltaQ.Utility.Memory/StreamExtensions.cs diff --git a/src/DeltaQ.Utility.Memory/StreamExtensions.cs b/src/DeltaQ.Utility.Memory/StreamExtensions.cs deleted file mode 100644 index 25ef4c9..0000000 --- a/src/DeltaQ.Utility.Memory/StreamExtensions.cs +++ /dev/null @@ -1,57 +0,0 @@ -using System; -using System.Buffers; -using System.IO; - -namespace DeltaQ.Utility.Memory -{ - public static class StreamExtensions - { -#if !(NETCOREAPP3_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER) - private static ArrayPool DefaultPool => ArrayPool.Shared; - - /// - /// Reads a sequence of bytes from a given instance. - /// - /// The source to read data from. - /// The target to write data to. - /// The number of bytes that have been read. - public static int Read(this Stream stream, Span buffer) - { - byte[] array = DefaultPool.Rent(buffer.Length); - try - { - int bytesRead = stream.Read(array, 0, buffer.Length); - if (bytesRead > 0) - { - array.AsSpan(0, bytesRead).CopyTo(buffer); - } - - return bytesRead; - } - finally - { - DefaultPool.Return(array); - } - } - - /// - /// Writes a sequence of bytes to a given instance. - /// - /// The destination to write data to. - /// The source to read data from. - public static void Write(this Stream stream, ReadOnlySpan buffer) - { - byte[] array = DefaultPool.Rent(buffer.Length); - try - { - buffer.CopyTo(array); - stream.Write(array, 0, buffer.Length); - } - finally - { - DefaultPool.Return(array); - } - } -#endif - } -} From 903de88edd263b19f3fb365cc7e9c06861cdfde8 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:00:25 -0500 Subject: [PATCH 259/325] Update preprocessor directives in DQ.Utility.Memory --- src/DeltaQ.Utility.Memory/Index.cs | 4 ++-- src/DeltaQ.Utility.Memory/NullableAttributes.cs | 2 +- src/DeltaQ.Utility.Memory/Range.cs | 4 ++-- src/DeltaQ.Utility.Memory/ThrowHelper.cs | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.Utility.Memory/Index.cs b/src/DeltaQ.Utility.Memory/Index.cs index 066dc96..1fe0b41 100644 --- a/src/DeltaQ.Utility.Memory/Index.cs +++ b/src/DeltaQ.Utility.Memory/Index.cs @@ -3,7 +3,7 @@ using System.Runtime.CompilerServices; -#if NETCOREAPP3_0_OR_GREATER +#if NETCOREAPP3_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER [assembly: TypeForwardedTo(typeof(System.Index))] #else // Licensed to the .NET Foundation under one or more agreements. @@ -147,7 +147,7 @@ public override string ToString() private string ToStringFromEnd() { -#if (!NETSTANDARD2_0 && !NETFRAMEWORK) +#if !NETSTANDARD2_0 && !NETFRAMEWORK Span span = stackalloc char[11]; // 1 for ^ and 10 for longest possible uint value bool formatted = ((uint)Value).TryFormat(span.Slice(1), out int charsWritten); Debug.Assert(formatted); diff --git a/src/DeltaQ.Utility.Memory/NullableAttributes.cs b/src/DeltaQ.Utility.Memory/NullableAttributes.cs index 41025ab..05d0f4e 100644 --- a/src/DeltaQ.Utility.Memory/NullableAttributes.cs +++ b/src/DeltaQ.Utility.Memory/NullableAttributes.cs @@ -1,7 +1,7 @@ //Generated 2021-12-26 //https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/Diagnostics/CodeAnalysis/NullableAttributes.cs -#if NETSTANDARD2_0 +#if !(NETCOREAPP3_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER) // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. diff --git a/src/DeltaQ.Utility.Memory/Range.cs b/src/DeltaQ.Utility.Memory/Range.cs index 5959171..6971d3f 100644 --- a/src/DeltaQ.Utility.Memory/Range.cs +++ b/src/DeltaQ.Utility.Memory/Range.cs @@ -3,7 +3,7 @@ using System.Runtime.CompilerServices; -#if NETCOREAPP3_0_OR_GREATER +#if NETCOREAPP3_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER [assembly: TypeForwardedTo(typeof(System.Range))] #else // Licensed to the .NET Foundation under one or more agreements. @@ -64,7 +64,7 @@ public override int GetHashCode() /// Converts the value of the current Range object to its equivalent string representation. public override string ToString() { -#if (!NETSTANDARD2_0 && !NETFRAMEWORK) +#if !NETSTANDARD2_0 && !NETFRAMEWORK Span span = stackalloc char[2 + (2 * 11)]; // 2 for "..", then for each index 1 for '^' and 10 for longest possible uint int pos = 0; diff --git a/src/DeltaQ.Utility.Memory/ThrowHelper.cs b/src/DeltaQ.Utility.Memory/ThrowHelper.cs index ef94640..5a513b0 100644 --- a/src/DeltaQ.Utility.Memory/ThrowHelper.cs +++ b/src/DeltaQ.Utility.Memory/ThrowHelper.cs @@ -1,7 +1,7 @@ //Generated 2021-12-26 //https://raw.githubusercontent.com/dotnet/runtime/84680bf557210114ea5ca823386cd49691c4cac6/src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs -#if NETSTANDARD2_0 +#if !(NETCOREAPP3_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER) // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. From 917c1628383b77a9f10abfa579c9302784a9399b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:00:55 -0500 Subject: [PATCH 260/325] Update .editorconfig --- .editorconfig | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.editorconfig b/.editorconfig index 9a17955..b4fe15b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -110,15 +110,6 @@ dotnet_style_object_initializer = true:suggestion #prefer inferred tuple element names dotnet_style_prefer_inferred_tuple_names = true:suggestion -#Style - implicit and explicit types - -#prefer var over explicit type in all cases, unless overridden by another code style rule -csharp_style_var_elsewhere = true:suggestion -#prefer var is used to declare variables with built-in system types such as int -csharp_style_var_for_built_in_types = true:suggestion -#prefer var when the type is already mentioned on the right-hand side of a declaration expression -csharp_style_var_when_type_is_apparent = true:suggestion - #Style - language keyword and framework type options #prefer the language keyword for local variables, method parameters, and class members, instead of the type name, for types that have a keyword to represent them @@ -142,7 +133,7 @@ csharp_preferred_modifier_order = public,private,internal,static,readonly,sealed #Style - qualification options #prefer fields to be prefaced with this. in C# or Me. in Visual Basic -dotnet_style_qualification_for_field = true:suggestion +dotnet_style_qualification_for_field = false:suggestion #prefer methods not to be prefaced with this. or Me. in Visual Basic dotnet_style_qualification_for_method = false:suggestion #prefer properties not to be prefaced with this. or Me. in Visual Basic From 2c67a374ee4a0e7b38d34ac4e6162800c515e622 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:01:42 -0500 Subject: [PATCH 261/325] Compress icon --- assets/{dq-icon.png => dq-icon-orig.png} | Bin assets/dq.png | Bin 0 -> 2437 bytes 2 files changed, 0 insertions(+), 0 deletions(-) rename assets/{dq-icon.png => dq-icon-orig.png} (100%) create mode 100644 assets/dq.png diff --git a/assets/dq-icon.png b/assets/dq-icon-orig.png similarity index 100% rename from assets/dq-icon.png rename to assets/dq-icon-orig.png diff --git a/assets/dq.png b/assets/dq.png new file mode 100644 index 0000000000000000000000000000000000000000..d9561e088068c2941101ca08af67ecc1d21bebb5 GIT binary patch literal 2437 zcma)8c|4R07k=NF!3@onB{XGMDxxSMVGLueGuBKCBWsq+y_T$ZhN8IHmr6oe<4c4r z6~j=LgceJ(RCFz6g!$}z@66=)eSd!MZ|1ybp7T8CJm>s1aU@G)G(UzP006WJ!O$9d z)7W1m9EwZhDXRbg<0F~d8X5bWL&*R2gf#=uhxmBG`ms`xMPn!g$@L_{&yl9ftKaq+$uYA_>eL7 zI-LGoB!clvc;hu^C}exbIa|fq4mu871sQ%0gh2eAZd?M}Ed)YU&V?Zwo3OK81m^+} z2f7UV>}{O0a7I8h?xv71oB;$tN8sX6tDkCN_)-!oRk^N#{TmIU-iKA!LixLef>?O)r+E= z_pc_dxjc(^B3=Lh-e?m;tZm50w!6#J_Zkw{BN@8uye71_Hw|DYS(L1EW`>%9go*6U zC;TyLU0z))`e|bP>FANdaH4h~)m2s3Op2*A?9tV*T9tqQb$=3Fvvv63>VH)KfhJ%DS+>K2Cliz_Q*+{KDak;M4$p!zU2(_iM6FA$>aF130~ zCau9KG#WUY#_Q?d-PoR7ZD@aDA!ii{j#}6^F{(D|cu)sLcy61x9W3oJlcCQjuzX)& zm3UwgsS0$V(ggvFdN`_Fv6u=YU&YaC)8KXbCv!et-Y3(X)e78hm2%B*f7+DxmWo1y zc!2H~XDpwT{jj@&Uqdm!DNY2ArLj7I=lYw{LB%~$h%v?cdRObW7|r2mIfw@--#T%9sl=0)v&e#E zxL@p-gV&tysZ?b~u-eoMJIBHr76#x*e^SVaCuUaGcALswuZd^A^&RmPDH!Y4nJZxC z2Dy5RfltDAjQEgky(b z<9k{CNs+X}N_O6pLI_R8z}++5>6QGXbi8`ajT1PbEBiEtBYV9-9@0zm{sb#R?>_0g z+_eVWGm6L|7Y)Xfel+-H%-6yBEX_GG3QrckA2b=+pdMrit3db_t#HKxZPqNMP;#=1>jEU6ezD zvdtHq`v}MMEmy^1t@XndZ6DHG0Oaqn%9uOf0KzJ4NuQRH!Ww9Bl6&=or<&?2>UJ8j zqNYYydf%2kbMCf4a?pfN4C0mi%-CWMqHw;0WW}GD^bQ0qUnStn8@wB)juMKel@(3p z9~~Em4SX^<;D`0j(zsRBKABizO!hhdCw=`9dPBfrdeM+&c=p64yQsMkn_kqksjoc3 zVLx^o6YJEPX;?afIfu*FB`(eY0z&5Rjj|m?mK_(5tRIy&s_| zde5Z!=hQN{h8N4)nJg1n_@TL#py?Wpa-a8qmR}Y%h#^B z?aDa6StRRmS-O9eZ}}&y8>~zg_fFf+EVmdFrXWU&H|`rer*Zbat`X`-jv z2a`S`5Sk(zPB`rFrkDQgtC#B53_q;Hi6^;>VDd7tY!cRg;IC-ldJDDABBYTQSEZv! z2{2rK?In~`-r2kDW9oL< zg?eB_B{1hR7h+$6$cnqnaB|6u7lCev%geHY+&z9|yJ^MoqGComx-ud7U$oQiIJsYxdA2mKQ-7!B>(^b literal 0 HcmV?d00001 From 391a2030220416537cdea700c44e307ddc0d64ba Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:02:35 -0500 Subject: [PATCH 262/325] Add original SVG and icon-generator HTML --- assets/dq-icon-view.html | 41 ++++++++++++++++++++++++++++++++++++++++ assets/dq-orig.svg | 5 +++++ 2 files changed, 46 insertions(+) create mode 100644 assets/dq-icon-view.html create mode 100644 assets/dq-orig.svg diff --git a/assets/dq-icon-view.html b/assets/dq-icon-view.html new file mode 100644 index 0000000..33bbe07 --- /dev/null +++ b/assets/dq-icon-view.html @@ -0,0 +1,41 @@ + + + + + + + + + Δ + δ + + + + \ No newline at end of file diff --git a/assets/dq-orig.svg b/assets/dq-orig.svg new file mode 100644 index 0000000..184bb1e --- /dev/null +++ b/assets/dq-orig.svg @@ -0,0 +1,5 @@ +Δδ \ No newline at end of file From 5d6acfae015891d45806ff7fccbd7d8c5c42ea8f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:03:08 -0500 Subject: [PATCH 263/325] Remove non-prod assets --- assets/dq-icon-orig.png | Bin 4971 -> 0 bytes assets/dq-icon-view.html | 41 --------------------------------------- assets/dq-orig.svg | 5 ----- 3 files changed, 46 deletions(-) delete mode 100644 assets/dq-icon-orig.png delete mode 100644 assets/dq-icon-view.html delete mode 100644 assets/dq-orig.svg diff --git a/assets/dq-icon-orig.png b/assets/dq-icon-orig.png deleted file mode 100644 index 6e9e5196b2e0cec2ff68aef9a69bc67740f5a95d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4971 zcmV-x6O`org>6q#iq_DW7CBi|($xvUz?cE1v zvrbKt*FOLKgz?0cUp69<}ZPloA z0GMR;J2c>syoY6A!+>>brds_1ptWmjkt&kiKX{SiSj+;q1ZR-^n)M5Sta+)h#W^8| zpbN7=V@Mu}u3n!2n0RYijM}uDNAty2d&NlcZMw2lMT+Ylh5$x&@07jt;xE{-_ z-Dd!R9hiA0f?a(9Ae$d8?9?NXXx5g^qO;MAgm)F0))xRW=5Mg5oe+&a^v9hlp0A$Q= zO#kVKwf7bPut~&i#LB)t0Fbdx!}Nc3ESi(@DAC?{M=Wi{&`5m%z$iS6qu;hM=!=@j zV;0yarnVw)ruG5QW{wv{@n;Q9k)!7iHR#kN#o{P2yX6HnVe%{<~~B&ydJ@I1+}vY2@ijNG`PvfVPF(ieehmzoHb2Tj7g+Nf_p1p!~XuT>W>% z4Rj!WpbkoH1Hdq^z!-dAF%B};iSE3a}n4Mw&-Mszkm zsO&?632GPs8o?e?PQ{u1XZ`fgHVep`aY1x^g2z|cwb}%LVI7NLkDHa!xjAwe7OsqK zo-5;#!y3dHqgN>Vl2C#g1%OFczM~a7y$YK@=M7?}uor?w7PE|rlv&I&9W1=6z@+v7 zpgVXUEn7>>JLq#)opLP$LcWG0A)otM%pj;e02n6u>3iSHOIg5IiKrltk}+FxdDO|G zZz&H$dv2*`Sel56No@gO`l2t1Aw93Em7(d%%{B{&2e90XSNAJ<6tx8aqi_)nIp=z+ zQHnPZ&9k|dsz( ze1h_17G6bhfSBFH<9^1{A(zQ2Lf+w6XS==w!_N&$CYHWl=GlY2g_gg^+JAT?is9P~eu5clSR{R9VwyYv zWEYGS_K=s9@}=$=M&I}Y|NU7mhpAocXBh3+s;>4oav=>=&0Fbc`!}04brA(_U z@)w|YOXErdKh^Q;E-eq_%YRIt_UKV5@+5v@)xTxUPAmgVb_RPImM!({rc?kgL2>|K zXa##*jty0LckUG&LSOV*8mgitqcXziIQ-l1R@LR31ztqozoOBnSV@urfb+~MwOl-8;6tcX6FXzjk;hg#_o1}HW@1~IcUYb!1lM+z+^EP%B$nL~u z&|nxl78cb9fX*h%7DvM2lfF=|H-LhE{bfbc)$P3wTBQi%s{jI?%pyX32r&D2#FAy4 zU71uD02%WXn0>ENidZ`Od9D2fjBS*B)C5k0OiRHi0N_UauPPgvx&@*r0s2lo;%KR= z3jo9XAH>_zQ$$;2pe#xS<8|z#Gd)M1V5iL>b#I0+@-QZ)D9ac}S&Ug=Zw^Lbc~lPo zhP5v~h51SWN?$*jdc9n}5jY|!UtrkcbR;C6r5VOB$+^RisoA|hJ?W8R&e)g3b~tSP z>H$C}*h9$cvGbnKJ}6sq;Oh;6PBG1Qhmt5LnFYy5&YwZDF|Gz0EbUqrX28K%x)oFtqI!bv2a{0}Wt^9SR zJbDE00`z3;ioI|tPTlcr0l^+p73^_4z67TumdX;yC5P8-BgwdWJBIl*{*B2!-98An zggj2D>d{z5!KFCwCXM-<;yV2eWxu@N;_&5H3nQE`@EL!Gg6fVQcaJHJg28E3nh0Z1( zUK-8yU(+-z`6AOx@Ys#7U&rOjYmF`$X@VE8>7Ted^Uvki!Df(>P4bkO?+2ov0d#^r zgnbS+Ksw19X%=wv33UEK^@c@t%JB9yHcUF*?4j3`lIkRU4C=%fN;Y}~(E^|i;Z;SA z@G5_(cblricm=sRf|Rw`d;D|KYbJj%LfY=pQ@N7rX#ikHB^47vv;fcv_OQieSoi$C z=;^i;&_hywvvY;}`d94**n9?Y*(i6!W{{4Uk(cNI&}MEAd%0SO?(}-%qOpG0P_hfq zSL^g_j34I^;zQH7+tIJrHC#0mMgjf;GNU5X`nxS|!AB8xQ>)cfbO6u`_P8Ix9zh)~ z-9XSk#?M_9?f8?Ii?+YxrOs(bYgj?US5g>|%a zS`G*V6dTwZ#b*)El zq5**3RCo*oR9_A2tFoLV!YN#w29)9t{)r!@pYvMxgAIPUpN2^@-`e`!_Ml7^Tg0rE z0t+t}Cj0;}Ov;q~Yh|aSTjR)9K*^;E2!36-!wo(OZCKa{+C1+F+F~J=0l3dcSE2xb zMzDvSMVK^?lDu_8>pTTukUMu}^lxQNTJwK^pTB9}seQc}P;XQlit8enl!9U-DZBv4 zSbHPOhdMepy070$NnU2;Aoc_IwhWWr5ImG+#H73kCnf4%B!wFQ>X$vFybc1zoiNF5 z@aeQb1EmJ7AN6PRqlKNK>uZkEgT2xa4n|Hq_yCa0Z;4A0n*O~@cX|sLeqK|NG5+0c z@acqb0?U-|)Y*)=6epogSmcnYqec5Tru4O|_lOSwTEQN25j=i)(ygd9OQ(XH@N-)G zd8d4*z8Ds+b!}V~GZZPGz^q9YPWHkG01aEX6is;5lXC7vUEHjMbYXNde!Nq@n>3u8 z!H81h@&F)fO^3s0Wum1@wY-hBY(&%}5B_B2p)-HwJ%MvBz$iOpm^1?&&(nmBK-p#{ zFXY}H@cgRKPv=klyI>8=N%*NW!b5-groYUa@ zyUFvRG+(4`U6bGxbFHsB?!>PG0Nxo@d^Qvg0EV?QcDm}s49_neB5pgn$7kPk$twrn z&vmeF8{z0UH-mQ1o}zOfb%2d#kHY~VT(E~`7d^=;eFVeX@;Q1{$-^+oW&Ly>*79B) zr4<1-`YiFp9i}bShl}$yPE5ENVU;Mj#rSK~Wro%`!M8im?%dep^EpoD`OYTJ% z8~{4O9yXST&gMuA7!ay&c2ABkpZL*HmJCOBz+@=!iI*qC5H;jw^y=k0ip4d%m3kX0AQ_PYFoWlFaTr=XF(X% zapW^}awtb9U3iqI|1j?0#M&z}n18o0FM)MF8eN6iDSTYxLVsKV0bsrd-?!nOeU)-5 z?AKf=)ts6t10YO_7?n#nohUZc9TO#8#<~R8<v+&O512$f$T$rqHjvu<+v;kYp+Q1O%%#m&XVE$XOikJkJHKquHkh`uj-@Ce(V zOWPb>g&(iwcB^ZWROT2{lP%x7cNggO-V6}TVJc^So(nLhxu&fi+3*6<3UE;ZCp z+yHJ&>La3#jP5jjgF5`20%+R58$Xr5pngz@A32Ny;vgj7=_2Gz>kj}r!5%hV=;~}z z`QQ0bnHO!}FIg1}K!71!^n%UBdOd~P2g_|Y$chBlSI#V>kikz9TVHiIktbkLJF4Fl86rYD2& z^8o-2kDsD_yqMk0l`|)ZJXD8f1X_Z2TL}iI0Mha)hGvPyu?%{pG=_y^tj)AkO1I=% zN*(F}p!&fcNz+qJw)}A9oF37mpg2jbW-AaKd2R(L7ffm zYU)x0AZw0+)vuhj+g;ZI5m>W>$I)@4fISNWKUj*r3^svv{IXI5KaRbnIlHbM$ z>7fD6`vD>MgRj5>KYH~BK*kyY+x44%7!J}_RRg}0O?m@BE7+r|0uMG`sezv5!^NEa zKhRZ-0stCEKi!rF>Eh9VZT}QbI6u;D%pCxF!5+Hp4ARA^0UsN3cL3-Fd&DW!U|T*k zAdkal&?`UEDqhY2&<*zR(WODs7&Rb~2B*Dg2b~t30iYA?p~HQUrg|FiPQC9j1Ly{O pR8O?QmUT2B<&AL3qkK)C<^RC91k8ASQs@8x002ovPDHLkV1jfPLAL+^ diff --git a/assets/dq-icon-view.html b/assets/dq-icon-view.html deleted file mode 100644 index 33bbe07..0000000 --- a/assets/dq-icon-view.html +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - Δ - δ - - - - \ No newline at end of file diff --git a/assets/dq-orig.svg b/assets/dq-orig.svg deleted file mode 100644 index 184bb1e..0000000 --- a/assets/dq-orig.svg +++ /dev/null @@ -1,5 +0,0 @@ -Δδ \ No newline at end of file From b1f1fc8efc3b70fda288f54be7b89fc44ebb950a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:04:11 -0500 Subject: [PATCH 264/325] Update README for 2.0 RC --- README.md | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index cebbece..425ac3f 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,29 @@ -#deltaq +# deltaq logo DeltaQ -Fast and portable delta encoding library for .NET +Fast and portable delta encoding for .NET. -deltaq is a [.NET Core](https://dotnet.github.io/) class library +## About + +DeltaQ provides fast and portable delta encoding for .NET in 100% safe, managed code. + +DeltaQ is available for use as a library in .NET and .NET Framework, and as a cross-platform command-line tool, `dq`, which can be used to perform delta operations (similar to `bsdiff` or `xdelta`). ## Installing -* Manual install: Download from the [Releases](https://github.com/jzebedee/deltaq/releases/) -* NuGet install: Follow instructions on the [NuGet page](https://www.nuget.org/packages/deltaq/) or enter ```Install-Package deltaq``` in the Package Manager console. +### `dq` command-line tool + +`> ` `dotnet tool install DeltaQ.CLI -g` + +### `DeltaQ` library + +`> ` `dotnet add package DeltaQ` + +## Usage + +### `dq` command-line tool -### Supported formats -|Format|Create patches|Apply patches| -|------|--------------|-------------| -|bsdiff|Yes|Yes| -|vcdiff|No|No| +TBW -### Roadmap +### `DeltaQ` library -* Add support for applying VCDIFF patches. VCDIFF format is defined in [RFC 3284](https://tools.ietf.org/html/rfc3284) with several existing implementations. Jon Skeet's [MiscUtil](http://www.yoda.arachsys.com/csharp/miscutil/) already has an implementation of the patch portion of VCDIFF, but much more work is needed to create a C# patch generator. -* Add platform-specific libraries to make usage as simple as possible. There's also room to support memory-mapped files and similar significant optimizations. +TBW \ No newline at end of file From 163f195a1373262f2e1f4bfc6cd3716ee571174f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:04:40 -0500 Subject: [PATCH 265/325] Remove net461 directives in SAISTests --- .../SAISTests.cs | 65 ++----------------- 1 file changed, 7 insertions(+), 58 deletions(-) diff --git a/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs index 015e33f..6ed9d56 100644 --- a/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs +++ b/test/DeltaQ.SuffixSorting.SAIS.Tests/SAISTests.cs @@ -11,13 +11,6 @@ public class SAISTests { private readonly SAIS _sais = new SAIS(); -#if NET461 - private static void RandomFillBuffer(byte[] buffer) - { - var rand = new Random(63 * 13 * 63 * 13); - rand.NextBytes(buffer); - } -#else private static SpanOwner GetOwnedRandomBuffer(int size) { var rand = new Random(63 * 13 * 63 * 13); @@ -33,7 +26,6 @@ private static SpanOwner GetOwnedRandomBuffer(int size) return owner; } -#endif [Theory] [InlineData(0)] @@ -44,61 +36,18 @@ private static SpanOwner GetOwnedRandomBuffer(int size) [InlineData(16)] [InlineData(32)] [InlineData(51)] + [InlineData(0x8000 - 1)] [InlineData(0x8000)] - [InlineData(0x80000)] - [InlineData(0x800000)] public void CheckRandomBuffer(int size) { -#if NET461 - var ownedT = ArrayPool.Shared.Rent(size); - try -#else - using (var ownedT = GetOwnedRandomBuffer(size)) -#endif - { -#if NET461 - RandomFillBuffer(ownedT); - Span T = ownedT; -#else - Span T = ownedT.Span; -#endif - using (var ownedSA = _sais.Sort(T)) - { - Span SA = ownedSA.Span; - var result = Check(T, SA, T.Length, false); - Assert.Equal(0, result); - } - } -#if NET461 - finally - { - ArrayPool.Shared.Return(ownedT); - } -#endif - } + using var ownedT = GetOwnedRandomBuffer(size); + Span T = ownedT.Span; - [Theory] - [InlineData(0)] - [InlineData(1)] - [InlineData(2)] - [InlineData(4)] - [InlineData(8)] - [InlineData(16)] - [InlineData(32)] - [InlineData(51)] - [InlineData(0x1000)] - public void CheckRandomBufferContinuous(int size) - { - const int repetitions = 2_000; - for (int i = 0; i < repetitions; i++) - { - CheckRandomBuffer(size); + using var ownedSA = _sais.Sort(T); + Span SA = ownedSA.Span; - if (i % 100 == 0) - { - System.Diagnostics.Debug.WriteLine("Gen0:{0} Gen1:{1} Gen2:{2}", GC.CollectionCount(0), GC.CollectionCount(1), GC.CollectionCount(2)); - } - } + var result = Check(T, SA, T.Length, false); + Assert.Equal(0, result); } } } From 9c530b3aef755c61f9b5b883463f71f3385b284b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:13:56 -0500 Subject: [PATCH 266/325] Remove inline license notices in DQ.BsDiff --- src/DeltaQ.BsDiff/Diff.cs | 33 +++-------------------- src/DeltaQ.BsDiff/Patch.cs | 31 ++------------------- src/DeltaQ.BsDiff/SpanExtensions.cs | 26 +----------------- src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt | 12 +++++++++ 4 files changed, 18 insertions(+), 84 deletions(-) create mode 100644 src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt diff --git a/src/DeltaQ.BsDiff/Diff.cs b/src/DeltaQ.BsDiff/Diff.cs index b45c18b..b3f8839 100644 --- a/src/DeltaQ.BsDiff/Diff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -1,31 +1,4 @@ -/* - * BsDiff.cs for DeltaQ - * Copyright (c) 2014 J. Zebedee - * - * BsDiff.net is Copyright 2010 Logos Bible Software - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -using bz2core; +using bz2core; using DeltaQ.SuffixSorting; using Microsoft.Toolkit.HighPerformance; using Microsoft.Toolkit.HighPerformance.Buffers; @@ -139,14 +112,14 @@ 0 32 Header { if ((scsc + lastoffset < oldData.Length) && (oldData[scsc + lastoffset] == newData[scsc])) oldscore++; - } + } if ((len == oldscore && len != 0) || (len > oldscore + 8)) break; if ((scan + lastoffset < oldData.Length) && (oldData[scan + lastoffset] == newData[scan])) oldscore--; - } + } if (len != oldscore || scan == newData.Length) { diff --git a/src/DeltaQ.BsDiff/Patch.cs b/src/DeltaQ.BsDiff/Patch.cs index 89bef4f..1cd1166 100644 --- a/src/DeltaQ.BsDiff/Patch.cs +++ b/src/DeltaQ.BsDiff/Patch.cs @@ -1,31 +1,4 @@ -/* - * BsPatch.cs for DeltaQ - * Copyright (c) 2014 J. Zebedee - * - * BsDiff.net is Copyright 2010 Logos Bible Software - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -using Microsoft.Toolkit.HighPerformance; +using Microsoft.Toolkit.HighPerformance; using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.IO; @@ -102,7 +75,7 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre if (controlLength < 0 || diffLength < 0 || newSize < 0) throw new InvalidOperationException("Corrupt patch"); - } + } // prepare to read three parts of the patch in parallel Stream diff --git a/src/DeltaQ.BsDiff/SpanExtensions.cs b/src/DeltaQ.BsDiff/SpanExtensions.cs index c784165..016dd75 100644 --- a/src/DeltaQ.BsDiff/SpanExtensions.cs +++ b/src/DeltaQ.BsDiff/SpanExtensions.cs @@ -1,28 +1,4 @@ -/* - * Extensions.cs for DeltaQ - * Copyright (c) 2014 J. Zebedee - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -using System; +using System; using System.Collections.Generic; using System.IO; using System.Runtime.CompilerServices; diff --git a/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt new file mode 100644 index 0000000..d271984 --- /dev/null +++ b/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt @@ -0,0 +1,12 @@ +DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. + +The attached notices are provided for information only. + +License notice for BsDiff.net +------------------------------- + +Copyright 2010 Logos Bible Software +Licensed under the MIT license. + +Available at +https://github.com/LogosBible/bsdiff.net \ No newline at end of file From 7c7c315fe84b23df65bb62c848bf20977994e2a5 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:15:43 -0500 Subject: [PATCH 267/325] Update DQ.BsDiff.Diff for new ISuffixSort signature --- src/DeltaQ.BsDiff/Diff.cs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/DeltaQ.BsDiff/Diff.cs b/src/DeltaQ.BsDiff/Diff.cs index b3f8839..03eff6e 100644 --- a/src/DeltaQ.BsDiff/Diff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -4,7 +4,6 @@ using Microsoft.Toolkit.HighPerformance.Buffers; using System; using System.Buffers; -using System.Diagnostics; using System.IO; namespace DeltaQ.BsDiff @@ -89,8 +88,7 @@ 0 32 Header using (var extraStream = GetEncodingStream(msExtra, true)) { Span I = saOwner.Span; - var sortLen = suffixSort.Sort(oldData, I[..^1]); - Debug.Assert(sortLen == oldData.Length); + suffixSort.Sort(oldData, I[..^1]); var scan = 0; var pos = 0; From 028c078bc6b3e265a733d201ba45b07490b87ad1 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:17:38 -0500 Subject: [PATCH 268/325] Update benchmarks for multitarget testing --- .../DeltaQ.Benchmarks.csproj | 28 +++++++-------- .../LibDivSufSortBenchmarks.cs | 36 ------------------- bench/DeltaQ.Benchmarks/Log2Benchmarks.cs | 25 +++---------- bench/DeltaQ.Benchmarks/Program.cs | 28 +++++++++++++-- bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs | 27 +++++--------- .../SuffixSortingBenchmarks.cs | 36 +++++++++++++++++++ bench/DeltaQ.Benchmarks/SwapBenchmarks.cs | 5 ++- 7 files changed, 92 insertions(+), 93 deletions(-) delete mode 100644 bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs create mode 100644 bench/DeltaQ.Benchmarks/SuffixSortingBenchmarks.cs diff --git a/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj index 7046611..a3ca878 100644 --- a/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj +++ b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj @@ -2,32 +2,30 @@ Exe + net6.0 enable enable - - - - - PreserveNewest - - + latest - - - - - - - + pdbonly + true + - + + + PreserveNewest + + + + + diff --git a/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs b/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs deleted file mode 100644 index 655870d..0000000 --- a/bench/DeltaQ.Benchmarks/LibDivSufSortBenchmarks.cs +++ /dev/null @@ -1,36 +0,0 @@ -using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Engines; -using DeltaQ.SuffixSorting.LibDivSufSort; - -namespace DeltaQ.Benchmarks -{ - [SimpleJob(RunStrategy.Throughput)] - public class LibDivSufSortBenchmarks - { - private static readonly byte[][] _assets = Directory.EnumerateFiles("./assets/").Select(File.ReadAllBytes).ToArray(); - - [Benchmark(Baseline = true)] - public void ldss() - { - //SsSort.new_ss_pivot_feature_flag = false; - - var ldss = new LibDivSufSort(); - foreach (var asset in _assets) - { - ldss.Sort(asset).Dispose(); - } - } - - //[Benchmark] - //public void ss_pivot_new() - //{ - // SsSort.new_ss_pivot_feature_flag = true; - - // var ldss = new LibDivSufSort(); - // foreach (var asset in _assets) - // { - // ldss.Sort(asset).Dispose(); - // } - //} - } -} diff --git a/bench/DeltaQ.Benchmarks/Log2Benchmarks.cs b/bench/DeltaQ.Benchmarks/Log2Benchmarks.cs index 73802f5..5d041b9 100644 --- a/bench/DeltaQ.Benchmarks/Log2Benchmarks.cs +++ b/bench/DeltaQ.Benchmarks/Log2Benchmarks.cs @@ -3,32 +3,15 @@ using Idx = System.Int32; using static DeltaQ.SuffixSorting.LibDivSufSort.Utils; using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Engines; namespace DeltaQ.Benchmarks { - [RyuJitX64Job] - //[RyuJitX86Job] - [HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + //[HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + [SimpleJob(RunStrategy.Throughput)] public class Log2Benchmarks { private const int Step = 1; - //public Log2Benchmarks() - //{ - // //sanity check range - // for (int i = 1; i < int.MaxValue; i++) - // { - // var x = tr_ilg(i); - // var y = Log2(i); - // var z = Math.ILogB(i); - // //var a = MathF.ILogB(i); - // var a = (int)Math.Log2(i); - // //var a = (int)MathF.Log2(i); - // if (x != y || y != z || z != a) - // { - // throw new InvalidOperationException($"{i} did not match"); - // } - // } - //} [Benchmark(Baseline = true)] public void tr_ilg() @@ -52,6 +35,7 @@ public void Log2() GC.KeepAlive(y); } +#if NETCOREAPP3_0_OR_GREATER [Benchmark] public void MathLog2() { @@ -95,6 +79,7 @@ public void MathFILogB() } GC.KeepAlive(y); } +#endif [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int tr_ilg(int n) diff --git a/bench/DeltaQ.Benchmarks/Program.cs b/bench/DeltaQ.Benchmarks/Program.cs index 3b9be9a..be0f80b 100644 --- a/bench/DeltaQ.Benchmarks/Program.cs +++ b/bench/DeltaQ.Benchmarks/Program.cs @@ -1,4 +1,26 @@ -using BenchmarkDotNet.Running; -using DeltaQ.Benchmarks; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Diagnostics.Windows; +using BenchmarkDotNet.Running; +using Microsoft.Diagnostics.Tracing; +using Microsoft.Diagnostics.Tracing.Parsers; +using Microsoft.Diagnostics.Tracing.Session; -BenchmarkRunner.Run(); \ No newline at end of file +IConfig? config = null; +if (args.Any() && args[0] == "etl") +{ + var bdnDefaults = (ClrTraceEventParser.Keywords)167993uL; + var keywords = bdnDefaults | ClrTraceEventParser.Keywords.GCSampledObjectAllocationHigh | ClrTraceEventParser.Keywords.Threading; + var providers = new (Guid, TraceEventLevel, ulong, TraceEventProviderOptions?)[] + { + (ClrTraceEventParser.ProviderGuid, TraceEventLevel.Verbose, (ulong)keywords, new TraceEventProviderOptions + { + StacksEnabled = true, + }), + (new Guid("0866B2B8-5CEF-5DB9-2612-0C0FFD814A44"), TraceEventLevel.Informational, ulong.MaxValue, null) + }; + var profilerConfig = new EtwProfilerConfig(providers: providers); + var profiler = new EtwProfiler(profilerConfig); + config = DefaultConfig.Instance.AddDiagnoser(profiler); +} + +BenchmarkSwitcher.FromAssemblies(new[] { typeof(Program).Assembly }).Run(args, config); \ No newline at end of file diff --git a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs index f0b8bee..b1a864d 100644 --- a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs +++ b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs @@ -3,26 +3,15 @@ using Idx = System.Int32; using static DeltaQ.SuffixSorting.LibDivSufSort.Utils; using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Engines; namespace DeltaQ.Benchmarks { - [RyuJitX64Job] - //[RyuJitX86Job] - [HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + //[HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + [SimpleJob(RunStrategy.Throughput)] public class SqrtBenchmarks { private const int Step = 1; - //public SqrtBenchmarks() - //{ - // //sanity check range - // for (int i = 0; i < SS_BLOCKSIZE * SS_BLOCKSIZE; i++) - // { - // var sqrtFast = ss_isqrt(i); - // var sqrtD = (int)Math.Sqrt(i); - // var sqrtF = (int)MathF.Sqrt(i); - // if (sqrtFast != sqrtD || sqrtD != sqrtF) throw new InvalidOperationException($"{i} did not match"); - // } - //} [Benchmark(Baseline = true)] public void SqrtsSS() @@ -46,6 +35,7 @@ public void SqrtsMath() GC.KeepAlive(y); } +#if NETCOREAPP3_0_OR_GREATER [Benchmark] public void SqrtsMathF() { @@ -58,23 +48,24 @@ public void SqrtsMathF() } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ss_isqrt_math(int x) + private static int ss_isqrt_mathf(int x) { if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } - return (int)Math.Sqrt(x); + return (int)MathF.Sqrt(x); } +#endif [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ss_isqrt_mathf(int x) + private static int ss_isqrt_math(int x) { if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } - return (int)MathF.Sqrt(x); + return (int)Math.Sqrt(x); } private const Idx SS_BLOCKSIZE = 1024; diff --git a/bench/DeltaQ.Benchmarks/SuffixSortingBenchmarks.cs b/bench/DeltaQ.Benchmarks/SuffixSortingBenchmarks.cs new file mode 100644 index 0000000..a15a9e1 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/SuffixSortingBenchmarks.cs @@ -0,0 +1,36 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Engines; +using DeltaQ.SuffixSorting; +using DeltaQ.SuffixSorting.LibDivSufSort; +using DeltaQ.SuffixSorting.SAIS; + +namespace DeltaQ.Benchmarks +{ + //[MemoryDiagnoser] + [HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + public class SuffixSortingBenchmarks + { + private const string AbsoluteAssetsPath = @"assets/"; + public static IEnumerable Assets { get; } = Directory.EnumerateFiles(AbsoluteAssetsPath) + .Select(file => new object[] { Path.GetFileName(file), File.ReadAllBytes(file) }) + .ToArray(); + + private static readonly ISuffixSort LDSS = new LibDivSufSort(); + private static readonly ISuffixSort SAIS = new SAIS(); + + [ArgumentsSource(nameof(Assets))] + [Benchmark] + public void ldss(string name, byte[] asset) + { + LDSS.Sort(asset).Dispose(); + } + + [ArgumentsSource(nameof(Assets))] + [Benchmark(Baseline = true)] + public void sais(string name, byte[] asset) + { + SAIS.Sort(asset).Dispose(); + } + } +} diff --git a/bench/DeltaQ.Benchmarks/SwapBenchmarks.cs b/bench/DeltaQ.Benchmarks/SwapBenchmarks.cs index 8eff492..0206a7d 100644 --- a/bench/DeltaQ.Benchmarks/SwapBenchmarks.cs +++ b/bench/DeltaQ.Benchmarks/SwapBenchmarks.cs @@ -1,9 +1,12 @@ using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Engines; using System.Runtime.CompilerServices; namespace DeltaQ.Benchmarks { - [SimpleJob] + //[HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + [SimpleJob(RunStrategy.Throughput)] public class SwapBenchmarks { [Benchmark] From 9b06d052aeac4981d0881d980cc6e9719244de36 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 08:23:02 -0500 Subject: [PATCH 269/325] Use ValueTuple for SsStackItem and stackalloc it Stackalloc should be safe here, as `sizeof(SsStackItem)=sizeof(int)*4=16` and `SS_STACK_SIZE=16`, putting us exactly at the 256 byte stackalloc cutoff --- .../SsSort.cs | 33 +++++++------------ 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs index 31413be..3306223 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/SsSort.cs @@ -10,6 +10,7 @@ namespace DeltaQ.SuffixSorting.LibDivSufSort; using static Crosscheck; using static Utils; +using SsStackItem = System.ValueTuple;//(SAPtr a, SAPtr b, SAPtr c, Idx d); internal static class SsSort { @@ -159,6 +160,7 @@ private static int ss_compare(Text T, ReadOnlySpan SAp1, SAPtr p1, ReadOnly var U2 = depth + SAp2[p2]; var U1n = SAp1[p1 + 1] + 2; var U2n = SAp2[p2 + 1] + 2; + //return T[U1..U1n].SequenceCompareTo(T[U2..U2n]); while ((U1 < U1n) && (U2 < U2n) && (T[U1] == T[U2])) { @@ -893,21 +895,14 @@ private static void ss_mergeforward(Text T, Span SA, SAPtr PA, SAPtr first, } } - private struct SsStackItem - { - public SAPtr a; - public SAPtr b; - public SAPtr c; - public Idx d; - } - private const int SS_STACK_SIZE = 16; private const int MERGE_STACK_SIZE = 32; private ref struct SsStack { - public readonly Span Items; - public int Size; + private readonly Span Items; + private int Size; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public SsStack(Span items) { Items = items; @@ -919,22 +914,19 @@ public void Push(SAPtr a, SAPtr b, SAPtr c, Idx d) { Trace.Assert(Size < Items.Length); ref SsStackItem item = ref Items[Size++]; - item.a = a; - item.b = b; - item.c = c; - item.d = d; + item = (a, b, c, d); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Pop(ref SAPtr a, ref SAPtr b, ref SAPtr c, ref Idx d) { - if (Size == 0) return false; + if (Size == 0) + { + return false; + } ref SsStackItem item = ref Items[--Size]; - a = item.a; - b = item.b; - c = item.c; - d = item.d; + (a, b, c, d) = item; return true; } } @@ -948,8 +940,7 @@ private static void ss_mintrosort(Text T, Span SA, SAPtr partitionOffset, S { var PA = SA[partitionOffset..]; - using var stackOwner = SpanOwner.Allocate(SS_STACK_SIZE); - var stack = new SsStack(stackOwner.Span); + var stack = new SsStack(stackalloc SsStackItem[SS_STACK_SIZE]); SAPtr a, b, c, d, e, f; From 6a54cbe1d121716c21e53861491f23d6fc91a1f5 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 20:51:40 -0500 Subject: [PATCH 270/325] Fix misspelled LDSS test project (how long has that been there?) --- deltaq.sln | 4 ++-- .../Crosscheck.cs | 0 .../DeltaQ.SuffixSorting.LibDivSufSort.Tests.csproj} | 0 .../LDSSChecker.cs | 0 .../LibDivSufSortTests.cs | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename test/{DeltaQ.SuffixSorting.LivDivSufSort.Tests => DeltaQ.SuffixSorting.LibDivSufSort.Tests}/Crosscheck.cs (100%) rename test/{DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj => DeltaQ.SuffixSorting.LibDivSufSort.Tests/DeltaQ.SuffixSorting.LibDivSufSort.Tests.csproj} (100%) rename test/{DeltaQ.SuffixSorting.LivDivSufSort.Tests => DeltaQ.SuffixSorting.LibDivSufSort.Tests}/LDSSChecker.cs (100%) rename test/{DeltaQ.SuffixSorting.LivDivSufSort.Tests => DeltaQ.SuffixSorting.LibDivSufSort.Tests}/LibDivSufSortTests.cs (100%) diff --git a/deltaq.sln b/deltaq.sln index d7bf58a..634666d 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -29,11 +29,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Utility.Memory", "sr EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LibDivSufSort", "src\DeltaQ.SuffixSorting.LibDivSufSort\DeltaQ.SuffixSorting.LibDivSufSort.csproj", "{E89B007E-0BDE-4642-B40F-CCB7569F88B8}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LivDivSufSort.Tests", "test\DeltaQ.SuffixSorting.LivDivSufSort.Tests\DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj", "{5486E391-BFF9-4ED9-8383-032AE249C588}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.LibDivSufSort.Tests", "test\DeltaQ.SuffixSorting.LibDivSufSort.Tests\DeltaQ.SuffixSorting.LibDivSufSort.Tests.csproj", "{5486E391-BFF9-4ED9-8383-032AE249C588}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "bench", "bench", "{BF7CD739-6B0C-424D-A5CD-7970D80915E4}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeltaQ.Benchmarks", "bench\DeltaQ.Benchmarks\DeltaQ.Benchmarks.csproj", "{A5FCA064-2EED-4CDB-93D7-FF16E3314885}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Benchmarks", "bench\DeltaQ.Benchmarks\DeltaQ.Benchmarks.csproj", "{A5FCA064-2EED-4CDB-93D7-FF16E3314885}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{09BBE319-2E1C-4878-AA44-FCC730167792}" ProjectSection(SolutionItems) = preProject diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/Crosscheck.cs b/test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/Crosscheck.cs similarity index 100% rename from test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/Crosscheck.cs rename to test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/Crosscheck.cs diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj b/test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/DeltaQ.SuffixSorting.LibDivSufSort.Tests.csproj similarity index 100% rename from test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/DeltaQ.SuffixSorting.LivDivSufSort.Tests.csproj rename to test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/DeltaQ.SuffixSorting.LibDivSufSort.Tests.csproj diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LDSSChecker.cs b/test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/LDSSChecker.cs similarity index 100% rename from test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LDSSChecker.cs rename to test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/LDSSChecker.cs diff --git a/test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs b/test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/LibDivSufSortTests.cs similarity index 100% rename from test/DeltaQ.SuffixSorting.LivDivSufSort.Tests/LibDivSufSortTests.cs rename to test/DeltaQ.SuffixSorting.LibDivSufSort.Tests/LibDivSufSortTests.cs From 7dcf732f2154e34181b706774a37b29785bb836b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 20:51:56 -0500 Subject: [PATCH 271/325] Add pkg folder to ignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9491a2f..4de518a 100644 --- a/.gitignore +++ b/.gitignore @@ -360,4 +360,7 @@ MigrationBackup/ .ionide/ # Fody - auto-generated XML schema -FodyWeavers.xsd \ No newline at end of file +FodyWeavers.xsd + +# DeltaQ +/pkg \ No newline at end of file From ca92f1e0f5451ef014de98ecd57bae72c9b19bfc Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 20:52:24 -0500 Subject: [PATCH 272/325] Add 3pn to DQ.Utility.Memory --- src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt diff --git a/src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt new file mode 100644 index 0000000..71fd1bf --- /dev/null +++ b/src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt @@ -0,0 +1,12 @@ +DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. + +The attached notices are provided for information only. + +License notice for .NET +------------------------------- + +Copyright (c) .NET Foundation and Contributors. All rights reserved. +Licensed under the MIT license. + +Available at +https://raw.githubusercontent.com/dotnet/runtime/main/LICENSE.TXT \ No newline at end of file From 58ef58f7506bb57a43cf8a2b36d49cda027b8cb1 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 20:58:32 -0500 Subject: [PATCH 273/325] Prepare DQ.Utility.Memory for RC --- .../DeltaQ.Utility.Memory.csproj | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 6995053..9816ca9 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -4,15 +4,37 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.4.0 + 2.0.0 + DeltaQ utility library to support buffer and memory management + https://github.com/jzebedee/deltaq + dq.png + RPL-1.5 + latest enable + + + true + true true snupkg + + + + true + + + + + + + + + - + From 8df010faf8a1b531bc024cb0eb5be9c63cc5ed5e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 21:48:46 -0500 Subject: [PATCH 274/325] Update diff description --- src/DeltaQ.CLI/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.CLI/Program.cs b/src/DeltaQ.CLI/Program.cs index 2603457..9e0f032 100644 --- a/src/DeltaQ.CLI/Program.cs +++ b/src/DeltaQ.CLI/Program.cs @@ -82,7 +82,7 @@ static void Verify(ReadOnlySpan input, ReadOnlySpan sa) static ISuffixSort GetDefaultSort() => new LibDivSufSort(); app.Command("diff", command => { - command.Description = "Diff two files"; + command.Description = "Generate a delta (difference) between two files"; command.HelpOption(HelpOptions); var oldFileArg = command.Argument("[oldfile]", ""); From ee9455fcbae744fb5a242aa08be74027f3b62c17 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Tue, 28 Dec 2021 21:57:35 -0500 Subject: [PATCH 275/325] Rename DQ.CLI to DQ.CommandLine --- deltaq.sln | 2 +- src/DeltaQ.CommandLine/Commands.Delta.cs | 12 ++++++++++++ .../DeltaQ.CommandLine.csproj} | 8 +++----- src/{DeltaQ.CLI => DeltaQ.CommandLine}/Program.cs | 0 src/{DeltaQ.CLI => DeltaQ.CommandLine}/fuzz.sh | 0 5 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 src/DeltaQ.CommandLine/Commands.Delta.cs rename src/{DeltaQ.CLI/DeltaQ.CLI.csproj => DeltaQ.CommandLine/DeltaQ.CommandLine.csproj} (81%) rename src/{DeltaQ.CLI => DeltaQ.CommandLine}/Program.cs (100%) rename src/{DeltaQ.CLI => DeltaQ.CommandLine}/fuzz.sh (100%) diff --git a/deltaq.sln b/deltaq.sln index 634666d..4db531f 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -7,7 +7,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ", "src\deltaq\DeltaQ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Tests", "test\DeltaQ.Tests\DeltaQ.Tests.csproj", "{784B81AE-E39B-497B-90AE-AA7EC4B98E50}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.CLI", "src\DeltaQ.CLI\DeltaQ.CLI.csproj", "{2E9A6A2A-438E-45DD-BDBC-8156A70B284F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.CommandLine", "src\DeltaQ.CommandLine\DeltaQ.CommandLine.csproj", "{2E9A6A2A-438E-45DD-BDBC-8156A70B284F}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.BsDiff", "src\DeltaQ.BsDiff\DeltaQ.BsDiff.csproj", "{C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1}" EndProject diff --git a/src/DeltaQ.CommandLine/Commands.Delta.cs b/src/DeltaQ.CommandLine/Commands.Delta.cs new file mode 100644 index 0000000..9fe7bc1 --- /dev/null +++ b/src/DeltaQ.CommandLine/Commands.Delta.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace DeltaQ.CLI +{ + public static partial class Commands + { + } +} diff --git a/src/DeltaQ.CLI/DeltaQ.CLI.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj similarity index 81% rename from src/DeltaQ.CLI/DeltaQ.CLI.csproj rename to src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index 6b732f3..2d7ba74 100644 --- a/src/DeltaQ.CLI/DeltaQ.CLI.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -7,16 +7,15 @@ jzebedee dq true - ../../pkg - + + - - + - diff --git a/src/DeltaQ.CLI/Program.cs b/src/DeltaQ.CommandLine/Program.cs similarity index 100% rename from src/DeltaQ.CLI/Program.cs rename to src/DeltaQ.CommandLine/Program.cs diff --git a/src/DeltaQ.CLI/fuzz.sh b/src/DeltaQ.CommandLine/fuzz.sh similarity index 100% rename from src/DeltaQ.CLI/fuzz.sh rename to src/DeltaQ.CommandLine/fuzz.sh From cdffec2c6cfff31049f8f128f866520952fc92b0 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 04:25:10 -0500 Subject: [PATCH 276/325] Break out DQ.CommandLine Place fuzzing behavior behind the FUZZ define constant --- src/DeltaQ.CommandLine/Commands.Delta.cs | 45 +++++++-- src/DeltaQ.CommandLine/Defaults.cs | 11 +++ .../DeltaQ.CommandLine.csproj | 30 ++++-- .../Fuzzing/Commands.Fuzz.cs | 38 ++++++++ .../Fuzzing/SuffixSortingVerifier.cs | 23 +++++ src/DeltaQ.CommandLine/Program.cs | 92 ++----------------- 6 files changed, 139 insertions(+), 100 deletions(-) create mode 100644 src/DeltaQ.CommandLine/Defaults.cs create mode 100644 src/DeltaQ.CommandLine/Fuzzing/Commands.Fuzz.cs create mode 100644 src/DeltaQ.CommandLine/Fuzzing/SuffixSortingVerifier.cs diff --git a/src/DeltaQ.CommandLine/Commands.Delta.cs b/src/DeltaQ.CommandLine/Commands.Delta.cs index 9fe7bc1..9d02f9b 100644 --- a/src/DeltaQ.CommandLine/Commands.Delta.cs +++ b/src/DeltaQ.CommandLine/Commands.Delta.cs @@ -1,12 +1,39 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using DeltaQ.SuffixSorting; +using DeltaQ.SuffixSorting.LibDivSufSort; +using DeltaQ.SuffixSorting.SAIS; +using Microsoft.Extensions.CommandLineUtils; +using System; +using System.IO; -namespace DeltaQ.CLI +namespace DeltaQ.CommandLine; +using static Defaults; + +internal static partial class Commands { - public static partial class Commands + public static Action DeltaCommand { get; } = command => { - } -} + command.Description = "Generate a delta (difference) between two files"; + command.HelpOption(HelpOptions); + + var oldFileArg = command.Argument("[oldfile]", ""); + var newFileArg = command.Argument("[newfile]", ""); + var deltaFileArg = command.Argument("[deltafile]", ""); + var algoArg = command.Option("-ss|--suffix-sort ", "Suffix sort library: [sais], [divsufsort]", CommandOptionType.SingleValue); + + command.OnExecute(() => + { + var oldFile = oldFileArg.Value; + var newFile = newFileArg.Value; + var deltaFile = deltaFileArg.Value; + ISuffixSort sort = algoArg.Value() switch + { + "sais" => new SAIS(), + "divsufsort" => new LibDivSufSort(), + _ => GetDefaultSort() + }; + BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), sort); + Console.WriteLine($"Diff [sort:{sort.GetType()}]: old:{oldFile} new:{newFile} delta:{deltaFile}"); + return 0; + }); + }; +} \ No newline at end of file diff --git a/src/DeltaQ.CommandLine/Defaults.cs b/src/DeltaQ.CommandLine/Defaults.cs new file mode 100644 index 0000000..7a38a9b --- /dev/null +++ b/src/DeltaQ.CommandLine/Defaults.cs @@ -0,0 +1,11 @@ +using DeltaQ.SuffixSorting; +using DeltaQ.SuffixSorting.LibDivSufSort; + +namespace DeltaQ.CommandLine; + +internal static class Defaults +{ + public const string HelpOptions = "-?|-h|--help"; + public static ISuffixSort GetDefaultSort() => new LibDivSufSort(); + +} diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index 2d7ba74..940a46e 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -11,16 +11,34 @@ - - + + + + + + + + + + + + + + + + diff --git a/src/DeltaQ.CommandLine/Fuzzing/Commands.Fuzz.cs b/src/DeltaQ.CommandLine/Fuzzing/Commands.Fuzz.cs new file mode 100644 index 0000000..f6a3cfd --- /dev/null +++ b/src/DeltaQ.CommandLine/Fuzzing/Commands.Fuzz.cs @@ -0,0 +1,38 @@ +using DeltaQ.CommandLine.Fuzzing; +using DeltaQ.SuffixSorting.LibDivSufSort; +using Microsoft.Extensions.CommandLineUtils; +using SharpFuzz; +using System; +using System.IO; + +namespace DeltaQ.CommandLine; +using static Defaults; +using static SuffixSortingVerifier; + +internal static partial class Commands +{ + public static Action FuzzCommand { get; } = command => + { + command.Description = "Fuzzit"; + command.HelpOption(HelpOptions); + + command.OnExecute(() => + { + Fuzzer.Run((Stream s) => + { + using var ms = new MemoryStream(); + s.CopyTo(ms); + + if (!ms.TryGetBuffer(out var T)) + { + throw new InvalidOperationException(); + } + + var ldss = new LibDivSufSort(); + using var ownedSA = ldss.Sort(T); + Verify(T, ownedSA.Memory.Span); + }); + return 0; + }); + }; +} \ No newline at end of file diff --git a/src/DeltaQ.CommandLine/Fuzzing/SuffixSortingVerifier.cs b/src/DeltaQ.CommandLine/Fuzzing/SuffixSortingVerifier.cs new file mode 100644 index 0000000..364cc9f --- /dev/null +++ b/src/DeltaQ.CommandLine/Fuzzing/SuffixSortingVerifier.cs @@ -0,0 +1,23 @@ +using System; + +namespace DeltaQ.CommandLine.Fuzzing; + +internal static class SuffixSortingVerifier +{ + public static void Verify(ReadOnlySpan input, ReadOnlySpan sa) + { + for (int i = 0; i < input.Length - 1; i++) + { + var cur = input[sa[i]..]; + var next = input[sa[i + 1]..]; + var cmp = cur.SequenceCompareTo(next); + if (!(cmp < 0)) + { + var ex = new InvalidOperationException("Input was unsorted"); + ex.Data["i"] = i; + ex.Data["j"] = i + 1; + throw ex; + } + } + } +} diff --git a/src/DeltaQ.CommandLine/Program.cs b/src/DeltaQ.CommandLine/Program.cs index 9e0f032..df777a4 100644 --- a/src/DeltaQ.CommandLine/Program.cs +++ b/src/DeltaQ.CommandLine/Program.cs @@ -1,39 +1,7 @@ using System; -using System.IO; -using System.Text; -using DeltaQ.SuffixSorting; -using DeltaQ.SuffixSorting.LibDivSufSort; -using DeltaQ.SuffixSorting.SAIS; +using DeltaQ.CommandLine; using Microsoft.Extensions.CommandLineUtils; -using SharpFuzz; - -static void Verify(ReadOnlySpan input, ReadOnlySpan sa) -{ - //ref byte suff(int index) => ref input[sa[index]]; - for (int i = 0; i < input.Length - 1; i++) - { - //if(!(suff(i) < suff(i + 1))) - var cur = input[sa[i]..]; - var next = input[sa[i + 1]..]; - var cmp = cur.SequenceCompareTo(next); - if (!(cmp < 0)) - //if (!(cur < next)) - { - var ex = new InvalidOperationException("Input was unsorted"); - ex.Data["i"] = i; - ex.Data["j"] = i + 1; - throw ex; - } - } - - var result = DeltaQ.Tests.LDSSChecker.Check(input, sa, true); - if (result != DeltaQ.Tests.LDSSChecker.ResultCode.Done) - { - throw new InvalidOperationException($"Input failed with result code {result}"); - } -} - -const string HelpOptions = "-?|-h|--help"; +using static DeltaQ.CommandLine.Defaults; // Description of the application var app = new CommandLineApplication() @@ -54,58 +22,12 @@ static void Verify(ReadOnlySpan input, ReadOnlySpan sa) return 0; }); -app.Command("fuzz", command => -{ - command.Description = "Fuzzit"; - command.HelpOption(HelpOptions); - - command.OnExecute(() => - { - Fuzzer.Run((Stream s) => - { - using var ms = new MemoryStream(); - s.CopyTo(ms); - - if (!ms.TryGetBuffer(out var T)) - { - throw new InvalidOperationException(); - } - - var ldss = new LibDivSufSort(); - using var ownedSA = ldss.Sort(T); - Verify(T, ownedSA.Memory.Span); - }); - return 0; - }); -}); - -static ISuffixSort GetDefaultSort() => new LibDivSufSort(); -app.Command("diff", command => -{ - command.Description = "Generate a delta (difference) between two files"; - command.HelpOption(HelpOptions); - - var oldFileArg = command.Argument("[oldfile]", ""); - var newFileArg = command.Argument("[newfile]", ""); - var deltaFileArg = command.Argument("[deltafile]", ""); - var algoArg = command.Option("-ss|--suffix-sort ", "Suffix sort library: [sais], [divsufsort]", CommandOptionType.SingleValue); +#if FUZZ +app.Command("fuzz", Commands.FuzzCommand); +#endif - command.OnExecute(() => - { - var oldFile = oldFileArg.Value; - var newFile = newFileArg.Value; - var deltaFile = deltaFileArg.Value; - ISuffixSort sort = algoArg.Value() switch - { - "sais" => new SAIS(), - "divsufsort" => new LibDivSufSort(), - _ => GetDefaultSort() - }; - DeltaQ.BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), sort); - Console.WriteLine($"Diff [sort:{sort.GetType()}]: old:{oldFile} new:{newFile} delta:{deltaFile}"); - return 0; - }); -}); +app.Command("delta", Commands.DeltaCommand); +app.Command("diff", Commands.DeltaCommand); try { From 45f10d772a79cc2d276317f9c1c764ec48273e6c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 04:32:04 -0500 Subject: [PATCH 277/325] Use assembly version in DQ.CommandLine --- src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj | 1 + src/DeltaQ.CommandLine/Program.cs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index 940a46e..4b7a692 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -5,6 +5,7 @@ net6.0 DeltaQ jzebedee + 0.1.0 dq true diff --git a/src/DeltaQ.CommandLine/Program.cs b/src/DeltaQ.CommandLine/Program.cs index df777a4..209fe31 100644 --- a/src/DeltaQ.CommandLine/Program.cs +++ b/src/DeltaQ.CommandLine/Program.cs @@ -12,7 +12,7 @@ }; app.HelpOption(HelpOptions); -app.VersionOption("--version", "0.1.0"); +app.VersionOption("--version", typeof(Program).Assembly.GetName().Version.ToString()); //No args app.OnExecute(() => From cf2b5472d7a0a046f86857792265cbaae1613dda Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 04:33:50 -0500 Subject: [PATCH 278/325] None include Fuzzing when FUZZ is not defined --- src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index 4b7a692..4804c64 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -39,6 +39,7 @@ + From 94c4e6b0b76d4d797df0470f497295e0b01cad69 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 04:58:08 -0500 Subject: [PATCH 279/325] Add pretty output to delta command --- src/DeltaQ.CommandLine/Commands.Delta.cs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/DeltaQ.CommandLine/Commands.Delta.cs b/src/DeltaQ.CommandLine/Commands.Delta.cs index 9d02f9b..877a283 100644 --- a/src/DeltaQ.CommandLine/Commands.Delta.cs +++ b/src/DeltaQ.CommandLine/Commands.Delta.cs @@ -31,8 +31,28 @@ internal static partial class Commands "divsufsort" => new LibDivSufSort(), _ => GetDefaultSort() }; - BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), sort); - Console.WriteLine($"Diff [sort:{sort.GetType()}]: old:{oldFile} new:{newFile} delta:{deltaFile}"); + Console.WriteLine("Generating BsDiff delta between"); + Console.WriteLine($@"Old file: ""{oldFile}"""); + Console.WriteLine($@"New file: ""{newFile}"""); + Console.WriteLine($"with suffix sort {sort.GetType().Name}"); + Console.WriteLine(); + try + { + var sw = System.Diagnostics.Stopwatch.StartNew(); + BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), sort); + sw.Stop(); + + Console.WriteLine($"Finished in {sw.Elapsed}"); + Console.WriteLine($@"Delta file: ""{deltaFile}"""); + var deltaFileInfo = new FileInfo(deltaFile); + var deltaFileRatio = (double)deltaFileInfo.Length / (new FileInfo(oldFile).Length + new FileInfo(newFile).Length); + Console.WriteLine($@"Delta size: {deltaFileInfo.Length} ({deltaFileRatio:0.00%})"); + } + catch + { + Console.Error.WriteLine("Failed to create delta"); + throw; + } return 0; }); }; From e6fa669a3cb7ad921cd8735b8792c7f95b7d6922 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 05:04:54 -0500 Subject: [PATCH 280/325] Add and use Humanizer --- src/DeltaQ.CommandLine/Commands.Delta.cs | 7 ++++--- src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/DeltaQ.CommandLine/Commands.Delta.cs b/src/DeltaQ.CommandLine/Commands.Delta.cs index 877a283..e73b468 100644 --- a/src/DeltaQ.CommandLine/Commands.Delta.cs +++ b/src/DeltaQ.CommandLine/Commands.Delta.cs @@ -1,6 +1,7 @@ using DeltaQ.SuffixSorting; using DeltaQ.SuffixSorting.LibDivSufSort; using DeltaQ.SuffixSorting.SAIS; +using Humanizer; using Microsoft.Extensions.CommandLineUtils; using System; using System.IO; @@ -41,12 +42,12 @@ internal static partial class Commands var sw = System.Diagnostics.Stopwatch.StartNew(); BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), sort); sw.Stop(); - - Console.WriteLine($"Finished in {sw.Elapsed}"); + + Console.WriteLine($"Finished in {sw.Elapsed.Humanize()} [{sw.Elapsed}]"); Console.WriteLine($@"Delta file: ""{deltaFile}"""); var deltaFileInfo = new FileInfo(deltaFile); var deltaFileRatio = (double)deltaFileInfo.Length / (new FileInfo(oldFile).Length + new FileInfo(newFile).Length); - Console.WriteLine($@"Delta size: {deltaFileInfo.Length} ({deltaFileRatio:0.00%})"); + Console.WriteLine($@"Delta size: {deltaFileInfo.Length.Bytes()} ({deltaFileRatio:0.00%})"); } catch { diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index 4804c64..44c17d9 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -11,6 +11,7 @@ + From 422def73a42cd7db63f67a505c75722b69dd7877 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 05:48:18 -0500 Subject: [PATCH 281/325] Fix BsPatch not correctly slicing input buffer during diff chunk --- src/DeltaQ.BsDiff/Patch.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.BsDiff/Patch.cs b/src/DeltaQ.BsDiff/Patch.cs index 1cd1166..852052e 100644 --- a/src/DeltaQ.BsDiff/Patch.cs +++ b/src/DeltaQ.BsDiff/Patch.cs @@ -133,7 +133,7 @@ private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Strea while (addSize > 0) { var diffBytesRead = diff.Read(diffBuffer.SliceUpTo((int)addSize)); - var inputBytesRead = input.Read(inputBuffer); + var inputBytesRead = input.Read(inputBuffer.SliceUpTo((int)addSize)); if (inputBytesRead != diffBytesRead) throw new InvalidOperationException("Corrupt patch"); From 99bd0dab2d25279c7f41fedcb6f96d512b10b5c4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 05:48:40 -0500 Subject: [PATCH 282/325] Add descriptions to DeltaCommand --- src/DeltaQ.CommandLine/Commands.Delta.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DeltaQ.CommandLine/Commands.Delta.cs b/src/DeltaQ.CommandLine/Commands.Delta.cs index e73b468..a5346d8 100644 --- a/src/DeltaQ.CommandLine/Commands.Delta.cs +++ b/src/DeltaQ.CommandLine/Commands.Delta.cs @@ -16,9 +16,9 @@ internal static partial class Commands command.Description = "Generate a delta (difference) between two files"; command.HelpOption(HelpOptions); - var oldFileArg = command.Argument("[oldfile]", ""); - var newFileArg = command.Argument("[newfile]", ""); - var deltaFileArg = command.Argument("[deltafile]", ""); + var oldFileArg = command.Argument("[oldfile]", "Original file (input)"); + var newFileArg = command.Argument("[newfile]", "New file (input)"); + var deltaFileArg = command.Argument("[deltafile]", "Delta file (output)"); var algoArg = command.Option("-ss|--suffix-sort ", "Suffix sort library: [sais], [divsufsort]", CommandOptionType.SingleValue); command.OnExecute(() => From 41ca482769610acabbd9fc1d5ba39b206b1d6107 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 05:50:08 -0500 Subject: [PATCH 283/325] Add ApplyCommand --- src/DeltaQ.CommandLine/Commands.Apply.cs | 54 ++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/DeltaQ.CommandLine/Commands.Apply.cs diff --git a/src/DeltaQ.CommandLine/Commands.Apply.cs b/src/DeltaQ.CommandLine/Commands.Apply.cs new file mode 100644 index 0000000..2888202 --- /dev/null +++ b/src/DeltaQ.CommandLine/Commands.Apply.cs @@ -0,0 +1,54 @@ +using Humanizer; +using Microsoft.Extensions.CommandLineUtils; +using System; +using System.IO; +using System.IO.MemoryMappedFiles; + +namespace DeltaQ.CommandLine; +using static Defaults; + +internal static partial class Commands +{ + public static Action ApplyCommand { get; } = command => + { + command.Description = "Apply a delta (patch) to an original file and generate an output file"; + command.HelpOption(HelpOptions); + + var oldFileArg = command.Argument("[oldfile]", "Original file (input)"); + var deltaFileArg = command.Argument("[deltafile]", "Delta file (input)"); + var newFileArg = command.Argument("[newfile]", "New file (output)"); + + command.OnExecute(() => + { + var oldFile = oldFileArg.Value; + var newFile = newFileArg.Value; + var deltaFile = deltaFileArg.Value; + Console.WriteLine("Applying BsDiff delta between"); + Console.WriteLine($@"Old file: ""{oldFile}"""); + Console.WriteLine($@"Delta file: ""{deltaFile}"""); + Console.WriteLine(); + try + { + var sw = System.Diagnostics.Stopwatch.StartNew(); + { + using var fsInput = File.OpenRead(oldFile); + using var fsDelta = MemoryMappedFile.CreateFromFile(deltaFile, FileMode.Open, null, 0, MemoryMappedFileAccess.Read); + using var fsOutput = File.Create(newFile); + BsDiff.Patch.Apply(fsInput, OpenPatch, fsOutput); + + Stream OpenPatch(long offset, long length) => fsDelta.CreateViewStream(offset, length, MemoryMappedFileAccess.Read); + } + sw.Stop(); + + Console.WriteLine($"Finished in {sw.Elapsed.Humanize()} [{sw.Elapsed}]"); + Console.WriteLine($@"New file: ""{newFile}"""); + } + catch + { + Console.Error.WriteLine("Failed to apply delta"); + throw; + } + return 0; + }); + }; +} \ No newline at end of file From 888461f237abcdb7cbf72ccbf0edc78a536b516a Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 05:50:40 -0500 Subject: [PATCH 284/325] Add apply command to dq commands --- src/DeltaQ.CommandLine/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.CommandLine/Program.cs b/src/DeltaQ.CommandLine/Program.cs index 209fe31..1bbe8a0 100644 --- a/src/DeltaQ.CommandLine/Program.cs +++ b/src/DeltaQ.CommandLine/Program.cs @@ -27,7 +27,7 @@ #endif app.Command("delta", Commands.DeltaCommand); -app.Command("diff", Commands.DeltaCommand); +app.Command("apply", Commands.ApplyCommand); try { From c161695382cb3e3a01a138fae68710aa8f5a535b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 05:51:38 -0500 Subject: [PATCH 285/325] Move fuzz.sh under Fuzzing --- src/DeltaQ.CommandLine/{ => Fuzzing}/fuzz.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/DeltaQ.CommandLine/{ => Fuzzing}/fuzz.sh (100%) diff --git a/src/DeltaQ.CommandLine/fuzz.sh b/src/DeltaQ.CommandLine/Fuzzing/fuzz.sh similarity index 100% rename from src/DeltaQ.CommandLine/fuzz.sh rename to src/DeltaQ.CommandLine/Fuzzing/fuzz.sh From 13d368ab572084780deef4558409a522eb239a5c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 07:06:46 -0500 Subject: [PATCH 286/325] Add BsSpanWriteBenchmarks --- .../BsSpanWriteBenchmarks.cs | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 bench/DeltaQ.Benchmarks/BsSpanWriteBenchmarks.cs diff --git a/bench/DeltaQ.Benchmarks/BsSpanWriteBenchmarks.cs b/bench/DeltaQ.Benchmarks/BsSpanWriteBenchmarks.cs new file mode 100644 index 0000000..98049a1 --- /dev/null +++ b/bench/DeltaQ.Benchmarks/BsSpanWriteBenchmarks.cs @@ -0,0 +1,90 @@ +using BenchmarkDotNet.Attributes; +using Idx = System.Int32; +using BenchmarkDotNet.Engines; + +namespace DeltaQ.Benchmarks +{ + //[HardwareCounters(HardwareCounter.BranchInstructions, HardwareCounter.BranchMispredictions)] + [SimpleJob(RunStrategy.Throughput)] + public class BsSpanWriteBenchmarks + { + public static IEnumerable Numbers + { + get + { + yield return long.MinValue; + yield return long.MinValue / 2; + yield return long.MinValue / 3; + yield return 0; + yield return long.MaxValue; + yield return long.MaxValue / 2; + yield return long.MaxValue / 3; + } + } + + [ArgumentsSource(nameof(Numbers))] + [Benchmark(Baseline = true)] + public void SpanWrite(long y) + { + Span span = stackalloc byte[sizeof(long)]; + WritePackedLong(span, y); + } + + [ArgumentsSource(nameof(Numbers))] + [Benchmark] + public void SpanWriteStartHigh(long y) + { + Span span = stackalloc byte[sizeof(long)]; + WritePackedLongStartHigh(span, y); + } + + public static void WritePackedLong(Span span, long y) + { + if (y < 0) + { + y = -y; + + span[0] = (byte)y; + span[1] = (byte)(y >>= 8); + span[2] = (byte)(y >>= 8); + span[3] = (byte)(y >>= 8); + span[4] = (byte)(y >>= 8); + span[5] = (byte)(y >>= 8); + span[6] = (byte)(y >>= 8); + span[7] = (byte)((y >> 8) | 0x80); + } + else + { + span[0] = (byte)y; + span[1] = (byte)(y >>= 8); + span[2] = (byte)(y >>= 8); + span[3] = (byte)(y >>= 8); + span[4] = (byte)(y >>= 8); + span[5] = (byte)(y >>= 8); + span[6] = (byte)(y >>= 8); + span[7] = (byte)(y >> 8); + } + } + + public static void WritePackedLongStartHigh(Span span, long y) + { + if (y < 0) + { + y = -y; + span[7] = (byte)((y >> 56) | 0x80); + } + else + { + span[7] = (byte)(y >> 56); + } + + span[6] = (byte)(y >> 48); + span[5] = (byte)(y >> 40); + span[4] = (byte)(y >> 32); + span[3] = (byte)(y >> 24); + span[2] = (byte)(y >> 16); + span[1] = (byte)(y >> 8); + span[0] = (byte)y; + } + } +} From ba78a607b42aa3e8d56a379b24e93561dc749c3b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 07:07:22 -0500 Subject: [PATCH 287/325] Add sqq_table to SqrtBenchmarks in case it's not defined in Utils --- bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs index b1a864d..c794e91 100644 --- a/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs +++ b/bench/DeltaQ.Benchmarks/SqrtBenchmarks.cs @@ -133,5 +133,27 @@ private static int ss_isqrt(int x) return y; } } + + private static readonly Idx[] sqq_table_array = new[] + { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, + 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, + 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, + 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, + 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, + 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, + 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, + 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, + 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, + 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, + 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, + 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 + }; + internal static ReadOnlySpan sqq_table => sqq_table_array; + } } From 2ad17d798acb53b9219c5a9f57ec75403de590f9 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 07:16:05 -0500 Subject: [PATCH 288/325] Start from high end in WritePackedLong to skip bounds check --- src/DeltaQ.BsDiff/SpanExtensions.cs | 37 +++++++++++++---------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/DeltaQ.BsDiff/SpanExtensions.cs b/src/DeltaQ.BsDiff/SpanExtensions.cs index 016dd75..b6b3a10 100644 --- a/src/DeltaQ.BsDiff/SpanExtensions.cs +++ b/src/DeltaQ.BsDiff/SpanExtensions.cs @@ -9,29 +9,26 @@ internal static class SpanExtensions { public static void WritePackedLong(this Span span, long y) { - if (y < 0) + // Write to highest index first so the JIT skips bounds checks on subsequent writes. + unchecked { - y = -y; + if (y < 0) + { + y = -y; + span[7] = (byte)((y >> 56) | 0x80); + } + else + { + span[7] = (byte)(y >> 56); + } + span[6] = (byte)(y >> 48); + span[5] = (byte)(y >> 40); + span[4] = (byte)(y >> 32); + span[3] = (byte)(y >> 24); + span[2] = (byte)(y >> 16); + span[1] = (byte)(y >> 8); span[0] = (byte)y; - span[1] = (byte)(y >>= 8); - span[2] = (byte)(y >>= 8); - span[3] = (byte)(y >>= 8); - span[4] = (byte)(y >>= 8); - span[5] = (byte)(y >>= 8); - span[6] = (byte)(y >>= 8); - span[7] = (byte)((y >> 8) | 0x80); - } - else - { - span[0] = (byte)y; - span[1] = (byte)(y >>= 8); - span[2] = (byte)(y >>= 8); - span[3] = (byte)(y >>= 8); - span[4] = (byte)(y >>= 8); - span[5] = (byte)(y >>= 8); - span[6] = (byte)(y >>= 8); - span[7] = (byte)(y >> 8); } } From 71c03b32ba15d9182a78350df71681f5a3905662 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 09:22:40 -0500 Subject: [PATCH 289/325] Use SharpZipLib for Bzip2 support in BsDiff --- src/DeltaQ.BsDiff/Bzip2/BZip2Constants.cs | 193 -- src/DeltaQ.BsDiff/Bzip2/BZip2Exception.cs | 71 - src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs | 1090 --------- src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs | 2008 ----------------- .../Bzip2/Checksums/IChecksum.cs | 93 - .../Bzip2/Checksums/StrangeCrc.cs | 192 -- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 4 + src/DeltaQ.BsDiff/Diff.cs | 245 +- 8 files changed, 120 insertions(+), 3776 deletions(-) delete mode 100644 src/DeltaQ.BsDiff/Bzip2/BZip2Constants.cs delete mode 100644 src/DeltaQ.BsDiff/Bzip2/BZip2Exception.cs delete mode 100644 src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs delete mode 100644 src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs delete mode 100644 src/DeltaQ.BsDiff/Bzip2/Checksums/IChecksum.cs delete mode 100644 src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2Constants.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2Constants.cs deleted file mode 100644 index 250786f..0000000 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2Constants.cs +++ /dev/null @@ -1,193 +0,0 @@ -// BZip2Constants.cs -// Copyright (C) 2001 Mike Krueger -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// Linking this library statically or dynamically with other modules is -// making a combined work based on this library. Thus, the terms and -// conditions of the GNU General Public License cover the whole -// combination. -// -// As a special exception, the copyright holders of this library give you -// permission to link this library with independent modules to produce an -// executable, regardless of the license terms of these independent -// modules, and to copy and distribute the resulting executable under -// terms of your choice, provided that you also meet, for each linked -// independent module, the terms and conditions of the license of that -// module. An independent module is a module which is not derived from -// or based on this library. If you modify this library, you may extend -// this exception to your version of the library, but you are not -// obligated to do so. If you do not wish to do so, delete this -// exception statement from your version. - -namespace bz2core -{ - - /// - /// Defines internal values for both compression and decompression - /// - internal static class BZip2Constants - { - /// - /// Random numbers used to randomise repetitive blocks - /// - public readonly static int[] RandomNumbers = { - 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, - 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, - 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, - 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, - 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, - 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, - 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, - 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, - 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, - 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, - 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, - 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, - 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, - 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, - 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, - 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, - 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, - 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, - 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, - 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, - 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, - 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, - 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, - 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, - 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, - 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, - 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, - 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, - 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, - 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, - 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, - 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, - 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, - 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, - 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, - 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, - 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, - 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, - 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, - 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, - 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, - 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, - 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, - 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, - 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, - 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, - 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, - 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, - 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, - 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, - 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, - 936, 638 - }; - - /// - /// When multiplied by compression parameter (1-9) gives the block size for compression - /// 9 gives the best compression but uses the most memory. - /// - public const int BaseBlockSize = 100000; - - /// - /// Backend constant - /// - public const int MaximumAlphaSize = 258; - - /// - /// Backend constant - /// - public const int MaximumCodeLength = 23; - - /// - /// Backend constant - /// - public const int RunA = 0; - - /// - /// Backend constant - /// - public const int RunB = 1; - - /// - /// Backend constant - /// - public const int GroupCount = 6; - - /// - /// Backend constant - /// - public const int GroupSize = 50; - - /// - /// Backend constant - /// - public const int NumberOfIterations = 4; - - /// - /// Backend constant - /// - public const int MaximumSelectors = (2 + (900000 / GroupSize)); - - /// - /// Backend constant - /// - public const int OvershootBytes = 20; - } -} - -/* This file was derived from a file containing this license: - * - * This file is a part of bzip2 and/or libbzip2, a program and - * library for lossless, block-sorting data compression. - * - * Copyright (C) 1996-1998 Julian R Seward. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. The origin of this software must not be misrepresented; you must - * not claim that you wrote the original software. If you use this - * software in a product, an acknowledgment in the product - * documentation would be appreciated but is not required. - * - * 3. Altered source versions must be plainly marked as such, and must - * not be misrepresented as being the original software. - * - * 4. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Java version ported by Keiron Liddle, Aftex Software 1999-2001 - */ diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2Exception.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2Exception.cs deleted file mode 100644 index 3684eee..0000000 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2Exception.cs +++ /dev/null @@ -1,71 +0,0 @@ -// BZip2.cs -// -// Copyright 2004 John Reilly -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// Linking this library statically or dynamically with other modules is -// making a combined work based on this library. Thus, the terms and -// conditions of the GNU General Public License cover the whole -// combination. -// -// As a special exception, the copyright holders of this library give you -// permission to link this library with independent modules to produce an -// executable, regardless of the license terms of these independent -// modules, and to copy and distribute the resulting executable under -// terms of your choice, provided that you also meet, for each linked -// independent module, the terms and conditions of the license of that -// module. An independent module is a module which is not derived from -// or based on this library. If you modify this library, you may extend -// this exception to your version of the library, but you are not -// obligated to do so. If you do not wish to do so, delete this -// exception statement from your version. - -using System; - -namespace bz2core -{ - /// - /// BZip2Exception represents exceptions specific to Bzip2 algorithm - /// - public class BZip2Exception : Exception - { - /// - /// Initialise a new instance of BZip2Exception. - /// - public BZip2Exception() - { - } - - /// - /// Initialise a new instance of BZip2Exception with its message set to message. - /// - /// The message describing the error. - public BZip2Exception(string message) - : base(message) - { - } - - /// - /// Initialise an instance of BZip2Exception - /// - /// A message describing the error. - /// The exception that is the cause of the current exception. - public BZip2Exception(string message, Exception exception) - : base(message, exception) - { - } - } -} diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs deleted file mode 100644 index 24537cb..0000000 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2InputStream.cs +++ /dev/null @@ -1,1090 +0,0 @@ -// BZip2InputStream.cs -// -// Copyright (C) 2001 Mike Krueger -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// Linking this library statically or dynamically with other modules is -// making a combined work based on this library. Thus, the terms and -// conditions of the GNU General Public License cover the whole -// combination. -// -// As a special exception, the copyright holders of this library give you -// permission to link this library with independent modules to produce an -// executable, regardless of the license terms of these independent -// modules, and to copy and distribute the resulting executable under -// terms of your choice, provided that you also meet, for each linked -// independent module, the terms and conditions of the license of that -// module. An independent module is a module which is not derived from -// or based on this library. If you modify this library, you may extend -// this exception to your version of the library, but you are not -// obligated to do so. If you do not wish to do so, delete this -// exception statement from your version. - -using System; -using System.IO; -using bz2core.Checksums; - -namespace bz2core -{ - - /// - /// An input stream that decompresses files in the BZip2 format - /// - public class BZip2InputStream : Stream - { - #region Constants - const int START_BLOCK_STATE = 1; - const int RAND_PART_A_STATE = 2; - const int RAND_PART_B_STATE = 3; - const int RAND_PART_C_STATE = 4; - const int NO_RAND_PART_A_STATE = 5; - const int NO_RAND_PART_B_STATE = 6; - const int NO_RAND_PART_C_STATE = 7; - #endregion - #region Constructors - /// - /// Construct instance for reading from stream - /// - /// Data source - public BZip2InputStream(Stream stream) - { - IsStreamOwner = true; - // init arrays - for (var i = 0; i < BZip2Constants.GroupCount; ++i) - { - limit[i] = new int[BZip2Constants.MaximumAlphaSize]; - baseArray[i] = new int[BZip2Constants.MaximumAlphaSize]; - perm[i] = new int[BZip2Constants.MaximumAlphaSize]; - } - - BsSetStream(stream); - Initialize(); - InitBlock(); - SetupBlock(); - } - - #endregion - - /// - /// Get/set flag indicating ownership of underlying stream. - /// - public bool IsStreamOwner { get; set; } - - #region Stream Overrides - /// - /// Gets a value indicating if the stream supports reading - /// - public override bool CanRead - { - get - { - return baseStream.CanRead; - } - } - - /// - /// Gets a value indicating whether the current stream supports seeking. - /// - public override bool CanSeek - { - get - { - return baseStream.CanSeek; - } - } - - /// - /// Gets a value indicating whether the current stream supports writing. - /// This property always returns false - /// - public override bool CanWrite - { - get - { - return false; - } - } - - /// - /// Gets the length in bytes of the stream. - /// - public override long Length - { - get - { - return baseStream.Length; - } - } - - /// - /// Gets or sets the streams position. - /// Setting the position is not supported and will throw a NotSupportException - /// - /// Any attempt to set the position - public override long Position - { - get - { - return baseStream.Position; - } - set - { - throw new NotSupportedException("BZip2InputStream position cannot be set"); - } - } - - /// - /// Flushes the stream. - /// - public override void Flush() - { - if (baseStream != null) - { - baseStream.Flush(); - } - } - - /// - /// Set the streams position. This operation is not supported and will throw a NotSupportedException - /// - /// A byte offset relative to the parameter. - /// A value of type indicating the reference point used to obtain the new position. - /// The new position of the stream. - /// Any access - public override long Seek(long offset, SeekOrigin origin) - { - throw new NotSupportedException("BZip2InputStream Seek not supported"); - } - - /// - /// Sets the length of this stream to the given value. - /// This operation is not supported and will throw a NotSupportedExceptionortedException - /// - /// The new length for the stream. - /// Any access - public override void SetLength(long value) - { - throw new NotSupportedException("BZip2InputStream SetLength not supported"); - } - - /// - /// Writes a block of bytes to this stream using data from a buffer. - /// This operation is not supported and will throw a NotSupportedException - /// - /// The buffer to source data from. - /// The offset to start obtaining data from. - /// The number of bytes of data to write. - /// Any access - public override void Write(byte[] buffer, int offset, int count) - { - throw new NotSupportedException("BZip2InputStream Write not supported"); - } - - /// - /// Writes a byte to the current position in the file stream. - /// This operation is not supported and will throw a NotSupportedException - /// - /// The value to write. - /// Any access - public override void WriteByte(byte value) - { - throw new NotSupportedException("BZip2InputStream WriteByte not supported"); - } - - /// - /// Read a sequence of bytes and advances the read position by one byte. - /// - /// Array of bytes to store values in - /// Offset in array to begin storing data - /// The maximum number of bytes to read - /// The total number of bytes read into the buffer. This might be less - /// than the number of bytes requested if that number of bytes are not - /// currently available or zero if the end of the stream is reached. - /// - public override int Read(byte[] buffer, int offset, int count) - { - if (buffer == null) - { - throw new ArgumentNullException(nameof(buffer)); - } - - for (var i = 0; i < count; ++i) - { - var rb = ReadByte(); - if (rb == -1) - { - return i; - } - buffer[offset + i] = (byte)rb; - } - return count; - } - - /// - /// Read a byte from stream advancing position - /// - /// byte read or -1 on end of stream - public override int ReadByte() - { - if (streamEnd) - { - return -1; // ok - } - - var retChar = currentChar; - switch (currentState) - { - case RAND_PART_B_STATE: - SetupRandPartB(); - break; - case RAND_PART_C_STATE: - SetupRandPartC(); - break; - case NO_RAND_PART_B_STATE: - SetupNoRandPartB(); - break; - case NO_RAND_PART_C_STATE: - SetupNoRandPartC(); - break; - case START_BLOCK_STATE: - case NO_RAND_PART_A_STATE: - case RAND_PART_A_STATE: - break; - } - return retChar; - } - - #endregion - - void MakeMaps() - { - nInUse = 0; - for (var i = 0; i < 256; ++i) - { - if (inUse[i]) - { - seqToUnseq[nInUse] = (byte)i; - unseqToSeq[i] = (byte)nInUse; - nInUse++; - } - } - } - - void Initialize() - { - var magic1 = BsGetUChar(); - var magic2 = BsGetUChar(); - - var magic3 = BsGetUChar(); - var magic4 = BsGetUChar(); - - if (magic1 == 'B' && magic2 == 'Z' && magic3 == 'h' && magic4 >= '1' && magic4 <= '9') - { - SetDecompressStructureSizes(magic4 - '0'); - computedCombinedCRC = 0; - } - else - { - streamEnd = true; - } - } - - void InitBlock() - { - var magic1 = BsGetUChar(); - var magic2 = BsGetUChar(); - var magic3 = BsGetUChar(); - var magic4 = BsGetUChar(); - var magic5 = BsGetUChar(); - var magic6 = BsGetUChar(); - - if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) - { - Complete(); - return; - } - - if (magic1 == 0x31 && magic2 == 0x41 && magic3 == 0x59 && magic4 == 0x26 && magic5 == 0x53 && magic6 == 0x59) - { - storedBlockCRC = BsGetInt32(); - - blockRandomised = (BsR(1) == 1); - - GetAndMoveToFrontDecode(); - - mCrc.Reset(); - currentState = START_BLOCK_STATE; - } - else - { - BadBlockHeader(); - streamEnd = true; - } - } - - void EndBlock() - { - computedBlockCRC = (int)mCrc.Value; - - // -- A bad CRC is considered a fatal error. -- - if (storedBlockCRC != computedBlockCRC) - { - CrcError(); - } - - // 1528150659 - computedCombinedCRC = ((computedCombinedCRC << 1) & 0xFFFFFFFF) | (computedCombinedCRC >> 31); - computedCombinedCRC = computedCombinedCRC ^ (uint)computedBlockCRC; - } - - void Complete() - { - storedCombinedCRC = BsGetInt32(); - if (storedCombinedCRC != (int)computedCombinedCRC) - { - CrcError(); - } - - streamEnd = true; - } - - void BsSetStream(Stream stream) - { - baseStream = stream; - bsLive = 0; - bsBuff = 0; - } - - void FillBuffer() - { - var thech = 0; - - try - { - thech = baseStream.ReadByte(); - } - catch (Exception) //??? ! - { - CompressedStreamEOF(); - } - - if (thech == -1) - { - CompressedStreamEOF(); - } - - bsBuff = (bsBuff << 8) | (thech & 0xFF); - bsLive += 8; - } - - int BsR(int n) - { - while (bsLive < n) - { - FillBuffer(); - } - - var v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1); - bsLive -= n; - return v; - } - - char BsGetUChar() - { - return (char)BsR(8); - } - - int BsGetIntVS(int numBits) - { - return BsR(numBits); - } - - int BsGetInt32() - { - var result = BsR(8); - result = (result << 8) | BsR(8); - result = (result << 8) | BsR(8); - result = (result << 8) | BsR(8); - return result; - } - - void RecvDecodingTables() - { - var len = new char[BZip2Constants.GroupCount][]; - for (var i = 0; i < BZip2Constants.GroupCount; ++i) - { - len[i] = new char[BZip2Constants.MaximumAlphaSize]; - } - - var inUse16 = new bool[16]; - - //--- Receive the mapping table --- - for (var i = 0; i < 16; i++) - { - inUse16[i] = (BsR(1) == 1); - } - - for (var i = 0; i < 16; i++) - { - if (inUse16[i]) - { - for (var j = 0; j < 16; j++) - { - inUse[i * 16 + j] = (BsR(1) == 1); - } - } - else - { - for (var j = 0; j < 16; j++) - { - inUse[i * 16 + j] = false; - } - } - } - - MakeMaps(); - var alphaSize = nInUse + 2; - - //--- Now the selectors --- - var nGroups = BsR(3); - var nSelectors = BsR(15); - - for (var i = 0; i < nSelectors; i++) - { - var j = 0; - while (BsR(1) == 1) - { - j++; - } - selectorMtf[i] = (byte)j; - } - - //--- Undo the MTF values for the selectors. --- - var pos = new byte[BZip2Constants.GroupCount]; - for (var v = 0; v < nGroups; v++) - { - pos[v] = (byte)v; - } - - for (var i = 0; i < nSelectors; i++) - { - int v = selectorMtf[i]; - var tmp = pos[v]; - while (v > 0) - { - pos[v] = pos[v - 1]; - v--; - } - pos[0] = tmp; - selector[i] = tmp; - } - - //--- Now the coding tables --- - for (var t = 0; t < nGroups; t++) - { - var curr = BsR(5); - for (var i = 0; i < alphaSize; i++) - { - while (BsR(1) == 1) - { - if (BsR(1) == 0) - { - curr++; - } - else - { - curr--; - } - } - len[t][i] = (char)curr; - } - } - - //--- Create the Huffman decoding tables --- - for (var t = 0; t < nGroups; t++) - { - var minLen = 32; - var maxLen = 0; - for (var i = 0; i < alphaSize; i++) - { - maxLen = Math.Max(maxLen, len[t][i]); - minLen = Math.Min(minLen, len[t][i]); - } - HbCreateDecodeTables(limit[t], baseArray[t], perm[t], len[t], minLen, maxLen, alphaSize); - minLens[t] = minLen; - } - } - - void GetAndMoveToFrontDecode() - { - var yy = new byte[256]; - - var limitLast = BZip2Constants.BaseBlockSize * blockSize100k; - origPtr = BsGetIntVS(24); - - RecvDecodingTables(); - var EOB = nInUse + 1; - var groupNo = -1; - var groupPos = 0; - - /*-- - Setting up the unzftab entries here is not strictly - necessary, but it does save having to do it later - in a separate pass, and so saves a block's worth of - cache misses. - --*/ - for (var i = 0; i <= 255; i++) - { - unzftab[i] = 0; - } - - for (var i = 0; i <= 255; i++) - { - yy[i] = (byte)i; - } - - last = -1; - - if (groupPos == 0) - { - groupNo++; - groupPos = BZip2Constants.GroupSize; - } - - groupPos--; - int zt = selector[groupNo]; - var zn = minLens[zt]; - var zvec = BsR(zn); - int zj; - - while (zvec > limit[zt][zn]) - { - if (zn > 20) - { // the longest code - throw new BZip2Exception("Bzip data error"); - } - zn++; - while (bsLive < 1) - { - FillBuffer(); - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; - zvec = (zvec << 1) | zj; - } - if (zvec - baseArray[zt][zn] < 0 || zvec - baseArray[zt][zn] >= BZip2Constants.MaximumAlphaSize) - { - throw new BZip2Exception("Bzip data error"); - } - var nextSym = perm[zt][zvec - baseArray[zt][zn]]; - - while (true) - { - if (nextSym == EOB) - { - break; - } - - if (nextSym == BZip2Constants.RunA || nextSym == BZip2Constants.RunB) - { - var s = -1; - var n = 1; - do - { - if (nextSym == BZip2Constants.RunA) - { - s += (0 + 1) * n; - } - else if (nextSym == BZip2Constants.RunB) - { - s += (1 + 1) * n; - } - - n <<= 1; - - if (groupPos == 0) - { - groupNo++; - groupPos = BZip2Constants.GroupSize; - } - - groupPos--; - - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = BsR(zn); - - while (zvec > limit[zt][zn]) - { - zn++; - while (bsLive < 1) - { - FillBuffer(); - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; - zvec = (zvec << 1) | zj; - } - nextSym = perm[zt][zvec - baseArray[zt][zn]]; - } while (nextSym == BZip2Constants.RunA || nextSym == BZip2Constants.RunB); - - s++; - var ch = seqToUnseq[yy[0]]; - unzftab[ch] += s; - - while (s > 0) - { - last++; - ll8[last] = ch; - s--; - } - - if (last >= limitLast) - { - BlockOverrun(); - } - } - else - { - last++; - if (last >= limitLast) - { - BlockOverrun(); - } - - var tmp = yy[nextSym - 1]; - unzftab[seqToUnseq[tmp]]++; - ll8[last] = seqToUnseq[tmp]; - - for (var j = nextSym - 1; j > 0; --j) - { - yy[j] = yy[j - 1]; - } - yy[0] = tmp; - - if (groupPos == 0) - { - groupNo++; - groupPos = BZip2Constants.GroupSize; - } - - groupPos--; - zt = selector[groupNo]; - zn = minLens[zt]; - zvec = BsR(zn); - while (zvec > limit[zt][zn]) - { - zn++; - while (bsLive < 1) - { - FillBuffer(); - } - zj = (bsBuff >> (bsLive - 1)) & 1; - bsLive--; - zvec = (zvec << 1) | zj; - } - nextSym = perm[zt][zvec - baseArray[zt][zn]]; - } - } - } - - void SetupBlock() - { - var cftab = new int[257]; - - cftab[0] = 0; - Array.Copy(unzftab, 0, cftab, 1, 256); - - for (var i = 1; i <= 256; i++) - { - cftab[i] += cftab[i - 1]; - } - - for (var i = 0; i <= last; i++) - { - var ch = ll8[i]; - tt[cftab[ch]] = i; - cftab[ch]++; - } - - tPos = tt[origPtr]; - - count = 0; - i2 = 0; - ch2 = 256; /*-- not a char and not EOF --*/ - - if (blockRandomised) - { - rNToGo = 0; - rTPos = 0; - SetupRandPartA(); - } - else - { - SetupNoRandPartA(); - } - } - - void SetupRandPartA() - { - if (i2 <= last) - { - chPrev = ch2; - ch2 = ll8[tPos]; - tPos = tt[tPos]; - if (rNToGo == 0) - { - rNToGo = BZip2Constants.RandomNumbers[rTPos]; - rTPos++; - if (rTPos == 512) - { - rTPos = 0; - } - } - rNToGo--; - ch2 ^= (rNToGo == 1) ? 1 : 0; - i2++; - - currentChar = ch2; - currentState = RAND_PART_B_STATE; - mCrc.Update(ch2); - } - else - { - EndBlock(); - InitBlock(); - SetupBlock(); - } - } - - void SetupNoRandPartA() - { - if (i2 <= last) - { - chPrev = ch2; - ch2 = ll8[tPos]; - tPos = tt[tPos]; - i2++; - - currentChar = ch2; - currentState = NO_RAND_PART_B_STATE; - mCrc.Update(ch2); - } - else - { - EndBlock(); - InitBlock(); - SetupBlock(); - } - } - - void SetupRandPartB() - { - if (ch2 != chPrev) - { - currentState = RAND_PART_A_STATE; - count = 1; - SetupRandPartA(); - } - else - { - count++; - if (count >= 4) - { - z = ll8[tPos]; - tPos = tt[tPos]; - if (rNToGo == 0) - { - rNToGo = BZip2Constants.RandomNumbers[rTPos]; - rTPos++; - if (rTPos == 512) - { - rTPos = 0; - } - } - rNToGo--; - z ^= (byte)((rNToGo == 1) ? 1 : 0); - j2 = 0; - currentState = RAND_PART_C_STATE; - SetupRandPartC(); - } - else - { - currentState = RAND_PART_A_STATE; - SetupRandPartA(); - } - } - } - - void SetupRandPartC() - { - if (j2 < z) - { - currentChar = ch2; - mCrc.Update(ch2); - j2++; - } - else - { - currentState = RAND_PART_A_STATE; - i2++; - count = 0; - SetupRandPartA(); - } - } - - void SetupNoRandPartB() - { - if (ch2 != chPrev) - { - currentState = NO_RAND_PART_A_STATE; - count = 1; - SetupNoRandPartA(); - } - else - { - count++; - if (count >= 4) - { - z = ll8[tPos]; - tPos = tt[tPos]; - currentState = NO_RAND_PART_C_STATE; - j2 = 0; - SetupNoRandPartC(); - } - else - { - currentState = NO_RAND_PART_A_STATE; - SetupNoRandPartA(); - } - } - } - - void SetupNoRandPartC() - { - if (j2 < z) - { - currentChar = ch2; - mCrc.Update(ch2); - j2++; - } - else - { - currentState = NO_RAND_PART_A_STATE; - i2++; - count = 0; - SetupNoRandPartA(); - } - } - - void SetDecompressStructureSizes(int newSize100k) - { - if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k && blockSize100k <= 9)) - { - throw new BZip2Exception("Invalid block size"); - } - - blockSize100k = newSize100k; - - if (newSize100k == 0) - { - return; - } - - var n = BZip2Constants.BaseBlockSize * newSize100k; - ll8 = new byte[n]; - tt = new int[n]; - } - - static void CompressedStreamEOF() - { - throw new EndOfStreamException("BZip2 input stream end of compressed stream"); - } - - static void BlockOverrun() - { - throw new BZip2Exception("BZip2 input stream block overrun"); - } - - static void BadBlockHeader() - { - throw new BZip2Exception("BZip2 input stream bad block header"); - } - - static void CrcError() - { - throw new BZip2Exception("BZip2 input stream crc error"); - } - - static void HbCreateDecodeTables(int[] limit, int[] baseArray, int[] perm, char[] length, int minLen, int maxLen, int alphaSize) - { - var pp = 0; - - for (var i = minLen; i <= maxLen; ++i) - { - for (var j = 0; j < alphaSize; ++j) - { - if (length[j] == i) - { - perm[pp] = j; - ++pp; - } - } - } - - for (var i = 0; i < BZip2Constants.MaximumCodeLength; i++) - { - baseArray[i] = 0; - } - - for (var i = 0; i < alphaSize; i++) - { - ++baseArray[length[i] + 1]; - } - - for (var i = 1; i < BZip2Constants.MaximumCodeLength; i++) - { - baseArray[i] += baseArray[i - 1]; - } - - for (var i = 0; i < BZip2Constants.MaximumCodeLength; i++) - { - limit[i] = 0; - } - - var vec = 0; - - for (var i = minLen; i <= maxLen; i++) - { - vec += (baseArray[i + 1] - baseArray[i]); - limit[i] = vec - 1; - vec <<= 1; - } - - for (var i = minLen + 1; i <= maxLen; i++) - { - baseArray[i] = ((limit[i - 1] + 1) << 1) - baseArray[i]; - } - } - - #region Instance Fields - /*-- - index of the last char in the block, so - the block size == last + 1. - --*/ - int last; - - /*-- - index in zptr[] of original string after sorting. - --*/ - int origPtr; - - /*-- - always: in the range 0 .. 9. - The current block size is 100000 * this number. - --*/ - int blockSize100k; - - bool blockRandomised; - - int bsBuff; - int bsLive; - readonly IChecksum mCrc = new StrangeCRC(); - - readonly bool[] inUse = new bool[256]; - int nInUse; - - readonly byte[] seqToUnseq = new byte[256]; - readonly byte[] unseqToSeq = new byte[256]; - - readonly byte[] selector = new byte[BZip2Constants.MaximumSelectors]; - readonly byte[] selectorMtf = new byte[BZip2Constants.MaximumSelectors]; - - int[] tt; - byte[] ll8; - - /*-- - freq table collected to save a pass over the data - during decompression. - --*/ - readonly int[] unzftab = new int[256]; - - readonly int[][] limit = new int[BZip2Constants.GroupCount][]; - readonly int[][] baseArray = new int[BZip2Constants.GroupCount][]; - readonly int[][] perm = new int[BZip2Constants.GroupCount][]; - readonly int[] minLens = new int[BZip2Constants.GroupCount]; - - Stream baseStream; - bool streamEnd; - - int currentChar = -1; - - int currentState = START_BLOCK_STATE; - - int storedBlockCRC, storedCombinedCRC; - int computedBlockCRC; - uint computedCombinedCRC; - - int count, chPrev, ch2; - int tPos; - int rNToGo; - int rTPos; - int i2, j2; - byte z; - - #endregion - } -} -/* This file was derived from a file containing this license: - * - * This file is a part of bzip2 and/or libbzip2, a program and - * library for lossless, block-sorting data compression. - * - * Copyright (C) 1996-1998 Julian R Seward. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. The origin of this software must not be misrepresented; you must - * not claim that you wrote the original software. If you use this - * software in a product, an acknowledgment in the product - * documentation would be appreciated but is not required. - * - * 3. Altered source versions must be plainly marked as such, and must - * not be misrepresented as being the original software. - * - * 4. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Java version ported by Keiron Liddle, Aftex Software 1999-2001 - */ diff --git a/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs b/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs deleted file mode 100644 index 7d2dfbe..0000000 --- a/src/DeltaQ.BsDiff/Bzip2/BZip2OutputStream.cs +++ /dev/null @@ -1,2008 +0,0 @@ -// BZip2OutputStream.cs -// -// Copyright (C) 2001 Mike Krueger -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// Linking this library statically or dynamically with other modules is -// making a combined work based on this library. Thus, the terms and -// conditions of the GNU General Public License cover the whole -// combination. -// -// As a special exception, the copyright holders of this library give you -// permission to link this library with independent modules to produce an -// executable, regardless of the license terms of these independent -// modules, and to copy and distribute the resulting executable under -// terms of your choice, provided that you also meet, for each linked -// independent module, the terms and conditions of the license of that -// module. An independent module is a module which is not derived from -// or based on this library. If you modify this library, you may extend -// this exception to your version of the library, but you are not -// obligated to do so. If you do not wish to do so, delete this -// exception statement from your version. - -using System; -using System.IO; -using bz2core.Checksums; - -namespace bz2core -{ - - // TODO: Update to BZip2 1.0.1, 1.0.2 - - /// - /// An output stream that compresses into the BZip2 format - /// including file header chars into another stream. - /// - public class BZip2OutputStream : Stream - { - #region Constants - const int SETMASK = (1 << 21); - const int CLEARMASK = (~SETMASK); - const int GREATER_ICOST = 15; - const int LESSER_ICOST = 0; - const int SMALL_THRESH = 20; - const int DEPTH_THRESH = 10; - - /*-- - If you are ever unlucky/improbable enough - to get a stack overflow whilst sorting, - increase the following constant and try - again. In practice I have never seen the - stack go above 27 elems, so the following - limit seems very generous. - --*/ - const int QSORT_STACK_SIZE = 1000; - - /*-- - Knuth's increments seem to work better - than Incerpi-Sedgewick here. Possibly - because the number of elems to sort is - usually small, typically <= 20. - --*/ - readonly int[] increments = - { - 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 - }; - #endregion - - #region Constructors - /// - /// Construct a default output stream with maximum block size - /// - /// The stream to write BZip data onto. - public BZip2OutputStream(Stream stream) - : this(stream, 9) - { - } - - /// - /// Initialise a new instance of the - /// for the specified stream, using the given blocksize. - /// - /// The stream to write compressed data to. - /// The block size to use. - /// - /// Valid block sizes are in the range 1..9, with 1 giving - /// the lowest compression and 9 the highest. - /// - public BZip2OutputStream(Stream stream, int blockSize) - { - IsStreamOwner = true; - BsSetStream(stream); - - workFactor = 50; - if (blockSize > 9) - { - blockSize = 9; - } - - if (blockSize < 1) - { - blockSize = 1; - } - blockSize100k = blockSize; - AllocateCompressStructures(); - Initialize(); - InitBlock(); - } - #endregion - - #region Destructor - /// - /// Ensures that resources are freed and other cleanup operations - /// are performed when the garbage collector reclaims the BZip2OutputStream. - /// - ~BZip2OutputStream() - { - Dispose(false); - } - #endregion - - /// - /// Get/set flag indicating ownership of underlying stream. - /// - public bool IsStreamOwner { get; set; } - - #region Stream overrides - /// - /// Gets a value indicating whether the current stream supports reading - /// - public override bool CanRead - { - get - { - return false; - } - } - - /// - /// Gets a value indicating whether the current stream supports seeking - /// - public override bool CanSeek - { - get - { - return false; - } - } - - /// - /// Gets a value indicating whether the current stream supports writing - /// - public override bool CanWrite - { - get - { - return baseStream.CanWrite; - } - } - - /// - /// Gets the length in bytes of the stream - /// - public override long Length - { - get - { - return baseStream.Length; - } - } - - /// - /// Gets or sets the current position of this stream. - /// - public override long Position - { - get - { - return baseStream.Position; - } - set - { - throw new NotSupportedException("BZip2OutputStream position cannot be set"); - } - } - - /// - /// Sets the current position of this stream to the given value. - /// - /// The point relative to the offset from which to being seeking. - /// The reference point from which to begin seeking. - /// The new position in the stream. - public override long Seek(long offset, SeekOrigin origin) - { - throw new NotSupportedException("BZip2OutputStream Seek not supported"); - } - - /// - /// Sets the length of this stream to the given value. - /// - /// The new stream length. - public override void SetLength(long value) - { - throw new NotSupportedException("BZip2OutputStream SetLength not supported"); - } - - /// - /// Read a byte from the stream advancing the position. - /// - /// The byte read cast to an int; -1 if end of stream. - public override int ReadByte() - { - throw new NotSupportedException("BZip2OutputStream ReadByte not supported"); - } - - /// - /// Read a block of bytes - /// - /// The buffer to read into. - /// The offset in the buffer to start storing data at. - /// The maximum number of bytes to read. - /// The total number of bytes read. This might be less than the number of bytes - /// requested if that number of bytes are not currently available, or zero - /// if the end of the stream is reached. - public override int Read(byte[] buffer, int offset, int count) - { - throw new NotSupportedException("BZip2OutputStream Read not supported"); - } - - /// - /// Write a block of bytes to the stream - /// - /// The buffer containing data to write. - /// The offset of the first byte to write. - /// The number of bytes to write. - public override void Write(byte[] buffer, int offset, int count) - { - if (buffer == null) - { - throw new ArgumentNullException(nameof(buffer)); - } - - if (offset < 0) - { - throw new ArgumentOutOfRangeException(nameof(offset)); - } - - if (count < 0) - { - throw new ArgumentOutOfRangeException(nameof(count)); - } - - if (buffer.Length - offset < count) - { - throw new ArgumentException("Offset/count out of range"); - } - - for (var i = 0; i < count; ++i) - { - WriteByte(buffer[offset + i]); - } - } - - /// - /// Write a byte to the stream. - /// - /// The byte to write to the stream. - public override void WriteByte(byte value) - { - var b = (256 + value) % 256; - if (currentChar != -1) - { - if (currentChar == b) - { - runLength++; - if (runLength > 254) - { - WriteRun(); - currentChar = -1; - runLength = 0; - } - } - else - { - WriteRun(); - runLength = 1; - currentChar = b; - } - } - else - { - currentChar = b; - runLength++; - } - } - - #endregion - void MakeMaps() - { - nInUse = 0; - for (var i = 0; i < 256; i++) - { - if (inUse[i]) - { - seqToUnseq[nInUse] = (char)i; - unseqToSeq[i] = (char)nInUse; - nInUse++; - } - } - } - - /// - /// Get the number of bytes written to output. - /// - void WriteRun() - { - if (last < allowableBlockSize) - { - inUse[currentChar] = true; - for (var i = 0; i < runLength; i++) - { - mCrc.Update(currentChar); - } - - switch (runLength) - { - case 1: - block[++last + 1] = (byte)currentChar; - break; - case 2: - block[++last + 1] = (byte)currentChar; - block[++last + 1] = (byte)currentChar; - break; - case 3: - block[++last + 1] = (byte)currentChar; - block[++last + 1] = (byte)currentChar; - block[++last + 1] = (byte)currentChar; - break; - default: - inUse[runLength - 4] = true; - block[++last + 1] = (byte)currentChar; - block[++last + 1] = (byte)currentChar; - block[++last + 1] = (byte)currentChar; - block[++last + 1] = (byte)currentChar; - block[++last + 1] = (byte)(runLength - 4); - break; - } - } - else - { - EndBlock(); - InitBlock(); - WriteRun(); - } - } - - /// - /// Get the number of bytes written to the output. - /// - public int BytesWritten - { - get { return bytesOut; } - } - - /// - /// Releases the unmanaged resources used by the and optionally releases the managed resources. - /// - /// true to release both managed and unmanaged resources; false to release only unmanaged resources. - override protected void Dispose(bool disposing) - { - if (IsStreamOwner) - base.Dispose(disposing); - - if (!disposed_) - { - disposed_ = true; - - if (runLength > 0) - { - WriteRun(); - } - - currentChar = -1; - EndBlock(); - EndCompression(); - Flush(); - } - } - - /// - /// Flush output buffers - /// - public override void Flush() - { - baseStream.Flush(); - } - - void Initialize() - { - bytesOut = 0; - nBlocksRandomised = 0; - - /*--- Write header `magic' bytes indicating file-format == huffmanised, - followed by a digit indicating blockSize100k. - ---*/ - - BsPutUChar('B'); - BsPutUChar('Z'); - - BsPutUChar('h'); - BsPutUChar('0' + blockSize100k); - - combinedCRC = 0; - } - - void InitBlock() - { - mCrc.Reset(); - last = -1; - - for (var i = 0; i < 256; i++) - { - inUse[i] = false; - } - - /*--- 20 is just a paranoia constant ---*/ - allowableBlockSize = BZip2Constants.BaseBlockSize * blockSize100k - 20; - } - - void EndBlock() - { - if (last < 0) - { // dont do anything for empty files, (makes empty files compatible with original Bzip) - return; - } - - blockCRC = unchecked((uint)mCrc.Value); - combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31); - combinedCRC ^= blockCRC; - - /*-- sort the block and establish position of original string --*/ - DoReversibleTransformation(); - - /*-- - A 6-byte block header, the value chosen arbitrarily - as 0x314159265359 :-). A 32 bit value does not really - give a strong enough guarantee that the value will not - appear by chance in the compressed datastream. Worst-case - probability of this event, for a 900k block, is about - 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. - For a compressed file of size 100Gb -- about 100000 blocks -- - only a 48-bit marker will do. NB: normal compression/ - decompression do *not* rely on these statistical properties. - They are only important when trying to recover blocks from - damaged files. - --*/ - BsPutUChar(0x31); - BsPutUChar(0x41); - BsPutUChar(0x59); - BsPutUChar(0x26); - BsPutUChar(0x53); - BsPutUChar(0x59); - - /*-- Now the block's CRC, so it is in a known place. --*/ - unchecked - { - BsPutInt((int)blockCRC); - } - - /*-- Now a single bit indicating randomisation. --*/ - if (blockRandomised) - { - BsW(1, 1); - nBlocksRandomised++; - } - else - { - BsW(1, 0); - } - - /*-- Finally, block's contents proper. --*/ - MoveToFrontCodeAndSend(); - } - - void EndCompression() - { - /*-- - Now another magic 48-bit number, 0x177245385090, to - indicate the end of the last block. (sqrt(pi), if - you want to know. I did want to use e, but it contains - too much repetition -- 27 18 28 18 28 46 -- for me - to feel statistically comfortable. Call me paranoid.) - --*/ - BsPutUChar(0x17); - BsPutUChar(0x72); - BsPutUChar(0x45); - BsPutUChar(0x38); - BsPutUChar(0x50); - BsPutUChar(0x90); - - unchecked - { - BsPutInt((int)combinedCRC); - } - - BsFinishedWithStream(); - } - - void BsSetStream(Stream stream) - { - baseStream = stream; - bsLive = 0; - bsBuff = 0; - bytesOut = 0; - } - - void BsFinishedWithStream() - { - while (bsLive > 0) - { - baseStream.WriteByte((byte)(bsBuff >> 24)); // write 8-bit - bsBuff <<= 8; - bsLive -= 8; - bytesOut++; - } - } - - void BsW(int n, int v) - { - while (bsLive >= 8) - { - baseStream.WriteByte((byte)(bsBuff >> 24)); // write 8-bit - bsBuff <<= 8; - bsLive -= 8; - bytesOut++; - } - - bsBuff |= (v << (32 - bsLive - n)); - bsLive += n; - } - - void BsPutUChar(int c) - { - BsW(8, c); - } - - void BsPutInt(int u) - { - BsW(8, (u >> 24) & 0xFF); - BsW(8, (u >> 16) & 0xFF); - BsW(8, (u >> 8) & 0xFF); - BsW(8, u & 0xFF); - } - - void BsPutIntVS(int numBits, int c) - { - BsW(numBits, c); - } - - void SendMTFValues() - { - var len = new char[BZip2Constants.GroupCount][]; - for (var i = 0; i < BZip2Constants.GroupCount; ++i) - { - len[i] = new char[BZip2Constants.MaximumAlphaSize]; - } - - int ge; - int iter; - int nSelectors = 0; - int nGroups; - - int alphaSize = nInUse + 2; - for (var t = 0; t < BZip2Constants.GroupCount; t++) - { - for (var v = 0; v < alphaSize; v++) - { - len[t][v] = (char)GREATER_ICOST; - } - } - - /*--- Decide how many coding tables to use ---*/ - if (nMTF <= 0) - { - Panic(); - } - - if (nMTF < 200) - { - nGroups = 2; - } - else if (nMTF < 600) - { - nGroups = 3; - } - else if (nMTF < 1200) - { - nGroups = 4; - } - else if (nMTF < 2400) - { - nGroups = 5; - } - else - { - nGroups = 6; - } - - /*--- Generate an initial set of coding tables ---*/ - var nPart = nGroups; - var remF = nMTF; - var gs = 0; - while (nPart > 0) - { - var tFreq = remF / nPart; - var aFreq = 0; - ge = gs - 1; - while (aFreq < tFreq && ge < alphaSize - 1) - { - ge++; - aFreq += mtfFreq[ge]; - } - - if (ge > gs && nPart != nGroups && nPart != 1 && ((nGroups - nPart) % 2 == 1)) - { - aFreq -= mtfFreq[ge]; - ge--; - } - - for (var v = 0; v < alphaSize; v++) - { - if (v >= gs && v <= ge) - { - len[nPart - 1][v] = (char)LESSER_ICOST; - } - else - { - len[nPart - 1][v] = (char)GREATER_ICOST; - } - } - - nPart--; - gs = ge + 1; - remF -= aFreq; - } - - var rfreq = new int[BZip2Constants.GroupCount][]; - for (var i = 0; i < BZip2Constants.GroupCount; ++i) - { - rfreq[i] = new int[BZip2Constants.MaximumAlphaSize]; - } - - var fave = new int[BZip2Constants.GroupCount]; - var cost = new short[BZip2Constants.GroupCount]; - /*--- - Iterate up to N_ITERS times to improve the tables. - ---*/ - for (iter = 0; iter < BZip2Constants.NumberOfIterations; ++iter) - { - for (var t = 0; t < nGroups; ++t) - { - fave[t] = 0; - } - - for (var t = 0; t < nGroups; ++t) - { - for (var v = 0; v < alphaSize; ++v) - { - rfreq[t][v] = 0; - } - } - - nSelectors = 0; - gs = 0; - while (true) - { - /*--- Set group start & end marks. --*/ - if (gs >= nMTF) - { - break; - } - ge = gs + BZip2Constants.GroupSize - 1; - if (ge >= nMTF) - { - ge = nMTF - 1; - } - - /*-- - Calculate the cost of this group as coded - by each of the coding tables. - --*/ - for (var t = 0; t < nGroups; t++) - { - cost[t] = 0; - } - - if (nGroups == 6) - { - short cost1, cost2, cost3, cost4, cost5; - short cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; - for (var i = gs; i <= ge; ++i) - { - var icv = szptr[i]; - cost0 += (short)len[0][icv]; - cost1 += (short)len[1][icv]; - cost2 += (short)len[2][icv]; - cost3 += (short)len[3][icv]; - cost4 += (short)len[4][icv]; - cost5 += (short)len[5][icv]; - } - cost[0] = cost0; - cost[1] = cost1; - cost[2] = cost2; - cost[3] = cost3; - cost[4] = cost4; - cost[5] = cost5; - } - else - { - for (var i = gs; i <= ge; ++i) - { - var icv = szptr[i]; - for (var t = 0; t < nGroups; t++) - { - cost[t] += (short)len[t][icv]; - } - } - } - - /*-- - Find the coding table which is best for this group, - and record its identity in the selector table. - --*/ - int bc = int.MaxValue; - int bt = -1; - for (var t = 0; t < nGroups; ++t) - { - if (cost[t] < bc) - { - bc = cost[t]; - bt = t; - } - } - fave[bt]++; - selector[nSelectors] = (char)bt; - nSelectors++; - - /*-- - Increment the symbol frequencies for the selected table. - --*/ - for (var i = gs; i <= ge; ++i) - { - ++rfreq[bt][szptr[i]]; - } - - gs = ge + 1; - } - - /*-- - Recompute the tables based on the accumulated frequencies. - --*/ - for (var t = 0; t < nGroups; ++t) - { - HbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20); - } - } - - if (!(nGroups < 8)) - { - Panic(); - } - - if (!(nSelectors < 0x8000 && nSelectors <= (2 + (900000 / BZip2Constants.GroupSize)))) - { - Panic(); - } - - /*--- Compute MTF values for the selectors. ---*/ - var pos = new char[BZip2Constants.GroupCount]; - - for (var i = 0; i < nGroups; i++) - { - pos[i] = (char)i; - } - - for (var i = 0; i < nSelectors; i++) - { - char ll_i = selector[i]; - var j = 0; - char tmp = pos[j]; - while (ll_i != tmp) - { - j++; - char tmp2 = tmp; - tmp = pos[j]; - pos[j] = tmp2; - } - pos[0] = tmp; - selectorMtf[i] = (char)j; - } - - var code = new int[BZip2Constants.GroupCount][]; - - for (var i = 0; i < BZip2Constants.GroupCount; ++i) - { - code[i] = new int[BZip2Constants.MaximumAlphaSize]; - } - - /*--- Assign actual codes for the tables. --*/ - for (var t = 0; t < nGroups; t++) - { - int minLen = 32; - int maxLen = 0; - for (var i = 0; i < alphaSize; i++) - { - if (len[t][i] > maxLen) - { - maxLen = len[t][i]; - } - if (len[t][i] < minLen) - { - minLen = len[t][i]; - } - } - if (maxLen > 20) - { - Panic(); - } - if (minLen < 1) - { - Panic(); - } - HbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); - } - - /*--- Transmit the mapping table. ---*/ - var inUse16 = new bool[16]; - for (var i = 0; i < 16; ++i) - { - inUse16[i] = false; - for (var j = 0; j < 16; ++j) - { - if (inUse[i * 16 + j]) - { - inUse16[i] = true; - } - } - } - - for (var i = 0; i < 16; ++i) - { - BsW(1, inUse16[i] ? 1 : 0); - } - - for (var i = 0; i < 16; ++i) - { - if (inUse16[i]) - { - for (var j = 0; j < 16; ++j) - { - BsW(1, inUse[i * 16 + j] ? 1 : 0); - } - } - } - - /*--- Now the selectors. ---*/ - BsW(3, nGroups); - BsW(15, nSelectors); - for (var i = 0; i < nSelectors; ++i) - { - for (var j = 0; j < selectorMtf[i]; ++j) - { - BsW(1, 1); - } - BsW(1, 0); - } - - /*--- Now the coding tables. ---*/ - for (var t = 0; t < nGroups; ++t) - { - int curr = len[t][0]; - BsW(5, curr); - for (var i = 0; i < alphaSize; ++i) - { - while (curr < len[t][i]) - { - BsW(2, 2); - curr++; /* 10 */ - } - while (curr > len[t][i]) - { - BsW(2, 3); - curr--; /* 11 */ - } - BsW(1, 0); - } - } - - /*--- And finally, the block data proper ---*/ - int selCtr = 0; - gs = 0; - while (true) - { - if (gs >= nMTF) - { - break; - } - ge = gs + BZip2Constants.GroupSize - 1; - if (ge >= nMTF) - { - ge = nMTF - 1; - } - - for (var i = gs; i <= ge; i++) - { - BsW(len[selector[selCtr]][szptr[i]], code[selector[selCtr]][szptr[i]]); - } - - gs = ge + 1; - ++selCtr; - } - if (selCtr != nSelectors) - { - Panic(); - } - } - - void MoveToFrontCodeAndSend() - { - BsPutIntVS(24, origPtr); - GenerateMTFValues(); - SendMTFValues(); - } - - void SimpleSort(int lo, int hi, int d) - { - int bigN = hi - lo + 1; - if (bigN < 2) - { - return; - } - - int hp = 0; - while (increments[hp] < bigN) - { - hp++; - } - hp--; - - for (; hp >= 0; hp--) - { - int h = increments[hp]; - - int i = lo + h; - while (true) - { - /*-- copy 1 --*/ - if (i > hi) - break; - int v = zptr[i]; - int j = i; - while (FullGtU(zptr[j - h] + d, v + d)) - { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) - break; - } - zptr[j] = v; - i++; - - /*-- copy 2 --*/ - if (i > hi) - { - break; - } - v = zptr[i]; - j = i; - while (FullGtU(zptr[j - h] + d, v + d)) - { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) - { - break; - } - } - zptr[j] = v; - i++; - - /*-- copy 3 --*/ - if (i > hi) - { - break; - } - v = zptr[i]; - j = i; - while (FullGtU(zptr[j - h] + d, v + d)) - { - zptr[j] = zptr[j - h]; - j = j - h; - if (j <= (lo + h - 1)) - { - break; - } - } - zptr[j] = v; - i++; - - if (workDone > workLimit && firstAttempt) - { - return; - } - } - } - } - - void Vswap(int p1, int p2, int n) - { - while (n > 0) - { - int temp = zptr[p1]; - zptr[p1] = zptr[p2]; - zptr[p2] = temp; - p1++; - p2++; - n--; - } - } - - void QSort3(int loSt, int hiSt, int dSt) - { - var stack = new StackElement[QSORT_STACK_SIZE]; - - var sp = 0; - - stack[sp].ll = loSt; - stack[sp].hh = hiSt; - stack[sp].dd = dSt; - sp++; - - while (sp > 0) - { - if (sp >= QSORT_STACK_SIZE) - { - Panic(); - } - - sp--; - int lo = stack[sp].ll; - int hi = stack[sp].hh; - int d = stack[sp].dd; - - if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) - { - SimpleSort(lo, hi, d); - if (workDone > workLimit && firstAttempt) - { - return; - } - continue; - } - - int med = Med3(block[zptr[lo] + d + 1], - block[zptr[hi] + d + 1], - block[zptr[(lo + hi) >> 1] + d + 1]); - - int ltLo; - int unLo = ltLo = lo; - int gtHi; - int unHi = gtHi = hi; - - int n; - while (true) - { - while (true) - { - if (unLo > unHi) - { - break; - } - n = block[zptr[unLo] + d + 1] - med; - if (n == 0) - { - var temp = zptr[unLo]; - zptr[unLo] = zptr[ltLo]; - zptr[ltLo] = temp; - ltLo++; - unLo++; - continue; - } - if (n > 0) - { - break; - } - unLo++; - } - - while (true) - { - if (unLo > unHi) - { - break; - } - n = block[zptr[unHi] + d + 1] - med; - if (n == 0) - { - var temp = zptr[unHi]; - zptr[unHi] = zptr[gtHi]; - zptr[gtHi] = temp; - gtHi--; - unHi--; - continue; - } - if (n < 0) - { - break; - } - unHi--; - } - - if (unLo > unHi) - { - break; - } - - { - var temp = zptr[unLo]; - zptr[unLo] = zptr[unHi]; - zptr[unHi] = temp; - unLo++; - unHi--; - } - } - - if (gtHi < ltLo) - { - stack[sp].ll = lo; - stack[sp].hh = hi; - stack[sp].dd = d + 1; - sp++; - continue; - } - - n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); - Vswap(lo, unLo - n, n); - int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); - Vswap(unLo, hi - m + 1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - stack[sp].ll = lo; - stack[sp].hh = n; - stack[sp].dd = d; - sp++; - - stack[sp].ll = n + 1; - stack[sp].hh = m - 1; - stack[sp].dd = d + 1; - sp++; - - stack[sp].ll = m; - stack[sp].hh = hi; - stack[sp].dd = d; - sp++; - } - } - - void MainSort() - { - int i; - var runningOrder = new int[256]; - var copy = new int[256]; - var bigDone = new bool[256]; - - /*-- - In the various block-sized structures, live data runs - from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, - set up the overshoot area for block. - --*/ - - // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); - for (i = 0; i < BZip2Constants.OvershootBytes; i++) - { - block[last + i + 2] = block[(i % (last + 1)) + 1]; - } - for (i = 0; i <= last + BZip2Constants.OvershootBytes; i++) - { - quadrant[i] = 0; - } - - block[0] = block[last + 1]; - - if (last < 4000) - { - /*-- - Use simpleSort(), since the full sorting mechanism - has quite a large constant overhead. - --*/ - for (i = 0; i <= last; i++) - { - zptr[i] = i; - } - firstAttempt = false; - workDone = workLimit = 0; - SimpleSort(0, last, 0); - } - else - { - for (i = 0; i <= 255; i++) - { - bigDone[i] = false; - } - for (i = 0; i <= 65536; i++) - { - ftab[i] = 0; - } - - int c1 = block[0]; - int c2; - for (i = 0; i <= last; i++) - { - c2 = block[i + 1]; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } - - for (i = 1; i <= 65536; i++) - { - ftab[i] += ftab[i - 1]; - } - - c1 = block[1]; - int j; - for (i = 0; i < last; i++) - { - c2 = block[i + 2]; - j = (c1 << 8) + c2; - c1 = c2; - ftab[j]--; - zptr[ftab[j]] = i; - } - - j = ((block[last + 1]) << 8) + (block[1]); - ftab[j]--; - zptr[ftab[j]] = last; - - /*-- - Now ftab contains the first loc of every small bucket. - Calculate the running order, from smallest to largest - big bucket. - --*/ - - for (i = 0; i <= 255; i++) - { - runningOrder[i] = i; - } - - var h = 1; - do - { - h = 3 * h + 1; - } while (h <= 256); - do - { - h = h / 3; - for (i = h; i <= 255; i++) - { - int vv = runningOrder[i]; - j = i; - while ((ftab[((runningOrder[j - h]) + 1) << 8] - ftab[(runningOrder[j - h]) << 8]) > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) - { - runningOrder[j] = runningOrder[j - h]; - j = j - h; - if (j <= (h - 1)) - { - break; - } - } - runningOrder[j] = vv; - } - } while (h != 1); - - /*-- - The main sorting loop. - --*/ - for (i = 0; i <= 255; i++) - { - - /*-- - Process big buckets, starting with the least full. - --*/ - int ss = runningOrder[i]; - - /*-- - Complete the big bucket [ss] by quicksorting - any unsorted small buckets [ss, j]. Hopefully - previous pointer-scanning phases have already - completed many of the small buckets [ss, j], so - we don't have to sort them at all. - --*/ - for (j = 0; j <= 255; j++) - { - int sb = (ss << 8) + j; - if ((ftab[sb] & SETMASK) != SETMASK) - { - var lo = ftab[sb] & CLEARMASK; - var hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) - { - QSort3(lo, hi, 2); - if (workDone > workLimit && firstAttempt) - { - return; - } - } - ftab[sb] |= SETMASK; - } - } - - /*-- - The ss big bucket is now done. Record this fact, - and update the quadrant descriptors. Remember to - update quadrants in the overshoot area too, if - necessary. The "if (i < 255)" test merely skips - this updating for the last bucket processed, since - updating for the last bucket is pointless. - --*/ - bigDone[ss] = true; - - if (i < 255) - { - var bbStart = ftab[ss << 8] & CLEARMASK; - var bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; - var shifts = 0; - - while ((bbSize >> shifts) > 65534) - { - shifts++; - } - - for (j = 0; j < bbSize; j++) - { - var a2update = zptr[bbStart + j]; - var qVal = (j >> shifts); - quadrant[a2update] = qVal; - if (a2update < BZip2Constants.OvershootBytes) - { - quadrant[a2update + last + 1] = qVal; - } - } - - if (!(((bbSize - 1) >> shifts) <= 65535)) - { - Panic(); - } - } - - /*-- - Now scan this big bucket so as to synthesise the - sorted order for small buckets [t, ss] for all t != ss. - --*/ - for (j = 0; j <= 255; j++) - { - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - } - - for (j = ftab[ss << 8] & CLEARMASK; j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) - { - c1 = block[zptr[j]]; - if (!bigDone[c1]) - { - zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1; - copy[c1]++; - } - } - - for (j = 0; j <= 255; j++) - { - ftab[(j << 8) + ss] |= SETMASK; - } - } - } - } - - void RandomiseBlock() - { - int i; - var rNToGo = 0; - var rTPos = 0; - for (i = 0; i < 256; i++) - { - inUse[i] = false; - } - - for (i = 0; i <= last; i++) - { - if (rNToGo == 0) - { - rNToGo = BZip2Constants.RandomNumbers[rTPos]; - rTPos++; - if (rTPos == 512) - { - rTPos = 0; - } - } - rNToGo--; - block[i + 1] ^= (byte)((rNToGo == 1) ? 1 : 0); - // handle 16 bit signed numbers - block[i + 1] &= 0xFF; - - inUse[block[i + 1]] = true; - } - } - - void DoReversibleTransformation() - { - workLimit = workFactor * last; - workDone = 0; - blockRandomised = false; - firstAttempt = true; - - MainSort(); - - if (workDone > workLimit && firstAttempt) - { - RandomiseBlock(); - workLimit = workDone = 0; - blockRandomised = true; - firstAttempt = false; - MainSort(); - } - - origPtr = -1; - for (var i = 0; i <= last; i++) - { - if (zptr[i] == 0) - { - origPtr = i; - break; - } - } - - if (origPtr == -1) - { - Panic(); - } - } - - bool FullGtU(int i1, int i2) - { - byte c1 = 0, c2 = 0; - for (int i = 0; c1 == c2 && i < 6; i1++, i2++, i++) - { - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - } - - if (c1 != c2) - return c1 > c2; - - int k = last + 1; - - do - { - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) - { - return c1 > c2; - } - int s1 = quadrant[i1]; - int s2 = quadrant[i2]; - if (s1 != s2) - { - return s1 > s2; - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) - { - return c1 > c2; - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) - { - return s1 > s2; - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) - { - return c1 > c2; - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) - { - return s1 > s2; - } - i1++; - i2++; - - c1 = block[i1 + 1]; - c2 = block[i2 + 1]; - if (c1 != c2) - { - return c1 > c2; - } - s1 = quadrant[i1]; - s2 = quadrant[i2]; - if (s1 != s2) - { - return s1 > s2; - } - i1++; - i2++; - - if (i1 > last) - { - i1 -= last; - i1--; - } - if (i2 > last) - { - i2 -= last; - i2--; - } - - k -= 4; - ++workDone; - } while (k >= 0); - - return false; - } - - void AllocateCompressStructures() - { - var n = BZip2Constants.BaseBlockSize * blockSize100k; - block = new byte[(n + 1 + BZip2Constants.OvershootBytes)]; - quadrant = new int[(n + BZip2Constants.OvershootBytes)]; - zptr = new int[n]; - ftab = new int[65537]; - - if (block == null || quadrant == null || zptr == null || ftab == null) - { - // int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; - // compressOutOfMemory ( totalDraw, n ); - } - - /* - The back end needs a place to store the MTF values - whilst it calculates the coding tables. We could - put them in the zptr array. However, these values - will fit in a short, so we overlay szptr at the - start of zptr, in the hope of reducing the number - of cache misses induced by the multiple traversals - of the MTF values when calculating coding tables. - Seems to improve compression speed by about 1%. - */ - // szptr = zptr; - - - szptr = new short[2 * n]; - } - - void GenerateMTFValues() - { - var yy = new char[256]; - int i; - - MakeMaps(); - int EOB = nInUse + 1; - - for (i = 0; i <= EOB; i++) - { - mtfFreq[i] = 0; - } - - int wr = 0; - int zPend = 0; - for (i = 0; i < nInUse; i++) - { - yy[i] = (char)i; - } - - - for (i = 0; i <= last; i++) - { - char ll_i = unseqToSeq[block[zptr[i]]]; - - int j = 0; - char tmp = yy[j]; - while (ll_i != tmp) - { - j++; - char tmp2 = tmp; - tmp = yy[j]; - yy[j] = tmp2; - } - yy[0] = tmp; - - if (j == 0) - { - zPend++; - } - else - { - if (zPend > 0) - { - zPend--; - while (true) - { - switch (zPend % 2) - { - case 0: - szptr[wr] = BZip2Constants.RunA; - wr++; - mtfFreq[BZip2Constants.RunA]++; - break; - case 1: - szptr[wr] = BZip2Constants.RunB; - wr++; - mtfFreq[BZip2Constants.RunB]++; - break; - } - if (zPend < 2) - { - break; - } - zPend = (zPend - 2) / 2; - } - zPend = 0; - } - szptr[wr] = (short)(j + 1); - wr++; - mtfFreq[j + 1]++; - } - } - - if (zPend > 0) - { - zPend--; - while (true) - { - switch (zPend % 2) - { - case 0: - szptr[wr] = BZip2Constants.RunA; - wr++; - mtfFreq[BZip2Constants.RunA]++; - break; - case 1: - szptr[wr] = BZip2Constants.RunB; - wr++; - mtfFreq[BZip2Constants.RunB]++; - break; - } - if (zPend < 2) - { - break; - } - zPend = (zPend - 2) / 2; - } - } - - szptr[wr] = (short)EOB; - wr++; - mtfFreq[EOB]++; - - nMTF = wr; - } - - static void Panic() - { - throw new BZip2Exception("BZip2 output stream panic"); - } - - static void HbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen) - { - /*-- - Nodes and heap entries run from 1. Entry 0 - for both the heap and nodes is a sentinel. - --*/ - - var heap = new int[BZip2Constants.MaximumAlphaSize + 2]; - var weight = new int[BZip2Constants.MaximumAlphaSize * 2]; - var parent = new int[BZip2Constants.MaximumAlphaSize * 2]; - - for (var i = 0; i < alphaSize; ++i) - { - weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; - } - - while (true) - { - int nNodes = alphaSize; - int nHeap = 0; - - heap[0] = 0; - weight[0] = 0; - parent[0] = -2; - - for (var i = 1; i <= alphaSize; ++i) - { - parent[i] = -1; - nHeap++; - heap[nHeap] = i; - var zz = nHeap; - var tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) - { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; - } - if (!(nHeap < (BZip2Constants.MaximumAlphaSize + 2))) - { - Panic(); - } - - while (nHeap > 1) - { - int n1 = heap[1]; - heap[1] = heap[nHeap]; - nHeap--; - var zz = 1; - int yy; - var tmp = heap[zz]; - while (true) - { - yy = zz << 1; - if (yy > nHeap) - { - break; - } - if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) - { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) - { - break; - } - - heap[zz] = heap[yy]; - zz = yy; - } - heap[zz] = tmp; - int n2 = heap[1]; - heap[1] = heap[nHeap]; - nHeap--; - - zz = 1; - tmp = heap[zz]; - while (true) - { - yy = zz << 1; - if (yy > nHeap) - { - break; - } - if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) - { - yy++; - } - if (weight[tmp] < weight[heap[yy]]) - { - break; - } - heap[zz] = heap[yy]; - zz = yy; - } - heap[zz] = tmp; - nNodes++; - parent[n1] = parent[n2] = nNodes; - - weight[nNodes] = (int)((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00)) | - 1 + (((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff)) ? (weight[n1] & 0x000000ff) : (weight[n2] & 0x000000ff)); - - parent[nNodes] = -1; - nHeap++; - heap[nHeap] = nNodes; - - zz = nHeap; - tmp = heap[zz]; - while (weight[tmp] < weight[heap[zz >> 1]]) - { - heap[zz] = heap[zz >> 1]; - zz >>= 1; - } - heap[zz] = tmp; - } - if (!(nNodes < (BZip2Constants.MaximumAlphaSize * 2))) - { - Panic(); - } - - bool tooLong = false; - int j; - for (var i = 1; i <= alphaSize; ++i) - { - j = 0; - int k = i; - while (parent[k] >= 0) - { - k = parent[k]; - j++; - } - len[i - 1] = (char)j; - if (j > maxLen) - { - tooLong = true; - } - } - - if (!tooLong) - { - break; - } - - for (var i = 1; i < alphaSize; ++i) - { - j = weight[i] >> 8; - j = 1 + (j / 2); - weight[i] = j << 8; - } - } - } - - static void HbAssignCodes(int[] code, char[] length, int minLen, int maxLen, int alphaSize) - { - var vec = 0; - for (var n = minLen; n <= maxLen; ++n) - { - for (var i = 0; i < alphaSize; ++i) - { - if (length[i] == n) - { - code[i] = vec; - ++vec; - } - } - vec <<= 1; - } - } - - static byte Med3(byte a, byte b, byte c) - { - if (a > b) - { - byte t = a; - a = b; - b = t; - } - if (b > c) - { - /* - t = b; - */ - b = c; - /* - c = t; - */ - } - if (a > b) - { - b = a; - } - return b; - } - - struct StackElement - { - public int ll; - public int hh; - public int dd; - } - - #region Instance Fields - - /*-- - index of the last char in the block, so - the block size == last + 1. - --*/ - int last; - - /*-- - index in zptr[] of original string after sorting. - --*/ - int origPtr; - - /*-- - always: in the range 0 .. 9. - The current block size is 100000 * this number. - --*/ - readonly int blockSize100k; - - bool blockRandomised; - - int bytesOut; - int bsBuff; - int bsLive; - readonly IChecksum mCrc = new StrangeCRC(); - - readonly bool[] inUse = new bool[256]; - int nInUse; - - readonly char[] seqToUnseq = new char[256]; - readonly char[] unseqToSeq = new char[256]; - - readonly char[] selector = new char[BZip2Constants.MaximumSelectors]; - readonly char[] selectorMtf = new char[BZip2Constants.MaximumSelectors]; - - byte[] block; - int[] quadrant; - int[] zptr; - short[] szptr; - int[] ftab; - - int nMTF; - - readonly int[] mtfFreq = new int[BZip2Constants.MaximumAlphaSize]; - - /* - * Used when sorting. If too many long comparisons - * happen, we stop sorting, randomise the block - * slightly, and try again. - */ - readonly int workFactor; - int workDone; - int workLimit; - bool firstAttempt; - int nBlocksRandomised; - - int currentChar = -1; - int runLength; - uint blockCRC, combinedCRC; - int allowableBlockSize; - Stream baseStream; - bool disposed_; - #endregion - } -} - -/* This file was derived from a file containing this license: - * - * This file is a part of bzip2 and/or libbzip2, a program and - * library for lossless, block-sorting data compression. - * - * Copyright (C) 1996-1998 Julian R Seward. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. The origin of this software must not be misrepresented; you must - * not claim that you wrote the original software. If you use this - * software in a product, an acknowledgment in the product - * documentation would be appreciated but is not required. - * - * 3. Altered source versions must be plainly marked as such, and must - * not be misrepresented as being the original software. - * - * 4. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE - * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Java version ported by Keiron Liddle, Aftex Software 1999-2001 - */ diff --git a/src/DeltaQ.BsDiff/Bzip2/Checksums/IChecksum.cs b/src/DeltaQ.BsDiff/Bzip2/Checksums/IChecksum.cs deleted file mode 100644 index f6f9af9..0000000 --- a/src/DeltaQ.BsDiff/Bzip2/Checksums/IChecksum.cs +++ /dev/null @@ -1,93 +0,0 @@ -// IChecksum.cs - Interface to compute a data checksum -// Copyright (C) 2001 Mike Krueger -// -// This file was translated from java, it was part of the GNU Classpath -// Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// Linking this library statically or dynamically with other modules is -// making a combined work based on this library. Thus, the terms and -// conditions of the GNU General Public License cover the whole -// combination. -// -// As a special exception, the copyright holders of this library give you -// permission to link this library with independent modules to produce an -// executable, regardless of the license terms of these independent -// modules, and to copy and distribute the resulting executable under -// terms of your choice, provided that you also meet, for each linked -// independent module, the terms and conditions of the license of that -// module. An independent module is a module which is not derived from -// or based on this library. If you modify this library, you may extend -// this exception to your version of the library, but you are not -// obligated to do so. If you do not wish to do so, delete this -// exception statement from your version. - -namespace bz2core.Checksums -{ - - /// - /// Interface to compute a data checksum used by checked input/output streams. - /// A data checksum can be updated by one byte or with a byte array. After each - /// update the value of the current checksum can be returned by calling - /// getValue. The complete checksum object can also be reset - /// so it can be used again with new data. - /// - public interface IChecksum - { - /// - /// Returns the data checksum computed so far. - /// - long Value - { - get; - } - - /// - /// Resets the data checksum as if no update was ever called. - /// - void Reset(); - - /// - /// Adds one byte to the data checksum. - /// - /// - /// the data value to add. The high byte of the int is ignored. - /// - void Update(int value); - - /// - /// Updates the data checksum with the bytes taken from the array. - /// - /// - /// buffer an array of bytes - /// - void Update(byte[] buffer); - - /// - /// Adds the byte array to the data checksum. - /// - /// - /// The buffer which contains the data - /// - /// - /// The offset in the buffer where the data starts - /// - /// - /// the number of data bytes to add. - /// - void Update(byte[] buffer, int offset, int count); - } -} diff --git a/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs b/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs deleted file mode 100644 index f785788..0000000 --- a/src/DeltaQ.BsDiff/Bzip2/Checksums/StrangeCrc.cs +++ /dev/null @@ -1,192 +0,0 @@ -// StrangeCRC.cs - computes a crc used in the bziplib -// -// Copyright (C) 2001 Mike Krueger -// -// This file was translated from java, it was part of the GNU Classpath -// Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// Linking this library statically or dynamically with other modules is -// making a combined work based on this library. Thus, the terms and -// conditions of the GNU General Public License cover the whole -// combination. -// -// As a special exception, the copyright holders of this library give you -// permission to link this library with independent modules to produce an -// executable, regardless of the license terms of these independent -// modules, and to copy and distribute the resulting executable under -// terms of your choice, provided that you also meet, for each linked -// independent module, the terms and conditions of the license of that -// module. An independent module is a module which is not derived from -// or based on this library. If you modify this library, you may extend -// this exception to your version of the library, but you are not -// obligated to do so. If you do not wish to do so, delete this -// exception statement from your version. - -using System; - -namespace bz2core.Checksums -{ - /// - /// Bzip2 checksum algorithm - /// - public class StrangeCRC : IChecksum - { - readonly static uint[] crc32Table = { - 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, - 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, - 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, - 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, - 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, - 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, - 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, - 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, - 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, - 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, - 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, - 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, - 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, - 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, - 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, - 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, - 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, - 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, - 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, - 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, - 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, - 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, - 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, - 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, - 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, - 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, - 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, - 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, - 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, - 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, - 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, - 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, - 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, - 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, - 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, - 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, - 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, - 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, - 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, - 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, - 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, - 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, - 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, - 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, - 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, - 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, - 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, - 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, - 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, - 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, - 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, - 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, - 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, - 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, - 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, - 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, - 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, - 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, - 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, - 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, - 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, - 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, - 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, - 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 - }; - - int globalCrc; - - /// - /// Initialise a default instance of - /// - public StrangeCRC() - { - Reset(); - } - - /// - /// Reset the state of Crc. - /// - public void Reset() - { - globalCrc = -1; - } - - /// - /// Get the current Crc value. - /// - public long Value - { - get - { - return ~globalCrc; - } - } - - /// - /// Update the Crc value. - /// - /// data update is based on - public void Update(int value) - { - int index = (globalCrc >> 24) ^ value; - if (index < 0) - index += 256; - - globalCrc = (int)((globalCrc << 8) ^ crc32Table[index]); - } - - /// - /// Update Crc based on a block of data - /// - /// The buffer containing data to update the crc with. - public void Update(byte[] buffer) - { - if (buffer == null) - throw new ArgumentNullException(nameof(buffer)); - - Update(buffer, 0, buffer.Length); - } - - /// - /// Update Crc based on a portion of a block of data - /// - /// block of data - /// index of first byte to use - /// number of bytes to use - public void Update(byte[] buffer, int offset, int count) - { - if (buffer == null) - throw new ArgumentNullException(nameof(buffer)); - if (offset < 0) - throw new ArgumentOutOfRangeException(nameof(offset), "cannot be less than zero"); - if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), "cannot be less than zero"); - if (offset + count > buffer.Length) - throw new ArgumentOutOfRangeException(nameof(count)); - - for (var i = 0; i < count; i++) - { - Update(buffer[offset + i]); - } - } - } -} diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 9de0c65..4e556f4 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -10,6 +10,10 @@ snupkg + + + + diff --git a/src/DeltaQ.BsDiff/Diff.cs b/src/DeltaQ.BsDiff/Diff.cs index 03eff6e..8677a55 100644 --- a/src/DeltaQ.BsDiff/Diff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -1,5 +1,5 @@ -using bz2core; -using DeltaQ.SuffixSorting; +using DeltaQ.SuffixSorting; +using ICSharpCode.SharpZipLib.BZip2; using Microsoft.Toolkit.HighPerformance; using Microsoft.Toolkit.HighPerformance.Buffers; using System; @@ -11,19 +11,18 @@ namespace DeltaQ.BsDiff public static class Diff { internal const int HeaderSize = 32; + + private const int HeaderOffsetSig = 0; + private const int HeaderOffsetCtrl = sizeof(long) * 1; + private const int HeaderOffsetDiff = sizeof(long) * 2; + private const int HeaderOffsetNewData = sizeof(long) * 3; + internal const long Signature = 0x3034464649445342; //"BSDIFF40" internal static Stream GetEncodingStream(Stream stream, bool output) - { - if (output) - { - return new BZip2OutputStream(stream) { IsStreamOwner = false }; - } - else - { - return new BZip2InputStream(stream); - } - } + => output + ? new BZip2OutputStream(stream) + : new BZip2InputStream(stream); /// /// Creates a BSDIFF-format patch from two byte buffers @@ -59,15 +58,9 @@ 0 32 Header ?? ?? Bzip2ed diff block ?? ?? Bzip2ed extra block */ Span header = stackalloc byte[HeaderSize]; + header[HeaderOffsetSig..].WritePackedLong(Signature); - Span header_signature = header[..sizeof(long)]; - header_signature.WritePackedLong(Signature); - - Span header_compressed_ctrl = header.Slice(sizeof(long), sizeof(long)); - Span header_compressed_diff = header.Slice(sizeof(long) * 2, sizeof(long)); - - Span header_newdata_len = header.Slice(sizeof(long) * 3, sizeof(long)); - header_newdata_len.WritePackedLong(newData.Length); + header[HeaderOffsetNewData..].WritePackedLong(newData.Length); var startPosition = output.Position; output.Write(header); @@ -78,148 +71,142 @@ 0 32 Header //the memory allocated for the suffix array MUST be at least (n+1) //this is only required for bsdiff, so we allocate it ourselves //instead of using the ISuffixSort overloads that only require allocations of (n) - using (MemoryOwner saOwner = MemoryOwner.Allocate(oldData.Length + 1, AllocationMode.Clear)) - using (var msControl = new MemoryStream()) - using (var msDiff = new MemoryStream()) - using (var msExtra = new MemoryStream()) + using var saOwner = MemoryOwner.Allocate(oldData.Length + 1, AllocationMode.Clear); + + using var ctrlSink = new ArrayPoolBufferWriter(); + using var diffSink = new ArrayPoolBufferWriter(); + using var extraSink = new ArrayPoolBufferWriter(); + { - using (var ctrlStream = GetEncodingStream(msControl, true)) - using (var diffStream = GetEncodingStream(msDiff, true)) - using (var extraStream = GetEncodingStream(msExtra, true)) + using var ctrlEncStream = GetEncodingStream(ctrlSink.AsStream(), true); + using var diffEncStream = GetEncodingStream(diffSink.AsStream(), true); + using var extraEncStream = GetEncodingStream(extraSink.AsStream(), true); + + Span I = saOwner.Span; + suffixSort.Sort(oldData, I[..^1]); + + var scan = 0; + var pos = 0; + var len = 0; + var lastscan = 0; + var lastpos = 0; + var lastoffset = 0; + + // compute the differences, writing ctrl as we go + while (scan < newData.Length) { - Span I = saOwner.Span; - suffixSort.Sort(oldData, I[..^1]); - - var scan = 0; - var pos = 0; - var len = 0; - var lastscan = 0; - var lastpos = 0; - var lastoffset = 0; - - // compute the differences, writing ctrl as we go - while (scan < newData.Length) + var oldscore = 0; + + for (var scsc = scan += len; scan < newData.Length; scan++) { - var oldscore = 0; + len = Search(I, oldData, newData[scan..], 0, oldData.Length, out pos); - for (var scsc = scan += len; scan < newData.Length; scan++) + for (; scsc < scan + len; scsc++) { - len = Search(I, oldData, newData[scan..], 0, oldData.Length, out pos); + if ((scsc + lastoffset < oldData.Length) && (oldData[scsc + lastoffset] == newData[scsc])) + oldscore++; + } - for (; scsc < scan + len; scsc++) - { - if ((scsc + lastoffset < oldData.Length) && (oldData[scsc + lastoffset] == newData[scsc])) - oldscore++; - } + if ((len == oldscore && len != 0) || (len > oldscore + 8)) + break; - if ((len == oldscore && len != 0) || (len > oldscore + 8)) - break; + if ((scan + lastoffset < oldData.Length) && (oldData[scan + lastoffset] == newData[scan])) + oldscore--; + } - if ((scan + lastoffset < oldData.Length) && (oldData[scan + lastoffset] == newData[scan])) - oldscore--; + if (len != oldscore || scan == newData.Length) + { + var s = 0; + var sf = 0; + var lenf = 0; + for (var i = 0; (lastscan + i < scan) && (lastpos + i < oldData.Length);) + { + if (oldData[lastpos + i] == newData[lastscan + i]) + s++; + i++; + if (s * 2 - i > sf * 2 - lenf) + { + sf = s; + lenf = i; } + } - if (len != oldscore || scan == newData.Length) + var lenb = 0; + if (scan < newData.Length) { - var s = 0; - var sf = 0; - var lenf = 0; - for (var i = 0; (lastscan + i < scan) && (lastpos + i < oldData.Length);) + s = 0; + var sb = 0; + for (var i = 1; (scan >= lastscan + i) && (pos >= i); i++) { - if (oldData[lastpos + i] == newData[lastscan + i]) + if (oldData[pos - i] == newData[scan - i]) s++; - i++; - if (s * 2 - i > sf * 2 - lenf) + if (s * 2 - i > sb * 2 - lenb) { - sf = s; - lenf = i; + sb = s; + lenb = i; } } + } - var lenb = 0; - if (scan < newData.Length) + if (lastscan + lenf > scan - lenb) + { + var overlap = (lastscan + lenf) - (scan - lenb); + s = 0; + var ss = 0; + var lens = 0; + for (var i = 0; i < overlap; i++) { - s = 0; - var sb = 0; - for (var i = 1; (scan >= lastscan + i) && (pos >= i); i++) + if (newData[lastscan + lenf - overlap + i] == oldData[lastpos + lenf - overlap + i]) + s++; + if (newData[scan - lenb + i] == oldData[pos - lenb + i]) + s--; + if (s > ss) { - if (oldData[pos - i] == newData[scan - i]) - s++; - if (s * 2 - i > sb * 2 - lenb) - { - sb = s; - lenb = i; - } + ss = s; + lens = i + 1; } } - if (lastscan + lenf > scan - lenb) - { - var overlap = (lastscan + lenf) - (scan - lenb); - s = 0; - var ss = 0; - var lens = 0; - for (var i = 0; i < overlap; i++) - { - if (newData[lastscan + lenf - overlap + i] == oldData[lastpos + lenf - overlap + i]) - s++; - if (newData[scan - lenb + i] == oldData[pos - lenb + i]) - s--; - if (s > ss) - { - ss = s; - lens = i + 1; - } - } - - lenf += lens - overlap; - lenb -= lens; - } + lenf += lens - overlap; + lenb -= lens; + } - //write diff string - for (var i = 0; i < lenf; i++) - diffStream.WriteByte((byte)(newData[lastscan + i] - oldData[lastpos + i])); + //write diff string + for (var i = 0; i < lenf; i++) + diffEncStream.WriteByte((byte)(newData[lastscan + i] - oldData[lastpos + i])); - //write extra string - var extraLength = (scan - lenb) - (lastscan + lenf); - if (extraLength > 0) - extraStream.Write(newData.Slice(lastscan + lenf, extraLength)); + //write extra string + var extraLength = (scan - lenb) - (lastscan + lenf); + if (extraLength > 0) + extraEncStream.Write(newData.Slice(lastscan + lenf, extraLength)); - //write ctrl block - buf.WritePackedLong(lenf); - ctrlStream.Write(buf); + //write ctrl block + buf.WritePackedLong(lenf); + ctrlEncStream.Write(buf); - buf.WritePackedLong(extraLength); - ctrlStream.Write(buf); + buf.WritePackedLong(extraLength); + ctrlEncStream.Write(buf); - buf.WritePackedLong((pos - lenb) - (lastpos + lenf)); - ctrlStream.Write(buf); + buf.WritePackedLong((pos - lenb) - (lastpos + lenf)); + ctrlEncStream.Write(buf); - lastscan = scan - lenb; - lastpos = pos - lenb; - lastoffset = pos - scan; - } + lastscan = scan - lenb; + lastpos = pos - lenb; + lastoffset = pos - scan; } } + } - //write compressed ctrl data - msControl.Seek(0, SeekOrigin.Begin); - msControl.CopyTo(output); - - // compute size of compressed ctrl data - header_compressed_ctrl.WritePackedLong(msControl.Length); - - // write compressed diff data - msDiff.Seek(0, SeekOrigin.Begin); - msDiff.CopyTo(output); + //write compressed ctrl data + output.Write(ctrlSink.WrittenSpan); + header[HeaderOffsetCtrl..].WritePackedLong(ctrlSink.WrittenCount); - // compute size of compressed diff data - header_compressed_diff.WritePackedLong(msDiff.Length); + // write compressed diff data + output.Write(diffSink.WrittenSpan); + header[HeaderOffsetDiff..].WritePackedLong(diffSink.WrittenCount); - // write compressed extra data - msExtra.Seek(0, SeekOrigin.Begin); - msExtra.CopyTo(output); - } + // write compressed extra data + output.Write(extraSink.WrittenSpan); // seek to the beginning, write the header, then seek back to end var endPosition = output.Position; From 3c90a3a3851540e9314cc738f545b3962f6e2c92 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 09:41:50 -0500 Subject: [PATCH 290/325] Rename delta CLI options to bsdiff and bspatch --- .../{Commands.Delta.cs => Commands.BsDiff.cs} | 24 +++++++++++++------ ...{Commands.Apply.cs => Commands.BsPatch.cs} | 10 ++++---- src/DeltaQ.CommandLine/Program.cs | 4 ++-- 3 files changed, 25 insertions(+), 13 deletions(-) rename src/DeltaQ.CommandLine/{Commands.Delta.cs => Commands.BsDiff.cs} (72%) rename src/DeltaQ.CommandLine/{Commands.Apply.cs => Commands.BsPatch.cs} (84%) diff --git a/src/DeltaQ.CommandLine/Commands.Delta.cs b/src/DeltaQ.CommandLine/Commands.BsDiff.cs similarity index 72% rename from src/DeltaQ.CommandLine/Commands.Delta.cs rename to src/DeltaQ.CommandLine/Commands.BsDiff.cs index a5346d8..ae48f33 100644 --- a/src/DeltaQ.CommandLine/Commands.Delta.cs +++ b/src/DeltaQ.CommandLine/Commands.BsDiff.cs @@ -4,6 +4,7 @@ using Humanizer; using Microsoft.Extensions.CommandLineUtils; using System; +using System.Diagnostics; using System.IO; namespace DeltaQ.CommandLine; @@ -11,9 +12,9 @@ namespace DeltaQ.CommandLine; internal static partial class Commands { - public static Action DeltaCommand { get; } = command => + public static Action BsDiffCommand { get; } = command => { - command.Description = "Generate a delta (difference) between two files"; + command.Description = "Generate a BSDIFF-compatible delta (difference) between two files"; command.HelpOption(HelpOptions); var oldFileArg = command.Argument("[oldfile]", "Original file (input)"); @@ -35,14 +36,23 @@ internal static partial class Commands Console.WriteLine("Generating BsDiff delta between"); Console.WriteLine($@"Old file: ""{oldFile}"""); Console.WriteLine($@"New file: ""{newFile}"""); - Console.WriteLine($"with suffix sort {sort.GetType().Name}"); + if (algoArg.HasValue()) + { + Console.WriteLine($"with suffix sort {sort.GetType().Name}"); + } Console.WriteLine(); try { - var sw = System.Diagnostics.Stopwatch.StartNew(); - BsDiff.Diff.Create(File.ReadAllBytes(oldFile), File.ReadAllBytes(newFile), File.Create(deltaFile), sort); - sw.Stop(); - + Stopwatch sw; + { + var oldBytes = File.ReadAllBytes(oldFile); + var newBytes = File.ReadAllBytes(newFile); + using var fsDelta = File.Create(deltaFile); + sw = Stopwatch.StartNew(); + BsDiff.Diff.Create(oldBytes, newBytes, fsDelta, sort); + sw.Stop(); + } + Console.WriteLine($"Finished in {sw.Elapsed.Humanize()} [{sw.Elapsed}]"); Console.WriteLine($@"Delta file: ""{deltaFile}"""); var deltaFileInfo = new FileInfo(deltaFile); diff --git a/src/DeltaQ.CommandLine/Commands.Apply.cs b/src/DeltaQ.CommandLine/Commands.BsPatch.cs similarity index 84% rename from src/DeltaQ.CommandLine/Commands.Apply.cs rename to src/DeltaQ.CommandLine/Commands.BsPatch.cs index 2888202..0d87e52 100644 --- a/src/DeltaQ.CommandLine/Commands.Apply.cs +++ b/src/DeltaQ.CommandLine/Commands.BsPatch.cs @@ -1,6 +1,7 @@ using Humanizer; using Microsoft.Extensions.CommandLineUtils; using System; +using System.Diagnostics; using System.IO; using System.IO.MemoryMappedFiles; @@ -9,9 +10,9 @@ namespace DeltaQ.CommandLine; internal static partial class Commands { - public static Action ApplyCommand { get; } = command => + public static Action BsPatchCommand { get; } = command => { - command.Description = "Apply a delta (patch) to an original file and generate an output file"; + command.Description = "Apply a BSDIFF-compatible delta (patch) to an original file and generate an output file"; command.HelpOption(HelpOptions); var oldFileArg = command.Argument("[oldfile]", "Original file (input)"); @@ -29,16 +30,17 @@ internal static partial class Commands Console.WriteLine(); try { - var sw = System.Diagnostics.Stopwatch.StartNew(); + Stopwatch sw; { using var fsInput = File.OpenRead(oldFile); using var fsDelta = MemoryMappedFile.CreateFromFile(deltaFile, FileMode.Open, null, 0, MemoryMappedFileAccess.Read); using var fsOutput = File.Create(newFile); + sw = Stopwatch.StartNew(); BsDiff.Patch.Apply(fsInput, OpenPatch, fsOutput); + sw.Stop(); Stream OpenPatch(long offset, long length) => fsDelta.CreateViewStream(offset, length, MemoryMappedFileAccess.Read); } - sw.Stop(); Console.WriteLine($"Finished in {sw.Elapsed.Humanize()} [{sw.Elapsed}]"); Console.WriteLine($@"New file: ""{newFile}"""); diff --git a/src/DeltaQ.CommandLine/Program.cs b/src/DeltaQ.CommandLine/Program.cs index 1bbe8a0..88f18dd 100644 --- a/src/DeltaQ.CommandLine/Program.cs +++ b/src/DeltaQ.CommandLine/Program.cs @@ -26,8 +26,8 @@ app.Command("fuzz", Commands.FuzzCommand); #endif -app.Command("delta", Commands.DeltaCommand); -app.Command("apply", Commands.ApplyCommand); +app.Command("bsdiff", Commands.BsDiffCommand); +app.Command("bspatch", Commands.BsPatchCommand); try { From b6c6fe5d21ad8e2b38a42a4511eb6ccbd4a9e4de Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 09:49:02 -0500 Subject: [PATCH 291/325] Update 3pn for DQ.BsDiff --- src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt index d271984..c2efaaf 100644 --- a/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt +++ b/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt @@ -9,4 +9,14 @@ Copyright 2010 Logos Bible Software Licensed under the MIT license. Available at -https://github.com/LogosBible/bsdiff.net \ No newline at end of file +https://github.com/LogosBible/bsdiff.net + + +License notice for SharpZipLib +------------------------------- + +Copyright 2000-2018 SharpZipLib Contributors +Licensed under the MIT license. + +Available at +https://raw.githubusercontent.com/icsharpcode/SharpZipLib/master/LICENSE.txt \ No newline at end of file From 99f5cb1e433524b6d9f409ebe470fbaec61c2164 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 09:49:26 -0500 Subject: [PATCH 292/325] Use file scoped namespaces in DQ.BsDiff --- src/DeltaQ.BsDiff/Diff.cs | 409 ++++++++++++++-------------- src/DeltaQ.BsDiff/Patch.cs | 271 +++++++++--------- src/DeltaQ.BsDiff/SpanExtensions.cs | 74 +++-- 3 files changed, 374 insertions(+), 380 deletions(-) diff --git a/src/DeltaQ.BsDiff/Diff.cs b/src/DeltaQ.BsDiff/Diff.cs index 8677a55..f264263 100644 --- a/src/DeltaQ.BsDiff/Diff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -6,261 +6,260 @@ using System.Buffers; using System.IO; -namespace DeltaQ.BsDiff +namespace DeltaQ.BsDiff; + +public static class Diff { - public static class Diff + internal const int HeaderSize = 32; + + private const int HeaderOffsetSig = 0; + private const int HeaderOffsetCtrl = sizeof(long) * 1; + private const int HeaderOffsetDiff = sizeof(long) * 2; + private const int HeaderOffsetNewData = sizeof(long) * 3; + + internal const long Signature = 0x3034464649445342; //"BSDIFF40" + + internal static Stream GetEncodingStream(Stream stream, bool output) + => output + ? new BZip2OutputStream(stream) + : new BZip2InputStream(stream); + + /// + /// Creates a BSDIFF-format patch from two byte buffers + /// + /// Byte buffer of the original (older) data + /// Byte buffer of the changed (newer) data + /// Seekable, writable stream where the patch will be written + /// Suffix sort implementation to use for comparison + public static void Create(ReadOnlySpan oldData, ReadOnlySpan newData, Stream output, ISuffixSort suffixSort) { - internal const int HeaderSize = 32; - - private const int HeaderOffsetSig = 0; - private const int HeaderOffsetCtrl = sizeof(long) * 1; - private const int HeaderOffsetDiff = sizeof(long) * 2; - private const int HeaderOffsetNewData = sizeof(long) * 3; - - internal const long Signature = 0x3034464649445342; //"BSDIFF40" - - internal static Stream GetEncodingStream(Stream stream, bool output) - => output - ? new BZip2OutputStream(stream) - : new BZip2InputStream(stream); - - /// - /// Creates a BSDIFF-format patch from two byte buffers - /// - /// Byte buffer of the original (older) data - /// Byte buffer of the changed (newer) data - /// Seekable, writable stream where the patch will be written - /// Suffix sort implementation to use for comparison - public static void Create(ReadOnlySpan oldData, ReadOnlySpan newData, Stream output, ISuffixSort suffixSort) - { - // check arguments - if (oldData == null) - throw new ArgumentNullException(nameof(oldData)); - if (newData == null) - throw new ArgumentNullException(nameof(newData)); - if (output == null) - throw new ArgumentNullException(nameof(output)); - if (suffixSort == null) - throw new ArgumentNullException(nameof(suffixSort)); - if (!output.CanSeek) - throw new ArgumentException("Output stream must be seekable.", nameof(output)); - if (!output.CanWrite) - throw new ArgumentException("Output stream must be writable.", nameof(output)); - - /* Header is - 0 8 "BSDIFF40" - 8 8 length of bzip2ed ctrl block - 16 8 length of bzip2ed diff block - 24 8 length of new file - File is - 0 32 Header - 32 ?? Bzip2ed ctrl block - ?? ?? Bzip2ed diff block - ?? ?? Bzip2ed extra block */ - Span header = stackalloc byte[HeaderSize]; - header[HeaderOffsetSig..].WritePackedLong(Signature); - - header[HeaderOffsetNewData..].WritePackedLong(newData.Length); - - var startPosition = output.Position; - output.Write(header); - - //backing for ctrl writes - Span buf = stackalloc byte[sizeof(long)]; - - //the memory allocated for the suffix array MUST be at least (n+1) - //this is only required for bsdiff, so we allocate it ourselves - //instead of using the ISuffixSort overloads that only require allocations of (n) - using var saOwner = MemoryOwner.Allocate(oldData.Length + 1, AllocationMode.Clear); - - using var ctrlSink = new ArrayPoolBufferWriter(); - using var diffSink = new ArrayPoolBufferWriter(); - using var extraSink = new ArrayPoolBufferWriter(); + // check arguments + if (oldData == null) + throw new ArgumentNullException(nameof(oldData)); + if (newData == null) + throw new ArgumentNullException(nameof(newData)); + if (output == null) + throw new ArgumentNullException(nameof(output)); + if (suffixSort == null) + throw new ArgumentNullException(nameof(suffixSort)); + if (!output.CanSeek) + throw new ArgumentException("Output stream must be seekable.", nameof(output)); + if (!output.CanWrite) + throw new ArgumentException("Output stream must be writable.", nameof(output)); + + /* Header is + 0 8 "BSDIFF40" + 8 8 length of bzip2ed ctrl block + 16 8 length of bzip2ed diff block + 24 8 length of new file + File is + 0 32 Header + 32 ?? Bzip2ed ctrl block + ?? ?? Bzip2ed diff block + ?? ?? Bzip2ed extra block */ + Span header = stackalloc byte[HeaderSize]; + header[HeaderOffsetSig..].WritePackedLong(Signature); + + header[HeaderOffsetNewData..].WritePackedLong(newData.Length); + + var startPosition = output.Position; + output.Write(header); + + //backing for ctrl writes + Span buf = stackalloc byte[sizeof(long)]; + + //the memory allocated for the suffix array MUST be at least (n+1) + //this is only required for bsdiff, so we allocate it ourselves + //instead of using the ISuffixSort overloads that only require allocations of (n) + using var saOwner = MemoryOwner.Allocate(oldData.Length + 1, AllocationMode.Clear); + + using var ctrlSink = new ArrayPoolBufferWriter(); + using var diffSink = new ArrayPoolBufferWriter(); + using var extraSink = new ArrayPoolBufferWriter(); + { + using var ctrlEncStream = GetEncodingStream(ctrlSink.AsStream(), true); + using var diffEncStream = GetEncodingStream(diffSink.AsStream(), true); + using var extraEncStream = GetEncodingStream(extraSink.AsStream(), true); + + Span I = saOwner.Span; + suffixSort.Sort(oldData, I[..^1]); + + var scan = 0; + var pos = 0; + var len = 0; + var lastscan = 0; + var lastpos = 0; + var lastoffset = 0; + + // compute the differences, writing ctrl as we go + while (scan < newData.Length) { - using var ctrlEncStream = GetEncodingStream(ctrlSink.AsStream(), true); - using var diffEncStream = GetEncodingStream(diffSink.AsStream(), true); - using var extraEncStream = GetEncodingStream(extraSink.AsStream(), true); - - Span I = saOwner.Span; - suffixSort.Sort(oldData, I[..^1]); - - var scan = 0; - var pos = 0; - var len = 0; - var lastscan = 0; - var lastpos = 0; - var lastoffset = 0; - - // compute the differences, writing ctrl as we go - while (scan < newData.Length) + var oldscore = 0; + + for (var scsc = scan += len; scan < newData.Length; scan++) { - var oldscore = 0; + len = Search(I, oldData, newData[scan..], 0, oldData.Length, out pos); - for (var scsc = scan += len; scan < newData.Length; scan++) + for (; scsc < scan + len; scsc++) { - len = Search(I, oldData, newData[scan..], 0, oldData.Length, out pos); + if ((scsc + lastoffset < oldData.Length) && (oldData[scsc + lastoffset] == newData[scsc])) + oldscore++; + } - for (; scsc < scan + len; scsc++) - { - if ((scsc + lastoffset < oldData.Length) && (oldData[scsc + lastoffset] == newData[scsc])) - oldscore++; - } + if ((len == oldscore && len != 0) || (len > oldscore + 8)) + break; - if ((len == oldscore && len != 0) || (len > oldscore + 8)) - break; + if ((scan + lastoffset < oldData.Length) && (oldData[scan + lastoffset] == newData[scan])) + oldscore--; + } - if ((scan + lastoffset < oldData.Length) && (oldData[scan + lastoffset] == newData[scan])) - oldscore--; + if (len != oldscore || scan == newData.Length) + { + var s = 0; + var sf = 0; + var lenf = 0; + for (var i = 0; (lastscan + i < scan) && (lastpos + i < oldData.Length);) + { + if (oldData[lastpos + i] == newData[lastscan + i]) + s++; + i++; + if (s * 2 - i > sf * 2 - lenf) + { + sf = s; + lenf = i; + } } - if (len != oldscore || scan == newData.Length) + var lenb = 0; + if (scan < newData.Length) { - var s = 0; - var sf = 0; - var lenf = 0; - for (var i = 0; (lastscan + i < scan) && (lastpos + i < oldData.Length);) + s = 0; + var sb = 0; + for (var i = 1; (scan >= lastscan + i) && (pos >= i); i++) { - if (oldData[lastpos + i] == newData[lastscan + i]) + if (oldData[pos - i] == newData[scan - i]) s++; - i++; - if (s * 2 - i > sf * 2 - lenf) + if (s * 2 - i > sb * 2 - lenb) { - sf = s; - lenf = i; + sb = s; + lenb = i; } } + } - var lenb = 0; - if (scan < newData.Length) + if (lastscan + lenf > scan - lenb) + { + var overlap = (lastscan + lenf) - (scan - lenb); + s = 0; + var ss = 0; + var lens = 0; + for (var i = 0; i < overlap; i++) { - s = 0; - var sb = 0; - for (var i = 1; (scan >= lastscan + i) && (pos >= i); i++) + if (newData[lastscan + lenf - overlap + i] == oldData[lastpos + lenf - overlap + i]) + s++; + if (newData[scan - lenb + i] == oldData[pos - lenb + i]) + s--; + if (s > ss) { - if (oldData[pos - i] == newData[scan - i]) - s++; - if (s * 2 - i > sb * 2 - lenb) - { - sb = s; - lenb = i; - } + ss = s; + lens = i + 1; } } - if (lastscan + lenf > scan - lenb) - { - var overlap = (lastscan + lenf) - (scan - lenb); - s = 0; - var ss = 0; - var lens = 0; - for (var i = 0; i < overlap; i++) - { - if (newData[lastscan + lenf - overlap + i] == oldData[lastpos + lenf - overlap + i]) - s++; - if (newData[scan - lenb + i] == oldData[pos - lenb + i]) - s--; - if (s > ss) - { - ss = s; - lens = i + 1; - } - } - - lenf += lens - overlap; - lenb -= lens; - } + lenf += lens - overlap; + lenb -= lens; + } - //write diff string - for (var i = 0; i < lenf; i++) - diffEncStream.WriteByte((byte)(newData[lastscan + i] - oldData[lastpos + i])); + //write diff string + for (var i = 0; i < lenf; i++) + diffEncStream.WriteByte((byte)(newData[lastscan + i] - oldData[lastpos + i])); - //write extra string - var extraLength = (scan - lenb) - (lastscan + lenf); - if (extraLength > 0) - extraEncStream.Write(newData.Slice(lastscan + lenf, extraLength)); + //write extra string + var extraLength = (scan - lenb) - (lastscan + lenf); + if (extraLength > 0) + extraEncStream.Write(newData.Slice(lastscan + lenf, extraLength)); - //write ctrl block - buf.WritePackedLong(lenf); - ctrlEncStream.Write(buf); + //write ctrl block + buf.WritePackedLong(lenf); + ctrlEncStream.Write(buf); - buf.WritePackedLong(extraLength); - ctrlEncStream.Write(buf); + buf.WritePackedLong(extraLength); + ctrlEncStream.Write(buf); - buf.WritePackedLong((pos - lenb) - (lastpos + lenf)); - ctrlEncStream.Write(buf); + buf.WritePackedLong((pos - lenb) - (lastpos + lenf)); + ctrlEncStream.Write(buf); - lastscan = scan - lenb; - lastpos = pos - lenb; - lastoffset = pos - scan; - } + lastscan = scan - lenb; + lastpos = pos - lenb; + lastoffset = pos - scan; } } + } - //write compressed ctrl data - output.Write(ctrlSink.WrittenSpan); - header[HeaderOffsetCtrl..].WritePackedLong(ctrlSink.WrittenCount); + //write compressed ctrl data + output.Write(ctrlSink.WrittenSpan); + header[HeaderOffsetCtrl..].WritePackedLong(ctrlSink.WrittenCount); - // write compressed diff data - output.Write(diffSink.WrittenSpan); - header[HeaderOffsetDiff..].WritePackedLong(diffSink.WrittenCount); + // write compressed diff data + output.Write(diffSink.WrittenSpan); + header[HeaderOffsetDiff..].WritePackedLong(diffSink.WrittenCount); - // write compressed extra data - output.Write(extraSink.WrittenSpan); + // write compressed extra data + output.Write(extraSink.WrittenSpan); - // seek to the beginning, write the header, then seek back to end - var endPosition = output.Position; - output.Position = startPosition; - output.Write(header); - output.Position = endPosition; - } + // seek to the beginning, write the header, then seek back to end + var endPosition = output.Position; + output.Position = startPosition; + output.Write(header); + output.Position = endPosition; + } - private static int CompareBytes(ReadOnlySpan left, ReadOnlySpan right) - => left.SequenceCompareTo(right); + private static int CompareBytes(ReadOnlySpan left, ReadOnlySpan right) + => left.SequenceCompareTo(right); - private static int MatchLength(ReadOnlySpan oldData, ReadOnlySpan newData) + private static int MatchLength(ReadOnlySpan oldData, ReadOnlySpan newData) + { + int i; + for (i = 0; i < oldData.Length && i < newData.Length; i++) { - int i; - for (i = 0; i < oldData.Length && i < newData.Length; i++) - { - if (oldData[i] != newData[i]) - break; - } - - return i; + if (oldData[i] != newData[i]) + break; } - private static int Search(ReadOnlySpan I, ReadOnlySpan oldData, ReadOnlySpan newData, int start, int end, out int pos) + return i; + } + + private static int Search(ReadOnlySpan I, ReadOnlySpan oldData, ReadOnlySpan newData, int start, int end, out int pos) + { + while (true) { - while (true) + if (end - start < 2) { - if (end - start < 2) - { - var x = MatchLength(oldData[I[start]..], newData); - var y = MatchLength(oldData[I[end]..], newData); + var x = MatchLength(oldData[I[start]..], newData); + var y = MatchLength(oldData[I[end]..], newData); - if (x > y) - { - pos = I[start]; - return x; - } - else - { - pos = I[end]; - return y; - } - } - - var midPoint = start + (end - start) / 2; - if (CompareBytes(oldData[I[midPoint]..], newData) < 0) + if (x > y) { - start = midPoint; + pos = I[start]; + return x; } else { - end = midPoint; + pos = I[end]; + return y; } } + + var midPoint = start + (end - start) / 2; + if (CompareBytes(oldData[I[midPoint]..], newData) < 0) + { + start = midPoint; + } + else + { + end = midPoint; + } } } } diff --git a/src/DeltaQ.BsDiff/Patch.cs b/src/DeltaQ.BsDiff/Patch.cs index 852052e..7cd59db 100644 --- a/src/DeltaQ.BsDiff/Patch.cs +++ b/src/DeltaQ.BsDiff/Patch.cs @@ -3,167 +3,166 @@ using System; using System.IO; -namespace DeltaQ.BsDiff +namespace DeltaQ.BsDiff; + +public static class Patch { - public static class Patch + /// + /// Opens a BSDIFF-format patch at a specific position + /// + /// Zero-based offset into the patch + /// Length of the Stream from offset, or 0 for the rest of the patch + /// Readable, seekable stream with specified offset and length + public delegate Stream OpenPatchStream(long offset, long length); + + /// + /// Applies a BSDIFF-format patch to an original and produces the updated version + /// + /// Byte array of the original (older) data + /// Byte array of the BSDIFF-format patch data + /// Writable stream where the updated data will be written + public static void Apply(ReadOnlyMemory input, ReadOnlyMemory diff, Stream output) { - /// - /// Opens a BSDIFF-format patch at a specific position - /// - /// Zero-based offset into the patch - /// Length of the Stream from offset, or 0 for the rest of the patch - /// Readable, seekable stream with specified offset and length - public delegate Stream OpenPatchStream(long offset, long length); - - /// - /// Applies a BSDIFF-format patch to an original and produces the updated version - /// - /// Byte array of the original (older) data - /// Byte array of the BSDIFF-format patch data - /// Writable stream where the updated data will be written - public static void Apply(ReadOnlyMemory input, ReadOnlyMemory diff, Stream output) - { - var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); + var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); - // prepare to read three parts of the patch in parallel - using var inputStream = input.AsStream(); - ApplyInternal(newSize, inputStream, controlStream, diffStream, extraStream, output); - return; + // prepare to read three parts of the patch in parallel + using var inputStream = input.AsStream(); + ApplyInternal(newSize, inputStream, controlStream, diffStream, extraStream, output); + return; - Stream openPatchStream(long offset, long length) - => diff.Slice((int)offset, length > 0 ? (int)length : diff.Length - (int)offset).AsStream(); - } + Stream openPatchStream(long offset, long length) + => diff.Slice((int)offset, length > 0 ? (int)length : diff.Length - (int)offset).AsStream(); + } - /// - /// Applies a BSDIFF-format patch to an original and produces the updated version - /// - /// Readable, seekable stream of the original (older) data - /// - /// Writable stream where the updated data will be written - public static void Apply(Stream input, OpenPatchStream openPatchStream, Stream output) - { - var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); + /// + /// Applies a BSDIFF-format patch to an original and produces the updated version + /// + /// Readable, seekable stream of the original (older) data + /// + /// Writable stream where the updated data will be written + public static void Apply(Stream input, OpenPatchStream openPatchStream, Stream output) + { + var newSize = CreatePatchStreams(openPatchStream, out Stream controlStream, out Stream diffStream, out Stream extraStream); - // prepare to read three parts of the patch in parallel - ApplyInternal(newSize, input, controlStream, diffStream, extraStream, output); - } + // prepare to read three parts of the patch in parallel + ApplyInternal(newSize, input, controlStream, diffStream, extraStream, output); + } - private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stream ctrl, out Stream diff, out Stream extra) + private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stream ctrl, out Stream diff, out Stream extra) + { + // read header + long controlLength, diffLength, newSize; + using (var headerStream = openPatchStream(0, Diff.HeaderSize)) { - // read header - long controlLength, diffLength, newSize; - using (var headerStream = openPatchStream(0, Diff.HeaderSize)) - { - // check patch stream capabilities - if (!headerStream.CanRead) - throw new ArgumentException("Patch stream must be readable", nameof(openPatchStream)); - if (!headerStream.CanSeek) - throw new ArgumentException("Patch stream must be seekable", nameof(openPatchStream)); - - Span header = stackalloc byte[Diff.HeaderSize]; - headerStream.Read(header); - - // check for appropriate magic - var signature = header.ReadPackedLong(); - if (signature != Diff.Signature) - throw new InvalidOperationException("Corrupt patch"); + // check patch stream capabilities + if (!headerStream.CanRead) + throw new ArgumentException("Patch stream must be readable", nameof(openPatchStream)); + if (!headerStream.CanSeek) + throw new ArgumentException("Patch stream must be seekable", nameof(openPatchStream)); + + Span header = stackalloc byte[Diff.HeaderSize]; + headerStream.Read(header); + + // check for appropriate magic + var signature = header.ReadPackedLong(); + if (signature != Diff.Signature) + throw new InvalidOperationException("Corrupt patch"); + + // read lengths from header + controlLength = header[sizeof(long)..].ReadPackedLong(); + diffLength = header[(sizeof(long) * 2)..].ReadPackedLong(); + newSize = header[(sizeof(long) * 3)..].ReadPackedLong(); + + if (controlLength < 0 || diffLength < 0 || newSize < 0) + throw new InvalidOperationException("Corrupt patch"); + } - // read lengths from header - controlLength = header[sizeof(long)..].ReadPackedLong(); - diffLength = header[(sizeof(long) * 2)..].ReadPackedLong(); - newSize = header[(sizeof(long) * 3)..].ReadPackedLong(); + // prepare to read three parts of the patch in parallel + Stream + compressedControlStream = openPatchStream(Diff.HeaderSize, controlLength), + compressedDiffStream = openPatchStream(Diff.HeaderSize + controlLength, diffLength), + compressedExtraStream = openPatchStream(Diff.HeaderSize + controlLength + diffLength, 0); - if (controlLength < 0 || diffLength < 0 || newSize < 0) - throw new InvalidOperationException("Corrupt patch"); - } + // decompress each part (to read it) + ctrl = Diff.GetEncodingStream(compressedControlStream, false); + diff = Diff.GetEncodingStream(compressedDiffStream, false); + extra = Diff.GetEncodingStream(compressedExtraStream, false); - // prepare to read three parts of the patch in parallel - Stream - compressedControlStream = openPatchStream(Diff.HeaderSize, controlLength), - compressedDiffStream = openPatchStream(Diff.HeaderSize + controlLength, diffLength), - compressedExtraStream = openPatchStream(Diff.HeaderSize + controlLength + diffLength, 0); + return newSize; + } - // decompress each part (to read it) - ctrl = Diff.GetEncodingStream(compressedControlStream, false); - diff = Diff.GetEncodingStream(compressedDiffStream, false); - extra = Diff.GetEncodingStream(compressedExtraStream, false); + private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Stream diff, Stream extra, Stream output, int bufferSize = 0x1000) + { + if (!input.CanRead) + throw new ArgumentException("Input stream must be readable", nameof(input)); + if (!input.CanSeek) + throw new ArgumentException("Input stream must be seekable", nameof(input)); + if (!output.CanWrite) + throw new ArgumentException("Output stream must be writable", nameof(output)); + + using (ctrl) + using (diff) + using (extra) + { + using var diffBufferOwner = SpanOwner.Allocate(bufferSize); + using var inputBufferOwner = SpanOwner.Allocate(bufferSize); - return newSize; - } + Span ctrlBuffer = stackalloc byte[sizeof(long) * 3]; - private static void ApplyInternal(long newSize, Stream input, Stream ctrl, Stream diff, Stream extra, Stream output, int bufferSize = 0x1000) - { - if (!input.CanRead) - throw new ArgumentException("Input stream must be readable", nameof(input)); - if (!input.CanSeek) - throw new ArgumentException("Input stream must be seekable", nameof(input)); - if (!output.CanWrite) - throw new ArgumentException("Output stream must be writable", nameof(output)); - - using (ctrl) - using (diff) - using (extra) + var diffBuffer = diffBufferOwner.Span; + var inputBuffer = inputBufferOwner.Span; + while (output.Position < newSize) { - using var diffBufferOwner = SpanOwner.Allocate(bufferSize); - using var inputBufferOwner = SpanOwner.Allocate(bufferSize); + //read control data: + // set of triples (x,y,z) meaning + ctrl.Read(ctrlBuffer); + + // add x bytes from oldfile to x bytes from the diff block; + var addSize = ctrlBuffer.ReadPackedLong(); + // copy y bytes from the extra block; + var copySize = ctrlBuffer[sizeof(long)..].ReadPackedLong(); + // seek forwards in oldfile by z bytes; + var seekAmount = ctrlBuffer[(sizeof(long) * 2)..].ReadPackedLong(); + + // sanity-check + if (output.Position + addSize > newSize) + throw new InvalidOperationException("Corrupt patch"); - Span ctrlBuffer = stackalloc byte[sizeof(long) * 3]; + // read diff string in chunks - var diffBuffer = diffBufferOwner.Span; - var inputBuffer = inputBufferOwner.Span; - while (output.Position < newSize) + while (addSize > 0) { - //read control data: - // set of triples (x,y,z) meaning - ctrl.Read(ctrlBuffer); - - // add x bytes from oldfile to x bytes from the diff block; - var addSize = ctrlBuffer.ReadPackedLong(); - // copy y bytes from the extra block; - var copySize = ctrlBuffer[sizeof(long)..].ReadPackedLong(); - // seek forwards in oldfile by z bytes; - var seekAmount = ctrlBuffer[(sizeof(long) * 2)..].ReadPackedLong(); - - // sanity-check - if (output.Position + addSize > newSize) - throw new InvalidOperationException("Corrupt patch"); - - // read diff string in chunks - - while (addSize > 0) - { - var diffBytesRead = diff.Read(diffBuffer.SliceUpTo((int)addSize)); - var inputBytesRead = input.Read(inputBuffer.SliceUpTo((int)addSize)); - - if (inputBytesRead != diffBytesRead) - throw new InvalidOperationException("Corrupt patch"); + var diffBytesRead = diff.Read(diffBuffer.SliceUpTo((int)addSize)); + var inputBytesRead = input.Read(inputBuffer.SliceUpTo((int)addSize)); - // add old data to diff string - for (var i = 0; i < diffBytesRead; i++) - diffBuffer[i] += inputBuffer[i]; + if (inputBytesRead != diffBytesRead) + throw new InvalidOperationException("Corrupt patch"); - output.Write(diffBuffer[..diffBytesRead]); - addSize -= diffBytesRead; - } + // add old data to diff string + for (var i = 0; i < diffBytesRead; i++) + diffBuffer[i] += inputBuffer[i]; - // sanity-check - if (output.Position + copySize > newSize) - throw new InvalidOperationException("Corrupt patch"); + output.Write(diffBuffer[..diffBytesRead]); + addSize -= diffBytesRead; + } - // read extra string in chunks - while (copySize > 0) - { - var bytesRead = extra.Read(diffBuffer.SliceUpTo((int)copySize)); - output.Write(diffBuffer[..bytesRead]); - copySize -= bytesRead; - } + // sanity-check + if (output.Position + copySize > newSize) + throw new InvalidOperationException("Corrupt patch"); - // adjust position - input.Seek(seekAmount, SeekOrigin.Current); + // read extra string in chunks + while (copySize > 0) + { + var bytesRead = extra.Read(diffBuffer.SliceUpTo((int)copySize)); + output.Write(diffBuffer[..bytesRead]); + copySize -= bytesRead; } - } - output.Flush(); + // adjust position + input.Seek(seekAmount, SeekOrigin.Current); + } } + + output.Flush(); } } diff --git a/src/DeltaQ.BsDiff/SpanExtensions.cs b/src/DeltaQ.BsDiff/SpanExtensions.cs index b6b3a10..ccaf8e2 100644 --- a/src/DeltaQ.BsDiff/SpanExtensions.cs +++ b/src/DeltaQ.BsDiff/SpanExtensions.cs @@ -1,51 +1,47 @@ using System; -using System.Collections.Generic; -using System.IO; -using System.Runtime.CompilerServices; -namespace DeltaQ.BsDiff +namespace DeltaQ.BsDiff; + +internal static class SpanExtensions { - internal static class SpanExtensions + public static void WritePackedLong(this Span span, long y) { - public static void WritePackedLong(this Span span, long y) + // Write to highest index first so the JIT skips bounds checks on subsequent writes. + unchecked { - // Write to highest index first so the JIT skips bounds checks on subsequent writes. - unchecked + if (y < 0) { - if (y < 0) - { - y = -y; - span[7] = (byte)((y >> 56) | 0x80); - } - else - { - span[7] = (byte)(y >> 56); - } - - span[6] = (byte)(y >> 48); - span[5] = (byte)(y >> 40); - span[4] = (byte)(y >> 32); - span[3] = (byte)(y >> 24); - span[2] = (byte)(y >> 16); - span[1] = (byte)(y >> 8); - span[0] = (byte)y; + y = -y; + span[7] = (byte)((y >> 56) | 0x80); + } + else + { + span[7] = (byte)(y >> 56); } - } - - public static long ReadPackedLong(this Span span) - { - long y = span[7] & 0x7F; - y <<= 8; y += span[6]; - y <<= 8; y += span[5]; - y <<= 8; y += span[4]; - y <<= 8; y += span[3]; - y <<= 8; y += span[2]; - y <<= 8; y += span[1]; - y <<= 8; y += span[0]; - return (span[7] & 0x80) != 0 ? -y : y; + span[6] = (byte)(y >> 48); + span[5] = (byte)(y >> 40); + span[4] = (byte)(y >> 32); + span[3] = (byte)(y >> 24); + span[2] = (byte)(y >> 16); + span[1] = (byte)(y >> 8); + span[0] = (byte)y; } + } - public static Span SliceUpTo(this Span span, int max) => span[..Math.Min(span.Length, max)]; + public static long ReadPackedLong(this Span span) + { + long y = span[7] & 0x7F; + y <<= 8; y += span[6]; + y <<= 8; y += span[5]; + y <<= 8; y += span[4]; + y <<= 8; y += span[3]; + y <<= 8; y += span[2]; + y <<= 8; y += span[1]; + y <<= 8; y += span[0]; + + return (span[7] & 0x80) != 0 ? -y : y; } + + public static Span SliceUpTo(this Span span, int max) => span[..Math.Min(span.Length, max)]; } From 92b31a7df1487cba708f2b2db99c1fb10a875fbe Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 09:54:26 -0500 Subject: [PATCH 293/325] Break out BsDiff constants --- src/DeltaQ.BsDiff/Constants.cs | 13 +++++++++++++ src/DeltaQ.BsDiff/Diff.cs | 10 +--------- src/DeltaQ.BsDiff/Patch.cs | 19 ++++++++++--------- 3 files changed, 24 insertions(+), 18 deletions(-) create mode 100644 src/DeltaQ.BsDiff/Constants.cs diff --git a/src/DeltaQ.BsDiff/Constants.cs b/src/DeltaQ.BsDiff/Constants.cs new file mode 100644 index 0000000..38cde8e --- /dev/null +++ b/src/DeltaQ.BsDiff/Constants.cs @@ -0,0 +1,13 @@ +namespace DeltaQ.BsDiff; + +internal static class Constants +{ + public const int HeaderSize = 32; + + public const int HeaderOffsetSig = 0; + public const int HeaderOffsetCtrl = sizeof(long) * 1; + public const int HeaderOffsetDiff = sizeof(long) * 2; + public const int HeaderOffsetNewData = sizeof(long) * 3; + + public const long Signature = 0x3034464649445342; //"BSDIFF40" +} diff --git a/src/DeltaQ.BsDiff/Diff.cs b/src/DeltaQ.BsDiff/Diff.cs index f264263..571b8fb 100644 --- a/src/DeltaQ.BsDiff/Diff.cs +++ b/src/DeltaQ.BsDiff/Diff.cs @@ -7,18 +7,10 @@ using System.IO; namespace DeltaQ.BsDiff; +using static Constants; public static class Diff { - internal const int HeaderSize = 32; - - private const int HeaderOffsetSig = 0; - private const int HeaderOffsetCtrl = sizeof(long) * 1; - private const int HeaderOffsetDiff = sizeof(long) * 2; - private const int HeaderOffsetNewData = sizeof(long) * 3; - - internal const long Signature = 0x3034464649445342; //"BSDIFF40" - internal static Stream GetEncodingStream(Stream stream, bool output) => output ? new BZip2OutputStream(stream) diff --git a/src/DeltaQ.BsDiff/Patch.cs b/src/DeltaQ.BsDiff/Patch.cs index 7cd59db..97c73f4 100644 --- a/src/DeltaQ.BsDiff/Patch.cs +++ b/src/DeltaQ.BsDiff/Patch.cs @@ -4,6 +4,7 @@ using System.IO; namespace DeltaQ.BsDiff; +using static Constants; public static class Patch { @@ -52,7 +53,7 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre { // read header long controlLength, diffLength, newSize; - using (var headerStream = openPatchStream(0, Diff.HeaderSize)) + using (var headerStream = openPatchStream(0, HeaderSize)) { // check patch stream capabilities if (!headerStream.CanRead) @@ -60,18 +61,18 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre if (!headerStream.CanSeek) throw new ArgumentException("Patch stream must be seekable", nameof(openPatchStream)); - Span header = stackalloc byte[Diff.HeaderSize]; + Span header = stackalloc byte[HeaderSize]; headerStream.Read(header); // check for appropriate magic var signature = header.ReadPackedLong(); - if (signature != Diff.Signature) + if (signature != Signature) throw new InvalidOperationException("Corrupt patch"); // read lengths from header - controlLength = header[sizeof(long)..].ReadPackedLong(); - diffLength = header[(sizeof(long) * 2)..].ReadPackedLong(); - newSize = header[(sizeof(long) * 3)..].ReadPackedLong(); + controlLength = header[HeaderOffsetCtrl..].ReadPackedLong(); + diffLength = header[HeaderOffsetDiff..].ReadPackedLong(); + newSize = header[HeaderOffsetNewData..].ReadPackedLong(); if (controlLength < 0 || diffLength < 0 || newSize < 0) throw new InvalidOperationException("Corrupt patch"); @@ -79,9 +80,9 @@ private static long CreatePatchStreams(OpenPatchStream openPatchStream, out Stre // prepare to read three parts of the patch in parallel Stream - compressedControlStream = openPatchStream(Diff.HeaderSize, controlLength), - compressedDiffStream = openPatchStream(Diff.HeaderSize + controlLength, diffLength), - compressedExtraStream = openPatchStream(Diff.HeaderSize + controlLength + diffLength, 0); + compressedControlStream = openPatchStream(HeaderSize, controlLength), + compressedDiffStream = openPatchStream(HeaderSize + controlLength, diffLength), + compressedExtraStream = openPatchStream(HeaderSize + controlLength + diffLength, 0); // decompress each part (to read it) ctrl = Diff.GetEncodingStream(compressedControlStream, false); From bed648c10b47d4e6772bb14b93a72d5b14bdab9d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:23:33 -0500 Subject: [PATCH 294/325] Prepare DQ.SAIS package for RC --- .../DeltaQ.SuffixSorting.SAIS.csproj | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index 4283305..f23cb95 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -4,12 +4,35 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.3.2 + 2.0.0 + DeltaQ implementation of the suffix array induced sort (SAIS) algorithm + https://github.com/jzebedee/deltaq + dq.png + RPL-1.5 + latest + enable + + + true + true true snupkg + + + true + + + + + + + + + + From 5dd7c60dd01103ddefc4e29dcd5b00be15b7dbe8 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:24:01 -0500 Subject: [PATCH 295/325] Prepare DQ.LDSS package for RC --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index dba7790..d2435b6 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -4,13 +4,35 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.5.2 - latest - enable + 2.0.0 + DeltaQ implementation of the DivSufSort suffix sorting algorithm + https://github.com/jzebedee/deltaq + dq.png + RPL-1.5 + + latest + enable + + + true + true true snupkg + + + true + + + + + + + + + + From 8dca589d4d08bd4ae24e6dce62569c078f4b2d70 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:27:28 -0500 Subject: [PATCH 296/325] Prepare DQ.SS.Abstractions package for RC --- .../DeltaQ.SuffixSorting.Abstractions.csproj | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 28873cf..2564f61 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -4,15 +4,28 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.4.1 + 2.0.0 + DeltaQ utility library containing abstractions for suffix sorting algorithms + https://github.com/jzebedee/deltaq dq.png RPL-1.5 + + latest + enable + + + true + true true snupkg - - + + + true + + + @@ -20,4 +33,8 @@ + + + + From e1f66f573df692803ffa00d95e3bb3a7ad8d1afb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:52:19 -0500 Subject: [PATCH 297/325] Enable trimming for DQ.Utility.Memory --- src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 9816ca9..7b6bc24 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -12,6 +12,7 @@ latest enable + true true From dcd6f431aea3a3b47e6f3b0b35fba835353c2ff2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:52:38 -0500 Subject: [PATCH 298/325] Enable trimming for DQ.SS.Abstractions --- .../DeltaQ.SuffixSorting.Abstractions.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 2564f61..176a3e1 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -12,6 +12,7 @@ latest enable + true true From 3eaf1b3b00d9a1abaac09c3c3bf139baa272e510 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:52:51 -0500 Subject: [PATCH 299/325] Enable trimming for DQ.SS.SAIS --- src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index f23cb95..5ca7e37 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -12,6 +12,7 @@ latest enable + true true From 02820d5be0d5855f8528205a9df4b2ee14eafd4b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:53:06 -0500 Subject: [PATCH 300/325] Enable trimming for DQ.SS.LibDivSufSort --- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index d2435b6..3b27c55 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -12,6 +12,7 @@ latest enable + true true From f801596cb9769a2605a369a27c5a15a18f07c921 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:53:38 -0500 Subject: [PATCH 301/325] Prepare DQ.BsDiff package for RC --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 4e556f4..9a3d4ad 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -4,12 +4,36 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.3.0 + 2.0.0 + DeltaQ implementation of BSDIFF and BSPATCH compatible delta encoding + https://github.com/jzebedee/deltaq + dq.png + RPL-1.5 + latest + enable + true + + + true + true true snupkg + + + true + + + + + + + + + + From 2e7ae3c8744a89ea64d5d0d81a4b3b1585e17fe8 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:53:55 -0500 Subject: [PATCH 302/325] Prepare DQ package for RC --- src/deltaq/deltaq.csproj | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/deltaq/deltaq.csproj b/src/deltaq/deltaq.csproj index 2748a7b..ea297a2 100644 --- a/src/deltaq/deltaq.csproj +++ b/src/deltaq/deltaq.csproj @@ -4,10 +4,36 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 0.2.1 + 2.0.0 + DeltaQ provides fast and portable delta encoding for .NET in 100% safe, managed code + https://github.com/jzebedee/deltaq + dq.png + RPL-1.5 + latest + enable + true + + + true + true + true + snupkg + + + true + + + + + + + + + + From 863c30a508cd05246bd0c0af070146d9f14611f4 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 10:54:35 -0500 Subject: [PATCH 303/325] Prepare DQ.CLI package for RC --- .../DeltaQ.CommandLine.csproj | 38 +++++++++++++++++-- src/DeltaQ.CommandLine/Program.cs | 2 +- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index 44c17d9..2a8fd5f 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -1,15 +1,47 @@  + dq Exe net6.0 DeltaQ jzebedee - 0.1.0 - dq - true + 2.0.0 + DeltaQ binary diff and patch tool + https://github.com/jzebedee/deltaq + dq.png + RPL-1.5 + + latest + enable + true + true + true + true + embedded + dq + true + + + true + true + true + snupkg + + + true + + + + + + + + + + diff --git a/src/DeltaQ.CommandLine/Program.cs b/src/DeltaQ.CommandLine/Program.cs index 88f18dd..737b5a4 100644 --- a/src/DeltaQ.CommandLine/Program.cs +++ b/src/DeltaQ.CommandLine/Program.cs @@ -12,7 +12,7 @@ }; app.HelpOption(HelpOptions); -app.VersionOption("--version", typeof(Program).Assembly.GetName().Version.ToString()); +app.VersionOption("--version", typeof(Program).Assembly.GetName().Version!.ToString()); //No args app.OnExecute(() => From d8dc4b66c8734f8ce147ddbb09dbc723ae767879 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 11:00:03 -0500 Subject: [PATCH 304/325] Disable SingleFile/R2R/Trim which won't be used as a tool Might set these up in a CI build to support downloading stand-alone binaries from GH releases --- src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index 2a8fd5f..e865de2 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -1,7 +1,7 @@  - dq + Exe net6.0 DeltaQ @@ -14,11 +14,11 @@ latest enable - true - true - true - true - embedded + + + + + dq true From db1eb2c250cc9fb74bb01ece8fb23fe98ed5494d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 11:09:25 -0500 Subject: [PATCH 305/325] Add 3pn for DQ.SS.LDSS --- .../THIRD-PARTY-NOTICES.txt | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt new file mode 100644 index 0000000..a8ee76c --- /dev/null +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt @@ -0,0 +1,22 @@ +DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. + +The attached notices are provided for information only. + +License notice for LibDivSufSort +------------------------------- + +Copyright (c) 2003 Yuta Mori. All rights reserved. +Licensed under the MIT license. + +Available at +https://raw.githubusercontent.com/y-256/libdivsufsort/master/LICENSE + + +License notice for divsufsort (Rust) +------------------------------- + +Copyright (c) 2019 Amos Wenger. All rights reserved. +Licensed under the MIT license. + +Available at +https://raw.githubusercontent.com/fasterthanlime/stringsearch/master/crates/divsufsort/LICENSE \ No newline at end of file From 69a3faaf816654cfa43e3f631442cb761a3916bb Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 11:24:37 -0500 Subject: [PATCH 306/325] Add 3pn to DQ.CLI --- .../THIRD-PARTY-NOTICES.txt | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt diff --git a/src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt new file mode 100644 index 0000000..a4bebe9 --- /dev/null +++ b/src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt @@ -0,0 +1,21 @@ +DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. + +The attached notices are provided for information only. + +License notice for Humanizer.Core +------------------------------- + +Copyright (c) .NET Foundation and Contributors +Licensed under the MIT license. + +Available at +https://raw.githubusercontent.com/Humanizr/Humanizer/main/LICENSE + + +License notice for Microsoft.Extensions.CommandLineUtils +------------------------------- + +Licensed under the MICROSOFT .NET LIBRARY license. + +Available at +https://www.microsoft.com/web/webpi/eula/net_library_eula_enu.htm \ No newline at end of file From b24f4f3c3bcba50c5c800658cb0cc6474b63ab99 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 11:49:58 -0500 Subject: [PATCH 307/325] Update 3pns --- src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt | 2 +- src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt | 2 +- src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt | 2 +- src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt | 2 +- src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt index c2efaaf..5978ca0 100644 --- a/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt +++ b/src/DeltaQ.BsDiff/THIRD-PARTY-NOTICES.txt @@ -1,4 +1,4 @@ -DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. +DeltaQ uses third-party libraries or other resources that may be distributed under a different license than DeltaQ. The attached notices are provided for information only. diff --git a/src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt index a4bebe9..c79f6a2 100644 --- a/src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt +++ b/src/DeltaQ.CommandLine/THIRD-PARTY-NOTICES.txt @@ -1,4 +1,4 @@ -DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. +DeltaQ uses third-party libraries or other resources that may be distributed under a different license than DeltaQ. The attached notices are provided for information only. diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt index a8ee76c..6724cb3 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/THIRD-PARTY-NOTICES.txt @@ -1,4 +1,4 @@ -DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. +DeltaQ uses third-party libraries or other resources that may be distributed under a different license than DeltaQ. The attached notices are provided for information only. diff --git a/src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt index 3661273..faed743 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt +++ b/src/DeltaQ.SuffixSorting.SAIS/THIRD-PARTY-NOTICES.txt @@ -1,4 +1,4 @@ -DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. +DeltaQ uses third-party libraries or other resources that may be distributed under a different license than DeltaQ. The attached notices are provided for information only. diff --git a/src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt b/src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt index 71fd1bf..36ca0a5 100644 --- a/src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt +++ b/src/DeltaQ.Utility.Memory/THIRD-PARTY-NOTICES.txt @@ -1,4 +1,4 @@ -DeltaQ uses third-party libraries or other resources that may be distributed under a different licenese from DeltaQ. +DeltaQ uses third-party libraries or other resources that may be distributed under a different license than DeltaQ. The attached notices are provided for information only. From 9f698da4e76512358aaddbbcbae8773a5ba7b07c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 11:50:16 -0500 Subject: [PATCH 308/325] Add license and readme to solution --- deltaq.sln | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deltaq.sln b/deltaq.sln index 4db531f..4dec870 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -38,6 +38,8 @@ EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{09BBE319-2E1C-4878-AA44-FCC730167792}" ProjectSection(SolutionItems) = preProject .editorconfig = .editorconfig + LICENSE.md = LICENSE.md + README.md = README.md EndProjectSection EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{8B749DE9-2B73-49DE-912E-DE5E17ADA029}" From 523c65fbf1057176fff3433f433a2e74d79e911d Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 11:59:33 -0500 Subject: [PATCH 309/325] Update license --- LICENSE.md | 104 ++++------------------------------------------------- 1 file changed, 6 insertions(+), 98 deletions(-) diff --git a/LICENSE.md b/LICENSE.md index 85c7f28..14643dd 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,103 +1,11 @@ -## Notes +## Third Party Notice -deltaq contains source code reproductions or derivatives from -Colin Percival's bsdiff and bspatch, -Logos Bible Software's bsdiff.net, and Yuta Mori's implementation of Suffix Array Induced Sort (SAIS). +DeltaQ uses third-party libraries or other resources that may be distributed under a different license than DeltaQ. -## deltaq license +Please review the third party notices distributed with DeltaQ for more information. -The MIT License (MIT) +## DeltaQ License -Copyright (c) 2014 J. Zebedee +Copyright © 2014-2021 J. Zebedee -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -## bsdiff and bspatch license - -Copyright 2003-2005 Colin Percival -All rights reserved - -Redistribution and use in source and binary forms, with or without -modification, are permitted providing that the following conditions -are met: -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -## bsdiff.net license - -Copyright 2010 Logos Bible Software - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -## Yuta Mori's SAIS-CSharp license - -SAIS.cs for SAIS-CSharp -Copyright (c) 2010 Yuta Mori. All Rights Reserved. - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. +DeltaQ is released under the terms of the [Reciprocal Public License 1.5 (RPL-1.5)](https://www.opensource.org/licenses/rpl1.5.txt). From 5591e332c226bf727c5283f5eef891317619443c Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 12:23:20 -0500 Subject: [PATCH 310/325] Remove DQ.VCDIFF Don't worry, it's coming back later --- deltaq.sln | 7 ------- src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj | 10 ---------- 2 files changed, 17 deletions(-) delete mode 100644 src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj diff --git a/deltaq.sln b/deltaq.sln index 4dec870..0e9bfb0 100644 --- a/deltaq.sln +++ b/deltaq.sln @@ -15,8 +15,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.Abstra EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.SuffixSorting.SAIS", "src\DeltaQ.SuffixSorting.SAIS\DeltaQ.SuffixSorting.SAIS.csproj", "{0C1531C0-427B-42BE-B781-E83D7B377537}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.VCDiff", "src\DeltaQ.VCDiff\DeltaQ.VCDiff.csproj", "{C889CB97-5D73-4D53-8249-DD7BFD402475}" -EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{8B14206D-43D5-4740-96BF-3772DC4C3A6B}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{03F00ECA-08B1-47A4-8ACE-4624E31741BA}" @@ -78,10 +76,6 @@ Global {0C1531C0-427B-42BE-B781-E83D7B377537}.Debug|Any CPU.Build.0 = Debug|Any CPU {0C1531C0-427B-42BE-B781-E83D7B377537}.Release|Any CPU.ActiveCfg = Release|Any CPU {0C1531C0-427B-42BE-B781-E83D7B377537}.Release|Any CPU.Build.0 = Release|Any CPU - {C889CB97-5D73-4D53-8249-DD7BFD402475}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {C889CB97-5D73-4D53-8249-DD7BFD402475}.Debug|Any CPU.Build.0 = Debug|Any CPU - {C889CB97-5D73-4D53-8249-DD7BFD402475}.Release|Any CPU.ActiveCfg = Release|Any CPU - {C889CB97-5D73-4D53-8249-DD7BFD402475}.Release|Any CPU.Build.0 = Release|Any CPU {279B6F7C-7FB0-42AA-8804-8FF64A990A9A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {279B6F7C-7FB0-42AA-8804-8FF64A990A9A}.Debug|Any CPU.Build.0 = Debug|Any CPU {279B6F7C-7FB0-42AA-8804-8FF64A990A9A}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -117,7 +111,6 @@ Global {C8834EFF-AB77-42D5-8EA2-0AAB88DFEDA1} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {D81A3696-DBC3-46EA-8CB4-C7C3FA96564B} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {0C1531C0-427B-42BE-B781-E83D7B377537} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} - {C889CB97-5D73-4D53-8249-DD7BFD402475} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} {279B6F7C-7FB0-42AA-8804-8FF64A990A9A} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {2D37444E-3C89-4E1E-A0E6-C009F205EA84} = {03F00ECA-08B1-47A4-8ACE-4624E31741BA} {96F1E46E-53CB-4463-82E2-0F81BEB87080} = {8B14206D-43D5-4740-96BF-3772DC4C3A6B} diff --git a/src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj b/src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj deleted file mode 100644 index ffd38c4..0000000 --- a/src/DeltaQ.VCDiff/DeltaQ.VCDiff.csproj +++ /dev/null @@ -1,10 +0,0 @@ - - - - net5.0;netstandard2.0 - DeltaQ - jzebedee - true - - - From f78f87b1dc0363f0bc567bcb040e959e66cee8ca Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 19:47:19 -0500 Subject: [PATCH 311/325] Fix casing --- deltaq.sln => DeltaQ.sln | 0 src/{deltaq/deltaq.csproj => DeltaQ/DeltaQ.csproj} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename deltaq.sln => DeltaQ.sln (100%) rename src/{deltaq/deltaq.csproj => DeltaQ/DeltaQ.csproj} (100%) diff --git a/deltaq.sln b/DeltaQ.sln similarity index 100% rename from deltaq.sln rename to DeltaQ.sln diff --git a/src/deltaq/deltaq.csproj b/src/DeltaQ/DeltaQ.csproj similarity index 100% rename from src/deltaq/deltaq.csproj rename to src/DeltaQ/DeltaQ.csproj From 52b60fb4ed77ac4d6116902200f5faf1ee712e8e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 19:47:51 -0500 Subject: [PATCH 312/325] Fix casing in paths --- DeltaQ.sln | 2 +- src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DeltaQ.sln b/DeltaQ.sln index 0e9bfb0..69e8903 100644 --- a/DeltaQ.sln +++ b/DeltaQ.sln @@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.0.31825.309 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ", "src\deltaq\DeltaQ.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ", "src\DeltaQ\DeltaQ.csproj", "{CE1513B6-2F66-4E62-BDD1-0C41D4433A51}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeltaQ.Tests", "test\DeltaQ.Tests\DeltaQ.Tests.csproj", "{784B81AE-E39B-497B-90AE-AA7EC4B98E50}" EndProject diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index e865de2..f36a3e9 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -48,7 +48,7 @@ - + From 3bbf6ae402c824d4782d01058674a64657d14c48 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 19:55:47 -0500 Subject: [PATCH 313/325] Update CI --- .github/workflows/ci.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 62da666..bf2fcf9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,30 +3,30 @@ on: [push] jobs: build: runs-on: ubuntu-latest - strategy: - matrix: - dotnet-version: [ '6.0.x', '5.0.x', '3.1.x' ] steps: - uses: actions/checkout@v2 - name: Setup .NET SDK ${{ matrix.dotnet }} uses: actions/setup-dotnet@v1.7.2 with: - dotnet-version: ${{ matrix.dotnet-version }} + dotnet-version: | + 3.1.x + 5.0.x + 6.0.x - name: Install dependencies run: dotnet restore - name: Build run: dotnet build --no-restore --configuration Release - name: Test - run: dotnet test --no-restore --no-build --configuration Release --logger trx --results-directory "TestResults-${{ matrix.dotnet-version }}" + run: dotnet test --no-restore --no-build --configuration Release --logger trx --results-directory "TestResults" - name: Upload test results uses: actions/upload-artifact@v2 with: - name: dotnet-results-${{ matrix.dotnet-version }} - path: TestResults-${{ matrix.dotnet-version }} + name: TestResults + path: TestResults # Use always() to always run this step to publish test results when there are test failures if: ${{ always() }} - name: Pack - run: dotnet pack --no-restore --no-build --version-suffix CI-$GITHUB_RUN_ID --output pkg + run: dotnet pack --no-restore --configuration Release --version-suffix CI-$GITHUB_RUN_ID --output pkg - name: Upload package uses: actions/upload-artifact@v2 with: From 1780b2263aa42fbf5cb7336a94ce52c0f334545b Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 19:58:03 -0500 Subject: [PATCH 314/325] Update CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf2fcf9..15b0242 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Setup .NET SDK ${{ matrix.dotnet }} + - name: Setup .NET SDKs uses: actions/setup-dotnet@v1.7.2 with: dotnet-version: | From 07949d41329a912eb59a247302699b0eb6bd62dd Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 20:00:24 -0500 Subject: [PATCH 315/325] Update CI --- .github/workflows/ci.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15b0242..6344724 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,13 +5,18 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Setup .NET SDKs - uses: actions/setup-dotnet@v1.7.2 + - name: Setup .NET SDK - 6.0.x + uses: actions/setup-dotnet@v1 with: - dotnet-version: | - 3.1.x - 5.0.x - 6.0.x + dotnet-version: '6.0.x' + - name: Setup .NET SDK - 5.0.x + uses: actions/setup-dotnet@v1 + with: + dotnet-version: '5.0.x' + - name: Setup .NET SDK - 3.1.x + uses: actions/setup-dotnet@v1 + with: + dotnet-version: '3.1.x' - name: Install dependencies run: dotnet restore - name: Build From e22bb71929afe4c4ccea5696de3901a94338ee5f Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 20:09:55 -0500 Subject: [PATCH 316/325] Update CI --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6344724..d62777f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,8 @@ name: CI build-test-pack on: [push] jobs: build: - runs-on: ubuntu-latest + # In order to run netframework test targets + runs-on: windows-latest steps: - uses: actions/checkout@v2 - name: Setup .NET SDK - 6.0.x From 2db7bd8097a8f53f1643d57f01ee63d8a305bd69 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 20:18:16 -0500 Subject: [PATCH 317/325] Disable packing in DQ.Benchmarks --- bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj index a3ca878..9a36464 100644 --- a/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj +++ b/bench/DeltaQ.Benchmarks/DeltaQ.Benchmarks.csproj @@ -7,6 +7,7 @@ enable enable latest + false pdbonly true From c0e9ee0279060878dfd3b24ed68d2f1899658054 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Wed, 29 Dec 2021 20:25:57 -0500 Subject: [PATCH 318/325] Use bash shell even on windows runner --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d62777f..9dc8752 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,10 +20,13 @@ jobs: dotnet-version: '3.1.x' - name: Install dependencies run: dotnet restore + shell: bash - name: Build run: dotnet build --no-restore --configuration Release + shell: bash - name: Test run: dotnet test --no-restore --no-build --configuration Release --logger trx --results-directory "TestResults" + shell: bash - name: Upload test results uses: actions/upload-artifact@v2 with: @@ -33,8 +36,9 @@ jobs: if: ${{ always() }} - name: Pack run: dotnet pack --no-restore --configuration Release --version-suffix CI-$GITHUB_RUN_ID --output pkg + shell: bash - name: Upload package uses: actions/upload-artifact@v2 with: - name: DeltaQ-$GITHUB_RUN_ID + name: DeltaQ-${{ env.GITHUB_RUN_ID }} path: pkg/*.* From b7d61d0a579e246fba577bce12a651b722fc96a2 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 30 Dec 2021 12:36:24 -0500 Subject: [PATCH 319/325] Remove unnecessary check in DQ.LDSS.Sort --- src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs index 9a87c3f..ad21e4d 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/LibDivSufSort.cs @@ -13,7 +13,7 @@ public IMemoryOwner Sort(ReadOnlySpan textBuffer) { var owner = MemoryOwner.Allocate(textBuffer.Length); - Sort(textBuffer, suffixBuffer: owner.Span); + DivSufSort.divsufsort(textBuffer, owner.Span); return owner; } From 37f8ada4bf06992f3d11afa66a92663be577b08e Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 30 Dec 2021 12:36:39 -0500 Subject: [PATCH 320/325] Update readme with CLI instructions --- README.md | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 425ac3f..82bb690 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,6 @@ # deltaq logo DeltaQ -Fast and portable delta encoding for .NET. - -## About - -DeltaQ provides fast and portable delta encoding for .NET in 100% safe, managed code. +Fast and portable delta encoding for .NET in 100% safe, managed code. DeltaQ is available for use as a library in .NET and .NET Framework, and as a cross-platform command-line tool, `dq`, which can be used to perform delta operations (similar to `bsdiff` or `xdelta`). @@ -12,17 +8,54 @@ DeltaQ is available for use as a library in .NET and .NET Framework, and as a cr ### `dq` command-line tool -`> ` `dotnet tool install DeltaQ.CLI -g` +`> dotnet tool install DeltaQ.CommandLine -g` ### `DeltaQ` library -`> ` `dotnet add package DeltaQ` +`> dotnet add package DeltaQ` ## Usage ### `dq` command-line tool -TBW +#### Create a binary delta (diff) with BsDiff + +`dq bsdiff ` + +Here's an example of `dq` creating a `bsdiff` delta for patching file `app_v1.exe` into `app_v2.exe`: +``` +> ls -sh +total 32M +16M app_v1.exe 17M app_v2.exe + +> dq bsdiff app_v1.exe app_v2.exe v1_to_v2.delta +Generating BsDiff delta between +Old file: "app_v1.exe" +New file: "app_v2.exe" + +Delta file: "v1_to_v2.delta" +Delta size: 4.28 MB (13.49%) +``` + +#### Apply a binary delta (patch) with BsDiff + +`dq bspatch ` + +Instead of distributing the large `app_v2.exe` when it's time to upgrade, `dq` can recreate it by applying the much smaller delta file `v1_to_v2.delta` to the original `app_v1.exe`: + +``` +> dq bspatch app_v1.exe v1_to_v2.delta generated_app_v2.exe +Applying BsDiff delta between +Old file: "app_v1.exe" +Delta file: "v1_to_v2.delta" + +New file: "generated_app_v2.exe" +``` +``` +> sha256sum app_v2.exe generated_app_v2.exe +fab165a6e604dc7f9265d13013b6fb06319faec4eaa251a8a6d74a7e30e38dc6 app_v2.exe +fab165a6e604dc7f9265d13013b6fb06319faec4eaa251a8a6d74a7e30e38dc6 generated_app_v2.exe +``` ### `DeltaQ` library From 19254281defb64f9012ecd7777540d8bc5bf8f31 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 30 Dec 2021 13:04:21 -0500 Subject: [PATCH 321/325] Set AssemblyVersion and VersionPrefix instead of Version --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 3 ++- src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj | 3 ++- .../DeltaQ.SuffixSorting.Abstractions.csproj | 3 ++- .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 3 ++- src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj | 3 ++- src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj | 3 ++- src/DeltaQ/DeltaQ.csproj | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 9a3d4ad..397c337 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -4,7 +4,8 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0 + 2.0.0.0 + 2.0.0 DeltaQ implementation of BSDIFF and BSPATCH compatible delta encoding https://github.com/jzebedee/deltaq dq.png diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index f36a3e9..a975a81 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -6,7 +6,8 @@ net6.0 DeltaQ jzebedee - 2.0.0 + 2.0.0.0 + 2.0.0 DeltaQ binary diff and patch tool https://github.com/jzebedee/deltaq dq.png diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 176a3e1..37bf6a6 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -4,7 +4,8 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0 + 2.0.0.0 + 2.0.0 DeltaQ utility library containing abstractions for suffix sorting algorithms https://github.com/jzebedee/deltaq dq.png diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index 3b27c55..df8cc2b 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -4,7 +4,8 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0 + 2.0.0.0 + 2.0.0 DeltaQ implementation of the DivSufSort suffix sorting algorithm https://github.com/jzebedee/deltaq dq.png diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index 5ca7e37..1f3517f 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -4,7 +4,8 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0 + 2.0.0.0 + 2.0.0 DeltaQ implementation of the suffix array induced sort (SAIS) algorithm https://github.com/jzebedee/deltaq dq.png diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 7b6bc24..1867088 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -4,7 +4,8 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0 + 2.0.0.0 + 2.0.0 DeltaQ utility library to support buffer and memory management https://github.com/jzebedee/deltaq dq.png diff --git a/src/DeltaQ/DeltaQ.csproj b/src/DeltaQ/DeltaQ.csproj index ea297a2..9930c3f 100644 --- a/src/DeltaQ/DeltaQ.csproj +++ b/src/DeltaQ/DeltaQ.csproj @@ -4,7 +4,8 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0 + 2.0.0.0 + 2.0.0 DeltaQ provides fast and portable delta encoding for .NET in 100% safe, managed code https://github.com/jzebedee/deltaq dq.png From 4d22c8ca25af5774ac0704b3c564ca6ade64c348 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 30 Dec 2021 13:04:56 -0500 Subject: [PATCH 322/325] Add no-build back to CI pack step --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9dc8752..9767b5b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,10 +35,10 @@ jobs: # Use always() to always run this step to publish test results when there are test failures if: ${{ always() }} - name: Pack - run: dotnet pack --no-restore --configuration Release --version-suffix CI-$GITHUB_RUN_ID --output pkg + run: dotnet pack --no-restore --no-build --configuration Release --version-suffix CI-$GITHUB_RUN_ID --output pkg shell: bash - name: Upload package uses: actions/upload-artifact@v2 with: - name: DeltaQ-${{ env.GITHUB_RUN_ID }} + name: DeltaQ-$GITHUB_RUN_ID path: pkg/*.* From 1a0cca5fd4c9c3fb683bf2161e43c534ad7b0df6 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 30 Dec 2021 13:15:35 -0500 Subject: [PATCH 323/325] Remove AssemblyVersion It should get set from which is built from + --- src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj | 1 - src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj | 1 - .../DeltaQ.SuffixSorting.Abstractions.csproj | 1 - .../DeltaQ.SuffixSorting.LibDivSufSort.csproj | 1 - src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj | 1 - src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj | 1 - src/DeltaQ/DeltaQ.csproj | 1 - 7 files changed, 7 deletions(-) diff --git a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj index 397c337..469ca20 100644 --- a/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj +++ b/src/DeltaQ.BsDiff/DeltaQ.BsDiff.csproj @@ -4,7 +4,6 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0.0 2.0.0 DeltaQ implementation of BSDIFF and BSPATCH compatible delta encoding https://github.com/jzebedee/deltaq diff --git a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj index a975a81..ea63399 100644 --- a/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj +++ b/src/DeltaQ.CommandLine/DeltaQ.CommandLine.csproj @@ -6,7 +6,6 @@ net6.0 DeltaQ jzebedee - 2.0.0.0 2.0.0 DeltaQ binary diff and patch tool https://github.com/jzebedee/deltaq diff --git a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj index 37bf6a6..469f80e 100644 --- a/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj +++ b/src/DeltaQ.SuffixSorting.Abstractions/DeltaQ.SuffixSorting.Abstractions.csproj @@ -4,7 +4,6 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0.0 2.0.0 DeltaQ utility library containing abstractions for suffix sorting algorithms https://github.com/jzebedee/deltaq diff --git a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj index df8cc2b..f29257d 100644 --- a/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj +++ b/src/DeltaQ.SuffixSorting.LibDivSufSort/DeltaQ.SuffixSorting.LibDivSufSort.csproj @@ -4,7 +4,6 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0.0 2.0.0 DeltaQ implementation of the DivSufSort suffix sorting algorithm https://github.com/jzebedee/deltaq diff --git a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj index 1f3517f..a263cb2 100644 --- a/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj +++ b/src/DeltaQ.SuffixSorting.SAIS/DeltaQ.SuffixSorting.SAIS.csproj @@ -4,7 +4,6 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0.0 2.0.0 DeltaQ implementation of the suffix array induced sort (SAIS) algorithm https://github.com/jzebedee/deltaq diff --git a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj index 1867088..c1ee065 100644 --- a/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj +++ b/src/DeltaQ.Utility.Memory/DeltaQ.Utility.Memory.csproj @@ -4,7 +4,6 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0.0 2.0.0 DeltaQ utility library to support buffer and memory management https://github.com/jzebedee/deltaq diff --git a/src/DeltaQ/DeltaQ.csproj b/src/DeltaQ/DeltaQ.csproj index 9930c3f..3474ffc 100644 --- a/src/DeltaQ/DeltaQ.csproj +++ b/src/DeltaQ/DeltaQ.csproj @@ -4,7 +4,6 @@ netstandard2.0;netstandard2.1 DeltaQ jzebedee - 2.0.0.0 2.0.0 DeltaQ provides fast and portable delta encoding for .NET in 100% safe, managed code https://github.com/jzebedee/deltaq From 926567e77e735cf9c440e8ad5244c9c8b1bd3a09 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 30 Dec 2021 13:15:49 -0500 Subject: [PATCH 324/325] Rename pkg artifact in CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9767b5b..9dc6b71 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,5 +40,5 @@ jobs: - name: Upload package uses: actions/upload-artifact@v2 with: - name: DeltaQ-$GITHUB_RUN_ID + name: DeltaQ-pkg path: pkg/*.* From 66ced17bdb5bec5811bb680d265846253de72421 Mon Sep 17 00:00:00 2001 From: "J. Zebedee" Date: Thu, 30 Dec 2021 16:27:50 -0500 Subject: [PATCH 325/325] Update readme --- README.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 82bb690..b8a7cbc 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,24 @@ Fast and portable delta encoding for .NET in 100% safe, managed code. DeltaQ is available for use as a library in .NET and .NET Framework, and as a cross-platform command-line tool, `dq`, which can be used to perform delta operations (similar to `bsdiff` or `xdelta`). +## Support + +Discussion and technical support is available on Discord. + +[![Discord](https://img.shields.io/discord/359127425558249482)](https://discord.gg/FkRPyz6kcD) + ## Installing ### `dq` command-line tool +[![DeltaQ.CommandLine nuget package](https://img.shields.io/nuget/v/DeltaQ.CommandLine.svg?style=flat)](https://www.nuget.org/packages/DeltaQ.CommandLine) + `> dotnet tool install DeltaQ.CommandLine -g` ### `DeltaQ` library +[![DeltaQ nuget package](https://img.shields.io/nuget/v/DeltaQ.svg?style=flat)](https://www.nuget.org/packages/DeltaQ) + `> dotnet add package DeltaQ` ## Usage @@ -59,4 +69,44 @@ fab165a6e604dc7f9265d13013b6fb06319faec4eaa251a8a6d74a7e30e38dc6 generated_app_ ### `DeltaQ` library -TBW \ No newline at end of file +The `DeltaQ` package contains all currently supported delta encoding and suffix sorting providers, for use in your own .NET projects. + +#### Example: bsdiff and bspatch files + +```cs +using System.IO; +using DeltaQ.BsDiff; +using DeltaQ.SuffixSorting; +using DeltaQ.SuffixSorting.LibDivSufSort; + +void MakeDelta() { + var oldData = File.ReadAllBytes("oldfile.txt"); + var newData = File.ReadAllBytes("newfile.txt"); + using var outStream = File.Create("old_to_new.delta"); + ISuffixSort suffixSorter = new LibDivSufSort(); + + Diff.Create(oldData, newData, outStream, suffixSorter); +} + +void UseDelta() { + var oldData = File.ReadAllBytes("oldfile.txt"); + var deltaData = File.ReadAllBytes("old_to_new.delta"); + using var outStream = File.Create("generated_newfile.txt"); + + Patch.Apply(oldData, deltaData, outStream); +} +``` + +#### Example: Suffix sorting with LibDivSufSort + +```cs +using DeltaQ.SuffixSorting; +using DeltaQ.SuffixSorting.LibDivSufSort; + +ISuffixSort suffixSorter = new LibDivSufSort(); + +ReadOnlySpan text = new byte[] { 1, 2, 3, 4 }; +using var ownedSuffixArray = suffixSorter.Sort(text); +ReadOnlySpan sortedSuffixes = ownedSuffixArray.Memory.Span; +``` +