From 18f59b1b8ca37c5b1481672534263051785b5925 Mon Sep 17 00:00:00 2001 From: Leandro Fernandes Date: Mon, 6 Nov 2023 19:48:49 -0300 Subject: [PATCH] update (#16) --- source/Benchmarks/Benchmarks.csproj | 31 ++--- source/Benchmarks/CsvDataBinderBenchmarks.cs | 74 ++++++++++-- source/Benchmarks/CsvSum.cs | 35 +++++- source/Benchmarks/CsvWriterBenchmarks.cs | 113 ++++--------------- 4 files changed, 140 insertions(+), 113 deletions(-) diff --git a/source/Benchmarks/Benchmarks.csproj b/source/Benchmarks/Benchmarks.csproj index 5416461..7a5a21d 100644 --- a/source/Benchmarks/Benchmarks.csproj +++ b/source/Benchmarks/Benchmarks.csproj @@ -7,7 +7,8 @@ - + + @@ -33,7 +34,7 @@ - + @@ -58,9 +59,9 @@ - + - + @@ -70,16 +71,16 @@ - - - - - ix - - - + + + + + ix + + + diff --git a/source/Benchmarks/CsvDataBinderBenchmarks.cs b/source/Benchmarks/CsvDataBinderBenchmarks.cs index cc49fc5..bb595f1 100644 --- a/source/Benchmarks/CsvDataBinderBenchmarks.cs +++ b/source/Benchmarks/CsvDataBinderBenchmarks.cs @@ -1,10 +1,12 @@ -using BenchmarkDotNet.Attributes; +using Ben.Collections.Specialized; +using BenchmarkDotNet.Attributes; using Cesil; using CsvHelper.Configuration; using Dapper; using RecordParser.Builders.Reader; +using RecordParser.Extensions; +using RecordParser.Parsers; using System; -using System.Data; using System.Data.Common; using System.Globalization; using System.Linq; @@ -73,10 +75,9 @@ public void TinyCsvManual() } } - [Benchmark] - public async Task RecordParserAsync() + private static IVariableLengthReader BuildReader(bool pooled) { - var parser = new VariableLengthReaderSequentialBuilder() + var builder = new VariableLengthReaderSequentialBuilder() .Map(x => x.Region) .Map(x => x.Country) .Map(x => x.ItemType) @@ -90,9 +91,18 @@ public async Task RecordParserAsync() .Map(x => x.UnitCost) .Map(x => x.TotalRevenue) .Map(x => x.TotalCost) - .Map(x => x.TotalProfit) - .Build(",", CultureInfo.InvariantCulture);//, () => r); + .Map(x => x.TotalProfit); + + if (pooled) + builder.DefaultTypeConvert(new InternPool().Intern); + + return builder.Build(",", CultureInfo.InvariantCulture); + } + [Benchmark] + public async Task RecordParserAsync_Manual() + { + var parser = BuildReader(pooled: true); using var stream = TestData.GetUtf8Stream(); var records = RecordParserSupport.ProcessFile(stream, parser.Parse); await foreach (var record in records) @@ -101,6 +111,56 @@ public async Task RecordParserAsync() } } + [Benchmark] + [Arguments(true)] + [Arguments(false)] + public void RecordParser_Native_Sequential(bool pooled) + { + var tr = TestData.GetTextReader(); + var parser = BuildReader(pooled); + var options = new VariableLengthReaderOptions + { + HasHeader = true, + ContainsQuotedFields = false, + ParallelismOptions = new () + { + Enabled = false, + } + }; + + var records = tr.ReadRecords(parser, options); + foreach (var record in records) + { + + } + } + + [Benchmark] + [Arguments(true)] + [Arguments(false)] + public void RecordParser_Native_Parallel(bool ordered) + { + var tr = TestData.GetTextReader(); + var parser = BuildReader(pooled: false); + var options = new VariableLengthReaderOptions + { + HasHeader = true, + ContainsQuotedFields = false, + ParallelismOptions = new () + { + Enabled = true, + EnsureOriginalOrdering = ordered, + MaxDegreeOfParallelism = 4, + } + }; + + var records = tr.ReadRecords(parser, options); + foreach (var record in records) + { + + } + } + [Benchmark] public void SylvanData() { diff --git a/source/Benchmarks/CsvSum.cs b/source/Benchmarks/CsvSum.cs index 2dfb7d6..35dbf68 100644 --- a/source/Benchmarks/CsvSum.cs +++ b/source/Benchmarks/CsvSum.cs @@ -1,10 +1,10 @@ using BenchmarkDotNet.Attributes; using nietras.SeparatedValues; -using System.Buffers; +using RecordParser.Builders.Reader; +using RecordParser.Extensions; using System.Data; using System.Globalization; - namespace Benchmarks; [MemoryDiagnoser] @@ -26,6 +26,37 @@ public decimal SylvanData() return a; } + [Benchmark] + [Arguments(true)] + [Arguments(false)] + public decimal RecordParser(bool parallel) + { + var parser = new VariableLengthReaderBuilder() + .Map(x => x, indexColumn: 13) + .Build(",", CultureInfo.InvariantCulture); + + var options = new VariableLengthReaderOptions + { + HasHeader = true, + ContainsQuotedFields = false, + ParallelismOptions = new () + { + Enabled = parallel, + MaxDegreeOfParallelism = 4, + EnsureOriginalOrdering = false + } + }; + + var a = 0m; + using var tr = TestData.GetTextReader(); + foreach (var profit in tr.ReadRecords(parser, options)) + { + a += profit; + } + + return a; + } + [Benchmark] public decimal SepCsv() { diff --git a/source/Benchmarks/CsvWriterBenchmarks.cs b/source/Benchmarks/CsvWriterBenchmarks.cs index adeffae..e6d80ff 100644 --- a/source/Benchmarks/CsvWriterBenchmarks.cs +++ b/source/Benchmarks/CsvWriterBenchmarks.cs @@ -1,5 +1,7 @@ using BenchmarkDotNet.Attributes; using CsvHelper.Configuration; +using RecordParser.Extensions; +using RecordParser.Parsers; using Sylvan.Data; using Sylvan.Data.Csv; using System; @@ -68,13 +70,8 @@ public void NaiveBroken() } } - [Benchmark] - public void RecordParserParallelX() + private static IVariableLengthWriter BuildWriter() { - using var tw = GetWriter(); - // I don't see a way to use this library without a `T`, so can't use DbDataReader directly. - var items = GetRecords(); - var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder(); builder.Map(x => x.Region); builder.Map(x => x.Country); @@ -91,7 +88,16 @@ public void RecordParserParallelX() builder.Map(x => x.TotalCost); builder.Map(x => x.TotalProfit); - var csv = builder.Build(","); + return builder.Build(","); + } + + [Benchmark] + public void RecordParser_Parallel_Manual() + { + using var tw = GetWriter(); + // I don't see a way to use this library without a `T`, so can't use DbDataReader directly. + var items = GetRecords(); + var csv = BuildWriter(); var parallelism = 4; var buffers = Enumerable @@ -137,95 +143,24 @@ public void RecordParserParallelX() } [Benchmark] - public async Task RecordParserAsync() - { - using var tw = GetWriter(); - // I don't see a way to use this library without a `T`, so can't use DbDataReader directly. - var items = GetRecords(); - - var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder(); - builder.Map(x => x.Region); - builder.Map(x => x.Country); - builder.Map(x => x.ItemType); - builder.Map(x => x.SalesChannel); - builder.Map(x => x.OrderPriority); - builder.Map(x => x.OrderDate); - builder.Map(x => x.OrderId); - builder.Map(x => x.ShipDate); - builder.Map(x => x.UnitsSold); - builder.Map(x => x.UnitPrice); - builder.Map(x => x.UnitCost); - builder.Map(x => x.TotalRevenue); - builder.Map(x => x.TotalCost); - builder.Map(x => x.TotalProfit); - - var csv = builder.Build(","); - - var charsWritten = 0; - var pow = 8; - var buffer = ArrayPool.Shared.Rent((int)Math.Pow(2, pow)); - foreach (var item in items) - { - retry: - - if (csv.TryFormat(item, buffer, out charsWritten)) - { - await tw.WriteLineAsync(buffer, 0, charsWritten); - } - else - { - ArrayPool.Shared.Return(buffer); - pow++; - buffer = ArrayPool.Shared.Rent((int)Math.Pow(2, pow)); - goto retry; - } - } - } - - [Benchmark] - public void RecordParser() + [Arguments(false, null)] + [Arguments(true, true)] + [Arguments(true, false)] + public void RecordParser_Native(bool parallel, bool? asOrdered) { using var tw = GetWriter(); // I don't see a way to use this library without a `T`, so can't use DbDataReader directly. var items = GetRecords(); + var csv = BuildWriter(); - var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder(); - builder.Map(x => x.Region); - builder.Map(x => x.Country); - builder.Map(x => x.ItemType); - builder.Map(x => x.SalesChannel); - builder.Map(x => x.OrderPriority); - builder.Map(x => x.OrderDate); - builder.Map(x => x.OrderId); - builder.Map(x => x.ShipDate); - builder.Map(x => x.UnitsSold); - builder.Map(x => x.UnitPrice); - builder.Map(x => x.UnitCost); - builder.Map(x => x.TotalRevenue); - builder.Map(x => x.TotalCost); - builder.Map(x => x.TotalProfit); - - var csv = builder.Build(","); - - var charsWritten = 0; - var pow = 8; - var buffer = ArrayPool.Shared.Rent((int)Math.Pow(2, pow)); - foreach (var item in items) + var options = new ParallelismOptions { - retry: + Enabled = parallel, + MaxDegreeOfParallelism = 4, + EnsureOriginalOrdering = asOrdered ?? true + }; - if (csv.TryFormat(item, buffer, out charsWritten)) - { - tw.WriteLine(buffer, 0, charsWritten); - } - else - { - ArrayPool.Shared.Return(buffer); - pow++; - buffer = ArrayPool.Shared.Rent((int)Math.Pow(2, pow)); - goto retry; - } - } + tw.WriteRecords(items, csv.TryFormat, options); } [Benchmark]