Skip to content

Commit

Permalink
update (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
leandromoh authored Nov 6, 2023
1 parent 4558c5d commit 18f59b1
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 113 deletions.
31 changes: 16 additions & 15 deletions source/Benchmarks/Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.13.7" />
<PackageReference Include="Ben.StringIntern" Version="0.1.8" />
<PackageReference Include="BenchmarkDotNet" Version="0.13.7" />
<PackageReference Include="Csv" Version="2.0.93" />
<PackageReference Include="CsvHelper" Version="30.0.1" />
<PackageReference Include="CsvTextFieldParser" Version="1.2.2" />
Expand All @@ -33,7 +34,7 @@
<PackageReference Include="NReco.Csv" Version="1.0.2" />
<PackageReference Include="FSharp.Data" Version="6.2.0" />
<PackageReference Include="Microsoft.VisualBasic" Version="10.4.0-preview.18571.3" />
<PackageReference Include="RecordParser" Version="1.3.0" />
<PackageReference Include="RecordParser" Version="2.1.0" />
<PackageReference Include="SoftCircuits.CsvParser" Version="4.1.0" />
<PackageReference Include="Aspose.Cells" Version="23.8.0" />
<PackageReference Include="ClosedXML" Version="0.102.1" />
Expand All @@ -58,9 +59,9 @@
<PackageReference Include="TinyCsvParser" Version="2.7.0" />
<PackageReference Include="Cesil" Version="0.9.0" />

<PackageReference Include="Sep" Version="0.2.2" />
<PackageReference Include="Sep" Version="0.2.2" />

<PackageReference Include="XlsxHelper" Version="2.0.0" />
<PackageReference Include="XlsxHelper" Version="2.0.0" />

</ItemGroup>

Expand All @@ -70,16 +71,16 @@
</None>
</ItemGroup>

<!--
fix some issue with IAsyncEnumerable resolving to the wrong assembly
https://github.com/grpc/grpc-dotnet/issues/329
-->
<Target Name="ChangeAliasesOfReactiveExtensions" BeforeTargets="FindReferenceAssembliesForReferences;ResolveReferences">
<ItemGroup>
<ReferencePath Condition="'%(FileName)' == 'System.Interactive.Async'">
<Aliases>ix</Aliases>
</ReferencePath>
</ItemGroup>
</Target>
<!--
fix some issue with IAsyncEnumerable resolving to the wrong assembly
https://github.com/grpc/grpc-dotnet/issues/329
-->
<Target Name="ChangeAliasesOfReactiveExtensions" BeforeTargets="FindReferenceAssembliesForReferences;ResolveReferences">
<ItemGroup>
<ReferencePath Condition="'%(FileName)' == 'System.Interactive.Async'">
<Aliases>ix</Aliases>
</ReferencePath>
</ItemGroup>
</Target>

</Project>
74 changes: 67 additions & 7 deletions source/Benchmarks/CsvDataBinderBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
using BenchmarkDotNet.Attributes;
using Ben.Collections.Specialized;
using BenchmarkDotNet.Attributes;
using Cesil;
using CsvHelper.Configuration;
using Dapper;
using RecordParser.Builders.Reader;
using RecordParser.Extensions;
using RecordParser.Parsers;
using System;
using System.Data;
using System.Data.Common;
using System.Globalization;
using System.Linq;
Expand Down Expand Up @@ -73,10 +75,9 @@ public void TinyCsvManual()
}
}

[Benchmark]
public async Task RecordParserAsync()
private static IVariableLengthReader<SalesRecord> BuildReader(bool pooled)
{
var parser = new VariableLengthReaderSequentialBuilder<SalesRecord>()
var builder = new VariableLengthReaderSequentialBuilder<SalesRecord>()
.Map(x => x.Region)
.Map(x => x.Country)
.Map(x => x.ItemType)
Expand All @@ -90,9 +91,18 @@ public async Task RecordParserAsync()
.Map(x => x.UnitCost)
.Map(x => x.TotalRevenue)
.Map(x => x.TotalCost)
.Map(x => x.TotalProfit)
.Build(",", CultureInfo.InvariantCulture);//, () => r);
.Map(x => x.TotalProfit);

if (pooled)
builder.DefaultTypeConvert(new InternPool().Intern);

return builder.Build(",", CultureInfo.InvariantCulture);
}

[Benchmark]
public async Task RecordParserAsync_Manual()
{
var parser = BuildReader(pooled: true);
using var stream = TestData.GetUtf8Stream();
var records = RecordParserSupport.ProcessFile(stream, parser.Parse);
await foreach (var record in records)
Expand All @@ -101,6 +111,56 @@ public async Task RecordParserAsync()
}
}

[Benchmark]
[Arguments(true)]
[Arguments(false)]
public void RecordParser_Native_Sequential(bool pooled)
{
var tr = TestData.GetTextReader();
var parser = BuildReader(pooled);
var options = new VariableLengthReaderOptions
{
HasHeader = true,
ContainsQuotedFields = false,
ParallelismOptions = new ()
{
Enabled = false,
}
};

var records = tr.ReadRecords(parser, options);
foreach (var record in records)
{

}
}

[Benchmark]
[Arguments(true)]
[Arguments(false)]
public void RecordParser_Native_Parallel(bool ordered)
{
var tr = TestData.GetTextReader();
var parser = BuildReader(pooled: false);
var options = new VariableLengthReaderOptions
{
HasHeader = true,
ContainsQuotedFields = false,
ParallelismOptions = new ()
{
Enabled = true,
EnsureOriginalOrdering = ordered,
MaxDegreeOfParallelism = 4,
}
};

var records = tr.ReadRecords(parser, options);
foreach (var record in records)
{

}
}

[Benchmark]
public void SylvanData()
{
Expand Down
35 changes: 33 additions & 2 deletions source/Benchmarks/CsvSum.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
using BenchmarkDotNet.Attributes;
using nietras.SeparatedValues;
using System.Buffers;
using RecordParser.Builders.Reader;
using RecordParser.Extensions;
using System.Data;
using System.Globalization;


namespace Benchmarks;

[MemoryDiagnoser]
Expand All @@ -26,6 +26,37 @@ public decimal SylvanData()
return a;
}

[Benchmark]
[Arguments(true)]
[Arguments(false)]
public decimal RecordParser(bool parallel)
{
var parser = new VariableLengthReaderBuilder<decimal>()
.Map(x => x, indexColumn: 13)
.Build(",", CultureInfo.InvariantCulture);

var options = new VariableLengthReaderOptions
{
HasHeader = true,
ContainsQuotedFields = false,
ParallelismOptions = new ()
{
Enabled = parallel,
MaxDegreeOfParallelism = 4,
EnsureOriginalOrdering = false
}
};

var a = 0m;
using var tr = TestData.GetTextReader();
foreach (var profit in tr.ReadRecords(parser, options))
{
a += profit;
}

return a;
}

[Benchmark]
public decimal SepCsv()
{
Expand Down
113 changes: 24 additions & 89 deletions source/Benchmarks/CsvWriterBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using BenchmarkDotNet.Attributes;
using CsvHelper.Configuration;
using RecordParser.Extensions;
using RecordParser.Parsers;
using Sylvan.Data;
using Sylvan.Data.Csv;
using System;
Expand Down Expand Up @@ -68,13 +70,8 @@ public void NaiveBroken()
}
}

[Benchmark]
public void RecordParserParallelX()
private static IVariableLengthWriter<SalesRecord> BuildWriter()
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();

var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder<SalesRecord>();
builder.Map(x => x.Region);
builder.Map(x => x.Country);
Expand All @@ -91,7 +88,16 @@ public void RecordParserParallelX()
builder.Map(x => x.TotalCost);
builder.Map(x => x.TotalProfit);

var csv = builder.Build(",");
return builder.Build(",");
}

[Benchmark]
public void RecordParser_Parallel_Manual()
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();
var csv = BuildWriter();

var parallelism = 4;
var buffers = Enumerable
Expand Down Expand Up @@ -137,95 +143,24 @@ public void RecordParserParallelX()
}

[Benchmark]
public async Task RecordParserAsync()
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();

var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder<SalesRecord>();
builder.Map(x => x.Region);
builder.Map(x => x.Country);
builder.Map(x => x.ItemType);
builder.Map(x => x.SalesChannel);
builder.Map(x => x.OrderPriority);
builder.Map(x => x.OrderDate);
builder.Map(x => x.OrderId);
builder.Map(x => x.ShipDate);
builder.Map(x => x.UnitsSold);
builder.Map(x => x.UnitPrice);
builder.Map(x => x.UnitCost);
builder.Map(x => x.TotalRevenue);
builder.Map(x => x.TotalCost);
builder.Map(x => x.TotalProfit);

var csv = builder.Build(",");

var charsWritten = 0;
var pow = 8;
var buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
foreach (var item in items)
{
retry:

if (csv.TryFormat(item, buffer, out charsWritten))
{
await tw.WriteLineAsync(buffer, 0, charsWritten);
}
else
{
ArrayPool<char>.Shared.Return(buffer);
pow++;
buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
goto retry;
}
}
}

[Benchmark]
public void RecordParser()
[Arguments(false, null)]
[Arguments(true, true)]
[Arguments(true, false)]
public void RecordParser_Native(bool parallel, bool? asOrdered)
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();
var csv = BuildWriter();

var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder<SalesRecord>();
builder.Map(x => x.Region);
builder.Map(x => x.Country);
builder.Map(x => x.ItemType);
builder.Map(x => x.SalesChannel);
builder.Map(x => x.OrderPriority);
builder.Map(x => x.OrderDate);
builder.Map(x => x.OrderId);
builder.Map(x => x.ShipDate);
builder.Map(x => x.UnitsSold);
builder.Map(x => x.UnitPrice);
builder.Map(x => x.UnitCost);
builder.Map(x => x.TotalRevenue);
builder.Map(x => x.TotalCost);
builder.Map(x => x.TotalProfit);

var csv = builder.Build(",");

var charsWritten = 0;
var pow = 8;
var buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
foreach (var item in items)
var options = new ParallelismOptions
{
retry:
Enabled = parallel,
MaxDegreeOfParallelism = 4,
EnsureOriginalOrdering = asOrdered ?? true
};

if (csv.TryFormat(item, buffer, out charsWritten))
{
tw.WriteLine(buffer, 0, charsWritten);
}
else
{
ArrayPool<char>.Shared.Return(buffer);
pow++;
buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
goto retry;
}
}
tw.WriteRecords(items, csv.TryFormat, options);
}

[Benchmark]
Expand Down

0 comments on commit 18f59b1

Please sign in to comment.