Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update/record-parser2.1.0 #16

Merged
merged 1 commit into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions source/Benchmarks/Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.13.7" />
<PackageReference Include="Ben.StringIntern" Version="0.1.8" />
<PackageReference Include="BenchmarkDotNet" Version="0.13.7" />
<PackageReference Include="Csv" Version="2.0.93" />
<PackageReference Include="CsvHelper" Version="30.0.1" />
<PackageReference Include="CsvTextFieldParser" Version="1.2.2" />
Expand All @@ -33,7 +34,7 @@
<PackageReference Include="NReco.Csv" Version="1.0.2" />
<PackageReference Include="FSharp.Data" Version="6.2.0" />
<PackageReference Include="Microsoft.VisualBasic" Version="10.4.0-preview.18571.3" />
<PackageReference Include="RecordParser" Version="1.3.0" />
<PackageReference Include="RecordParser" Version="2.1.0" />
<PackageReference Include="SoftCircuits.CsvParser" Version="4.1.0" />
<PackageReference Include="Aspose.Cells" Version="23.8.0" />
<PackageReference Include="ClosedXML" Version="0.102.1" />
Expand All @@ -58,9 +59,9 @@
<PackageReference Include="TinyCsvParser" Version="2.7.0" />
<PackageReference Include="Cesil" Version="0.9.0" />

<PackageReference Include="Sep" Version="0.2.2" />
<PackageReference Include="Sep" Version="0.2.2" />

<PackageReference Include="XlsxHelper" Version="2.0.0" />
<PackageReference Include="XlsxHelper" Version="2.0.0" />

</ItemGroup>

Expand All @@ -70,16 +71,16 @@
</None>
</ItemGroup>

<!--
fix some issue with IAsyncEnumerable resolving to the wrong assembly
https://github.com/grpc/grpc-dotnet/issues/329
-->
<Target Name="ChangeAliasesOfReactiveExtensions" BeforeTargets="FindReferenceAssembliesForReferences;ResolveReferences">
<ItemGroup>
<ReferencePath Condition="'%(FileName)' == 'System.Interactive.Async'">
<Aliases>ix</Aliases>
</ReferencePath>
</ItemGroup>
</Target>
<!--
fix some issue with IAsyncEnumerable resolving to the wrong assembly
https://github.com/grpc/grpc-dotnet/issues/329
-->
<Target Name="ChangeAliasesOfReactiveExtensions" BeforeTargets="FindReferenceAssembliesForReferences;ResolveReferences">
<ItemGroup>
<ReferencePath Condition="'%(FileName)' == 'System.Interactive.Async'">
<Aliases>ix</Aliases>
</ReferencePath>
</ItemGroup>
</Target>

</Project>
74 changes: 67 additions & 7 deletions source/Benchmarks/CsvDataBinderBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
using BenchmarkDotNet.Attributes;
using Ben.Collections.Specialized;
using BenchmarkDotNet.Attributes;
using Cesil;
using CsvHelper.Configuration;
using Dapper;
using RecordParser.Builders.Reader;
using RecordParser.Extensions;
using RecordParser.Parsers;
using System;
using System.Data;
using System.Data.Common;
using System.Globalization;
using System.Linq;
Expand Down Expand Up @@ -73,10 +75,9 @@ public void TinyCsvManual()
}
}

[Benchmark]
public async Task RecordParserAsync()
private static IVariableLengthReader<SalesRecord> BuildReader(bool pooled)
{
var parser = new VariableLengthReaderSequentialBuilder<SalesRecord>()
var builder = new VariableLengthReaderSequentialBuilder<SalesRecord>()
.Map(x => x.Region)
.Map(x => x.Country)
.Map(x => x.ItemType)
Expand All @@ -90,9 +91,18 @@ public async Task RecordParserAsync()
.Map(x => x.UnitCost)
.Map(x => x.TotalRevenue)
.Map(x => x.TotalCost)
.Map(x => x.TotalProfit)
.Build(",", CultureInfo.InvariantCulture);//, () => r);
.Map(x => x.TotalProfit);

if (pooled)
builder.DefaultTypeConvert(new InternPool().Intern);

return builder.Build(",", CultureInfo.InvariantCulture);
}

[Benchmark]
public async Task RecordParserAsync_Manual()
{
var parser = BuildReader(pooled: true);
using var stream = TestData.GetUtf8Stream();
var records = RecordParserSupport.ProcessFile(stream, parser.Parse);
await foreach (var record in records)
Expand All @@ -101,6 +111,56 @@ public async Task RecordParserAsync()
}
}

[Benchmark]
[Arguments(true)]
[Arguments(false)]
public void RecordParser_Native_Sequential(bool pooled)
{
var tr = TestData.GetTextReader();
var parser = BuildReader(pooled);
var options = new VariableLengthReaderOptions
{
HasHeader = true,
ContainsQuotedFields = false,
ParallelismOptions = new ()
{
Enabled = false,
}
};

var records = tr.ReadRecords(parser, options);
foreach (var record in records)
{

}
}

[Benchmark]
[Arguments(true)]
[Arguments(false)]
public void RecordParser_Native_Parallel(bool ordered)
{
var tr = TestData.GetTextReader();
var parser = BuildReader(pooled: false);
var options = new VariableLengthReaderOptions
{
HasHeader = true,
ContainsQuotedFields = false,
ParallelismOptions = new ()
{
Enabled = true,
EnsureOriginalOrdering = ordered,
MaxDegreeOfParallelism = 4,
}
};

var records = tr.ReadRecords(parser, options);
foreach (var record in records)
{

}
}

[Benchmark]
public void SylvanData()
{
Expand Down
35 changes: 33 additions & 2 deletions source/Benchmarks/CsvSum.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
using BenchmarkDotNet.Attributes;
using nietras.SeparatedValues;
using System.Buffers;
using RecordParser.Builders.Reader;
using RecordParser.Extensions;
using System.Data;
using System.Globalization;


namespace Benchmarks;

[MemoryDiagnoser]
Expand All @@ -26,6 +26,37 @@ public decimal SylvanData()
return a;
}

[Benchmark]
[Arguments(true)]
[Arguments(false)]
public decimal RecordParser(bool parallel)
{
var parser = new VariableLengthReaderBuilder<decimal>()
.Map(x => x, indexColumn: 13)
.Build(",", CultureInfo.InvariantCulture);

var options = new VariableLengthReaderOptions
{
HasHeader = true,
ContainsQuotedFields = false,
ParallelismOptions = new ()
{
Enabled = parallel,
MaxDegreeOfParallelism = 4,
EnsureOriginalOrdering = false
}
};

var a = 0m;
using var tr = TestData.GetTextReader();
foreach (var profit in tr.ReadRecords(parser, options))
{
a += profit;
}

return a;
}

[Benchmark]
public decimal SepCsv()
{
Expand Down
113 changes: 24 additions & 89 deletions source/Benchmarks/CsvWriterBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using BenchmarkDotNet.Attributes;
using CsvHelper.Configuration;
using RecordParser.Extensions;
using RecordParser.Parsers;
using Sylvan.Data;
using Sylvan.Data.Csv;
using System;
Expand Down Expand Up @@ -68,13 +70,8 @@ public void NaiveBroken()
}
}

[Benchmark]
public void RecordParserParallelX()
private static IVariableLengthWriter<SalesRecord> BuildWriter()
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();

var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder<SalesRecord>();
builder.Map(x => x.Region);
builder.Map(x => x.Country);
Expand All @@ -91,7 +88,16 @@ public void RecordParserParallelX()
builder.Map(x => x.TotalCost);
builder.Map(x => x.TotalProfit);

var csv = builder.Build(",");
return builder.Build(",");
}

[Benchmark]
public void RecordParser_Parallel_Manual()
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();
var csv = BuildWriter();

var parallelism = 4;
var buffers = Enumerable
Expand Down Expand Up @@ -137,95 +143,24 @@ public void RecordParserParallelX()
}

[Benchmark]
public async Task RecordParserAsync()
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();

var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder<SalesRecord>();
builder.Map(x => x.Region);
builder.Map(x => x.Country);
builder.Map(x => x.ItemType);
builder.Map(x => x.SalesChannel);
builder.Map(x => x.OrderPriority);
builder.Map(x => x.OrderDate);
builder.Map(x => x.OrderId);
builder.Map(x => x.ShipDate);
builder.Map(x => x.UnitsSold);
builder.Map(x => x.UnitPrice);
builder.Map(x => x.UnitCost);
builder.Map(x => x.TotalRevenue);
builder.Map(x => x.TotalCost);
builder.Map(x => x.TotalProfit);

var csv = builder.Build(",");

var charsWritten = 0;
var pow = 8;
var buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
foreach (var item in items)
{
retry:

if (csv.TryFormat(item, buffer, out charsWritten))
{
await tw.WriteLineAsync(buffer, 0, charsWritten);
}
else
{
ArrayPool<char>.Shared.Return(buffer);
pow++;
buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
goto retry;
}
}
}

[Benchmark]
public void RecordParser()
[Arguments(false, null)]
[Arguments(true, true)]
[Arguments(true, false)]
public void RecordParser_Native(bool parallel, bool? asOrdered)
{
using var tw = GetWriter();
// I don't see a way to use this library without a `T`, so can't use DbDataReader directly.
var items = GetRecords();
var csv = BuildWriter();

var builder = new RecordParser.Builders.Writer.VariableLengthWriterSequentialBuilder<SalesRecord>();
builder.Map(x => x.Region);
builder.Map(x => x.Country);
builder.Map(x => x.ItemType);
builder.Map(x => x.SalesChannel);
builder.Map(x => x.OrderPriority);
builder.Map(x => x.OrderDate);
builder.Map(x => x.OrderId);
builder.Map(x => x.ShipDate);
builder.Map(x => x.UnitsSold);
builder.Map(x => x.UnitPrice);
builder.Map(x => x.UnitCost);
builder.Map(x => x.TotalRevenue);
builder.Map(x => x.TotalCost);
builder.Map(x => x.TotalProfit);

var csv = builder.Build(",");

var charsWritten = 0;
var pow = 8;
var buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
foreach (var item in items)
var options = new ParallelismOptions
{
retry:
Enabled = parallel,
MaxDegreeOfParallelism = 4,
EnsureOriginalOrdering = asOrdered ?? true
};

if (csv.TryFormat(item, buffer, out charsWritten))
{
tw.WriteLine(buffer, 0, charsWritten);
}
else
{
ArrayPool<char>.Shared.Return(buffer);
pow++;
buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, pow));
goto retry;
}
}
tw.WriteRecords(items, csv.TryFormat, options);
}

[Benchmark]
Expand Down