Skip to content

Commit

Permalink
squashes 8 commits; implements the write portion of supporting multi-…
Browse files Browse the repository at this point in the history
…character value separators per #10 (comment); still leaves read changes to be made; test coverage will need to be expanded to cover new cases as well
  • Loading branch information
kevin-montrose committed Jun 1, 2020
1 parent cfd387f commit c18f80b
Show file tree
Hide file tree
Showing 21 changed files with 981 additions and 92 deletions.
4 changes: 2 additions & 2 deletions Cesil.Benchmark/Benchmarks/Internals/NeedsEncodeBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public class NeedsEncodeBenchmark

private List<string> Strings;

private char BasicContains_C1;
private string BasicContains_C1;
private char? BasicContains_C2;
private char? BasicContains_C3;

Expand All @@ -40,7 +40,7 @@ public unsafe void Initialize()

Strings = strRet;

BasicContains_C1 = ',';
BasicContains_C1 = ",";
BasicContains_C2 = '"';
BasicContains_C3 = null;

Expand Down
18 changes: 9 additions & 9 deletions Cesil.Tests/ConfigurationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,15 @@ public void CharLookupInBounds()
var cOpts =
Options.CreateBuilder(Options.Default)
.WithEscapedValueStartAndEnd(char.MinValue)
.WithValueSeparator(char.MaxValue)
.WithValueSeparator(char.MaxValue.ToString())
.WithEscapedValueEscapeCharacter('c')
.WithCommentCharacter('d')
.ToOptions();

var dOpts =
Options.CreateBuilder(Options.Default)
.WithEscapedValueStartAndEnd('"')
.WithValueSeparator(',')
.WithValueSeparator(','.ToString())
.WithEscapedValueEscapeCharacter('"')
.WithCommentCharacter('#')
.ToOptions();
Expand Down Expand Up @@ -294,7 +294,7 @@ public void OptionsEquality()
.WithReadHeader(rh)
.WithRowEnding(re)
.WithTypeDescriber(typeDesc)
.WithValueSeparator(valSepChar)
.WithValueSeparator(valSepChar.ToString())
.WithWriteBufferSizeHint(writeHint)
.WithWriteHeader(wh)
.WithWriteTrailingRowEnding(wt)
Expand Down Expand Up @@ -457,9 +457,9 @@ private static void _SingleColumn<T>(string n)
[Fact]
public void OptionsValidation()
{
Assert.Throws<InvalidOperationException>(() => Options.CreateBuilder(Options.Default).WithValueSeparator(',').WithEscapedValueStartAndEnd(',').ToOptions());
Assert.Throws<InvalidOperationException>(() => Options.CreateBuilder(Options.Default).WithValueSeparator(','.ToString()).WithEscapedValueStartAndEnd(',').ToOptions());

Assert.Throws<InvalidOperationException>(() => Options.CreateBuilder(Options.Default).WithValueSeparator(',').WithCommentCharacter(',').ToOptions());
Assert.Throws<InvalidOperationException>(() => Options.CreateBuilder(Options.Default).WithValueSeparator(','.ToString()).WithCommentCharacter(',').ToOptions());

Assert.Throws<InvalidOperationException>(() => Options.CreateBuilder(Options.Default).WithEscapedValueStartAndEnd(',').WithCommentCharacter(',').ToOptions());

Expand All @@ -474,7 +474,7 @@ public void OptionsValidation()

Assert.Throws<InvalidOperationException>(
() =>
Options.CreateBuilder().WithValueSeparator(',')
Options.CreateBuilder().WithValueSeparator(','.ToString())
.WithRowEnding(RowEnding.CarriageReturnLineFeed)
.WithEscapedValueStartAndEnd('"')
.WithEscapedValueEscapeCharacter('"')
Expand All @@ -494,7 +494,7 @@ public void OptionsValidation()

Assert.Throws<InvalidOperationException>(
() =>
Options.CreateBuilder().WithValueSeparator(',')
Options.CreateBuilder().WithValueSeparator(','.ToString())
.WithRowEnding(RowEnding.CarriageReturnLineFeed)
.WithEscapedValueStartAndEnd('"')
.WithEscapedValueEscapeCharacter('"')
Expand Down Expand Up @@ -569,15 +569,15 @@ public void OptionsValidation()
() =>
Options.CreateBuilder(Options.Default)
.WithWhitespaceTreatmentInternal(WhitespaceTreatments.Trim)
.WithValueSeparator(' ')
.WithValueSeparator(' '.ToString())
.ToOptions()
);

Assert.Throws<InvalidOperationException>(
() =>
Options.CreateBuilder(Options.Default)
.WithExtraColumnTreatmentInternal(0)
.WithValueSeparator(' ')
.WithValueSeparator(' '.ToString())
.ToOptions()
);

Expand Down
164 changes: 160 additions & 4 deletions Cesil.Tests/DynamicWriterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,84 @@ private static dynamic MakeDynamicRow(string csvStr)
}
}

[Fact]
public void MultiCharacterValueSeparators()
{
var opts = Options.CreateBuilder(Options.DynamicDefault).WithValueSeparator("#*#").ToOptions();

// no escapes
{
var r1 = MakeDynamicRow("A,B\r\n123,foo");
var r2 = MakeDynamicRow("A,B\r\n456,#");
var r3 = MakeDynamicRow("A,B\r\n789,*");

RunSyncDynamicWriterVariants(
opts,
(config, getWriter, getStr) =>
{
using (var writer = getWriter())
using (var csv = config.CreateWriter(writer))
{
csv.Write(r1);
csv.Write(r2);
csv.Write(r3);
}
var res = getStr();
Assert.Equal("A#*#B\r\n123#*#foo\r\n456#*##\r\n789#*#*", res);
}
);
}

// escapes
{
var r1 = MakeDynamicRow("A,B\r\n123,foo#*#bar");
var r2 = MakeDynamicRow("A,B\r\n456,#");
var r3 = MakeDynamicRow("A,B\r\n789,*");

RunSyncDynamicWriterVariants(
opts,
(config, getWriter, getStr) =>
{
using (var writer = getWriter())
using (var csv = config.CreateWriter(writer))
{
csv.Write(r1);
csv.Write(r2);
csv.Write(r3);
}
var res = getStr();
Assert.Equal("A#*#B\r\n123#*#\"foo#*#bar\"\r\n456#*##\r\n789#*#*", res);
}
);
}

// in headers
{
var r1 = MakeDynamicRow("A#*#Escaped,B\r\n123,foo#*#bar");
var r2 = MakeDynamicRow("A#*#Escaped,B\r\n456,#");
var r3 = MakeDynamicRow("A#*#Escaped,B\r\n789,*");

RunSyncDynamicWriterVariants(
opts,
(config, getWriter, getStr) =>
{
using (var writer = getWriter())
using (var csv = config.CreateWriter(writer))
{
csv.Write(r1);
csv.Write(r2);
csv.Write(r3);
}
var res = getStr();
Assert.Equal("\"A#*#Escaped\"#*#B\r\n123#*#\"foo#*#bar\"\r\n456#*##\r\n789#*#*", res);
}
);
}
}

[Fact]
public void WriteCommentBeforeRow()
{
Expand Down Expand Up @@ -313,7 +391,7 @@ public void NoEscapes()
{
// no escapes at all (TSV)
{
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator('\t').WithEscapedValueStartAndEnd(null).WithEscapedValueEscapeCharacter(null).ToOptions();
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator("\t").WithEscapedValueStartAndEnd(null).WithEscapedValueEscapeCharacter(null).ToOptions();

// correct
RunSyncDynamicWriterVariants(
Expand Down Expand Up @@ -390,7 +468,7 @@ public void NoEscapes()

// escapes, but no escape for the escape start and end char
{
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator('\t').WithEscapedValueStartAndEnd('"').WithEscapedValueEscapeCharacter(null).ToOptions();
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator("\t").WithEscapedValueStartAndEnd('"').WithEscapedValueEscapeCharacter(null).ToOptions();

// correct
RunSyncDynamicWriterVariants(
Expand Down Expand Up @@ -1840,6 +1918,84 @@ public void WriteAll()

// async tests

[Fact]
public async Task MultiCharacterValueSeparatorsAsync()
{
var opts = Options.CreateBuilder(Options.DynamicDefault).WithValueSeparator("#*#").ToOptions();

// no escapes
{
var r1 = MakeDynamicRow("A,B\r\n123,foo");
var r2 = MakeDynamicRow("A,B\r\n456,#");
var r3 = MakeDynamicRow("A,B\r\n789,*");

await RunAsyncDynamicWriterVariants(
opts,
async (config, getWriter, getStr) =>
{
await using (var writer = getWriter())
await using (var csv = config.CreateAsyncWriter(writer))
{
await csv.WriteAsync(r1);
await csv.WriteAsync(r2);
await csv.WriteAsync(r3);
}
var res = await getStr();
Assert.Equal("A#*#B\r\n123#*#foo\r\n456#*##\r\n789#*#*", res);
}
);
}

// escapes
{
var r1 = MakeDynamicRow("A,B\r\n123,foo#*#bar");
var r2 = MakeDynamicRow("A,B\r\n456,#");
var r3 = MakeDynamicRow("A,B\r\n789,*");

await RunAsyncDynamicWriterVariants(
opts,
async (config, getWriter, getStr) =>
{
await using (var writer = getWriter())
await using (var csv = config.CreateAsyncWriter(writer))
{
await csv.WriteAsync(r1);
await csv.WriteAsync(r2);
await csv.WriteAsync(r3);
}
var res = await getStr();
Assert.Equal("A#*#B\r\n123#*#\"foo#*#bar\"\r\n456#*##\r\n789#*#*", res);
}
);
}

// in headers
{
var r1 = MakeDynamicRow("A#*#Escaped,B\r\n123,foo#*#bar");
var r2 = MakeDynamicRow("A#*#Escaped,B\r\n456,#");
var r3 = MakeDynamicRow("A#*#Escaped,B\r\n789,*");

await RunAsyncDynamicWriterVariants(
opts,
async (config, getWriter, getStr) =>
{
await using (var writer = getWriter())
await using (var csv = config.CreateAsyncWriter(writer))
{
await csv.WriteAsync(r1);
await csv.WriteAsync(r2);
await csv.WriteAsync(r3);
}
var res = await getStr();
Assert.Equal("\"A#*#Escaped\"#*#B\r\n123#*#\"foo#*#bar\"\r\n456#*##\r\n789#*#*", res);
}
);
}
}

[Fact]
public async Task WriteCommentBeforeRowAsync()
{
Expand Down Expand Up @@ -2094,7 +2250,7 @@ public async Task NoEscapesAsync()
{
// no escapes at all (TSV)
{
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator('\t').WithEscapedValueStartAndEnd(null).WithEscapedValueEscapeCharacter(null).ToOptions();
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator("\t").WithEscapedValueStartAndEnd(null).WithEscapedValueEscapeCharacter(null).ToOptions();

// correct
await RunAsyncDynamicWriterVariants(
Expand Down Expand Up @@ -2171,7 +2327,7 @@ await RunAsyncDynamicWriterVariants(

// escapes, but no escape for the escape start and end char
{
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator('\t').WithEscapedValueStartAndEnd('"').WithEscapedValueEscapeCharacter(null).ToOptions();
var opts = OptionsBuilder.CreateBuilder(Options.DynamicDefault).WithValueSeparator("\t").WithEscapedValueStartAndEnd('"').WithEscapedValueEscapeCharacter(null).ToOptions();

// correct
await RunAsyncDynamicWriterVariants(
Expand Down
41 changes: 35 additions & 6 deletions Cesil.Tests/NeedsEncodeHelper_AVX_Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,35 @@ namespace Cesil.Tests
{
public class NeedsEncodeHelper_AVX_Tests
{
[Theory]
[InlineData("0123456789ABCDE", -1)]
[InlineData("0123456789ABCDEF", -1)]
[InlineData("0123456789ABCDEFG", -1)]

[InlineData("012#*#6789ABCDE", 3)]
[InlineData("0123456789ABC#*#", 13)]
[InlineData("#*#123456789ABCDEFG", 0)]

[InlineData("#123456789ABCDE", -1)]
[InlineData("#*23456789ABCDEF", -1)]
[InlineData("*#23456789ABCDEFG", -1)]

[InlineData("0123456789ABCD#", -1)]
[InlineData("0123456789ABCD#*", -1)]
[InlineData("0123456789ABCDEF#", -1)]

public unsafe void MultiCharacterValueSeparator(string txt, int expected)
{
var state = new NeedsEncodeHelper("#*#", '"', '#');

fixed (char* charPtr = txt)
{
var res = state.ContainsCharRequiringEncoding(charPtr, txt.Length);

Assert.Equal(expected, res);
}
}

[Theory]
[InlineData("0123456789ABCDEFa", -1)]
[InlineData("0123456789ABCDEF,", 16)]
Expand All @@ -19,7 +48,7 @@ public unsafe void AwkwardLengths(string txt, int expected)

Assert.NotEqual(0, txt.Length % charsFor256Bits);

var state = new NeedsEncodeHelper(',', '"', '#');
var state = new NeedsEncodeHelper(",", '"', '#');

fixed (char* charPtr = txt)
{
Expand All @@ -45,7 +74,7 @@ public unsafe void Exactly256Bits(string txt, int expected)

Assert.Equal(0, txt.Length % charsFor256Bits);

var state = new NeedsEncodeHelper(',', '"', '#');
var state = new NeedsEncodeHelper(",", '"', '#');

fixed (char* charPtr = txt)
{
Expand All @@ -58,7 +87,7 @@ public unsafe void Exactly256Bits(string txt, int expected)
[Fact]
public unsafe void LessThan256Bits()
{
var state = new NeedsEncodeHelper(',', '"', '#');
var state = new NeedsEncodeHelper(",", '"', '#');

for (var len = 0; len < 16; len++)
{
Expand Down Expand Up @@ -98,9 +127,9 @@ public unsafe void LessThan256Bits()
[InlineData("world", -1)]
public unsafe void Simple(string txt, int expected)
{
var s1 = new NeedsEncodeHelper(',', '"', '#');
var s2 = new NeedsEncodeHelper(',', '"', null);
var s3 = new NeedsEncodeHelper(',', null, null);
var s1 = new NeedsEncodeHelper(",", '"', '#');
var s2 = new NeedsEncodeHelper(",", '"', null);
var s3 = new NeedsEncodeHelper(",", null, null);

fixed (char* charPtr = txt)
{
Expand Down
3 changes: 1 addition & 2 deletions Cesil.Tests/PublicInterfaceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1941,8 +1941,7 @@ public void ParameterNamesApproved()
[typeof(object).GetTypeInfo()] = new[] { "obj", "context", "row", "value" },
[typeof(int).GetTypeInfo()] = new[] { "index", "sizeHint" },
[typeof(int?).GetTypeInfo()] = new[] { "sizeHint" },
[typeof(string).GetTypeInfo()] = new[] { "name", "comment", "path", "data" },
[typeof(char).GetTypeInfo()] = new[] { "valueSeparator" },
[typeof(string).GetTypeInfo()] = new[] { "name", "comment", "path", "data", "valueSeparator" },
[typeof(char?).GetTypeInfo()] = new[] { "commentStart", "escapeStart", "escape" },

// system types
Expand Down
Loading

0 comments on commit c18f80b

Please sign in to comment.