Skip to content

Commit

Permalink
Expose GetFieldSpan on IParser
Browse files Browse the repository at this point in the history
Co-authored-by: JanEggers <[email protected]>
  • Loading branch information
Rob-Hague and JanEggers committed Apr 4, 2023
1 parent 9848510 commit 3ad60c6
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 141 deletions.
230 changes: 113 additions & 117 deletions src/CsvHelper/CsvParser.cs

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion src/CsvHelper/CsvReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Configuration;

namespace CsvHelper
{
Expand Down
27 changes: 17 additions & 10 deletions src/CsvHelper/FieldCache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;

Expand Down Expand Up @@ -34,26 +35,26 @@ public FieldCache(int initialSize = 128, int maxFieldSize = 128)
entries = new Entry[size];
}

public string GetField(char[] buffer, int start, int length)
public string GetField(ReadOnlySpan<char> buffer)
{
if (length == 0)
if (buffer.IsEmpty)
{
return string.Empty;
}

if (length > maxFieldSize)
if (buffer.Length > maxFieldSize)
{
return new string(buffer, start, length);
return buffer.ToString();
}

var hashCode = GetHashCode(buffer, start, length);
var hashCode = GetHashCode(buffer);
ref var bucket = ref GetBucket(hashCode);
int i = bucket - 1;
while ((uint)i < (uint)entries.Length)
{
ref var entry = ref entries[i];

if (entry.HashCode == hashCode && entry.Value.AsSpan().SequenceEqual(new Span<char>(buffer, start, length)))
if (entry.HashCode == hashCode && entry.Value.AsSpan().SequenceEqual(buffer))
{
return entry.Value;
}
Expand All @@ -70,25 +71,31 @@ public string GetField(char[] buffer, int start, int length)
ref var reference = ref entries[count];
reference.HashCode = hashCode;
reference.Next = bucket - 1;
reference.Value = new string(buffer, start, length);
reference.Value = buffer.ToString();
bucket = count + 1;
count++;

return reference.Value;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private uint GetHashCode(char[] buffer, int start, int length)
private static uint GetHashCode(ReadOnlySpan<char> buffer)
{
unchecked
{
#if NET6_0_OR_GREATER
HashCode hash = new();
hash.AddBytes(MemoryMarshal.AsBytes(buffer));
return (uint)hash.ToHashCode();
#else
uint hash = 17;
for (var i = start; i < start + length; i++)
foreach (char c in buffer)
{
hash = hash * 31 + buffer[i];
hash = hash * 31 + c;
}

return hash;
#endif
}
}

Expand Down
25 changes: 23 additions & 2 deletions src/CsvHelper/IParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,21 @@ public interface IParser : IDisposable
/// <summary>
/// Gets the field at the specified index for the current row.
/// </summary>
/// <param name="index">The index.</param>
/// <returns>The field.</returns>
/// <param name="index">The index of the field in the current row.</param>
/// <returns>A <see cref="string"/> representing the field at the specified index in the current row.</returns>
string this[int index] { get; }

/// <summary>
/// Gets a span over the field at the specified index in the current row.
/// </summary>
/// <param name="index">The index of the field in the current row.</param>
/// <returns>A span representing the field at the specified index in the current row.</returns>
ReadOnlySpan<char> GetFieldSpan(int index)
#if NET
=> this[index]
#endif
;

/// <summary>
/// Gets the record for the current row. Note:
/// It is much more efficient to only get the fields you need. If
Expand All @@ -49,6 +60,16 @@ public interface IParser : IDisposable
/// </summary>
string RawRecord { get; }

/// <summary>
/// Gets a span over the raw record for the current row.
/// </summary>
ReadOnlySpan<char> RawRecordSpan
#if NET
=> RawRecord;
#else
{ get; }
#endif

/// <summary>
/// Gets the CSV row the parser is currently on.
/// </summary>
Expand Down
13 changes: 13 additions & 0 deletions tests/CsvHelper.Tests/CsvParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1366,5 +1366,18 @@ public void RawRowCountWithSingleLineAndNoLineEndingTest()
Assert.Equal(1, parser.RawRow);
}
}

[Theory]
[InlineData(-1)]
[InlineData(2)]
public void Parser_IndexOutOfRangeException(int index)
{
using (var reader = new StringReader("1,2\r\n"))
using (var parser = new CsvParser(reader, CultureInfo.InvariantCulture))
{
Assert.True(parser.Read());
Assert.Throws<IndexOutOfRangeException>(() => parser[index]);
}
}
}
}
4 changes: 4 additions & 0 deletions tests/CsvHelper.Tests/Mocks/ParserMock.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public class ParserMock : IParser, IEnumerable<string[]>

public string RawRecord => string.Empty;

public ReadOnlySpan<char> RawRecordSpan => ReadOnlySpan<char>.Empty;

public int Row => row;

public int RawRow => row;
Expand All @@ -41,6 +43,8 @@ public class ParserMock : IParser, IEnumerable<string[]>

public string this[int index] => record[index];

public ReadOnlySpan<char> GetFieldSpan(int index) => record[index];

public ParserMock() : this(new CsvConfiguration(CultureInfo.InvariantCulture)) { }

public ParserMock(CsvConfiguration configuration)
Expand Down
30 changes: 30 additions & 0 deletions tests/CsvHelper.Tests/Parsing/BadDataTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,5 +149,35 @@ public void Read_AccessingParserRecordInBadDataFound_ThrowsParserException()

Assert.Throws<ParserException>(() => parser[1]);
}

[Fact]
public void ConsecutiveBadDataTest()
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
BadDataFound = null,
CacheFields = false,
ProcessFieldBufferSize = 4
};
// These 3 fields each use the processFieldBuffer.
// The test is to ensure consistency of the fields during a read,
// i.e. the memory that each field points to is not overwritten
// during the processing of the other fields in the same row.
string csv = "\"\"\"\",\"two\" \"2,\"three\" \"3\r\n"; // """","two" "2,"three" "3
using (var reader = new StringReader(csv))
using (var parser = new CsvParser(reader, config))
{
Assert.True(parser.Read());

Assert.Equal(3, parser.Count);
Assert.Equal("\"", parser.GetFieldSpan(0).ToString());
Assert.Equal("two \"2", parser.GetFieldSpan(1).ToString());
Assert.Equal("three \"3", parser.GetFieldSpan(2).ToString());
Assert.Equal("two \"2", parser.GetFieldSpan(1).ToString());
Assert.Equal("\"", parser.GetFieldSpan(0).ToString());

Assert.False(parser.Read());
}
}
}
}
15 changes: 4 additions & 11 deletions tests/CsvHelper.Tests/Parsing/FieldCacheTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,30 +64,23 @@ public void Read_WithFieldCacheDisabled_ReturnsDifferentFieldInstance()
[Fact]
public void Test1()
{
// "542008", "27721116", "98000820" have hash code 3769566006

var value1 = "542008";
var value2 = "27721116";
var value3 = "98000820";
var value4 = "542008";

var cache = new FieldCache(1);

var field1 = cache.GetField(value1.ToCharArray(), 0, value1.Length);
var field2 = cache.GetField(value2.ToCharArray(), 0, value2.Length);
var field3 = cache.GetField(value3.ToCharArray(), 0, value3.Length);
var field4 = cache.GetField(value4.ToCharArray(), 0, value4.Length);
var field1 = cache.GetField(value1);
var field2 = cache.GetField(value2);
var field3 = cache.GetField(value3);
var field4 = cache.GetField(value4);

Assert.Equal(value1, field1);
Assert.Equal(value2, field2);
Assert.Equal(value3, field3);
Assert.Equal(value4, field4);

Assert.NotSame(value1, field1);
Assert.NotSame(value2, field2);
Assert.NotSame(value3, field3);
Assert.NotSame(value4, field4);

Assert.Same(field1, field4);
}
}
Expand Down

0 comments on commit 3ad60c6

Please sign in to comment.