Skip to content

Commit

Permalink
SepReader.Cols: Add Join/JoinToString(ReadOnlySpan<char> separator) (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
nietras authored Jan 31, 2025
1 parent f8c4697 commit 985457a
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 6 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2068,6 +2068,8 @@ namespace nietras.SeparatedValues
{
public int Count { get; }
public nietras.SeparatedValues.SepReader.Col this[int index] { get; }
public System.ReadOnlySpan<char> Join(System.ReadOnlySpan<char> separator) { }
public string JoinToString(System.ReadOnlySpan<char> separator) { }
public System.Span<T> Parse<T>()
where T : System.ISpanParsable<T> { }
public void Parse<T>(System.Span<T> span)
Expand Down
49 changes: 43 additions & 6 deletions src/Sep.Test/SepReaderColsTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ public class SepReaderColsTest
[TestMethod]
public void SepReaderColsTest_Length()
{
Run((cols, range) => Assert.AreEqual(range.GetOffsetAndLength(_colsCount).Length, cols.Count));
Run((cols, range) => Assert.AreEqual(range.GetOffsetAndLength(_colsCount).Length, cols.Count),
checkIndexOutOfRange: false);
}

[TestMethod]
Expand Down Expand Up @@ -57,7 +58,7 @@ public void SepReaderColsTest_Indexer_OutOfRange_Throws()
{
Assert.IsNotNull(e);
}
});
}, checkIndexOutOfRange: false);
}

[TestMethod]
Expand Down Expand Up @@ -120,7 +121,7 @@ public void SepReaderColsTest_Parse_IntoSpan_LengthWrong_Throws()
{
Assert.AreEqual($"'span':{colValues.Length} must have length/count {cols.Count} matching columns selected", e.Message);
}
});
}, checkIndexOutOfRange: false);
}

[TestMethod]
Expand Down Expand Up @@ -156,7 +157,7 @@ public void SepReaderColsTest_TryParse_IntoSpan_LengthWrong_Throws()
{
Assert.AreEqual($"'span':{colValues.Length} must have length/count {cols.Count} matching columns selected", e.Message);
}
});
}, checkIndexOutOfRange: false);
}

[TestMethod]
Expand All @@ -177,9 +178,21 @@ public void SepReaderColsTest_Select_ToStringDirect()
Run((cols, range) => CollectionAssert.AreEqual(_colTexts[range], cols.Select(c => c.ToStringDirect()).ToArray()));
}

[DataTestMethod]
[DataRow("")]
[DataRow("/")]
[DataRow("<SEP>")]
public void SepReaderColsTest_Join(string separator)
{
// Join
Run((cols, range) => Assert.AreEqual(string.Join(separator, _colTexts[range]), cols.Join(separator).ToString()));
// JoinToString
Run((cols, range) => Assert.AreEqual(string.Join(separator, _colTexts[range]), cols.JoinToString(separator)));
}

static string ToString(SepReader.Col col) => col.ToString();

static void Run(ColsTestAction action, string text = Text)
static void Run(ColsTestAction action, string text = Text, bool checkIndexOutOfRange = true)
{
var ranges = new Range[]
{
Expand All @@ -194,7 +207,26 @@ static void Run(ColsTestAction action, string text = Text)
2..2,
2.._colsCount,
};
using var reader = Sep.Reader().FromText(text);
{
using var reader = Sep.Reader().FromText(text);
Run(reader, ranges, action, checkIndexOutOfRange);
}
{
using var reader = Sep.Reader(o => o with { Unescape = true }).FromText(text);
Run(reader, ranges, action, checkIndexOutOfRange);
}
{
using var reader = Sep.Reader(o => o with { Trim = SepTrim.All }).FromText(text);
Run(reader, ranges, action, checkIndexOutOfRange);
}
{
using var reader = Sep.Reader(o => o with { Unescape = true, Trim = SepTrim.All }).FromText(text);
Run(reader, ranges, action, checkIndexOutOfRange);
}
}

static void Run(SepReader reader, Range[] ranges, ColsTestAction action, bool checkIndexOutOfRange)
{
Assert.IsTrue(reader.MoveNext());
var row = reader.Current;
action(row[_colNames], ..);
Expand All @@ -210,6 +242,11 @@ static void Run(ColsTestAction action, string text = Text)

action(row[range], range);
}
if (checkIndexOutOfRange)
{
// Ensure index out of range causes exception (note range is not same)
Assert.ThrowsException<IndexOutOfRangeException>(() => action(reader.Current[[-1]], 0..1));
}
}

delegate void ColsTestAction(SepReader.Cols cols, Range range);
Expand Down
6 changes: 6 additions & 0 deletions src/Sep/Internals/SepRange.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
using System.Diagnostics.CodeAnalysis;

namespace nietras.SeparatedValues;

[ExcludeFromCodeCoverage]
readonly record struct SepRange(int Start, int Length);
8 changes: 8 additions & 0 deletions src/Sep/SepReader.Cols.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ public unsafe Span<T> Select<T>(delegate*<Col, T> selector) => IsIndices()
? _state.ColsSelect<T>(_colIndices, selector)
: _state.ColsSelect<T>(_colStartIfRange, _colIndices.Length, selector);

public ReadOnlySpan<char> Join(ReadOnlySpan<char> separator) => IsIndices()
? _state.Join(_colIndices, separator)
: _state.Join(_colStartIfRange, _colIndices.Length, separator);

public string JoinToString(ReadOnlySpan<char> separator) => IsIndices()
? _state.JoinToString(_colIndices, separator)
: _state.JoinToString(_colStartIfRange, _colIndices.Length, separator);

bool IsIndices() => _colStartIfRange < 0;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand Down
171 changes: 171 additions & 0 deletions src/Sep/SepReaderState.cs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,39 @@ internal int GetCachedColIndex(string colName)
#endregion

#region Col
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal SepRange GetColRange(int index)
{
if ((uint)index >= (uint)_currentRowColCount) { SepThrow.IndexOutOfRangeException(); }
A.Assert(_currentRowColEndsOrInfosOffset >= 0);
index += _currentRowColEndsOrInfosOffset;
if (_colSpanFlags == 0)
{
var colEnds = _colEndsOrColInfos;
var colStart = colEnds[index] + 1; // +1 since previous end
var colEnd = colEnds[index + 1];
A.Assert(colStart >= 0);
A.Assert(colEnd < _chars.Length);
A.Assert(colEnd >= colStart);
return new(colStart, colEnd - colStart);
}
else if (_colSpanFlags == UnescapeFlag) // Unquote/Unescape
{
ref var colInfos = ref Unsafe.As<int, SepColInfo>(ref MemoryMarshal.GetArrayDataReference(_colEndsOrColInfos));
var colStart = Unsafe.Add(ref colInfos, index).ColEnd + 1; // +1 since previous end
ref var colInfo = ref Unsafe.Add(ref colInfos, index + 1);
var (colEnd, quoteCountOrNegativeUnescapedLength) = colInfo;
A.Assert(colStart >= 0);
A.Assert(colEnd < _chars.Length);
A.Assert(colEnd >= colStart);
return new(colStart, colEnd - colStart);
}
else
{
return GetColSpanTrimmedRange(index);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal ReadOnlySpan<char> GetColSpan(int index)
{
Expand Down Expand Up @@ -415,6 +448,16 @@ ReadOnlySpan<char> GetColSpanTrimmed(int index)
}
}

//[MethodImpl(MethodImplOptions.NoInlining)]
SepRange GetColSpanTrimmedRange(int index)
{
var colSpan = GetColSpanTrimmed(index);
var byteOffset = Unsafe.ByteOffset(ref MemoryMarshal.GetArrayDataReference(_chars),
ref MemoryMarshal.GetReference(colSpan));
var colStart = (int)(byteOffset >> 1);
return new(colStart, colSpan.Length);
}

// Only trim the default space character no other whitespace characters
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static ref char TrimSpace(ref char col, ref int length)
Expand Down Expand Up @@ -637,6 +680,38 @@ internal unsafe Span<T> ColsSelect<T>(ReadOnlySpan<int> colIndices, delegate*<Co
}
return span;
}

[SkipLocalsInit]
internal ReadOnlySpan<char> Join(ReadOnlySpan<int> colIndices, scoped ReadOnlySpan<char> separator)
{
var length = colIndices.Length;
if (length == 0) { return []; }
if (length == 1) { return GetColSpan(colIndices[0]); }
// Assume col count never so high stackalloc is not possible
Span<SepRange> colRanges = stackalloc SepRange[colIndices.Length];
GetColRanges(colIndices, colRanges);
return Join(colRanges, separator);
}
[SkipLocalsInit]
internal string JoinToString(ReadOnlySpan<int> colIndices, scoped ReadOnlySpan<char> separator)
{
var length = colIndices.Length;
if (length == 0) { return string.Empty; }
if (length == 1) { return ToStringDefault(colIndices[0]); }
// Assume col count never so high stackalloc is not possible
Span<SepRange> colRanges = stackalloc SepRange[colIndices.Length];
GetColRanges(colIndices, colRanges);
return JoinToString(colRanges, separator);
}

void GetColRanges(ReadOnlySpan<int> colIndices, Span<SepRange> colRanges)
{
A.Assert(colIndices.Length == colRanges.Length);
for (var i = 0; i < colIndices.Length; i++)
{
colRanges[i] = GetColRange(colIndices[i]);
}
}
#endregion

#region Cols Range
Expand Down Expand Up @@ -727,6 +802,102 @@ internal unsafe Span<T> ColsSelect<T>(int colStart, int colCount, delegate*<Col,
}
return span;
}

[SkipLocalsInit]
internal ReadOnlySpan<char> Join(int colStart, int colCount, scoped ReadOnlySpan<char> separator)
{
if (colCount == 0) { return []; }
if (colCount == 1) { return GetColSpan(colStart); }
// Assume col count never so high stackalloc is not possible
Span<SepRange> colRanges = stackalloc SepRange[colCount];
GetColRanges(colStart, colRanges);
return Join(colRanges, separator);
}
[SkipLocalsInit]
internal string JoinToString(int colStart, int colCount, scoped ReadOnlySpan<char> separator)
{
if (colCount == 0) { return string.Empty; }
if (colCount == 1) { return ToStringDefault(colStart); }
// Assume col count never so high stackalloc is not possible
Span<SepRange> colRanges = stackalloc SepRange[colCount];
GetColRanges(colStart, colRanges);
return JoinToString(colRanges, separator);
}

void GetColRanges(int colStart, Span<SepRange> colRanges)
{
for (var i = 0; i < colRanges.Length; i++)
{
colRanges[i] = GetColRange(colStart + i);
}
}
#endregion

#region Join
ReadOnlySpan<char> Join(scoped Span<SepRange> colRanges, scoped ReadOnlySpan<char> separator)
{
var totalLength = JoinTotalLength(colRanges, separator.Length);
var join = _arrayPool.RentUniqueArrayAsSpan<char>(totalLength);
Join(_chars.AsSpan(), colRanges, separator, join);
return join;
}

readonly ref struct JoinToStringState(ReadOnlySpan<SepRange> colRanges, ReadOnlySpan<char> separator)
{
public ReadOnlySpan<SepRange> ColRanges { get; } = colRanges;
public ReadOnlySpan<char> Separator { get; } = separator;
}

string JoinToString(scoped ReadOnlySpan<SepRange> colRanges, scoped ReadOnlySpan<char> separator)
{
var totalLength = JoinTotalLength(colRanges, separator.Length);
#if NET9_0_OR_GREATER
var state = new JoinToStringState(colRanges, separator);
return string.Create(totalLength, state, (join, state) =>
{
Join(_chars.AsSpan(), state.ColRanges, state.Separator, join);
});
#else
// Before .NET 9 no `allows ref struct`, so create uninitialized string,
// and get mutable span for that and join into that.
var s = new string('\0', totalLength);
var join = MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference<char>(s), s.Length);
Join(_chars.AsSpan(), colRanges, separator, join);
return s;
#endif
}

static void Join(ReadOnlySpan<char> chars,
ReadOnlySpan<SepRange> colRanges, ReadOnlySpan<char> separator,
Span<char> join)
{
var separatorLength = separator.Length;
var spanIndex = 0;
for (var i = 0; i < colRanges.Length; i++)
{
var colRange = colRanges[i];
var colSpan = chars.Slice(colRange.Start, colRange.Length);
colSpan.CopyTo(join.Slice(spanIndex));
spanIndex += colSpan.Length;
if (i < colRanges.Length - 1)
{
separator.CopyTo(join.Slice(spanIndex));
spanIndex += separatorLength;
}
}
A.Assert(spanIndex == join.Length);
}

static int JoinTotalLength(ReadOnlySpan<SepRange> colRanges, int separatorLength)
{
var totalLength = 0;
for (var i = 0; i < colRanges.Length; i++)
{
totalLength += colRanges[i].Length;
}
totalLength += separatorLength * (colRanges.Length - 1);
return totalLength;
}
#endregion

[ExcludeFromCodeCoverage]
Expand Down

0 comments on commit 985457a

Please sign in to comment.