From 3c24ab28a2b40f662c120abadc34ece882f56506 Mon Sep 17 00:00:00 2001 From: Ulimo Date: Fri, 24 Jan 2025 10:01:24 +0100 Subject: [PATCH] Bug fix: fix union deserialize special case with only null values (#667) When deserializing a union column with only null values the null counter from the inner null column was incorrectly used on the union column as well. This change resets the null count. --- .../TreeStorage/ArrowToInternalVisitor.cs | 2 ++ .../ColumnStore/ArrowTests.cs | 33 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/FlowtideDotNet.Core/ColumnStore/TreeStorage/ArrowToInternalVisitor.cs b/src/FlowtideDotNet.Core/ColumnStore/TreeStorage/ArrowToInternalVisitor.cs index 230de132b..d2a6fb784 100644 --- a/src/FlowtideDotNet.Core/ColumnStore/TreeStorage/ArrowToInternalVisitor.cs +++ b/src/FlowtideDotNet.Core/ColumnStore/TreeStorage/ArrowToInternalVisitor.cs @@ -200,6 +200,8 @@ public void Visit(DenseUnionArray array) } columns.Add(_dataColumn ?? throw new InvalidOperationException("Internal column is null")); + // Reset null counter + _nullCount = 0; } _dataColumn = new UnionColumn(columns, typeMemory, offsetMemory, array.Length, preAllocatedMemoryManager); diff --git a/tests/FlowtideDotNet.Core.Tests/ColumnStore/ArrowTests.cs b/tests/FlowtideDotNet.Core.Tests/ColumnStore/ArrowTests.cs index 1d26f050e..5012d7612 100644 --- a/tests/FlowtideDotNet.Core.Tests/ColumnStore/ArrowTests.cs +++ b/tests/FlowtideDotNet.Core.Tests/ColumnStore/ArrowTests.cs @@ -378,6 +378,39 @@ public void UnionSerializeDeserialize() Assert.Equal(1, deserializedBatch.Columns[0].GetValueAt(2, default).AsLong); Assert.Equal("2", deserializedBatch.Columns[0].GetValueAt(3, default).ToString()); Assert.Equal(2, deserializedBatch.Columns[0].GetValueAt(4, default).AsLong); + + + } + + [Fact] + public void UnionSerializeDeserializeWithOnlyNull() + { + Column column = new Column(GlobalMemoryManager.Instance); + column.Add(new StringValue("1")); + column.Add(NullValue.Instance); + column.Add(new Int64Value(1)); + + Column toInsertInto = new Column(GlobalMemoryManager.Instance); + + toInsertInto.InsertRangeFrom(0, column, 1, 1); + + var recordBatch = EventArrowSerializer.BatchToArrow(new EventBatchData( + [ + toInsertInto + ]), toInsertInto.Count); + + MemoryStream memoryStream = new MemoryStream(); + var writer = new ArrowStreamWriter(memoryStream, recordBatch.Schema, true); + writer.WriteRecordBatch(recordBatch); + writer.Dispose(); + memoryStream.Position = 0; + var reader = new ArrowStreamReader(memoryStream, new Apache.Arrow.Memory.NativeMemoryAllocator(), true); + var deserializedRecordBatch = reader.ReadNextRecordBatch(); + var deserializedBatch = EventArrowSerializer.ArrowToBatch(deserializedRecordBatch, GlobalMemoryManager.Instance); + + // This threw an error since null count was incorrectly set on the union column from the null column inside + deserializedBatch.Columns[0].RemoveAt(0); + Assert.Empty(deserializedBatch); } [Fact]