From 649a9259a42838c8cda8160f7fe5cc42ba8da114 Mon Sep 17 00:00:00 2001 From: Giorgi Date: Tue, 5 Sep 2023 16:46:47 +0400 Subject: [PATCH] Start moving the reader to data chunks Api --- DuckDB.NET.Bindings/DuckDBNativeObjects.cs | 22 ++++++ DuckDB.NET.Bindings/NativeMethods.cs | 36 +++++++++ DuckDB.NET.Bindings/Utils.cs | 23 +++--- DuckDB.NET.Data/DuckDBDataReader.cs | 87 +++++++++++++++++----- 4 files changed, 138 insertions(+), 30 deletions(-) diff --git a/DuckDB.NET.Bindings/DuckDBNativeObjects.cs b/DuckDB.NET.Bindings/DuckDBNativeObjects.cs index 9c9a62a3..246f5cf5 100644 --- a/DuckDB.NET.Bindings/DuckDBNativeObjects.cs +++ b/DuckDB.NET.Bindings/DuckDBNativeObjects.cs @@ -232,4 +232,26 @@ private static DuckDBInterval FromTimeSpan(TimeSpan timeSpan) , Convert.ToUInt64(timeSpan.Ticks / 10 - new TimeSpan(timeSpan.Days, 0, 0, 0).Ticks / 10) ); } + + [StructLayout(LayoutKind.Explicit)] + public struct DuckDBString + { + [FieldOffset(0)] public DuckDBStringPointer Pointer; + [FieldOffset(0)] public DuckDBStringInlined Inlined; + } + + [StructLayout(LayoutKind.Explicit)] + public struct DuckDBStringPointer + { + [FieldOffset(0)] public uint length; + [FieldOffset(4)] public IntPtr prefix; + [FieldOffset(8)] public IntPtr ptr; + } + + [StructLayout(LayoutKind.Explicit)] + public struct DuckDBStringInlined + { + [FieldOffset(0)] public uint length; + [FieldOffset(4)] public IntPtr inlined; + } } diff --git a/DuckDB.NET.Bindings/NativeMethods.cs b/DuckDB.NET.Bindings/NativeMethods.cs index 7a7440de..4f040cc7 100644 --- a/DuckDB.NET.Bindings/NativeMethods.cs +++ b/DuckDB.NET.Bindings/NativeMethods.cs @@ -84,6 +84,36 @@ public static class Query public static extern IntPtr DuckDBResultError([In, Out] DuckDBResult result); } + public static class DataChunks + { + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_data_chunk_get_column_count")] + public static extern long DuckDBDataChunkGetColumnCount(IntPtr chunk); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_data_chunk_get_vector")] + public static extern IntPtr DuckDBDataChunkGetVector(IntPtr chunk, long columnIndex); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_data_chunk_get_size")] + public static extern long DuckDBDataChunkGetSize(IntPtr chunk); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_vector_get_column_type")] + public static extern IntPtr DuckDBVectorGetColumnType(IntPtr vector); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_vector_get_data")] + public static extern IntPtr DuckDBVectorGetData(IntPtr vector); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_vector_get_validity")] + public static extern UIntPtr DuckDBVectorGetValidity(IntPtr vector); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_list_vector_get_child")] + public static extern long DuckDBListVectorGetChild(IntPtr vector); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_list_vector_get_size")] + public static extern long DuckDBListVectorGetSize(IntPtr vector); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_struct_vector_get_child")] + public static extern long DuckDBStructVectorGetChild(IntPtr vector, long index); + } + public static class Types { [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_value_boolean")] @@ -139,6 +169,12 @@ public static class Types [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_value_timestamp")] public static extern DuckDBTimestampStruct DuckDBValueTimestamp([In, Out] DuckDBResult result, long col, long row); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_result_get_chunk")] + public static extern IntPtr DuckDBResultGetChunk(DuckDBResult result, long chunkIndex); + + [DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_result_chunk_count")] + public static extern long DuckDBResultChunkCount(DuckDBResult result); } public static class PreparedStatements diff --git a/DuckDB.NET.Bindings/Utils.cs b/DuckDB.NET.Bindings/Utils.cs index 323e8099..12ddf242 100644 --- a/DuckDB.NET.Bindings/Utils.cs +++ b/DuckDB.NET.Bindings/Utils.cs @@ -13,22 +13,25 @@ public static bool IsSuccess(this DuckDBState duckDBState) return duckDBState == DuckDBState.DuckDBSuccess; } - public static string ToManagedString(this IntPtr unmanagedString, bool freeWhenCopied = true) + public static string ToManagedString(this IntPtr unmanagedString, bool freeWhenCopied = true, int? length = null) { string result; #if NET6_0_OR_GREATER - result = Marshal.PtrToStringUTF8(unmanagedString); + result = length.HasValue? Marshal.PtrToStringUTF8(unmanagedString, length.Value) : Marshal.PtrToStringUTF8(unmanagedString); #else if (unmanagedString == IntPtr.Zero) { return ""; } - var length = 0; - - while (Marshal.ReadByte(unmanagedString, length) != 0) + if (length == null) { - length++; + length = 0; + + while (Marshal.ReadByte(unmanagedString, length.Value) != 0) + { + length++; + } } if (length == 0) @@ -36,11 +39,11 @@ public static string ToManagedString(this IntPtr unmanagedString, bool freeWhenC return string.Empty; } - var byteArray = new byte[length]; + var byteArray = new byte[length.Value]; - Marshal.Copy(unmanagedString, byteArray, 0, length); + Marshal.Copy(unmanagedString, byteArray, 0, length.Value); - result = Encoding.UTF8.GetString(byteArray, 0, length); + result = Encoding.UTF8.GetString(byteArray, 0, length.Value); #endif if (freeWhenCopied) { @@ -93,4 +96,4 @@ internal static int GetMicrosecond(this TimeOnly timeOnly) } #endif } -} +} \ No newline at end of file diff --git a/DuckDB.NET.Data/DuckDBDataReader.cs b/DuckDB.NET.Data/DuckDBDataReader.cs index 912a3fab..931d3150 100644 --- a/DuckDB.NET.Data/DuckDBDataReader.cs +++ b/DuckDB.NET.Data/DuckDBDataReader.cs @@ -12,6 +12,7 @@ namespace DuckDB.NET.Data { public class DuckDBDataReader : DbDataReader { + private const int InlineStringMaxLength = 12; private readonly DuckDbCommand command; private readonly CommandBehavior behavior; @@ -25,6 +26,7 @@ public class DuckDBDataReader : DbDataReader private int fieldCount; private int recordsAffected; + private Dictionary vectors = new(); internal DuckDBDataReader(DuckDbCommand command, List queryResults, CommandBehavior behavior) { @@ -38,6 +40,18 @@ internal DuckDBDataReader(DuckDbCommand command, List queryResults private void InitReaderData() { + var chunkCount = NativeMethods.Types.DuckDBResultChunkCount(currentResult); + var chunk = NativeMethods.Types.DuckDBResultGetChunk(currentResult, 0); + + var columnCount = NativeMethods.DataChunks.DuckDBDataChunkGetColumnCount(chunk); + + var size = NativeMethods.DataChunks.DuckDBDataChunkGetSize(chunk); + + for (int i = 0; i < columnCount; i++) + { + vectors[i] = NativeMethods.DataChunks.DuckDBDataChunkGetVector(chunk, i); + } + currentRow = -1; rowCount = NativeMethods.Query.DuckDBRowCount(currentResult); fieldCount = (int)NativeMethods.Query.DuckDBColumnCount(currentResult); @@ -46,17 +60,20 @@ private void InitReaderData() public override bool GetBoolean(int ordinal) { - return NativeMethods.Types.DuckDBValueBoolean(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return Marshal.ReadByte(data, currentRow * Marshal.SizeOf()) != 0; } public override byte GetByte(int ordinal) { - return NativeMethods.Types.DuckDBValueUInt8(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return Marshal.ReadByte(data, currentRow * Marshal.SizeOf()); } private sbyte GetSByte(int ordinal) { - return NativeMethods.Types.DuckDBValueInt8(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return (sbyte)Marshal.ReadByte(data, currentRow * Marshal.SizeOf()); } public override long GetBytes(int ordinal, long dataOffset, byte[] buffer, int bufferOffset, int length) @@ -81,20 +98,23 @@ public override string GetDataTypeName(int ordinal) public override DateTime GetDateTime(int ordinal) { - var timestampStruct = NativeMethods.Types.DuckDBValueTimestamp(currentResult, ordinal, currentRow); - + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + var timestampStruct = Marshal.PtrToStructure(data + currentRow * Marshal.SizeOf()); + return timestampStruct.ToDateTime(); } private DuckDBDateOnly GetDateOnly(int ordinal) { - var date = NativeMethods.Types.DuckDBValueDate(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + var date = Marshal.PtrToStructure(data + currentRow * Marshal.SizeOf()); return NativeMethods.DateTime.DuckDBFromDate(date); } private DuckDBTimeOnly GetTimeOnly(int ordinal) { - var time = NativeMethods.Types.DuckDBValueTime(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + var time = Marshal.PtrToStructure(data + currentRow * Marshal.SizeOf()); return NativeMethods.DateTime.DuckDBFromTime(time); } @@ -105,7 +125,8 @@ public override decimal GetDecimal(int ordinal) public override double GetDouble(int ordinal) { - return NativeMethods.Types.DuckDBValueDouble(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return Marshal.PtrToStructure(data + currentRow * Marshal.SizeOf()); } public override Type GetFieldType(int ordinal) @@ -132,13 +153,15 @@ public override Type GetFieldType(int ordinal) DuckDBType.DuckdbTypeVarchar => typeof(string), DuckDBType.DuckdbTypeDecimal => typeof(decimal), DuckDBType.DuckdbTypeBlob => typeof(Stream), - var type => throw new ArgumentException($"Unrecognised type {type} ({(int)type}) in column {ordinal + 1}") + var type => throw new ArgumentException( + $"Unrecognised type {type} ({(int)type}) in column {ordinal + 1}") }; } public override float GetFloat(int ordinal) { - return NativeMethods.Types.DuckDBValueFloat(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return Marshal.PtrToStructure(data + currentRow * Marshal.SizeOf()); } public override Guid GetGuid(int ordinal) @@ -148,37 +171,46 @@ public override Guid GetGuid(int ordinal) public override short GetInt16(int ordinal) { - return NativeMethods.Types.DuckDBValueInt16(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return Marshal.ReadInt16(data, currentRow * Marshal.SizeOf()); } public override int GetInt32(int ordinal) { - return NativeMethods.Types.DuckDBValueInt32(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return Marshal.ReadInt32(data, currentRow * Marshal.SizeOf()); } public override long GetInt64(int ordinal) { - return NativeMethods.Types.DuckDBValueInt64(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return Marshal.ReadInt64(data, currentRow * Marshal.SizeOf()); } private ushort GetUInt16(int ordinal) { - return NativeMethods.Types.DuckDBValueUInt16(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return (ushort)Marshal.ReadInt32(data, currentRow * Marshal.SizeOf()); } private uint GetUInt32(int ordinal) { - return NativeMethods.Types.DuckDBValueUInt32(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return (uint)Marshal.ReadInt32(data, currentRow * Marshal.SizeOf()); } private ulong GetUInt64(int ordinal) { - return NativeMethods.Types.DuckDBValueUInt64(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + return (ulong)Marshal.ReadInt32(data, currentRow * Marshal.SizeOf()); } private BigInteger GetBigInteger(int ordinal) { - return BigInteger.Parse(GetString(ordinal)); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + var hugeInt = Marshal.PtrToStructure(data + currentRow * Marshal.SizeOf()); + + return hugeInt.ToBigInteger(); } public override string GetName(int ordinal) @@ -203,9 +235,20 @@ public override int GetOrdinal(string name) public override string GetString(int ordinal) { - var unmanagedString = NativeMethods.Types.DuckDBValueVarchar(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + data += currentRow * Marshal.SizeOf(); + + var length = Marshal.ReadInt32(data); - return unmanagedString.ToManagedString(); + if (length <= InlineStringMaxLength) + { + return (data + Marshal.SizeOf()).ToManagedString(false, length); + } + else + { + var intPtr = Marshal.ReadIntPtr(data + Marshal.SizeOf() * 2); + return intPtr.ToManagedString(false, length); + } } public override object GetValue(int ordinal) @@ -243,7 +286,9 @@ public override object GetValue(int ordinal) private DuckDBInterval GetDuckDBInterval(int ordinal) { - return NativeMethods.Types.DuckDBValueInterval(currentResult, ordinal, currentRow); + var data = NativeMethods.DataChunks.DuckDBVectorGetData(vectors[ordinal]); + var interval = Marshal.PtrToStructure(data + currentRow * Marshal.SizeOf()); + return interval; } public override int GetValues(object[] values) @@ -264,6 +309,7 @@ public override Stream GetStream(int ordinal) public override bool IsDBNull(int ordinal) { + return false; var nullMask = NativeMethods.Query.DuckDBNullmaskData(currentResult, ordinal); return Marshal.ReadByte(nullMask, currentRow) != 0; } @@ -330,6 +376,7 @@ public override DataTable GetSchemaTable() rowData[4] = true; table.Rows.Add(rowData); } + return table; }