From 1394f53fef6cfe2b26acbdcd8cb0a4fe91710653 Mon Sep 17 00:00:00 2001 From: Krzysztof Nozderko Date: Mon, 14 Oct 2024 16:08:41 +0200 Subject: [PATCH] SNOW-1488701 Enable structured types, documentation for structured types and vector type (#1016) Co-authored-by: Piotr Bulawa --- README.md | 8 + .../IntegrationTests/StructuredArraysIT.cs | 32 +-- .../IntegrationTests/VectorTypesIT.cs | 27 --- .../Client/SnowflakeDbDataReader.cs | 33 +--- doc/StructuredTypes.md | 185 ++++++++++++++++++ doc/VectorType.md | 18 ++ 6 files changed, 233 insertions(+), 70 deletions(-) create mode 100644 doc/StructuredTypes.md create mode 100644 doc/VectorType.md diff --git a/README.md b/README.md index ed8b8341c..0378b5416 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,14 @@ Snowflake data types and their .NET types is covered in: [Data Types and Data Fo How execute a query, use query bindings, run queries synchronously and asynchronously: [Running Queries and Reading Results](doc/QueryingData.md) +## Structured types + +Using structured types: [Structured types](doc/StructuredTypes.md) + +## Vector type + +Using vector type: [Vector type](doc/VectorType.md) + ## Stage Files Using stage files within PUT/GET commands: diff --git a/Snowflake.Data.Tests/IntegrationTests/StructuredArraysIT.cs b/Snowflake.Data.Tests/IntegrationTests/StructuredArraysIT.cs index 142f1eae3..aee5e666e 100644 --- a/Snowflake.Data.Tests/IntegrationTests/StructuredArraysIT.cs +++ b/Snowflake.Data.Tests/IntegrationTests/StructuredArraysIT.cs @@ -28,7 +28,7 @@ public void TestSelectArray() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(3, array.Length); @@ -54,7 +54,7 @@ public void TestSelectArrayOfObjects() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(2, array.Length); @@ -79,7 +79,7 @@ public void TestSelectArrayOfArrays() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(2, array.Length); @@ -104,7 +104,7 @@ public void TestSelectArrayOfMap() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray>(0); + var array = reader.GetArray>(0); // assert Assert.AreEqual(1, array.Length); @@ -134,7 +134,7 @@ public void TestSelectSemiStructuredTypesInArray(string valueSfString, string ex Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.NotNull(array); @@ -159,7 +159,7 @@ public void TestSelectArrayOfIntegers() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(3, array.Length); @@ -184,7 +184,7 @@ public void TestSelectArrayOfLong() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(3, array.Length); @@ -209,7 +209,7 @@ public void TestSelectArrayOfFloats() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(3, array.Length); @@ -234,7 +234,7 @@ public void TestSelectArrayOfDoubles() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(3, array.Length); @@ -259,7 +259,7 @@ public void TestSelectArrayOfDoublesWithExponentNotation() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(2, array.Length); @@ -284,7 +284,7 @@ public void TestSelectStringArrayWithNulls() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(3, array.Length); @@ -309,7 +309,7 @@ public void TestSelectIntArrayWithNulls() Assert.IsTrue(reader.Read()); // act - var array = reader.GetStucturedArray(0); + var array = reader.GetArray(0); // assert Assert.AreEqual(3, array.Length); @@ -334,7 +334,7 @@ public void TestSelectNullArray() Assert.IsTrue(reader.Read()); // act - var nullArray = reader.GetStucturedArray(0); + var nullArray = reader.GetArray(0); // assert Assert.IsNull(nullArray); @@ -358,7 +358,7 @@ public void TestThrowExceptionForInvalidArray() Assert.IsTrue(reader.Read()); // act - var thrown = Assert.Throws(() => reader.GetStucturedArray(0)); + var thrown = Assert.Throws(() => reader.GetArray(0)); // assert SnowflakeDbExceptionAssert.HasErrorCode(thrown, SFError.STRUCTURED_TYPE_READ_DETAILED_ERROR); @@ -384,7 +384,7 @@ public void TestThrowExceptionForInvalidArrayElement() Assert.IsTrue(reader.Read()); // act - var thrown = Assert.Throws(() => reader.GetStucturedArray(0)); + var thrown = Assert.Throws(() => reader.GetArray(0)); // assert SnowflakeDbExceptionAssert.HasErrorCode(thrown, SFError.STRUCTURED_TYPE_READ_ERROR); @@ -411,7 +411,7 @@ public void TestThrowExceptionForNextedInvalidElement() Assert.IsTrue(reader.Read()); // act - var thrown = Assert.Throws(() => reader.GetStucturedArray(0)); + var thrown = Assert.Throws(() => reader.GetArray(0)); // assert SnowflakeDbExceptionAssert.HasErrorCode(thrown, SFError.STRUCTURED_TYPE_READ_DETAILED_ERROR); diff --git a/Snowflake.Data.Tests/IntegrationTests/VectorTypesIT.cs b/Snowflake.Data.Tests/IntegrationTests/VectorTypesIT.cs index aa2475c86..07a24a91b 100644 --- a/Snowflake.Data.Tests/IntegrationTests/VectorTypesIT.cs +++ b/Snowflake.Data.Tests/IntegrationTests/VectorTypesIT.cs @@ -6,7 +6,6 @@ using Snowflake.Data.Client; using System.Data.Common; using Snowflake.Data.Core; -using Snowflake.Data.Tests.Util; using System; namespace Snowflake.Data.Tests.IntegrationTests @@ -345,32 +344,6 @@ public void TestThrowExceptionForInvalidIdentifierForFloatVector() } } - [Test] - public void TestThrowExceptionForInvalidVectorType() - { - using (DbConnection conn = new SnowflakeDbConnection()) - { - conn.ConnectionString = ConnectionString; - conn.Open(); - AlterSessionSettings(conn); - - using (DbCommand command = conn.CreateCommand()) - { - command.CommandText = "SELECT ARRAY_CONSTRUCT(1.1)::ARRAY(DOUBLE)"; - var reader = (SnowflakeDbDataReader)command.ExecuteReader(); - Assert.IsTrue(reader.Read()); - - // act - var thrown = Assert.Throws(() => reader.GetArray(0)); - - // assert - SnowflakeDbExceptionAssert.HasErrorCode(thrown, SFError.STRUCTURED_TYPE_READ_DETAILED_ERROR); - Assert.That(thrown.Message, Does.Contain("Failed to read structured type when getting an array")); - Assert.That(thrown.Message, Does.Contain("Method GetArray can be used only for vector types")); - } - } - } - private void AlterSessionSettings(DbConnection conn) { using (var command = conn.CreateCommand()) diff --git a/Snowflake.Data/Client/SnowflakeDbDataReader.cs b/Snowflake.Data/Client/SnowflakeDbDataReader.cs index 7d624bd80..7d475024a 100755 --- a/Snowflake.Data/Client/SnowflakeDbDataReader.cs +++ b/Snowflake.Data/Client/SnowflakeDbDataReader.cs @@ -253,7 +253,7 @@ public override int GetValues(object[] values) return count; } - internal T GetObject(int ordinal) + public T GetObject(int ordinal) where T : class, new() { try @@ -282,9 +282,11 @@ public T[] GetArray(int ordinal) { var rowType = resultSet.sfResultSetMetaData.rowTypes[ordinal]; var fields = rowType.fields; - if (fields == null || fields.Count == 0 || !JsonToStructuredTypeConverter.IsVectorType(rowType.type)) + var isArrayOrVector = JsonToStructuredTypeConverter.IsArrayType(rowType.type) || + JsonToStructuredTypeConverter.IsVectorType(rowType.type); + if (fields == null || fields.Count == 0 || !isArrayOrVector) { - throw new StructuredTypesReadingException($"Method GetArray<{typeof(T)}> can be used only for vector types"); + throw new StructuredTypesReadingException($"Method GetArray<{typeof(T)}> can be used only for structured array or vector types"); } var stringValue = GetString(ordinal); @@ -299,30 +301,7 @@ public T[] GetArray(int ordinal) } } - internal T[] GetStucturedArray(int ordinal) - { - try - { - var rowType = resultSet.sfResultSetMetaData.rowTypes[ordinal]; - var fields = rowType.fields; - if (fields == null || fields.Count == 0 || !JsonToStructuredTypeConverter.IsArrayType(rowType.type)) - { - throw new StructuredTypesReadingException($"Method GetArray<{typeof(T)}> can be used only for structured array"); - } - - var stringValue = GetString(ordinal); - var json = stringValue == null ? null : JArray.Parse(stringValue); - return JsonToStructuredTypeConverter.ConvertArray(fields, json); - } - catch (Exception e) - { - if (e is SnowflakeDbException) - throw; - throw StructuredTypesReadingHandler.ToSnowflakeDbException(e, "when getting an array"); - } - } - - internal Dictionary GetMap(int ordinal) + public Dictionary GetMap(int ordinal) { try { diff --git a/doc/StructuredTypes.md b/doc/StructuredTypes.md new file mode 100644 index 000000000..bc45d98c9 --- /dev/null +++ b/doc/StructuredTypes.md @@ -0,0 +1,185 @@ +## Concept + +Snowflake structured types documentation is available here: [Snowflake Structured Types Documentation](https://docs.snowflake.com/en/sql-reference/data-types-structured). + +Snowflake offers a way to store structured types which can be: +- objects, e.g. ```OBJECT(city VARCHAR, state VARCHAR)``` +- arrays, e.g. ```ARRAY(NUMBER)``` +- maps, e.g. ```MAP(VARCHAR, VARCHAR)``` + +The driver allows reading and casting such structured objects into customer classes. + +**Note**: Currently, reading structured types is available only for JSON result format. + +## Enabling the feature + +Currently, reading structured types is available only for JSON result format, so you can make sure you are using JSON result format by: +```sql +ALTER SESSION SET DOTNET_QUERY_RESULT_FORMAT = JSON; +``` + +The structured types feature is enabled starting from v4.2.0 driver version. + +## Structured types vs semi-structured types + +The difference between structured types and semi-structured types is that structured types contain types definitions for given objects/arrays/maps. + +E.g. for a given object: +```sql +SELECT OBJECT_CONSTRUCT('city','San Mateo', 'state', 'CA')::OBJECT(city VARCHAR, state VARCHAR) +``` + +The part indicating the type of object is `::OBJECT(city VARCHAR, state VARCHAR)`. +This part of definition is essential for structured types because it is used to convert the object into the customer class instance. + +Whereas the corresponding semi-structured type does not contain a detailed type definition, for instance: +```sql +SELECT OBJECT_CONSTRUCT('city','San Mateo', 'state', 'CA')::OBJECT +``` + +which means the semi-structured types are returned only as a JSON string. + +## Handling objects + +You can construct structured objects by using an object constructor and providing type details: + +```sql +SELECT OBJECT_CONSTRUCT('city','San Mateo', 'state', 'CA')::OBJECT(city VARCHAR, state VARCHAR) +``` + +You can read the object into your class by executing `T SnowflakeDbReader.GetObject(int ordinal)` method: + +```csharp +var reader = (SnowflakeDbDataReader) command.ExecuteReader(); +Assert.IsTrue(reader.Read()); +var address = reader.GetObject
(0); +``` + +where `Address` is a customer class, e.g. +```csharp +public class Address +{ + public string city { get; set; } + public string state { get; set; } + public Zip zip { get; set; } +} +``` + +There are a few possible ways of constructing an object of a customer class. +The customer object (e.g. `Address`) can be created either: +- by the properties order, which is a default method +- by properties names +- by the constructor. + +### Creating objects by properties order + +Creating objects by properties order is a default construction method. +Objects are created by the non-parametrized constructor, and then the n-th Snowflake object field is converted into the n-th customer object property, one by one. + +You can annotate your class with `SnowflakeObject` annotation to make sure this creation method would be chosen (however it is not necessary since it is a default method): +```csharp +[SnowflakeObject(ConstructionMethod = SnowflakeObjectConstructionMethod.PROPERTIES_ORDER)] +public class Address +{ + public string city { get; set; } + public string state { get; set; } + public Zip zip { get; set; } +} +``` + +If you would like to skip any customer property, you could use a `[SnowflakeColumn(IgnoreForPropertyOrder = true)]` annotation for a given property. +For instance, the annotation used in the following class definition makes the `city` be skipped when mapping the properties: +```csharp +public class Address +{ + [SnowflakeColumn(IgnoreForPropertyOrder = true)] + public string city { get; set; } + public string state { get; set; } + public Zip zip { get; set; } +} +``` + +So, the first field from the database object would be mapped to the `state` property because `city` is skipped. + +### Creating objects by property names + +Using the `[SnowflakeObject(ConstructionMethod = SnowflakeObjectConstructionMethod.PROPERTIES_NAMES)]` annotation on the customer class can enable the creation of objects by their property names. +In this creation method, objects are created by the non-parametrised constructor, and then for each of the database object fields a property of the same name is set with the field value. +It is crucial that database object field names are the same as customer property names; otherwise, a given database object field value would not be set in the customer object. +You can use the annotation `SnowflakeColumn` to rename the customer object property to the match database object field name. + +In the example: + +```csharp +[SnowflakeObject(ConstructionMethod = SnowflakeObjectConstructionMethod.PROPERTIES_NAMES)] +public class Address +{ + [SnowflakeColumn(Name = "nearestCity")] + public string city { get; set; } + public string state { get; set; } + public Zip zip { get; set; } +} +``` + +the database object field `nearestCity` would be mapped to the `city` property of `Address` class. + +### Creating objects by the constructor + +Using the `[SnowflakeObject(ConstructionMethod = SnowflakeObjectConstructionMethod.CONSTRUCTOR)]` annotation on the customer class enables the creation of objects by a constructor. +In this creation method, an object with all its fields is created by a constructor. +A constructor with the exact number of parameters as the number of database object fields should exist because such a constructor would be chosen to instantiate a customer object. +Database object fields are mapped to customer object constructor parameters based on their order. + +Example: +```csharp +[SnowflakeObject(ConstructionMethod = SnowflakeObjectConstructionMethod.CONSTRUCTOR)] +public class Address +{ + private string _city; + private string _state; + + public Address() + { + } + + public Address(string city, string state) + { + _city = city; + _state = state; + } +} +``` + +## Handling arrays + +You can construct structured arrays like this: + +```sql +SELECT ARRAY_CONSTRUCT('a', 'b', 'c')::ARRAY(TEXT) +``` + +You can read such a structured array using `T[] SnowflakeDbReader.GetArray(int ordinal)` method to get an array of specified type. + +```csharp +var reader = (SnowflakeDbDataReader) command.ExecuteReader(); +Assert.IsTrue(reader.Read()); +var array = reader.GetArray(0); +``` + +## Handling maps + +You can construct structured maps like this: + +```sql +SELECT OBJECT_CONSTRUCT('5','San Mateo', '8', 'CA', '13', '01-234')::MAP(INTEGER, VARCHAR) +``` + +**Note**: The only possible map key types are: VARCHAR or NUMBER with scale 0. + +You can read a structured map using `Dictionary SnowflakeDbReader.GetMap(int ordinal)` method to get an array of specified type. + +```csharp +var reader = (SnowflakeDbDataReader) command.ExecuteReader(); +Assert.IsTrue(reader.Read()); +var map = reader.GetMap(0); +``` diff --git a/doc/VectorType.md b/doc/VectorType.md new file mode 100644 index 000000000..fcf3cdaa4 --- /dev/null +++ b/doc/VectorType.md @@ -0,0 +1,18 @@ +# Vector type + +Vector type represents an array of either integer or float type and a fixed size. +Examples: +- `[4, 5, 6]::VECTOR(INT, 3)` is a 3 elements vector of integers +- `[1.1, 2.2]::VECTOR(FLOAT, 2)` is a 2 elements vector of floats + +More about vectors you can read here: [Vector data types](https://docs.snowflake.com/en/sql-reference/data-types-vector). + +The driver allows to read a vector column into `int[]` or `float[]` arrays by calling `T[] SnowflakeDbReader.GetArray(int ordinal)` +method for either int or float types. + +```csharp +var reader = (SnowflakeDbDataReader) command.ExecuteReader(); +Assert.IsTrue(reader.Read()); +int[] intVector = reader.GetArray(0); +float[] floatVector = reader.GetArray(1); +```