From 73acd3c4f02390501f7c6b0eb95859e964e13c00 Mon Sep 17 00:00:00 2001 From: Krisso <105232495+krisso-rtb@users.noreply.github.com> Date: Wed, 27 Sep 2023 21:50:18 +0200 Subject: [PATCH] [fastserde] Logical types are now supported for avro version 1.9+ (#516) A modelData parameter (GenericData or SpecificData instance) is passed to FastSerdeBase, FastSerdeCache and everything else in-between. The behavior is unchanged if the modelData is not passed in. --- .../fastserde/FastAvroSerdesBenchmark.java | 2 +- .../avro/fastserde/FloatArrayBenchmark.java | 2 +- .../src/test/avro/logicalTypesDefined.avsc | 38 +++ .../src/test/avro/logicalTypesTest1.avsc | 187 +++++++++++++ .../src/test/avro/logicalTypesUndefined.avsc | 34 +++ .../test/avro/logicalTypesWithDefaults.avsc | 202 ++++++++++++++ .../avro/fastserde/FastDatumReaderTest.java | 2 +- .../avro/fastserde/FastDatumWriterTest.java | 2 +- .../FastDeserializerDefaultsTest.java | 9 +- .../FastGenericDeserializerGeneratorTest.java | 8 +- .../FastGenericSerializerGeneratorTest.java | 2 +- .../avro/fastserde/FastSerdeCacheTest.java | 8 +- .../avro/fastserde/FastSerdeTestsSupport.java | 8 +- ...FastSpecificDeserializerGeneratorTest.java | 20 +- .../FastSpecificSerializerGeneratorTest.java | 2 +- .../avro/fastserde/FastStringableTest.java | 8 +- .../linkedin/avro/fastserde/UtilsTest.java | 55 ++++ .../types/FunctionThrowingIOException.java | 9 + .../logical/types/InMemoryEncoder.java | 175 ++++++++++++ .../types/LogicalTypesFastSerdeTest.java | 239 +++++++++++++++++ .../logical/types/LogicalTypesTestBase.java | 251 ++++++++++++++++++ .../UndefinedLogicalTypesFastSerdeTest.java | 117 ++++++++ .../micro/benchmark/SerDeMicroBenchmark.java | 7 +- fastserde/avro-fastserde-tests14/build.gradle | 3 +- fastserde/avro-fastserde-tests15/build.gradle | 3 +- fastserde/avro-fastserde-tests16/build.gradle | 3 +- fastserde/avro-fastserde-tests17/build.gradle | 3 +- fastserde/avro-fastserde-tests18/build.gradle | 4 +- .../fastserde/FastDatumReaderWriterUtil.java | 6 +- .../fastserde/FastDeserializerGenerator.java | 154 +++++++---- .../FastDeserializerGeneratorBase.java | 9 +- .../fastserde/FastGenericDatumReader.java | 37 ++- .../fastserde/FastGenericDatumWriter.java | 40 +-- .../FastGenericDeserializerGenerator.java | 9 +- .../FastGenericSerializerGenerator.java | 7 +- .../avro/fastserde/FastSerdeBase.java | 81 +++++- .../avro/fastserde/FastSerdeCache.java | 199 ++++++++++---- .../fastserde/FastSerializerGenerator.java | 109 +++++--- .../fastserde/FastSpecificDatumReader.java | 24 +- .../fastserde/FastSpecificDatumWriter.java | 20 +- .../FastSpecificDeserializerGenerator.java | 9 +- .../FastSpecificSerializerGenerator.java | 7 +- .../avro/fastserde/SchemaAssistant.java | 90 ++++++- .../com/linkedin/avro/fastserde/Utils.java | 36 ++- .../avro/generic/ColdGenericDatumReader.java | 15 ++ .../avro/generic/ColdSpecificDatumReader.java | 16 ++ .../avroutil1/compatibility/AvroAdapter.java | 4 + .../AvroCompatibilityHelper.java | 10 + .../compatibility/avro110/Avro110Adapter.java | 12 + .../compatibility/avro111/Avro111Adapter.java | 12 + .../compatibility/avro14/Avro14Adapter.java | 13 + .../backports/GenericDatumWriterExt.java | 24 ++ .../compatibility/avro15/Avro15Adapter.java | 12 + .../backports/GenericDatumReaderExt.java | 24 ++ .../backports/GenericDatumWriterExt.java | 24 ++ .../compatibility/avro16/Avro16Adapter.java | 12 + .../backports/GenericDatumReaderExt.java | 24 ++ .../backports/GenericDatumWriterExt.java | 24 ++ .../compatibility/avro17/Avro17Adapter.java | 12 + .../compatibility/avro18/Avro18Adapter.java | 12 + .../compatibility/avro19/Avro19Adapter.java | 12 + 61 files changed, 2240 insertions(+), 262 deletions(-) create mode 100644 fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesDefined.avsc create mode 100644 fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesTest1.avsc create mode 100644 fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesUndefined.avsc create mode 100644 fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesWithDefaults.avsc create mode 100644 fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/FunctionThrowingIOException.java create mode 100644 fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/InMemoryEncoder.java create mode 100644 fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesFastSerdeTest.java create mode 100644 fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesTestBase.java create mode 100644 fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/UndefinedLogicalTypesFastSerdeTest.java create mode 100644 helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/backports/GenericDatumWriterExt.java create mode 100644 helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumReaderExt.java create mode 100644 helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumWriterExt.java create mode 100644 helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumReaderExt.java create mode 100644 helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumWriterExt.java diff --git a/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FastAvroSerdesBenchmark.java b/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FastAvroSerdesBenchmark.java index e95f4acd6..da320c151 100644 --- a/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FastAvroSerdesBenchmark.java +++ b/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FastAvroSerdesBenchmark.java @@ -41,7 +41,7 @@ * * To run this benchmark: * - * ./gradlew :avro-fastserde:jmh -PUSE_AVRO_14/17/18 + * ./gradlew :fastserde:avro-fastserde-jmh:jmh -PUSE_AVRO_14/17/18/19/110/111 * * * You also can test by your own AVRO schema by replacing the contents in diff --git a/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FloatArrayBenchmark.java b/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FloatArrayBenchmark.java index 6940f0d15..5ecacd796 100644 --- a/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FloatArrayBenchmark.java +++ b/fastserde/avro-fastserde-jmh/src/jmh/java/com/linkedin/avro/fastserde/FloatArrayBenchmark.java @@ -40,7 +40,7 @@ * * To run this benchmark: * - * ./gradlew :avro-fastserde:jmh -PUSE_AVRO_18 + * ./gradlew :fastserde:avro-fastserde-jmh:jmh -PUSE_AVRO_18 * * */ diff --git a/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesDefined.avsc b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesDefined.avsc new file mode 100644 index 000000000..35384336e --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesDefined.avsc @@ -0,0 +1,38 @@ +{ + "type": "record", + "name": "FastSerdeLogicalTypesDefined", + "namespace": "com.linkedin.avro.fastserde.generated.avro", + "doc": "Used in tests to confirm fast-serde supports logical-types", + "fields": [ + { + "name": "timeMillisField", + "type": { + "type": "int", + "logicalType": "time-millis" + } + }, + { + "name": "dateField", + "type": { + "type": "int", + "logicalType": "date" + } + }, + { + "name": "arrayOfUnionOfDateAndTimestampMillis", + "type": { + "type": "array", + "items": [ + { + "type": "int", + "logicalType": "date" + }, + { + "type": "long", + "logicalType": "timestamp-millis" + } + ] + } + } + ] +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesTest1.avsc b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesTest1.avsc new file mode 100644 index 000000000..4973a7408 --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesTest1.avsc @@ -0,0 +1,187 @@ +{ + "type": "record", + "name": "FastSerdeLogicalTypesTest1", + "namespace": "com.linkedin.avro.fastserde.generated.avro", + "doc": "Used in tests to confirm fast-serde supports logical-types", + "fields": [ + { + "name": "unionOfArrayAndMap", + "type": [ + { + "type": "array", + "items": { + "type": "int", + "logicalType": "time-millis" + } + }, + { + "type": "map", + "values": { + "type": "int", + "logicalType": "date" + } + } + ] + }, + { + "name": "mapOfUnionsOfDateAndTimestampMillis", + "type": { + "type": "map", + "values": [ + { + "type": "int", + "logicalType": "date" + }, + { + "type": "long", + "logicalType": "timestamp-millis" + } + ] + } + }, + { + "name": "timestampMillisMap", + "type": { + "type": "map", + "values": { + "type": "long", + "logicalType": "timestamp-millis" + } + } + }, + { + "name": "nullableArrayOfDates", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "int", + "logicalType": "date" + } + } + ], + "default": null + }, + { + "name": "arrayOfDates", + "type": { + "type": "array", + "items": { + "type": "int", + "logicalType": "date" + } + } + }, + { + "name": "unionOfDecimalOrDate", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }, + { + "type": "int", + "logicalType": "date" + } + ] + }, + { + "name": "uuidField", + "type": { + "type": "string", + "logicalType": "uuid" + } + }, + { + "name": "timestampMillisField", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + } + }, + { + "name": "timestampMicrosField", + "type": { + "type": "long", + "logicalType": "timestamp-micros" + } + }, + { + "name": "timeMillisField", + "type": { + "type": "int", + "logicalType": "time-millis" + } + }, + { + "name": "timeMicrosField", + "type": { + "type": "long", + "logicalType": "time-micros" + } + }, + { + "name": "dateField", + "type": { + "type": "int", + "logicalType": "date" + } + }, + { + "name": "nestedLocalTimestampMillis", + "type": { + "name": "LocalTimestampRecord", + "type": "record", + "fields": [ + { + "name": "nestedTimestamp", + "type": { + "type": "long", + "logicalType": "local-timestamp-millis" + } + }, + { + "name": "nullableNestedTimestamp", + "type": [ + "null", + { + "type": "long", + "logicalType": "local-timestamp-millis" + } + ] + }, + { + "name": "nullableUnionOfDateAndLocalTimestamp", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + }, + { + "type": "long", + "logicalType": "local-timestamp-millis" + } + ] + }, + { + "name": "unionOfDateAndLocalTimestamp", + "type": [ + { + "type": "int", + "logicalType": "date" + }, + { + "type": "long", + "logicalType": "local-timestamp-millis" + } + ] + } + ] + } + } + ] +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesUndefined.avsc b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesUndefined.avsc new file mode 100644 index 000000000..f5d4d9fe6 --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesUndefined.avsc @@ -0,0 +1,34 @@ +{ + "type": "record", + "name": "FastSerdeLogicalTypesUndefined", + "namespace": "com.linkedin.avro.fastserde.generated.avro", + "doc": "Used in tests to confirm fast-serde supports logical-types", + "fields": [ + { + "name": "timeMillisField", + "type": { + "type": "int" + } + }, + { + "name": "dateField", + "type": { + "type": "int" + } + }, + { + "name": "arrayOfUnionOfDateAndTimestampMillis", + "type": { + "type": "array", + "items": [ + { + "type": "int" + }, + { + "type": "long" + } + ] + } + } + ] +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesWithDefaults.avsc b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesWithDefaults.avsc new file mode 100644 index 000000000..31720b433 --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/avro/logicalTypesWithDefaults.avsc @@ -0,0 +1,202 @@ +{ + "type": "record", + "name": "FastSerdeLogicalTypesWithDefaults", + "namespace": "com.linkedin.avro.fastserde.generated.avro", + "doc": "Used in tests to confirm fast-serde supports logical-types", + "fields": [ + { + "name": "unionOfArrayAndMap", + "type": [ + { + "type": "array", + "items": { + "type": "int", + "logicalType": "time-millis" + } + }, + { + "type": "map", + "values": { + "type": "int", + "logicalType": "date" + } + } + ], + "default": [654321, 7415896] + }, + { + "name": "mapOfUnionsOfDateAndTimestampMillis", + "type": { + "type": "map", + "values": [ + { + "type": "int", + "logicalType": "date" + }, + { + "type": "long", + "logicalType": "timestamp-millis" + } + ] + }, + "default": {"someDay": 12345, "anotherDay": 23456} + }, + { + "name": "timestampMillisMap", + "type": { + "type": "map", + "values": { + "type": "long", + "logicalType": "timestamp-millis" + } + }, + "default": {"timestampMillis1": 123456789012, "timestampMillis2": 112233445566} + }, + { + "name": "nullableArrayOfDates", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "int", + "logicalType": "date" + } + } + ], + "default": null + }, + { + "name": "arrayOfDates", + "type": { + "type": "array", + "items": { + "type": "int", + "logicalType": "date" + } + }, + "default": [7777, 8888] + }, + { + "name": "unionOfDecimalOrDate", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 5, + "scale": 2 + }, + { + "type": "int", + "logicalType": "date" + } + ], + "default": "13" + }, + { + "name": "uuidField", + "type": { + "type": "string", + "logicalType": "uuid" + }, + "default": "b4ddd079-a024-4cc3-ac6c-a14f174c9922" + }, + { + "name": "timestampMillisField", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + }, + "default": 120120120120 + }, + { + "name": "timestampMicrosField", + "type": { + "type": "long", + "logicalType": "timestamp-micros" + }, + "default": 123451234512345 + }, + { + "name": "timeMillisField", + "type": { + "type": "int", + "logicalType": "time-millis" + }, + "default": 15 + }, + { + "name": "timeMicrosField", + "type": { + "type": "long", + "logicalType": "time-micros" + }, + "default": 16 + }, + { + "name": "dateField", + "type": { + "type": "int", + "logicalType": "date" + }, + "default": 223344 + }, + { + "name": "nestedLocalTimestampMillis", + "type": { + "name": "LocalTimestampRecordWithDefaults", + "type": "record", + "fields": [ + { + "name": "nestedTimestamp", + "type": { + "type": "long", + "logicalType": "local-timestamp-millis" + }, + "default": 99 + }, + { + "name": "nullableNestedTimestamp", + "type": [ + "null", + { + "type": "long", + "logicalType": "local-timestamp-millis" + } + ], + "default": null + }, + { + "name": "nullableUnionOfDateAndLocalTimestamp", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + }, + { + "type": "long", + "logicalType": "local-timestamp-millis" + } + ], + "default": null + }, + { + "name": "unionOfDateAndLocalTimestamp", + "type": [ + { + "type": "int", + "logicalType": "date" + }, + { + "type": "long", + "logicalType": "local-timestamp-millis" + } + ], + "default": 45678 + } + ] + } + } + ] +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumReaderTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumReaderTest.java index 3e2cc2dc4..bcd627a65 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumReaderTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumReaderTest.java @@ -102,7 +102,7 @@ public void shouldGetFastGenericDeserializerAndUpdateCachedFastDeserializer() th @Test(groups = {"deserializationTest"}) @SuppressWarnings("unchecked") - public void shouldCreateGenericDatumReader() throws IOException, InterruptedException { + public void shouldCreateGenericDatumReader() throws IOException { Schema recordSchema = createRecord("TestSchema", createPrimitiveUnionFieldSchema("test", Schema.Type.STRING)); FastGenericDatumReader fastGenericDatumReader = new FastGenericDatumReader<>(recordSchema, cache); diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java index 38a9db81a..b9a2f60b7 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java @@ -50,7 +50,7 @@ public void shouldCreateSpecificDatumWriter() throws IOException, InterruptedExc @Test(groups = {"serializationTest"}) @SuppressWarnings("unchecked") - public void shouldCreateGenericDatumWriter() throws IOException, InterruptedException { + public void shouldCreateGenericDatumWriter() throws IOException { Schema recordSchema = createRecord("TestSchema", createPrimitiveUnionFieldSchema("test", Schema.Type.STRING)); FastGenericDatumWriter fastGenericDatumWriter = new FastGenericDatumWriter<>(recordSchema, cache); diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDeserializerDefaultsTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDeserializerDefaultsTest.java index 1a34bde08..1bf3ed44e 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDeserializerDefaultsTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastDeserializerDefaultsTest.java @@ -112,7 +112,7 @@ public void testFastFloatArraySerDes() { Schema oldRecordSchema = Schema.parse(schemaString); GenericDatumWriter writer = new GenericDatumWriter<>(oldRecordSchema); FastDeserializer deserializer = new FastGenericDeserializerGenerator(oldRecordSchema, oldRecordSchema, tempDir, classLoader, - null).generateDeserializer(); + null, null).generateDeserializer(); // warm up cycles for (int i = 0; i < 1000; i++) { @@ -533,11 +533,10 @@ private GenericRecord decodeGenericSlow(Schema readerSchema, Schema writerSchema } } - @SuppressWarnings("unchecked") private T decodeSpecificFast(Schema readerSchema, Schema writerSchema, Decoder decoder) { FastDeserializer deserializer = - new FastSpecificDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, - null).generateDeserializer(); + new FastSpecificDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, + null, null).generateDeserializer(); try { return deserializer.deserialize(null, decoder); @@ -550,7 +549,7 @@ private T decodeSpecificFast(Schema readerSchema, Schema writerSchema, Decod private GenericRecord decodeGenericFast(Schema readerSchema, Schema writerSchema, Decoder decoder) { FastDeserializer deserializer = new FastGenericDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, - null).generateDeserializer(); + null, null).generateDeserializer(); try { return deserializer.deserialize(null, decoder); diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericDeserializerGeneratorTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericDeserializerGeneratorTest.java index 65a96149d..c1f5e7d8c 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericDeserializerGeneratorTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericDeserializerGeneratorTest.java @@ -1232,7 +1232,7 @@ public void shouldReturnMutableEmptyMap(Implementation implementation) { public void shouldReadSplittedAndAliasedSubRecordFields(Implementation implementation) { // given Schema subRecordSchema = createRecord("subRecord", createPrimitiveUnionFieldSchema("intField", Schema.Type.INT), createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING)); - subRecordSchema = FastSerdeTestsSupport.addAliases(subRecordSchema, "com.adpilot.utils.generated.avro.aliasedSubRecord"); + subRecordSchema = FastSerdeTestsSupport.addAliases(subRecordSchema, "com.linkedin.avro.fastserde.generated.avro.aliasedSubRecord"); Schema recordSchema = createRecord( createField("record1", subRecordSchema), @@ -1240,7 +1240,7 @@ public void shouldReadSplittedAndAliasedSubRecordFields(Implementation implement createArrayFieldSchema("recordArray", subRecordSchema)); Schema aliasedSubRecordSchema = createRecord("aliasedSubRecord", createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING)); - aliasedSubRecordSchema = FastSerdeTestsSupport.addAliases(aliasedSubRecordSchema, "com.adpilot.utils.generated.avro.subRecord"); + aliasedSubRecordSchema = FastSerdeTestsSupport.addAliases(aliasedSubRecordSchema, "com.linkedin.avro.fastserde.generated.avro.subRecord"); Schema splittedAndAliasedRecordSchema = createRecord( createField("record1", aliasedSubRecordSchema), @@ -1592,7 +1592,7 @@ record = new GenericData.Record(recordWithUnionMapOfUnionValuesSchema); private static T decodeRecordColdFast(Schema writerSchema, Schema readerSchema, Decoder decoder) { FastDeserializer deserializer = - new FastSerdeCache.FastDeserializerWithAvroGenericImpl<>(writerSchema, readerSchema); + new FastSerdeCache.FastDeserializerWithAvroGenericImpl<>(writerSchema, readerSchema, GenericData.get()); return decodeRecordFast(deserializer, decoder); } @@ -1600,7 +1600,7 @@ private static T decodeRecordColdFast(Schema writerSchema, Schema readerSche private static T decodeRecordWarmFast(Schema writerSchema, Schema readerSchema, Decoder decoder) { FastDeserializer deserializer = new FastGenericDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, - null).generateDeserializer(); + null, null).generateDeserializer(); return decodeRecordFast(deserializer, decoder); } diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java index 36e10a243..436d0575e 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java @@ -678,7 +678,7 @@ public Decoder dataAsBinaryDecoder(T data, Schema schema) { try { FastGenericSerializerGenerator fastGenericSerializerGenerator = - new FastGenericSerializerGenerator<>(schema, tempDir, classLoader, null); + new FastGenericSerializerGenerator<>(schema, tempDir, classLoader, null, null); FastSerializer fastSerializer = fastGenericSerializerGenerator.generateSerializer(); fastSerializer.serialize(data, binaryEncoder); binaryEncoder.flush(); diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeCacheTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeCacheTest.java index 0f7a04f4e..3bd86b12f 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeCacheTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeCacheTest.java @@ -42,7 +42,7 @@ public void testIsSupportedForFastDeserializer() { } @Test(groups = "deserializationTest") - public void testBuildFastGenericDeserializerSurviveFromWrongClasspath() throws Exception { + public void testBuildFastGenericDeserializerSurviveFromWrongClasspath() { String wrongClasspath = "."; FastSerdeCache cache = new FastSerdeCache(wrongClasspath); Schema testRecord = Schema.parse("{\"type\": \"record\", \"name\": \"test_record\", \"fields\":[]}"); @@ -50,21 +50,21 @@ public void testBuildFastGenericDeserializerSurviveFromWrongClasspath() throws E } @Test(groups = "deserializationTest") - public void testBuildFastGenericDeserializerWithCorrectClasspath() throws Exception { + public void testBuildFastGenericDeserializerWithCorrectClasspath() { FastSerdeCache cache = FastSerdeCache.getDefaultInstance(); Schema testRecord = Schema.parse("{\"type\": \"record\", \"name\": \"test_record\", \"fields\":[]}"); cache.buildFastGenericDeserializer(testRecord, testRecord); } @Test(groups = "deserializationTest") - public void testBuildFastSpecificDeserializerSurviveFromWrongClasspath() throws Exception { + public void testBuildFastSpecificDeserializerSurviveFromWrongClasspath() { String wrongClasspath = "."; FastSerdeCache cache = new FastSerdeCache(wrongClasspath); cache.buildFastSpecificDeserializer(TestRecord.SCHEMA$, TestRecord.SCHEMA$); } @Test(groups = "deserializationTest") - public void testBuildFastSpecificDeserializerWithCorrectClasspath() throws Exception { + public void testBuildFastSpecificDeserializerWithCorrectClasspath() { FastSerdeCache cache = FastSerdeCache.getDefaultInstance(); cache.buildFastSpecificDeserializer(TestRecord.SCHEMA$, TestRecord.SCHEMA$); } diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java index c19d7741a..cfc738f8b 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java @@ -59,7 +59,7 @@ public static Schema createRecord(Schema.Field... fields) { } public static Schema createRecord(String name, Schema.Field... fields) { - Schema schema = Schema.createRecord(name, name, "com.adpilot.utils.generated.avro", false); + Schema schema = Schema.createRecord(name, name, "com.linkedin.avro.fastserde.generated.avro", false); schema.setFields(Arrays.asList(fields)); return schema; @@ -130,15 +130,15 @@ public static Schema.Field createMapFieldSchema(String name, Schema valueType, S } public static Schema createFixedSchema(String name, int size) { - return Schema.createFixed(name, "", "com.adpilot.utils.generated.avro", size); + return Schema.createFixed(name, "", "com.linkedin.avro.fastserde.generated.avro", size); } public static Schema createEnumSchema(String name, String[] ordinals) { - return Schema.createEnum(name, "", "com.adpilot.utils.generated.avro", Arrays.asList(ordinals)); + return Schema.createEnum(name, "", "com.linkedin.avro.fastserde.generated.avro", Arrays.asList(ordinals)); } public static Schema createEnumSchema(String name, String[] ordinals, String enumDefault) { - return AvroCompatibilityHelper.newEnumSchema(name, "", "com.adpilot.utils.generated.avro", Arrays.asList(ordinals), + return AvroCompatibilityHelper.newEnumSchema(name, "", "com.linkedin.avro.fastserde.generated.avro", Arrays.asList(ordinals), enumDefault); } diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGeneratorTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGeneratorTest.java index bb6a6bdc7..327ebdbc1 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGeneratorTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGeneratorTest.java @@ -130,17 +130,17 @@ record = decodeRecordSlow(TestRecord.SCHEMA$, TestRecord.SCHEMA$, specificDataAs // then Assert.assertEquals(1, getField(record, "testInt")); - Assert.assertEquals(new Integer(1), getField(record, "testIntUnion")); + Assert.assertEquals(1, getField(record, "testIntUnion")); Assert.assertEquals(new Utf8("aaa"), getField(record, "testString")); Assert.assertEquals(new Utf8("aaa"), getField(record, "testStringUnion")); - Assert.assertEquals(1l, getField(record, "testLong")); - Assert.assertEquals(new Long(1), getField(record, "testLongUnion")); + Assert.assertEquals(1L, getField(record, "testLong")); + Assert.assertEquals(1L, getField(record, "testLongUnion")); Assert.assertEquals(1.0, (Double) getField(record, "testDouble"), 0); - Assert.assertEquals(new Double(1.0), getField(record, "testDoubleUnion")); + Assert.assertEquals(1.0, getField(record, "testDoubleUnion")); Assert.assertEquals(1.0f, (Float) getField(record, "testFloat"), 0); - Assert.assertEquals(new Float(1.0f), getField(record, "testFloatUnion")); + Assert.assertEquals(1.0f, getField(record, "testFloatUnion")); Assert.assertEquals(true, getField(record, "testBoolean")); - Assert.assertEquals(new Boolean(true), getField(record, "testBooleanUnion")); + Assert.assertEquals(true, getField(record, "testBooleanUnion")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), getField(record, "testBytes")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), getField(record, "testBytesUnion")); } @@ -838,17 +838,16 @@ public void largeSchemasWithUnionCanBeHandled() { // generateDeserializer should not throw an exception try { - new FastSpecificDeserializerGenerator<>(writerSchema, readerSchema, tempDir, classLoader, null).generateDeserializer(); + new FastSpecificDeserializerGenerator<>(writerSchema, readerSchema, tempDir, classLoader, null, null).generateDeserializer(); } catch (Exception e) { Assert.fail("Exception was thrown: ", e); } } - @SuppressWarnings("unchecked") private T decodeRecordFast(Schema readerSchema, Schema writerSchema, Decoder decoder) { FastDeserializer deserializer = - new FastSpecificDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, - null).generateDeserializer(); + new FastSpecificDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, + null, null).generateDeserializer(); try { return deserializer.deserialize(null, decoder); @@ -859,7 +858,6 @@ private T decodeRecordFast(Schema readerSchema, Schema writerSchema, Decoder } } - @SuppressWarnings("unchecked") private T decodeRecordSlow(Schema readerSchema, Schema writerSchema, Decoder decoder) { org.apache.avro.io.DatumReader datumReader = new SpecificDatumReader<>(writerSchema, readerSchema); try { diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java index d0bcf84db..aca57c28c 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java @@ -371,7 +371,7 @@ public Decoder dataAsDecoder(T data, Schema schema) { try { FastSpecificSerializerGenerator fastSpecificSerializerGenerator = - new FastSpecificSerializerGenerator<>(schema, tempDir, classLoader, null); + new FastSpecificSerializerGenerator<>(schema, tempDir, classLoader, null, null); FastSerializer fastSerializer = fastSpecificSerializerGenerator.generateSerializer(); fastSerializer.serialize(data, binaryEncoder); binaryEncoder.flush(); diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java index 187311394..1eeaa0cf5 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java @@ -330,10 +330,10 @@ public Decoder writeWithFastAvro(T data, Schema schema, boolean specific) { try { FastSerializer fastSerializer; if (specific) { - FastSpecificSerializerGenerator fastSpecificSerializerGenerator = new FastSpecificSerializerGenerator<>(schema, tempDir, classLoader, null); + FastSpecificSerializerGenerator fastSpecificSerializerGenerator = new FastSpecificSerializerGenerator<>(schema, tempDir, classLoader, null, null); fastSerializer = fastSpecificSerializerGenerator.generateSerializer(); } else { - FastGenericSerializerGenerator fastGenericSerializerGenerator = new FastGenericSerializerGenerator<>(schema, tempDir, classLoader, null); + FastGenericSerializerGenerator fastGenericSerializerGenerator = new FastGenericSerializerGenerator<>(schema, tempDir, classLoader, null, null); fastSerializer = fastGenericSerializerGenerator.generateSerializer(); } fastSerializer.serialize(data, binaryEncoder); @@ -348,9 +348,9 @@ public Decoder writeWithFastAvro(T data, Schema schema, boolean specific) { public T readWithFastAvro(Schema writerSchema, Schema readerSchema, Decoder decoder, boolean specific) { FastDeserializer deserializer; if (specific) { - deserializer = new FastSpecificDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, null).generateDeserializer(); + deserializer = new FastSpecificDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, null, null).generateDeserializer(); } else { - deserializer = new FastGenericDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, null).generateDeserializer(); + deserializer = new FastGenericDeserializerGenerator(writerSchema, readerSchema, tempDir, classLoader, null, null).generateDeserializer(); } try { return deserializer.deserialize(decoder); diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java index 1b8122ad7..68cfd70b2 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java @@ -1,6 +1,11 @@ package com.linkedin.avro.fastserde; +import java.util.Arrays; + +import javax.lang.model.SourceVersion; + import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import com.linkedin.avro.fastserde.generated.avro.FloatWithDefaultNaN; @@ -18,4 +23,54 @@ void testFingerprintOfSchemaWithDefaultNaN() { // expect no exception is thrown Utils.getSchemaFingerprint(FloatWithDefaultNaN.SCHEMA$); } + + @DataProvider + static Object[][] createValidJavaIdentifierTestCases() { + String[] validOrAlmostValidJavaIdentifiers = { + "goodOne", + "fine", + "break", + "class", + "field_name", + "var", + "varName", + " will be cleared as weLL %$^$&^*(*)!@#@#${} " + }; + + return Arrays.stream(validOrAlmostValidJavaIdentifiers) + .map(str -> new Object[]{str}) + .toArray(Object[][]::new); + } + + @Test(dataProvider = "createValidJavaIdentifierTestCases") + void shouldGenerateValidJavaIdentifier(String javaIdentifierCandidate) { + // when + String validJavaIdentifier = Utils.toValidJavaIdentifier(javaIdentifierCandidate); + + // when + Assert.assertTrue(SourceVersion.isIdentifier(validJavaIdentifier)); + } + + /*-----------------------------------------*/ + + @DataProvider + static Object[][] createInvalidJavaIdentifierTestCases() { + String[] invalidJavaIdentifiers = { + null, + "", + " ", + "\n\t\r", + "\n \t \r " + }; + + return Arrays.stream(invalidJavaIdentifiers) + .map(str -> new Object[]{str}) + .toArray(Object[][]::new); + } + + @Test(dataProvider = "createInvalidJavaIdentifierTestCases", expectedExceptions = IllegalArgumentException.class) + void shouldFailGeneratingValidJavaIdentifier(String invalidProposal) { + // NPE expected + Utils.toValidJavaIdentifier(invalidProposal); + } } diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/FunctionThrowingIOException.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/FunctionThrowingIOException.java new file mode 100644 index 000000000..e5cefceff --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/FunctionThrowingIOException.java @@ -0,0 +1,9 @@ +package com.linkedin.avro.fastserde.logical.types; + +import java.io.IOException; + +@FunctionalInterface +public interface FunctionThrowingIOException { + + R apply(T input) throws IOException; +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/InMemoryEncoder.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/InMemoryEncoder.java new file mode 100644 index 000000000..b74d902cb --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/InMemoryEncoder.java @@ -0,0 +1,175 @@ +package com.linkedin.avro.fastserde.logical.types; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.Encoder; +import org.apache.avro.util.Utf8; + +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; +import com.linkedin.avroutil1.compatibility.SchemaNormalization; + +/** + * Helper class which wraps {@link BinaryEncoder} and its corresponding {@code OutputStream} + * ({@link ByteArrayOutputStream}) so that we can simplify some repeatable code just to: + *
+ * InMemoryEncoder encoder = new InMemoryEncoder(data.getSchema());
+ * fastSerializer.serialize(data, encoder);
+ * return encoder.toByteArray();
+ * 
+ * Additionally, it prepends 10-bytes header to the output byte-array so that it can be easily compared with + * built-in serialization method: {@code someSpecificRecord.toByteBuffer().toByteArray()}. + */ +public class InMemoryEncoder extends Encoder { + + private final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + private final BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); + + public InMemoryEncoder(Schema schema) { + final int v1HeaderLength = 10; + byte[] v1Header = ByteBuffer.wrap(new byte[v1HeaderLength]) + .order(ByteOrder.LITTLE_ENDIAN) + .put(new byte[]{(byte) 0xC3, (byte) 0x01}) // BinaryMessageEncoder.V1_HEADER + .putLong(SchemaNormalization.parsingFingerprint64(schema)) + .array(); + + baos.write(v1Header, 0, v1Header.length); + } + + public byte[] toByteArray() { + try { + binaryEncoder.flush(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + return baos.toByteArray(); + } + + // generated delegate methods + + @Override + public void writeNull() throws IOException { + binaryEncoder.writeNull(); + } + + @Override + public void writeString(Utf8 utf8) throws IOException { + binaryEncoder.writeString(utf8); + } + + @Override + public void writeString(String string) throws IOException { + binaryEncoder.writeString(string); + } + + @Override + public void writeBytes(ByteBuffer bytes) throws IOException { + binaryEncoder.writeBytes(bytes); + } + + @Override + public void writeBytes(byte[] bytes, int start, int len) throws IOException { + binaryEncoder.writeBytes(bytes, start, len); + } + + @Override + public void writeEnum(int e) throws IOException { + binaryEncoder.writeEnum(e); + } + + @Override + public void writeArrayStart() throws IOException { + binaryEncoder.writeArrayStart(); + } + + @Override + public void setItemCount(long itemCount) throws IOException { + binaryEncoder.setItemCount(itemCount); + } + + @Override + public void startItem() throws IOException { + binaryEncoder.startItem(); + } + + @Override + public void writeArrayEnd() throws IOException { + binaryEncoder.writeArrayEnd(); + } + + @Override + public void writeMapStart() throws IOException { + binaryEncoder.writeMapStart(); + } + + @Override + public void writeMapEnd() throws IOException { + binaryEncoder.writeMapEnd(); + } + + @Override + public void writeIndex(int unionIndex) throws IOException { + binaryEncoder.writeIndex(unionIndex); + } + + @Override + public void writeBoolean(boolean b) throws IOException { + binaryEncoder.writeBoolean(b); + } + + @Override + public void writeInt(int n) throws IOException { + binaryEncoder.writeInt(n); + } + + @Override + public void writeLong(long n) throws IOException { + binaryEncoder.writeLong(n); + } + + @Override + public void writeFloat(float f) throws IOException { + binaryEncoder.writeFloat(f); + } + + @Override + public void writeDouble(double d) throws IOException { + binaryEncoder.writeDouble(d); + } + + @Override + public void writeString(CharSequence charSequence) throws IOException { + binaryEncoder.writeString(charSequence); + } + + @Override + public void writeBytes(byte[] bytes) throws IOException { + binaryEncoder.writeBytes(bytes); + } + + @Override + public void writeFixed(byte[] bytes, int start, int len) throws IOException { + binaryEncoder.writeFixed(bytes, start, len); + } + + @Override + public void writeFixed(byte[] bytes) throws IOException { + binaryEncoder.writeFixed(bytes); + } + + @Override + public void writeFixed(ByteBuffer bytes) throws IOException { + binaryEncoder.writeFixed(bytes); + } + + @Override + public void flush() throws IOException { + binaryEncoder.flush(); + } +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesFastSerdeTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesFastSerdeTest.java new file mode 100644 index 000000000..d72597096 --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesFastSerdeTest.java @@ -0,0 +1,239 @@ +package com.linkedin.avro.fastserde.logical.types; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; + +import org.apache.avro.util.Utf8; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import org.testng.collections.Lists; + +import com.linkedin.avro.fastserde.Utils; +import com.linkedin.avro.fastserde.generated.avro.FastSerdeLogicalTypesTest1; +import com.linkedin.avro.fastserde.generated.avro.FastSerdeLogicalTypesWithDefaults; +import com.linkedin.avro.fastserde.generated.avro.LocalTimestampRecord; +import com.linkedin.avro.fastserde.generated.avro.LocalTimestampRecordWithDefaults; +import com.linkedin.avroutil1.compatibility.AvroVersion; + +public class LogicalTypesFastSerdeTest extends LogicalTypesTestBase { + + @DataProvider + public static Object[][] logicalTypesTestCases() { + LocalDate now = LocalDate.now(); + LocalDate localDate = LocalDate.of(2023, 8, 11); + + Map mapOfDates = mapOf( + new String[]{"today", "yesterday", "tomorrow"}, + new LocalDate[]{now, now.minusDays(1), now.plusDays(1)}); + + Map mapOfTimestamps = mapOf( + new String[]{"today", "yesterday", "tomorrow"}, + new Instant[]{toInstant(now), toInstant(now.minusDays(1)), toInstant(now.plusDays(1))}); + + Map mapOfDatesAndTimestamps = mapOf( + new String[]{"today", "yesterday", "tomorrow"}, + new Object[]{toInstant(now), now.minusDays(1), now.plusDays(1)}); + + Object[] mapOfUnionsOfDateAndTimestampMillisOptions = {mapOfDates, mapOfTimestamps, mapOfDatesAndTimestamps}; + Object[] nullableUnionOfDateAndLocalTimestampOptions = {null, now.minusDays(12), localDate.atStartOfDay()}; + Object[] unionOfDateAndLocalTimestampOptions = {now.minusDays(12), localDate.atStartOfDay()}; + Object[] unionOfArrayAndMapOptions = { + Lists.newArrayList(LocalTime.now(), LocalTime.now().plusMinutes(1)), mapOfDates}; + Object[] nullableArrayOfDatesOptions = { + null, Lists.newArrayList(localDate, localDate.plusDays(11), localDate.plusDays(22))}; + Object[] decimalOrDateOptions = {new BigDecimal("3.14"), LocalDate.of(2023, 3, 14)}; + + List allOptions = new ArrayList<>(); + + for (Object mapOfUnionsOfDateAndTimestampMillis : mapOfUnionsOfDateAndTimestampMillisOptions) { + for (Object nullableUnionOfDateAndLocalTimestamp : nullableUnionOfDateAndLocalTimestampOptions) { + for (Object unionOfDateAndLocalTimestamp : unionOfDateAndLocalTimestampOptions) { + for (Object unionOfArrayAndMap : unionOfArrayAndMapOptions) { + for (Object nullableArrayOfDates : nullableArrayOfDatesOptions) { + for (Object decimalOrDate : decimalOrDateOptions) { + allOptions.add(new Object[]{mapOfUnionsOfDateAndTimestampMillis, + nullableUnionOfDateAndLocalTimestamp, unionOfDateAndLocalTimestamp, + unionOfArrayAndMap, nullableArrayOfDates, decimalOrDate}); + } + } + } + } + } + } + + return allOptions.toArray(new Object[0][]); + } + + @Test(groups = "serializationTest", dataProvider = "logicalTypesTestCases") + public void shouldWriteAndReadLogicalTypesSuccessfully(Map mapOfUnionsOfDateAndTimestampMillis, + Object nullableUnionOfDateAndLocalTimestamp, Object unionOfDateAndLocalTimestamp, + Object unionOfArrayAndMap, List nullableArrayOfDates, Object decimalOrDate) throws IOException { + // given + LocalDate localDate = LocalDate.of(2023, 8, 11); + Instant instant = localDate.atStartOfDay().toInstant(ZoneOffset.UTC); + LocalTimestampRecord localTimestampRecord = createLocalTimestampRecord(nullableUnionOfDateAndLocalTimestamp, unionOfDateAndLocalTimestamp); + + FastSerdeLogicalTypesTest1.Builder builder = FastSerdeLogicalTypesTest1.newBuilder() + .setMapOfUnionsOfDateAndTimestampMillis(mapOfUnionsOfDateAndTimestampMillis) + .setUnionOfArrayAndMap(unionOfArrayAndMap) + .setTimestampMillisMap(createTimestampMillisMap()) + .setNullableArrayOfDates(nullableArrayOfDates) + .setArrayOfDates(Lists.newArrayList(localDate, localDate.plusDays(1), localDate.plusDays(2))) + .setUnionOfDecimalOrDate(decimalOrDate) + .setTimestampMillisField(instant) + .setTimestampMicrosField(instant) + .setTimeMillisField(LocalTime.of(14, 17, 45, 12345)) + .setTimeMicrosField(LocalTime.of(14, 17, 45, 12345)) + .setDateField(localDate) + .setNestedLocalTimestampMillis(localTimestampRecord); + injectUuidField(builder); + FastSerdeLogicalTypesTest1 inputData = builder.build(); + + // all serializers produce the same array of bytes + byte[] bytesWithHeader = verifySerializers(inputData, FastSerdeLogicalTypesTest1::toByteBuffer); + + // all deserializers create (logically) the same data (in generic or specific representation) + verifyDeserializers(bytesWithHeader, FastSerdeLogicalTypesTest1::fromByteBuffer); + } + + @SuppressWarnings("unchecked") + @Test(groups = "serializationTest") + public void shouldCreateLogicalTypesFromDefaults() { + // given + LocalTime localTime0 = LocalTime.ofNanoOfDay(0L); + LocalDate localDate0 = LocalDate.ofYearDay(1970, 1); + LocalDateTime localDateTime0 = LocalDateTime.of(localDate0, localTime0); + + fixConversionsIfAvro19(new FastSerdeLogicalTypesWithDefaults().getSpecificData()); + + // when + FastSerdeLogicalTypesWithDefaults data = FastSerdeLogicalTypesWithDefaults.newBuilder() + .setNestedLocalTimestampMillisBuilder(LocalTimestampRecordWithDefaults.newBuilder()) + .build(); + + // and + List arrayOfLocalTimes = (List) data.getUnionOfArrayAndMap(); + Map mapOfLocalDates = data.getMapOfUnionsOfDateAndTimestampMillis(); + Map timestampMillisMap = data.getTimestampMillisMap(); + List arrayOfDates = data.getArrayOfDates(); + BigDecimal decimal = (BigDecimal) data.getUnionOfDecimalOrDate(); + Object uuidField = data.getUuidField(); + Instant timestampMillisField = data.getTimestampMillisField(); + Instant timestampMicrosField = data.getTimestampMicrosField(); + LocalTime timeMillisField = data.getTimeMillisField(); + LocalTime timeMicrosField = data.getTimeMicrosField(); + LocalDate dateField = data.getDateField(); + Object nestedTimestamp = data.getNestedLocalTimestampMillis().getNestedTimestamp(); + LocalDate nestedDate = (LocalDate) data.getNestedLocalTimestampMillis().getUnionOfDateAndLocalTimestamp(); + + // then + Assert.assertEquals(arrayOfLocalTimes, Lists.newArrayList( + localTime0.plus(654321, ChronoUnit.MILLIS), + localTime0.plus(7415896, ChronoUnit.MILLIS))); + + Assert.assertEquals(mapOfLocalDates, mapOf( + new Utf8[] {new Utf8("someDay"), new Utf8("anotherDay")}, + new LocalDate[] {localDate0.plusDays(12345), localDate0.plusDays(23456)})); + + Assert.assertEquals(timestampMillisMap, mapOf( + new Utf8[] {new Utf8("timestampMillis1"), new Utf8("timestampMillis2")}, + new Instant[] {Instant.ofEpochMilli(123456789012L), Instant.ofEpochMilli(112233445566L)})); + + Assert.assertNull(data.getNullableArrayOfDates()); + Assert.assertEquals(arrayOfDates, Lists.newArrayList(localDate0.plusDays(7777), localDate0.plusDays(8888))); + Assert.assertEquals(decimal.scale(), 2); + Assert.assertEquals(decimal.precision(), 5); + Assert.assertEquals(decimal, new BigDecimal(new BigInteger("13".getBytes(StandardCharsets.UTF_8)), 2)); + Assert.assertEquals(uuidField.toString(), "b4ddd079-a024-4cc3-ac6c-a14f174c9922"); + Assert.assertEquals(timestampMillisField, Instant.ofEpochMilli(120120120120L)); + Assert.assertEquals(timestampMicrosField, Instant.ofEpochMilli(0L).plusNanos(123451234512345L * 1000L)); + Assert.assertEquals(timeMillisField, localTime0.plus(15L, ChronoUnit.MILLIS)); + Assert.assertEquals(timeMicrosField, localTime0.plus(16L, ChronoUnit.MICROS)); + Assert.assertEquals(dateField, localDate0.plusDays(223344)); + if (Utils.getRuntimeAvroVersion() == AvroVersion.AVRO_1_9) { + Assert.assertEquals(nestedTimestamp, 99L); + } else { + Assert.assertEquals(nestedTimestamp, localDateTime0.plus(99, ChronoUnit.MILLIS)); + } + Assert.assertEquals(nestedDate, localDate0.plusDays(45678)); + } + + private Map createTimestampMillisMap() { + return mapOf(new String[]{"one", "two", "three"}, new Instant[]{ + toInstant(LocalDate.of(2023, 8, 18)), + toInstant(LocalDate.of(2023, 8, 19)), + toInstant(LocalDate.of(2023, 8, 20))}); + } + + private LocalTimestampRecord createLocalTimestampRecord( + Object nullableUnionOfDateAndLocalTimestamp, Object unionOfDateAndLocalTimestamp) { + Instant nestedTimestamp = toInstant(LocalDate.of(2023, 8, 21)); + LocalTimestampRecord.Builder builder = LocalTimestampRecord.newBuilder(); + + try { + if (Utils.getRuntimeAvroVersion().laterThan(AvroVersion.AVRO_1_9)) { + builder.getClass().getMethod("setNestedTimestamp", LocalDateTime.class) + .invoke(builder, LocalDateTime.ofInstant(nestedTimestamp, ZoneId.systemDefault())); + builder.getClass().getMethod("setNullableNestedTimestamp", LocalDateTime.class) + .invoke(builder, LocalDateTime.ofInstant(nestedTimestamp.plusSeconds(10), ZoneId.systemDefault())); + } else { + nullableUnionOfDateAndLocalTimestamp = Optional.ofNullable(toInstant(nullableUnionOfDateAndLocalTimestamp)) + .map(Instant::toEpochMilli) + .orElse(null); + unionOfDateAndLocalTimestamp = toInstant(unionOfDateAndLocalTimestamp).toEpochMilli(); + + builder.getClass().getMethod("setNestedTimestamp", Long.TYPE) + .invoke(builder, nestedTimestamp.toEpochMilli()); + builder.getClass().getMethod("setNullableNestedTimestamp", Long.class) + .invoke(builder, nestedTimestamp.toEpochMilli() + 10L); + } + + builder.setNullableUnionOfDateAndLocalTimestamp(nullableUnionOfDateAndLocalTimestamp); + builder.setUnionOfDateAndLocalTimestamp(unionOfDateAndLocalTimestamp); + } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { + throw new RuntimeException(e); + } + + return builder.build(); + } + + private static Map mapOf(K[] keys, V[] values) { + Map map = new LinkedHashMap<>(); + for (int i = 0; i < keys.length; i++) { + map.put(keys[i], values[i]); + } + + return map; + } + + private void injectUuidField(FastSerdeLogicalTypesTest1.Builder builder) { + try { + if (Utils.getRuntimeAvroVersion().laterThan(AvroVersion.AVRO_1_10)) { + builder.getClass().getMethod("setUuidField", UUID.class) + .invoke(builder, UUID.randomUUID()); + } else { + builder.getClass().getMethod("setUuidField", CharSequence.class) + .invoke(builder, UUID.randomUUID().toString()); + } + } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesTestBase.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesTestBase.java new file mode 100644 index 000000000..2e156ae48 --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/LogicalTypesTestBase.java @@ -0,0 +1,251 @@ +package com.linkedin.avro.fastserde.logical.types; + +import static com.linkedin.avro.fastserde.FastSerdeTestsSupport.getCodeGenDirectory; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.nio.ByteBuffer; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.function.Supplier; + +import org.apache.avro.Conversion; +import org.apache.avro.Conversions; +import org.apache.avro.Schema; +import org.apache.avro.data.TimeConversions; +import org.apache.avro.generic.ColdGenericDatumReader; +import org.apache.avro.generic.ColdSpecificDatumReader; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.avro.specific.SpecificRecordBase; +import org.testng.Assert; +import org.testng.collections.Lists; + +import com.linkedin.avro.fastserde.FastDeserializer; +import com.linkedin.avro.fastserde.FastGenericDeserializerGenerator; +import com.linkedin.avro.fastserde.FastGenericSerializerGenerator; +import com.linkedin.avro.fastserde.FastSerdeCache; +import com.linkedin.avro.fastserde.FastSerializer; +import com.linkedin.avro.fastserde.FastSpecificDeserializerGenerator; +import com.linkedin.avro.fastserde.FastSpecificSerializerGenerator; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelperCommon; +import com.linkedin.avroutil1.compatibility.AvroVersion; + +public abstract class LogicalTypesTestBase { + + private final File classesDir; + private final ClassLoader classLoader; + + LogicalTypesTestBase() { + try { + classesDir = getCodeGenDirectory(); + classLoader = URLClassLoader.newInstance(new URL[]{classesDir.toURI().toURL()}, getClass().getClassLoader()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + protected T copyConversions(SpecificData fromSpecificData, T toModelData) { + Optional.ofNullable(fromSpecificData.getConversions()) + .orElse(Collections.emptyList()) + .forEach(toModelData::addLogicalTypeConversion); + + fixConversionsIfAvro19(toModelData); + + return toModelData; + } + + protected void fixConversionsIfAvro19(T modelData) { + if (AvroCompatibilityHelperCommon.getRuntimeAvroVersion() == AvroVersion.AVRO_1_9) { + // 1. These conversions should be injected by default (avro-compiler issue). + // 2. Missing DecimalConversion causes conflict with stringable feature (BigDecimal is considered as string, not bytes) + // details: GenericData.resolveUnion() + SpecificData.getSchemaName(). + // 3. Time-conversions are needed for GenericDatumWriter/Reader which sees logicalType in the schema + // but is unable to find corresponding converter. + // 4. LocalTimestampMillisConversion not available in 1.9. + + List> missingConversions = Lists.newArrayList( + new Conversions.DecimalConversion(), + new TimeConversions.TimeMicrosConversion(), + new TimeConversions.TimestampMicrosConversion()); + + for (Conversion conversion : missingConversions) { + modelData.addLogicalTypeConversion(conversion); + } + } + } + + protected byte[] serialize(FastSerializer fastSerializer, T data) throws IOException { + InMemoryEncoder encoder = new InMemoryEncoder(data.getSchema()); + fastSerializer.serialize(data, encoder); + return encoder.toByteArray(); + } + + protected byte[] serialize(DatumWriter datumWriter, T data) throws IOException { + InMemoryEncoder encoder = new InMemoryEncoder(data.getSchema()); + datumWriter.write(data, encoder); + + return encoder.toByteArray(); + } + + protected static Instant toInstant(Object maybeDate) { + if (maybeDate == null) { + return null; + } else if (maybeDate instanceof LocalDate) { + return ((LocalDate) maybeDate).atStartOfDay(ZoneId.systemDefault()).toInstant(); + } else if (maybeDate instanceof LocalDateTime) { + return ((LocalDateTime) maybeDate).toInstant(ZoneOffset.UTC); + } else { + throw new UnsupportedOperationException(maybeDate + " is not supported (yet)"); + } + } + + /** + * Runs various serializers and ensures each one returned the same result. + * @return result from default serialization, normally it's just {@code data.toByteBuffer().array()} + */ + protected byte[] verifySerializers(T data, + FunctionThrowingIOException toByteBuffer) throws IOException { + // given + Schema schema = data.getSchema(); + GenericData genericData = copyConversions(data.getSpecificData(), new GenericData()); + SpecificData specificData = copyConversions(data.getSpecificData(), new SpecificData()); + + FastSerializer fastGenericSerializer = new FastGenericSerializerGenerator( + schema, classesDir, classLoader, null, genericData) + .generateSerializer(); + + FastSerializer fastSpecificSerializer = new FastSpecificSerializerGenerator( + schema, classesDir, classLoader, null, specificData) + .generateSerializer(); + + FastSerdeCache.FastSerializerWithAvroGenericImpl fastSerializerWithAvroGeneric = + new FastSerdeCache.FastSerializerWithAvroGenericImpl<>(schema, genericData); + + FastSerdeCache.FastSerializerWithAvroSpecificImpl fastSerializerWithAvroSpecific = + new FastSerdeCache.FastSerializerWithAvroSpecificImpl<>(schema, specificData); + + GenericDatumWriter genericDatumWriter = new GenericDatumWriter<>( + schema, genericData); + + SpecificDatumWriter specificDatumWriter = new SpecificDatumWriter<>( + schema, specificData); + + fixConversionsIfAvro19(data.getSpecificData()); + + // when + byte[] fastGenericBytes = serialize(fastGenericSerializer, data); + byte[] fastSpecificBytes = serialize(fastSpecificSerializer, data); + byte[] fastGenericWithAvroBytes = serialize(fastSerializerWithAvroGeneric, data); + byte[] fastSpecificWithAvroBytes = serialize(fastSerializerWithAvroSpecific, data); + byte[] genericBytes = serialize(genericDatumWriter, data); + byte[] specificBytes = serialize(specificDatumWriter, data); + byte[] defaultBytes = toByteBuffer.apply(data).array(); + + // then all 7 serializing methods should return the same array of bytes + Assert.assertEquals(fastGenericBytes, defaultBytes); + Assert.assertEquals(fastSpecificBytes, defaultBytes); + Assert.assertEquals(fastGenericWithAvroBytes, defaultBytes); + Assert.assertEquals(fastSpecificWithAvroBytes, defaultBytes); + Assert.assertEquals(genericBytes, defaultBytes); + Assert.assertEquals(specificBytes, defaultBytes); + + return defaultBytes; + } + + protected T verifyDeserializers(byte[] bytesWithHeader, + FunctionThrowingIOException fromByteBuffer) throws IOException { + // given + T data = fromByteBuffer.apply(ByteBuffer.wrap(bytesWithHeader)); + byte[] bytes = dropV1Header(bytesWithHeader); + Schema schema = data.getSchema(); + GenericData genericData = copyConversions(data.getSpecificData(), new GenericData()); + SpecificData specificData = copyConversions(data.getSpecificData(), new SpecificData()); + Supplier decoderSupplier = () -> DecoderFactory.get().binaryDecoder(bytes, null); + + FastDeserializer fastGenericDeserializer = new FastGenericDeserializerGenerator( + schema, schema, classesDir, classLoader, null, genericData) + .generateDeserializer(); + + FastDeserializer fastSpecificDeserializer = new FastSpecificDeserializerGenerator( + schema, schema, classesDir, classLoader, null, specificData) + .generateDeserializer(); + + FastSerdeCache.FastDeserializerWithAvroGenericImpl fastDeserializerWithAvroGeneric = + new FastSerdeCache.FastDeserializerWithAvroGenericImpl<>(schema, schema, genericData); + + FastSerdeCache.FastDeserializerWithAvroSpecificImpl fastDeserializerWithAvroSpecific = + new FastSerdeCache.FastDeserializerWithAvroSpecificImpl<>(schema, schema, specificData); + + GenericDatumReader genericDatumReader = new GenericDatumReader<>(schema, schema, genericData); + + SpecificDatumReader specificDatumReader = new SpecificDatumReader<>(schema, schema, specificData); + + ColdGenericDatumReader coldGenericDatumReader = ColdGenericDatumReader.of(schema, schema, genericData); + + ColdSpecificDatumReader coldSpecificDatumReader = ColdSpecificDatumReader.of(schema, schema, specificData); + + // when deserializing with different serializers/writers + GenericData.Record deserializedWithFastGeneric = fastGenericDeserializer.deserialize(decoderSupplier.get()); + T deserializedWithFastSpecific = fastSpecificDeserializer.deserialize(decoderSupplier.get()); + + GenericData.Record deserializedWithFastWithAvroGeneric = fastDeserializerWithAvroGeneric.deserialize(decoderSupplier.get()); + T deserializedWithFastWithAvroSpecific = fastDeserializerWithAvroSpecific.deserialize(decoderSupplier.get()); + + GenericData.Record deserializedWithGenericReader = genericDatumReader.read(null, decoderSupplier.get()); + T deserializedWithSpecificReader = specificDatumReader.read(null, decoderSupplier.get()); + + GenericData.Record deserializedWithColdGenericReader = coldGenericDatumReader.read(null, decoderSupplier.get()); + T deserializedWithColdSpecificReader = coldSpecificDatumReader.read(null, decoderSupplier.get()); + + // then + Assert.assertEquals(deserializedWithFastSpecific, data); + Assert.assertEquals(deserializedWithFastWithAvroSpecific, data); + Assert.assertEquals(deserializedWithSpecificReader, data); + Assert.assertEquals(deserializedWithColdSpecificReader, data); + + assertEquals(deserializedWithFastGeneric, data); + assertEquals(deserializedWithFastWithAvroGeneric, data); + assertEquals(deserializedWithGenericReader, data); + assertEquals(deserializedWithColdGenericReader, data); + + return deserializedWithFastSpecific; + } + + protected void assertEquals(GenericData.Record actual, T expected) throws IOException { + Assert.assertEquals(actual.toString(), expected.toString()); + + GenericDatumWriter genericDatumWriter = new GenericDatumWriter<>( + actual.getSchema(), copyConversions(expected.getSpecificData(), new GenericData())); + + SpecificDatumWriter specificDatumWriter = new SpecificDatumWriter<>( + expected.getSchema(), copyConversions(expected.getSpecificData(), new SpecificData())); + + byte[] genericBytes = serialize(genericDatumWriter, actual); + byte[] expectedBytes = serialize(specificDatumWriter, expected); + + Assert.assertEquals(genericBytes, expectedBytes); + } + + protected byte[] dropV1Header(byte[] bytesWithHeader) { + final int v1HeaderLength = 10; + return Arrays.copyOfRange(bytesWithHeader, v1HeaderLength, bytesWithHeader.length); + } +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/UndefinedLogicalTypesFastSerdeTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/UndefinedLogicalTypesFastSerdeTest.java new file mode 100644 index 000000000..19c1843e3 --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/logical/types/UndefinedLogicalTypesFastSerdeTest.java @@ -0,0 +1,117 @@ +package com.linkedin.avro.fastserde.logical.types; + +import java.io.IOException; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.testng.Assert; +import org.testng.annotations.Test; +import org.testng.collections.Lists; + +import com.linkedin.avro.fastserde.generated.avro.FastSerdeLogicalTypesDefined; +import com.linkedin.avro.fastserde.generated.avro.FastSerdeLogicalTypesUndefined; + +public class UndefinedLogicalTypesFastSerdeTest extends LogicalTypesTestBase { + + private final LocalTime localTime = LocalTime.now(); + private final LocalDate localDate = LocalDate.now(); + + @Test(groups = "serializationTest") + void definedAndUndefinedLogicalTypesShouldSerializeToTheSameBytes() throws IOException { + // given + FastSerdeLogicalTypesDefined logicalTypesDefined = createFastSerdeLogicalTypesDefined(); + FastSerdeLogicalTypesUndefined logicalTypesUndefined = createFastSerdeLogicalTypesUndefined(); + + // when + byte[] bytesFromDefinedLogicalTypes = dropV1Header(verifySerializers( + logicalTypesDefined, FastSerdeLogicalTypesDefined::toByteBuffer)); + + // and + byte[] bytesFromUndefinedLogicalTypes = dropV1Header(verifySerializers( + logicalTypesUndefined, FastSerdeLogicalTypesUndefined::toByteBuffer)); + + // then + Assert.assertEquals(bytesFromDefinedLogicalTypes, bytesFromUndefinedLogicalTypes); + } + + @Test(groups = "deserializationTest") + void deserializationToDefinedAndUndefinedLogicalTypesShouldBeEquivalent() throws IOException { + // given + byte[] bytesWithHeaderAndLogicalTypesDefined = verifySerializers(createFastSerdeLogicalTypesDefined(), + FastSerdeLogicalTypesDefined::toByteBuffer); + byte[] bytesWithHeaderAndLogicalTypesUndefined = verifySerializers(createFastSerdeLogicalTypesUndefined(), + FastSerdeLogicalTypesUndefined::toByteBuffer); + + // assume + Assert.assertEquals(dropV1Header(bytesWithHeaderAndLogicalTypesDefined), dropV1Header(bytesWithHeaderAndLogicalTypesUndefined)); + + // when + FastSerdeLogicalTypesDefined logicalTypesDefined = verifyDeserializers(bytesWithHeaderAndLogicalTypesDefined, + FastSerdeLogicalTypesDefined::fromByteBuffer); + + // and + FastSerdeLogicalTypesUndefined logicalTypesUndefined = verifyDeserializers(bytesWithHeaderAndLogicalTypesUndefined, + FastSerdeLogicalTypesUndefined::fromByteBuffer); + + // then + Assert.assertEquals(toInt(logicalTypesDefined.getTimeMillisField()), logicalTypesUndefined.getTimeMillisField()); + Assert.assertEquals(toInt(logicalTypesDefined.getDateField()), logicalTypesUndefined.getDateField()); + assertArraysOfUnionsAreEquivalent(logicalTypesDefined.getArrayOfUnionOfDateAndTimestampMillis(), + logicalTypesUndefined.getArrayOfUnionOfDateAndTimestampMillis()); + } + + private void assertArraysOfUnionsAreEquivalent(List unionsOfLogicalTypes, List unionsOfPrimitiveTypes) { + Assert.assertEquals(unionsOfLogicalTypes.size(), unionsOfPrimitiveTypes.size()); + + for (int i = 0; i < unionsOfLogicalTypes.size(); i++) { + Object logical = unionsOfLogicalTypes.get(i); + Object primitive = unionsOfPrimitiveTypes.get(i); + + if (logical instanceof LocalDate) { + Assert.assertEquals(toInt((LocalDate) logical), ((Integer) primitive).intValue()); + } else if (logical instanceof Instant) { + Assert.assertEquals(((Instant) logical).toEpochMilli(), ((Long) primitive).longValue()); + } else { + throw new RuntimeException("Unknown class of logical-type value: " + logical.getClass()); + } + } + } + + private FastSerdeLogicalTypesDefined createFastSerdeLogicalTypesDefined() { + return FastSerdeLogicalTypesDefined.newBuilder() + .setTimeMillisField(localTime) + .setDateField(localDate) + .setArrayOfUnionOfDateAndTimestampMillis(Lists.newArrayList( + LocalDate.of(2023, 8, 21), + LocalDate.of(2023, 8, 22), + toInstant(localDate), + toInstant(localDate.plusDays(7)), + LocalDate.of(2023, 12, 31))) + .build(); + } + + // same as above but without logical types + private FastSerdeLogicalTypesUndefined createFastSerdeLogicalTypesUndefined() { + return FastSerdeLogicalTypesUndefined.newBuilder() + .setTimeMillisField(toInt(localTime)) + .setDateField(toInt(localDate)) + .setArrayOfUnionOfDateAndTimestampMillis(Lists.newArrayList( + toInt(LocalDate.of(2023, 8, 21)), + toInt(LocalDate.of(2023, 8, 22)), + toInstant(localDate).toEpochMilli(), + toInstant(localDate.plusDays(7)).toEpochMilli(), + toInt(LocalDate.of(2023, 12, 31)))) + .build(); + } + + private int toInt(LocalTime localTime) { + return (int) TimeUnit.NANOSECONDS.toMillis(localTime.toNanoOfDay()); + } + + private int toInt(LocalDate localDate) { + return (int) localDate.toEpochDay(); + } +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/micro/benchmark/SerDeMicroBenchmark.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/micro/benchmark/SerDeMicroBenchmark.java index c1ae5c025..59b9cd686 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/micro/benchmark/SerDeMicroBenchmark.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/micro/benchmark/SerDeMicroBenchmark.java @@ -36,9 +36,9 @@ public static BenchmarkTestObject newTestObject() { fields.forEach(field -> { Schema.Type type = field.schema().getType(); if (type.equals(Schema.Type.FLOAT)) { - newObj.put(field.pos(), new Float(100)); + newObj.put(field.pos(), 100f); } else if (type.equals(Schema.Type.INT)) { - newObj.put(field.pos(), new Integer(100)); + newObj.put(field.pos(), 100); } else if (type.equals(Schema.Type.STRING)) { newObj.put(field.pos(), "testsetesttestsetestsetesttestsette"); } else { @@ -129,8 +129,7 @@ public Object[][] useFastAvroOptions() { @Test(dataProvider = "useFastAvroOptionsProvider", groups = {"perfTest"}) public void testFastAvroWithMultiThread(boolean useFastAvro) throws Exception { byte[] serializedBytes = serializedTestObjects(1000); - long startInMs = System.currentTimeMillis(); - AvroGenericDeserializer deserializer = null; + AvroGenericDeserializer deserializer; if (useFastAvro) { FastGenericDatumReader fastGenericDatumReader = new FastGenericDatumReader<>(BenchmarkTestObject.SCHEMA$); diff --git a/fastserde/avro-fastserde-tests14/build.gradle b/fastserde/avro-fastserde-tests14/build.gradle index 2cb7606f9..a4ba80963 100644 --- a/fastserde/avro-fastserde-tests14/build.gradle +++ b/fastserde/avro-fastserde-tests14/build.gradle @@ -13,6 +13,7 @@ sourceSets { java { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/java" srcDir "$buildDir/generated/avro" + exclude "com/linkedin/avro/fastserde/logical/types/**" // LogicalType available since version 1.8 } resources { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/resources" @@ -89,4 +90,4 @@ task runVanillaAvroCodegen { } } -compileTestJava.dependsOn runVanillaAvroCodegen \ No newline at end of file +compileTestJava.dependsOn runVanillaAvroCodegen diff --git a/fastserde/avro-fastserde-tests15/build.gradle b/fastserde/avro-fastserde-tests15/build.gradle index 09008482e..afd03179e 100644 --- a/fastserde/avro-fastserde-tests15/build.gradle +++ b/fastserde/avro-fastserde-tests15/build.gradle @@ -17,6 +17,7 @@ sourceSets { java { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/java" srcDir "$buildDir/generated/avro" + exclude "com/linkedin/avro/fastserde/logical/types/**" // LogicalType available since version 1.8 } resources { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/resources" @@ -78,4 +79,4 @@ task runVanillaAvroCodegen { } } -compileTestJava.dependsOn runVanillaAvroCodegen \ No newline at end of file +compileTestJava.dependsOn runVanillaAvroCodegen diff --git a/fastserde/avro-fastserde-tests16/build.gradle b/fastserde/avro-fastserde-tests16/build.gradle index 3ba813c26..42930628e 100644 --- a/fastserde/avro-fastserde-tests16/build.gradle +++ b/fastserde/avro-fastserde-tests16/build.gradle @@ -17,6 +17,7 @@ sourceSets { java { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/java" srcDir "$buildDir/generated/avro" + exclude "com/linkedin/avro/fastserde/logical/types/**" // LogicalType available since version 1.8 } resources { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/resources" @@ -78,4 +79,4 @@ task runVanillaAvroCodegen { } } -compileTestJava.dependsOn runVanillaAvroCodegen \ No newline at end of file +compileTestJava.dependsOn runVanillaAvroCodegen diff --git a/fastserde/avro-fastserde-tests17/build.gradle b/fastserde/avro-fastserde-tests17/build.gradle index 2bf21aae2..22805bb3d 100644 --- a/fastserde/avro-fastserde-tests17/build.gradle +++ b/fastserde/avro-fastserde-tests17/build.gradle @@ -17,6 +17,7 @@ sourceSets { java { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/java" srcDir "$buildDir/generated/avro" + exclude "com/linkedin/avro/fastserde/logical/types/**" // LogicalType available since version 1.8 } resources { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/resources" @@ -78,4 +79,4 @@ task runVanillaAvroCodegen { } } -compileTestJava.dependsOn runVanillaAvroCodegen \ No newline at end of file +compileTestJava.dependsOn runVanillaAvroCodegen diff --git a/fastserde/avro-fastserde-tests18/build.gradle b/fastserde/avro-fastserde-tests18/build.gradle index 35b7c3572..69b74563a 100644 --- a/fastserde/avro-fastserde-tests18/build.gradle +++ b/fastserde/avro-fastserde-tests18/build.gradle @@ -17,6 +17,7 @@ sourceSets { java { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/java" srcDir "$buildDir/generated/avro" + exclude "com/linkedin/avro/fastserde/logical/types/**" // due to joda.time instead of java.time } resources { srcDir "$rootProject.projectDir/fastserde/avro-fastserde-tests-common/src/test/resources" @@ -30,6 +31,7 @@ dependencies { testImplementation "org.slf4j:slf4j-api:1.7.14" testImplementation "org.apache.commons:commons-lang3:3.4" testImplementation "com.sun.codemodel:codemodel:2.6" + implementation "joda-time:joda-time:2.12.5" // required by generated Avro classes with logical types testImplementation ("org.apache.avro:avro:1.8.2") { exclude group: "org.slf4j" @@ -78,4 +80,4 @@ task runVanillaAvroCodegen { } } -compileTestJava.dependsOn runVanillaAvroCodegen \ No newline at end of file +compileTestJava.dependsOn runVanillaAvroCodegen diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDatumReaderWriterUtil.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDatumReaderWriterUtil.java index 85e7ad50c..d27173807 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDatumReaderWriterUtil.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDatumReaderWriterUtil.java @@ -65,12 +65,12 @@ public static FastGenericDatumReader getFastGenericDatumReader(Schema wri } public static FastGenericDatumWriter getFastGenericDatumWriter(Schema writerSchema) { - FastGenericDatumWriter fastDatumWriter = null; + FastGenericDatumWriter fastDatumWriter; // lookup cache and read lock reentrantReadWriteLock.readLock().lock(); try { - fastDatumWriter = (FastGenericDatumWriter)fastGenericDatumWriterCache.get(writerSchema); + fastDatumWriter = (FastGenericDatumWriter) fastGenericDatumWriterCache.get(writerSchema); } finally { reentrantReadWriteLock.readLock().unlock(); } @@ -84,7 +84,7 @@ public static FastGenericDatumWriter getFastGenericDatumWriter(Schema wri reentrantReadWriteLock.writeLock().unlock(); } } - return (FastGenericDatumWriter ) fastDatumWriter; + return fastDatumWriter; } public static FastSpecificDatumReader getFastSpecificDatumReader(Schema schema) { diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java index 139b3bb98..2a0c344db 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java @@ -14,6 +14,7 @@ import com.sun.codemodel.JDoLoop; import com.sun.codemodel.JExpr; import com.sun.codemodel.JExpression; +import com.sun.codemodel.JFieldRef; import com.sun.codemodel.JForLoop; import com.sun.codemodel.JInvocation; import com.sun.codemodel.JMethod; @@ -38,8 +39,11 @@ import java.util.function.BiConsumer; import java.util.function.Supplier; import java.util.stream.Collectors; + import org.apache.avro.AvroRuntimeException; import org.apache.avro.AvroTypeException; +import org.apache.avro.Conversion; +import org.apache.avro.Conversions; import org.apache.avro.Schema; import org.apache.avro.generic.GenericArray; import org.apache.avro.generic.GenericData; @@ -47,14 +51,13 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.avro.io.Decoder; +import org.apache.avro.specific.SpecificData; import org.apache.avro.util.Utf8; import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public class FastDeserializerGenerator extends FastDeserializerGeneratorBase { - private static final Logger LOGGER = LoggerFactory.getLogger(FastDeserializerGenerator.class); +public class FastDeserializerGenerator extends FastDeserializerGeneratorBase { + private static final String DECODER = "decoder"; private static final String VAR_NAME_FOR_REUSE = "reuse"; private static int FIELDS_PER_POPULATION_METHOD = 20; @@ -67,18 +70,18 @@ public class FastDeserializerGenerator extends FastDeserializerGeneratorBase< * and Avro treats null as a sentinel value indicating that it should * instantiate a new object instead of re-using. */ - private static final Supplier EMPTY_SUPPLIER = () -> JExpr._null(); + private static final Supplier EMPTY_SUPPLIER = JExpr::_null; private JMethod constructor; - private Map schemaMap = new HashMap<>(); - private Map schemaVarMap = new HashMap<>(); - private Map deserializeMethodMap = new HashMap<>(); - private Map skipMethodMap = new HashMap<>(); - private Map>> exceptionFromMethodMap = new HashMap<>(); + private final Map schemaMap = new HashMap<>(); + private final Map schemaVarMap = new HashMap<>(); + private final Map deserializeMethodMap = new HashMap<>(); + private final Map skipMethodMap = new HashMap<>(); + private final Map>> exceptionFromMethodMap = new HashMap<>(); FastDeserializerGenerator(boolean useGenericTypes, Schema writer, Schema reader, File destination, - ClassLoader classLoader, String compileClassPath) { - super(useGenericTypes, writer, reader, destination, classLoader, compileClassPath); + ClassLoader classLoader, String compileClassPath, U modelData) { + super(useGenericTypes, writer, reader, destination, classLoader, compileClassPath, modelData); } public FastDeserializer generateDeserializer() { @@ -92,9 +95,10 @@ public FastDeserializer generateDeserializer() { constructor = generatedClass.constructor(JMod.PUBLIC); JVar constructorParam = constructor.param(Schema.class, "readerSchema"); constructor.body().assign(JExpr.refthis(readerSchemaVar.name()), constructorParam); + injectConversionClasses(); Schema aliasedWriterSchema = writer; - /** + /* * {@link Schema.applyAliases} is not working correctly in avro-1.4 since there is a bug in this function: * {@literal Schema#getFieldAlias}. **/ @@ -109,7 +113,7 @@ public FastDeserializer generateDeserializer() { } JClass readerSchemaClass = schemaAssistant.classFromSchema(reader); - /** + /* * Writer schema could be using a different namespace from the reader schema, so we should always * use the reader schema class for generic type. */ @@ -158,7 +162,9 @@ public FastDeserializer generateDeserializer() { deserializeMethod.param(readerSchemaClass, VAR_NAME_FOR_REUSE); deserializeMethod.param(Decoder.class, DECODER); + @SuppressWarnings("unchecked") Class> clazz = compileClass(className, schemaAssistant.getUsedFullyQualifiedClassNameSet()); + return clazz.getConstructor(Schema.class).newInstance(reader); } catch (JClassAlreadyExistsException e) { throw new FastDeserializerGeneratorException("Class: " + className + " already exists"); @@ -752,8 +758,9 @@ private void processArray(JVar arraySchemaVar, final String name, final Schema a action = FieldAction.fromValues(arraySchema.getElementType().getType(), false, EMPTY_SYMBOL); } - final JVar arrayVar = action.getShouldRead() ? declareValueVar(name, effectiveArrayReaderSchema, parentBody, true, false, true) : null; - /** + final boolean useLogicalType = action.getShouldRead() && logicalTypeEnabled(effectiveArrayReaderSchema.getElementType()); + final JVar arrayVar = action.getShouldRead() ? declareValueVar(name, effectiveArrayReaderSchema, parentBody, true, false, !useLogicalType) : null; + /* * Special optimization for float array by leveraging {@link BufferBackedPrimitiveFloatList}. * * TODO: Handle other primitive element types here. @@ -778,12 +785,13 @@ private void processArray(JVar arraySchemaVar, final String name, final Schema a final Supplier finalReuseSupplier = potentiallyCacheInvocation(reuseSupplier, parentBody, "oldArray"); if (finalAction.getShouldRead()) { - JClass arrayClass = schemaAssistant.classFromSchema(effectiveArrayReaderSchema, false, false, true); - JClass abstractErasedArrayClass = schemaAssistant.classFromSchema(effectiveArrayReaderSchema, true, false, true).erasure(); + JClass arrayClass = schemaAssistant.classFromSchema(effectiveArrayReaderSchema, false, false, !useLogicalType); + JClass abstractErasedArrayClass = schemaAssistant.classFromSchema(effectiveArrayReaderSchema, true, false, !useLogicalType).erasure(); JInvocation newArrayExp = JExpr._new(arrayClass).arg(JExpr.cast(codeModel.INT, chunkLen)); - if (useGenericTypes && !SchemaAssistant.isPrimitive(effectiveArrayReaderSchema.getElementType())) { - /** + + if (useGenericTypes && (useLogicalType || !SchemaAssistant.isPrimitive(effectiveArrayReaderSchema.getElementType()))) { + /* * N.B.: The ColdPrimitiveXList implementations do not take the schema as a constructor param, * but the {@link org.apache.avro.generic.GenericData.Array} does. */ @@ -791,7 +799,7 @@ private void processArray(JVar arraySchemaVar, final String name, final Schema a } JInvocation finalNewArrayExp = newArrayExp; - /** N.B.: Need to use the erasure because instanceof does not support generic types */ + /* N.B.: Need to use the erasure because instanceof does not support generic types */ ifCodeGen(parentBody, finalReuseSupplier.get()._instanceof(abstractErasedArrayClass), then2 -> { then2.assign(arrayVar, JExpr.cast(abstractErasedArrayClass, finalReuseSupplier.get())); then2.invoke(arrayVar, "clear"); @@ -813,6 +821,7 @@ private void processArray(JVar arraySchemaVar, final String name, final Schema a BiConsumer putValueInArray = null; if (finalAction.getShouldRead()) { String addMethod = SchemaAssistant.isPrimitive(effectiveArrayReaderSchema.getElementType()) + && !logicalTypeEnabled(effectiveArrayReaderSchema.getElementType()) ? "addPrimitive" : "add"; putValueInArray = (block, expression) -> block.invoke(arrayVar, addMethod).arg(expression); @@ -864,7 +873,7 @@ private JExpression readStringableExpression(JClass stringbleClass) { // More GC-efficient stringableArgExpr = JExpr.direct(DECODER + ".readString()"); } else { - /** + /* * {@link BinaryDecoder#readString()} is not available in Avro 1.4 and 1.5. */ stringableArgExpr = JExpr.direct(DECODER + ".readString(null).toString()"); @@ -876,7 +885,7 @@ private void processMap(JVar mapSchemaVar, final String name, final Schema mapSc JBlock parentBody, FieldAction action, BiConsumer putMapIntoParent, Supplier reuseSupplier) { - /** + /* * Determine the action symbol for Map value. {@link ResolvingGrammarGenerator} generates * resolving grammar symbols with reversed order of production sequence. If this symbol is * a terminal, its production list will be null. Otherwise the production list @@ -1033,26 +1042,29 @@ private void processFixed(final Schema schema, JBlock body, FieldAction action, elseBlock -> elseBlock.assign(fixedBuffer, JExpr.direct(" new byte[" + schema.getFixedSize() + "]")) ); } + body.directStatement(DECODER + ".readFixed(" + fixedBuffer.name() + ");"); - JClass fixedClass = schemaAssistant.classFromSchema(schema, false, false, false); + JClass fixedClass = schemaAssistant.classFromSchema(schema, false, false, false, false); + JExpression valueToWrite; + if (useGenericTypes) { - JInvocation newFixedExpr; if (Utils.isAvro14()) { - newFixedExpr = JExpr._new(fixedClass).arg(fixedBuffer); + valueToWrite = JExpr._new(fixedClass).arg(fixedBuffer); } else { - newFixedExpr = JExpr._new(fixedClass).arg(getSchemaExpr(schema)).arg(fixedBuffer); + valueToWrite = JExpr._new(fixedClass).arg(getSchemaExpr(schema)).arg(fixedBuffer); } - putFixedIntoParent.accept(body, newFixedExpr); } else { // fixed implementation in avro-1.4 // The specific fixed type only has a constructor with empty param - JVar fixed = body.decl(fixedClass, getUniqueName(schema.getName())); - JInvocation newFixedExpr = JExpr._new(fixedClass); - body.assign(fixed, newFixedExpr); + JVar fixed = body.decl(fixedClass, getUniqueName(schema.getName()), JExpr._new(fixedClass)); body.directStatement(fixed.name() + ".bytes(" + fixedBuffer.name() + ");"); - putFixedIntoParent.accept(body, fixed); + + valueToWrite = fixed; } + + JExpression convertedValue = generateConversionCallIfLogicalType(valueToWrite, schema, body); + putFixedIntoParent.accept(body, convertedValue); } else { body.directStatement(DECODER + ".skipFixed(" + schema.getFixedSize() + ");"); } @@ -1091,7 +1103,7 @@ private void processEnum(final Schema schema, final JBlock body, FieldAction act newEnum = schemaAssistant.getEnumValueByIndex(schema, enumValueExpr, getSchemaExpr(schema)); } else { - /** + /* * Define a class variable to keep the mapping between the enum index from the writer schema and the corresponding * one in the reader schema, and there are some cases: * 1. If the enum index doesn't exist in this map, runtime will throw RuntimeException. @@ -1101,7 +1113,7 @@ private void processEnum(final Schema schema, final JBlock body, FieldAction act JVar tempEnumMappingVar = constructor.body().decl(codeModel.ref(HashMap.class), getUniqueName("tempEnumMapping"), JExpr._new(codeModel.ref(HashMap.class)).arg(JExpr.lit(enumAdjustAction.adjustments.length))); - /** + /* * Populate the global enum mapping based on the enum adjustment. */ for (int i = 0; i < enumAdjustAction.adjustments.length; i++) { @@ -1126,19 +1138,19 @@ private void processEnum(final Schema schema, final JBlock body, FieldAction act JVar lookupResult = body.decl(codeModel._ref(Object.class), getUniqueName("enumIndexLookupResult"), enumMappingVar.invoke("get").arg(enumIndex)); - /** + /* * Found the enum index mapping. */ JConditional ifBlock = body._if(lookupResult._instanceof(codeModel.ref(Integer.class))); JExpression ithValResult = schemaAssistant.getEnumValueByIndex(schema, JExpr.cast(codeModel.ref(Integer.class), lookupResult), getSchemaExpr(schema)); ifBlock._then().assign((JVar) newEnum, ithValResult); - /** + /* * Unknown enum in reader schema. */ JConditional elseIfBlock = ifBlock._elseif(lookupResult._instanceof(codeModel.ref(AvroTypeException.class))); elseIfBlock._then()._throw(JExpr.cast(codeModel.ref(AvroTypeException.class), lookupResult)); - /** + /* * Unknown enum in writer schema. */ elseIfBlock._else()._throw(JExpr._new(codeModel.ref(RuntimeException.class)) @@ -1150,21 +1162,26 @@ private void processEnum(final Schema schema, final JBlock body, FieldAction act } } - private void processBytes(JBlock body, FieldAction action, BiConsumer putValueIntoParent, - Supplier reuseSupplier) { + private void processBytes(Schema schema, JBlock body, FieldAction action, + BiConsumer putValueIntoParent, Supplier reuseSupplier) { if (action.getShouldRead()) { + JVar byteBufferVar = body.decl(codeModel.ref(ByteBuffer.class), getUniqueName("byteBuffer")); + if (reuseSupplier.get().equals(JExpr._null())) { - putValueIntoParent.accept(body, JExpr.invoke(JExpr.direct(DECODER), "readBytes").arg(JExpr.direct("null"))); + byteBufferVar.init(JExpr.invoke(JExpr.direct(DECODER), "readBytes").arg(JExpr.direct("null"))); } else { final Supplier finalReuseSupplier = potentiallyCacheInvocation(reuseSupplier, body, "oldBytes"); ifCodeGen(body, - finalReuseSupplier.get()._instanceof(codeModel.ref("java.nio.ByteBuffer")), - thenBlock -> putValueIntoParent.accept(thenBlock, JExpr.invoke(JExpr.direct(DECODER), "readBytes") + finalReuseSupplier.get()._instanceof(codeModel.ref(ByteBuffer.class)), + thenBlock -> thenBlock.assign(byteBufferVar, JExpr.invoke(JExpr.direct(DECODER), "readBytes") .arg(JExpr.cast(codeModel.ref(ByteBuffer.class), finalReuseSupplier.get()))), - elseBlock -> putValueIntoParent.accept(elseBlock, - JExpr.invoke(JExpr.direct(DECODER), "readBytes").arg(JExpr.direct("null"))) + elseBlock -> elseBlock.assign(byteBufferVar, JExpr.invoke(JExpr.direct(DECODER), "readBytes") + .arg(JExpr.direct("null"))) ); } + + JExpression finalValueVar = generateConversionCallIfLogicalType(byteBufferVar, schema, body); + putValueIntoParent.accept(body, finalValueVar); } else { body.directStatement(DECODER + ".skipBytes();"); } @@ -1174,21 +1191,27 @@ private void processString(Schema schema, JBlock body, FieldAction action, BiConsumer putValueIntoParent, Supplier reuseSupplier) { if (action.getShouldRead()) { JClass stringClass = schemaAssistant.findStringClass(schema); + JVar charSequenceVar = body.decl(stringClass, getUniqueName("charSequence")); + if (stringClass.equals(codeModel.ref(Utf8.class))) { if (reuseSupplier.equals(EMPTY_SUPPLIER)) { - putValueIntoParent.accept(body, JExpr.invoke(JExpr.direct(DECODER), "readString").arg(JExpr._null())); + charSequenceVar.init(JExpr.invoke(JExpr.direct(DECODER), "readString").arg(JExpr._null())); } else { final Supplier finalReuseSupplier = potentiallyCacheInvocation(reuseSupplier, body, "oldString"); ifCodeGen(body, finalReuseSupplier.get()._instanceof(codeModel.ref(Utf8.class)), - thenBlock -> putValueIntoParent.accept(thenBlock, JExpr.invoke(JExpr.direct(DECODER), "readString").arg(JExpr.cast(codeModel.ref(Utf8.class), finalReuseSupplier.get()))), - elseBlock -> putValueIntoParent.accept(elseBlock, + thenBlock -> thenBlock.assign(charSequenceVar, + JExpr.invoke(JExpr.direct(DECODER), "readString").arg(JExpr.cast(codeModel.ref(Utf8.class), finalReuseSupplier.get()))), + elseBlock -> elseBlock.assign(charSequenceVar, JExpr.invoke(JExpr.direct(DECODER), "readString").arg(JExpr._null()))); } } else if (stringClass.equals(codeModel.ref(String.class))) { - putValueIntoParent.accept(body, JExpr.invoke(JExpr.direct(DECODER), "readString")); + charSequenceVar.init(JExpr.invoke(JExpr.direct(DECODER), "readString")); } else { - putValueIntoParent.accept(body, readStringableExpression(stringClass)); + charSequenceVar.init(readStringableExpression(stringClass)); } + + JExpression finalValueVar = generateConversionCallIfLogicalType(charSequenceVar, schema, body); + putValueIntoParent.accept(body, finalValueVar); } else { body.directStatement(DECODER + ".skipString();"); } @@ -1203,7 +1226,7 @@ private void processPrimitive(final Schema schema, JBlock body, FieldAction acti processString(schema, body, action, putValueIntoParent, reuseSupplier); return; case BYTES: - processBytes(body, action, putValueIntoParent, reuseSupplier); + processBytes(schema, body, action, putValueIntoParent, reuseSupplier); return; case INT: readFunction = "readInt()"; @@ -1224,14 +1247,32 @@ private void processPrimitive(final Schema schema, JBlock body, FieldAction acti throw new FastDeserializerGeneratorException("Unsupported primitive schema of type: " + schema.getType()); } - JExpression primitiveValueExpression = JExpr.direct("decoder." + readFunction); if (action.getShouldRead()) { - putValueIntoParent.accept(body, primitiveValueExpression); + JExpression primitiveValueExpression = JExpr.direct("decoder." + readFunction); + JExpression finalValueVar = generateConversionCallIfLogicalType(primitiveValueExpression, schema, body); + putValueIntoParent.accept(body, finalValueVar); } else { body.directStatement(DECODER + "." + readFunction + ";"); } } + private JExpression generateConversionCallIfLogicalType(JExpression rawValueVar, Schema schema, JBlock body) { + if (logicalTypeEnabled(schema)) { + JFieldRef schemaFieldRef = injectLogicalTypeSchema(schema); + Conversion conversion = (Conversion) schemaAssistant.getConversion(schema.getLogicalType()); + + return body.decl(codeModel.ref(conversion.getConvertedType()), getUniqueName("convertedValue"), + JExpr.cast(codeModel.ref(conversion.getConvertedType()), codeModel.ref(Conversions.class) + .staticInvoke("convertToLogicalType") + .arg(rawValueVar) + .arg(schemaFieldRef) + .arg(schemaFieldRef.invoke("getLogicalType")) + .arg(getConversionRef(schema.getLogicalType())))); + } else { + return rawValueVar; + } + } + private JVar declareSchemaVariableForRecordField(final String name, final Schema schema, JVar schemaVar) { return declareSchemaVar(schema, name + "Field", schemaVar.invoke("getField").arg(name).invoke("schema")); } @@ -1240,12 +1281,15 @@ private JVar declareSchemaVar(Schema valueSchema, String variableName, JInvocati if (!useGenericTypes) { return null; } - /** + + /* * TODO: In theory, we should only need Record, Enum and Fixed here since only these types require * schema for the corresponding object initialization in Generic mode. */ - if (SchemaAssistant.isComplexType(valueSchema) || Schema.Type.ENUM.equals(valueSchema.getType()) - || Schema.Type.FIXED.equals(valueSchema.getType())) { + boolean shouldDeclareSchemaVar = SchemaAssistant.isComplexType(valueSchema) || logicalTypeEnabled(valueSchema) + || Schema.Type.ENUM.equals(valueSchema.getType()) || Schema.Type.FIXED.equals(valueSchema.getType()); + + if (shouldDeclareSchemaVar) { int schemaId = Utils.getSchemaFingerprint(valueSchema); JVar schemaVar = schemaVarMap.get(schemaId); if (schemaVar != null) { diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java index c4166c700..c4b301f6d 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java @@ -6,22 +6,23 @@ import java.util.Arrays; import java.util.Collections; import java.util.ListIterator; -import java.util.Optional; import org.apache.avro.Schema; import com.linkedin.avro.fastserde.backport.Symbol; + +import org.apache.avro.generic.GenericData; import org.apache.avro.util.Utf8; /** TODO all of this could be moved to {@link FastDeserializerGenerator} */ -public abstract class FastDeserializerGeneratorBase extends FastSerdeBase { +public abstract class FastDeserializerGeneratorBase extends FastSerdeBase { protected static final Symbol EMPTY_SYMBOL = new Symbol(Symbol.Kind.TERMINAL, new Symbol[]{}) {}; protected static final Symbol END_SYMBOL = new Symbol(Symbol.Kind.TERMINAL, new Symbol[]{}) {}; protected final Schema writer; protected final Schema reader; FastDeserializerGeneratorBase(boolean useGenericTypes, Schema writer, Schema reader, File destination, ClassLoader classLoader, - String compileClassPath) { - super("deserialization", useGenericTypes, Utf8.class, destination, classLoader, compileClassPath, false); + String compileClassPath, U modelData) { + super("deserialization", useGenericTypes, Utf8.class, destination, classLoader, compileClassPath, modelData, false); this.writer = writer; this.reader = reader; } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java index c0e868984..05e9666ad 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java @@ -4,6 +4,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicReference; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; import org.apache.avro.io.DatumReader; import org.apache.avro.io.Decoder; import org.slf4j.Logger; @@ -18,7 +19,8 @@ public class FastGenericDatumReader implements DatumReader { private Schema writerSchema; private Schema readerSchema; - private FastSerdeCache cache; + private final FastSerdeCache cache; + private final GenericData modelData; private final AtomicReference> cachedFastDeserializer = new AtomicReference<>(); @@ -30,17 +32,26 @@ public FastGenericDatumReader(Schema writerSchema, Schema readerSchema) { this(writerSchema, readerSchema, FastSerdeCache.getDefaultInstance()); } + public FastGenericDatumReader(Schema writerSchema, Schema readerSchema, GenericData modelData) { + this(writerSchema, readerSchema, FastSerdeCache.getDefaultInstance(), modelData); + } + public FastGenericDatumReader(Schema schema, FastSerdeCache cache) { this(schema, schema, cache); } public FastGenericDatumReader(Schema writerSchema, Schema readerSchema, FastSerdeCache cache) { + this(writerSchema, readerSchema, cache, null); + } + + public FastGenericDatumReader(Schema writerSchema, Schema readerSchema, FastSerdeCache cache, GenericData modelData) { this.writerSchema = writerSchema; this.readerSchema = readerSchema; this.cache = cache != null ? cache : FastSerdeCache.getDefaultInstance(); + this.modelData = modelData; if (!Utils.isSupportedAvroVersionsForDeserializer()) { - this.cachedFastDeserializer.set(getRegularAvroImpl(writerSchema, readerSchema)); + this.cachedFastDeserializer.set(getRegularAvroImpl(writerSchema, readerSchema, modelData)); if (LOGGER.isDebugEnabled()) { LOGGER.debug( "Current avro version: " + Utils.getRuntimeAvroVersion() + " is not supported, and only the following" @@ -49,7 +60,7 @@ public FastGenericDatumReader(Schema writerSchema, Schema readerSchema, FastSerd } } else if (!FastSerdeCache.isSupportedForFastDeserializer(readerSchema.getType())) { // For unsupported schema type, we won't try to fetch it from FastSerdeCache since it is inefficient. - this.cachedFastDeserializer.set(getRegularAvroImpl(writerSchema, readerSchema)); + this.cachedFastDeserializer.set(getRegularAvroImpl(writerSchema, readerSchema, modelData)); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Skip the FastGenericDeserializer generation since read schema type: " + readerSchema.getType() + " is not supported"); @@ -69,14 +80,13 @@ public void setSchema(Schema schema) { } @Override - @SuppressWarnings("unchecked") public T read(T reuse, Decoder in) throws IOException { - FastDeserializer fastDeserializer = null; + FastDeserializer fastDeserializer; if (cachedFastDeserializer.get() != null) { fastDeserializer = cachedFastDeserializer.get(); } else { - fastDeserializer = getFastDeserializerFromCache(cache, writerSchema, readerSchema); + fastDeserializer = getFastDeserializerFromCache(cache, writerSchema, readerSchema, modelData); if (!FastSerdeCache.isFastDeserializer(fastDeserializer)) { // don't cache } else { @@ -93,25 +103,26 @@ public T read(T reuse, Decoder in) throws IOException { public CompletableFuture> getFastDeserializer() { return cachedFastDeserializer.get() != null ? CompletableFuture.completedFuture(cachedFastDeserializer.get()) - : getFastDeserializer(cache, writerSchema, readerSchema).thenApply(d -> { + : getFastDeserializer(cache, writerSchema, readerSchema, modelData).thenApply(d -> { cachedFastDeserializer.compareAndSet(null, d); return d; }); } protected CompletableFuture> getFastDeserializer(FastSerdeCache fastSerdeCache, - Schema writerSchema, Schema readerSchema) { - return fastSerdeCache.getFastGenericDeserializerAsync(writerSchema, readerSchema) + Schema writerSchema, Schema readerSchema, GenericData modelData) { + return fastSerdeCache.getFastGenericDeserializerAsync(writerSchema, readerSchema, modelData) .thenApply(d -> (FastDeserializer) d); } + @SuppressWarnings("unchecked") protected FastDeserializer getFastDeserializerFromCache(FastSerdeCache fastSerdeCache, Schema writerSchema, - Schema readerSchema) { - return (FastDeserializer) fastSerdeCache.getFastGenericDeserializer(writerSchema, readerSchema); + Schema readerSchema, GenericData modelData) { + return (FastDeserializer) fastSerdeCache.getFastGenericDeserializer(writerSchema, readerSchema, modelData); } - protected FastDeserializer getRegularAvroImpl(Schema writerSchema, Schema readerSchema) { - return new FastSerdeCache.FastDeserializerWithAvroGenericImpl<>(writerSchema, readerSchema); + protected FastDeserializer getRegularAvroImpl(Schema writerSchema, Schema readerSchema, GenericData modelData) { + return new FastSerdeCache.FastDeserializerWithAvroGenericImpl<>(writerSchema, readerSchema, modelData); } /** diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumWriter.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumWriter.java index 1aa17117d..735fc1f91 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumWriter.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumWriter.java @@ -2,6 +2,7 @@ import java.io.IOException; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Encoder; import org.slf4j.Logger; @@ -13,19 +14,29 @@ */ public class FastGenericDatumWriter implements DatumWriter { private static final Logger LOGGER = LoggerFactory.getLogger(FastGenericDatumWriter.class); - private final FastSerdeCache cache; private Schema writerSchema; + private final GenericData modelData; + private final FastSerdeCache cache; private FastSerializer cachedFastSerializer; public FastGenericDatumWriter(Schema schema) { - this(schema, FastSerdeCache.getDefaultInstance()); + this(schema, null, null); + } + + public FastGenericDatumWriter(Schema schema, GenericData modelData) { + this(schema, modelData, null); } public FastGenericDatumWriter(Schema schema, FastSerdeCache cache) { + this(schema, null, cache); + } + + public FastGenericDatumWriter(Schema schema, GenericData modelData, FastSerdeCache cache) { this.writerSchema = schema; + this.modelData = modelData; this.cache = cache != null ? cache : FastSerdeCache.getDefaultInstance(); if (!Utils.isSupportedAvroVersionsForSerializer()) { - this.cachedFastSerializer = getRegularAvroImpl(writerSchema); + this.cachedFastSerializer = getRegularAvroImpl(writerSchema, modelData); if (LOGGER.isDebugEnabled()) { LOGGER.debug( "Current avro version: " + Utils.getRuntimeAvroVersion() + " is not supported, and only the following" @@ -34,7 +45,7 @@ public FastGenericDatumWriter(Schema schema, FastSerdeCache cache) { } } else if (!FastSerdeCache.isSupportedForFastSerializer(schema.getType())) { // For unsupported schema type, we won't try to fetch it from FastSerdeCache since it is inefficient. - this.cachedFastSerializer = getRegularAvroImpl(writerSchema); + this.cachedFastSerializer = getRegularAvroImpl(writerSchema, modelData); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Skip the FastGenericSerializer generation since read schema type: " + schema.getType() + " is not supported"); @@ -48,16 +59,13 @@ public void setSchema(Schema schema) { } @Override - @SuppressWarnings("unchecked") public void write(T data, Encoder out) throws IOException { FastSerializer fastSerializer; if (cachedFastSerializer != null) { fastSerializer = cachedFastSerializer; } else { - fastSerializer = getFastSerializerFromCache(cache, writerSchema); - if (!isFastSerializer(fastSerializer)) { - // don't cache - } else { + fastSerializer = getFastSerializerFromCache(cache, writerSchema, modelData); + if (isFastSerializer(fastSerializer)) { cachedFastSerializer = fastSerializer; if (LOGGER.isDebugEnabled()) { LOGGER.debug("FastSerializer has been generated and cached for writer schema: [" + writerSchema + "]"); @@ -68,22 +76,22 @@ public void write(T data, Encoder out) throws IOException { fastSerializer.serialize(data, out); } - protected FastSerializer getFastSerializerFromCache(FastSerdeCache fastSerdeCache, Schema schema) { - return (FastSerializer) fastSerdeCache.getFastGenericSerializer(schema); + @SuppressWarnings("unchecked") + protected FastSerializer getFastSerializerFromCache(FastSerdeCache fastSerdeCache, Schema schema, GenericData modelData) { + return (FastSerializer) fastSerdeCache.getFastGenericSerializer(schema, modelData); } - protected FastSerializer getRegularAvroImpl(Schema schema) { - return new FastSerdeCache.FastSerializerWithAvroGenericImpl<>(schema); + protected FastSerializer getRegularAvroImpl(Schema schema, GenericData modelData) { + return new FastSerdeCache.FastSerializerWithAvroGenericImpl<>(schema, modelData); } - private static boolean isFastSerializer(FastSerializer serializer) { + private static boolean isFastSerializer(FastSerializer serializer) { return !(serializer instanceof FastSerdeCache.FastSerializerWithAvroSpecificImpl || serializer instanceof FastSerdeCache.FastSerializerWithAvroGenericImpl); } /** - * Return a flag to indicate whether fast serializer is being used or not. - * @return + * @return flag to indicate whether fast serializer is being used or not */ public boolean isFastSerializerUsed() { if (cachedFastSerializer == null) { diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDeserializerGenerator.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDeserializerGenerator.java index fa2661440..dd23e01ef 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDeserializerGenerator.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDeserializerGenerator.java @@ -2,12 +2,13 @@ import java.io.File; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; -public final class FastGenericDeserializerGenerator extends FastDeserializerGenerator { +public final class FastGenericDeserializerGenerator extends FastDeserializerGenerator { - FastGenericDeserializerGenerator(Schema writer, Schema reader, File destination, ClassLoader classLoader, - String compileClassPath) { - super(true, writer, reader, destination, classLoader, compileClassPath); + public FastGenericDeserializerGenerator(Schema writer, Schema reader, File destination, ClassLoader classLoader, + String compileClassPath, GenericData modelData) { + super(true, writer, reader, destination, classLoader, compileClassPath, modelData); } } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericSerializerGenerator.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericSerializerGenerator.java index b99517967..50947ad17 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericSerializerGenerator.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericSerializerGenerator.java @@ -2,12 +2,13 @@ import java.io.File; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; -public class FastGenericSerializerGenerator extends FastSerializerGenerator { +public class FastGenericSerializerGenerator extends FastSerializerGenerator { public FastGenericSerializerGenerator(Schema schema, File destination, ClassLoader classLoader, - String compileClassPath) { - super(true, schema, destination, classLoader, compileClassPath); + String compileClassPath, GenericData modelData) { + super(true, schema, destination, classLoader, compileClassPath, modelData); } } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeBase.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeBase.java index 3df28ba46..f1c721048 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeBase.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeBase.java @@ -1,22 +1,34 @@ package com.linkedin.avro.fastserde; -import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelperCommon; import com.sun.codemodel.JBlock; +import com.sun.codemodel.JClass; import com.sun.codemodel.JCodeModel; import com.sun.codemodel.JConditional; import com.sun.codemodel.JDefinedClass; import com.sun.codemodel.JExpr; import com.sun.codemodel.JExpression; +import com.sun.codemodel.JFieldRef; +import com.sun.codemodel.JFieldVar; +import com.sun.codemodel.JMethod; +import com.sun.codemodel.JMod; import com.sun.codemodel.JVar; + import java.io.File; import java.io.IOException; +import java.util.HashSet; import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import javax.tools.JavaCompiler; import javax.tools.ToolProvider; + +import org.apache.avro.Conversion; +import org.apache.avro.LogicalType; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.specific.SpecificData; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,11 +39,14 @@ /** * Utilities used by both serialization and deserialization code. */ -public abstract class FastSerdeBase { +public abstract class FastSerdeBase { private static final Logger LOGGER = LoggerFactory.getLogger(FastSerdeBase.class); protected static final String SEP = "_"; public static final String GENERATED_PACKAGE_NAME_PREFIX = "com.linkedin.avro.fastserde.generated."; + private final Set injectedSchemaFieldNames = new HashSet<>(); + private final Set injectedConversionFieldNames = new HashSet<>(); + /** * A repository of how many times a given name was used. * N.B.: Does not actually need to be threadsafe, but it is made so just for defensive coding reasons. @@ -41,26 +56,32 @@ public abstract class FastSerdeBase { protected final String generatedPackageName; protected final JCodeModel codeModel = new JCodeModel(); protected final boolean useGenericTypes; - protected final SchemaAssistant schemaAssistant; + protected final SchemaAssistant schemaAssistant; protected final File destination; protected final ClassLoader classLoader; protected final String compileClassPath; + /** + * Contains information regarding conversion classes used by logical types feature. + * In case of specific Avro class it is just its MODEL$ field. + */ + protected final T modelData; protected JDefinedClass generatedClass; public FastSerdeBase(String description, boolean useGenericTypes, Class defaultStringClass, File destination, ClassLoader classLoader, - String compileClassPath, boolean isForSerializer) { + String compileClassPath, T modelData, boolean isForSerializer) { this.useGenericTypes = useGenericTypes; - this.schemaAssistant = new SchemaAssistant(codeModel, useGenericTypes, defaultStringClass, isForSerializer); + this.schemaAssistant = new SchemaAssistant<>(codeModel, useGenericTypes, defaultStringClass, modelData, isForSerializer); this.destination = destination; this.classLoader = classLoader; this.compileClassPath = (null == compileClassPath ? "" : compileClassPath); - this.generatedPackageName = GENERATED_PACKAGE_NAME_PREFIX + description + "." + AvroCompatibilityHelper.getRuntimeAvroVersion().name(); + this.modelData = modelData; + this.generatedPackageName = GENERATED_PACKAGE_NAME_PREFIX + description + "." + AvroCompatibilityHelperCommon.getRuntimeAvroVersion().name(); this.generatedSourcesPath = generateSourcePathFromPackageName(generatedPackageName); } /** * A function to generate unique names, such as those of variables and functions, within the scope - * of the this class instance (i.e. per serializer of a given schema or deserializer of a given + * of this class instance (i.e. per serializer of a given schema or deserializer of a given * schema pair). * * @param prefix String to serve as a prefix for the unique name @@ -100,7 +121,49 @@ protected JVar declareValueVar(final String name, final Schema schema, JBlock bl } } - @SuppressWarnings("unchecked") + protected void injectConversionClasses() { + if (Utils.isLogicalTypeSupported()) { + if (modelData != null) { + modelData.getConversions().forEach(this::injectConversionFieldIfMissing); + } + } + } + + protected JFieldRef getConversionRef(LogicalType logicalType) { + final Conversion conversion = (Conversion) schemaAssistant.getConversion(logicalType); + return injectConversionFieldIfMissing(conversion); + } + + private JFieldRef injectConversionFieldIfMissing(Conversion conversion) { + final String conversionFieldName = Utils.toValidJavaIdentifier("conversion_" + conversion.getLogicalTypeName()); + + if (injectedConversionFieldNames.add(conversionFieldName)) { + generatedClass.field(JMod.PRIVATE | JMod.FINAL, conversion.getClass(), conversionFieldName, + JExpr._new(codeModel.ref(conversion.getClass()))); + } + + return JExpr.refthis(conversionFieldName); + } + + protected JFieldRef injectLogicalTypeSchema(Schema schema) { + String schemaFieldName = toLogicalTypeSchemaFieldName(schema); + if (injectedSchemaFieldNames.add(schemaFieldName)) { + generatedClass.field(JMod.PRIVATE | JMod.FINAL, Schema.class, schemaFieldName, + codeModel.ref(Schema.class).staticInvoke("parse").arg(schema.toString())); + } + + return JExpr.refthis(schemaFieldName); + } + + protected String toLogicalTypeSchemaFieldName(Schema schema) { + long schemaFingerprint = Utils.getSchemaFingerprint(schema); + return ("logicalTypeSchema_" + schemaFingerprint).replace('-', '_'); + } + + protected boolean logicalTypeEnabled(Schema schema) { + return schemaAssistant.logicalTypeEnabled(schema); + } + protected Class compileClass(final String className, Set knownUsedFullyQualifiedClassNameSet) throws IOException, ClassNotFoundException { codeModel.build(destination); @@ -109,7 +172,7 @@ protected Class compileClass(final String className, Set knownUsedFullyQ JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); if (null == compiler) { - /** + /* * If the above function returns null, it is very likely that the env setting: "JAVA_HOME" is not being setup properly. */ throw new FastSerdeGeneratorException("Couldn't locate java compiler at runtime, please double check your env " diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeCache.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeCache.java index 4ac841e63..9041e8dc0 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeCache.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeCache.java @@ -23,17 +23,19 @@ import org.apache.avro.Schema; import org.apache.avro.generic.ColdGenericDatumReader; import org.apache.avro.generic.ColdSpecificDatumReader; +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Decoder; import org.apache.avro.io.Encoder; +import org.apache.avro.specific.SpecificData; import org.apache.avro.specific.SpecificDatumReader; -import org.apache.avro.specific.SpecificDatumWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; + /** * Fast avro serializer/deserializer cache. Stores generated and already compiled instances of serializers and @@ -184,6 +186,13 @@ public static boolean isFastDeserializer(FastDeserializer deserializer) { || deserializer instanceof FastDeserializerWithAvroGenericImpl); } + /** + * @see #getFastSpecificDeserializer(Schema, Schema, SpecificData) + */ + public FastDeserializer getFastSpecificDeserializer(Schema writerSchema, Schema readerSchema) { + return getFastSpecificDeserializer(writerSchema, readerSchema, null); + } + /** * Generates if needed and returns specific-class aware avro {@link FastDeserializer}. * @@ -191,23 +200,25 @@ public static boolean isFastDeserializer(FastDeserializer deserializer) { * {@link Schema} of written data * @param readerSchema * {@link Schema} intended to be used during deserialization + * @param modelData + * Provides additional information not available in the schema, e.g. conversion classes * @return specific-class aware avro {@link FastDeserializer} */ - public FastDeserializer getFastSpecificDeserializer(Schema writerSchema, Schema readerSchema) { + public FastDeserializer getFastSpecificDeserializer(Schema writerSchema, Schema readerSchema, SpecificData modelData) { String schemaKey = getSchemaKey(writerSchema, readerSchema); FastDeserializer deserializer = fastSpecificRecordDeserializersCache.get(schemaKey); if (deserializer == null) { - AtomicBoolean status = new AtomicBoolean(false); + AtomicBoolean fastDeserializerMissingInCache = new AtomicBoolean(false); deserializer = fastSpecificRecordDeserializersCache.computeIfAbsent( schemaKey, k -> { - status.set(true); - return new FastDeserializerWithAvroSpecificImpl<>(writerSchema, readerSchema); + fastDeserializerMissingInCache.set(true); + return new FastDeserializerWithAvroSpecificImpl<>(writerSchema, readerSchema, modelData); }); - if (status.get()) { - CompletableFuture.supplyAsync(() -> buildSpecificDeserializer(writerSchema, readerSchema), executor) + if (fastDeserializerMissingInCache.get()) { + CompletableFuture.supplyAsync(() -> buildSpecificDeserializer(writerSchema, readerSchema, modelData), executor) .thenAccept(d -> fastSpecificRecordDeserializersCache.put(schemaKey, d)); } } @@ -215,6 +226,13 @@ public FastDeserializer getFastSpecificDeserializer(Schema writerSchema, Sche return deserializer; } + /** + * @see #getFastGenericDeserializer(Schema, Schema, GenericData) + */ + public FastDeserializer getFastGenericDeserializer(Schema writerSchema, Schema readerSchema) { + return getFastGenericDeserializer(writerSchema, readerSchema, null); + } + /** * Generates if needed and returns generic-class aware avro {@link FastDeserializer}. * @@ -222,51 +240,62 @@ public FastDeserializer getFastSpecificDeserializer(Schema writerSchema, Sche * {@link Schema} of written data * @param readerSchema * {@link Schema} intended to be used during deserialization + * @param modelData + * Provides additional information not available in the schema, e.g. conversion classes * @return generic-class aware avro {@link FastDeserializer} */ - public FastDeserializer getFastGenericDeserializer(Schema writerSchema, Schema readerSchema) { + public FastDeserializer getFastGenericDeserializer(Schema writerSchema, Schema readerSchema, GenericData modelData) { String schemaKey = getSchemaKey(writerSchema, readerSchema); FastDeserializer deserializer = fastGenericRecordDeserializersCache.get(schemaKey); if (deserializer == null) { - AtomicBoolean status = new AtomicBoolean(false); + AtomicBoolean fastDeserializerMissingInCache = new AtomicBoolean(false); deserializer = fastGenericRecordDeserializersCache.computeIfAbsent( schemaKey, k -> { - status.set(true); - return new FastDeserializerWithAvroGenericImpl<>(writerSchema, readerSchema); + fastDeserializerMissingInCache.set(true); + return new FastDeserializerWithAvroGenericImpl<>(writerSchema, readerSchema, modelData); }); - if (status.get()) { - CompletableFuture.supplyAsync(() -> buildGenericDeserializer(writerSchema, readerSchema), executor) + if (fastDeserializerMissingInCache.get()) { + CompletableFuture.supplyAsync(() -> buildGenericDeserializer(writerSchema, readerSchema, modelData), executor) .thenAccept(d -> fastGenericRecordDeserializersCache.put(schemaKey, d)); } } return deserializer; } + + /** + * @see #getFastSpecificSerializer(Schema, SpecificData) + */ + public FastSerializer getFastSpecificSerializer(Schema schema) { + return getFastSpecificSerializer(schema, null); + } /** * Generates if needed and returns specific-class aware avro {@link FastSerializer}. * * @param schema * {@link Schema} of data to write + * @param modelData + * Provides additional information not available in the schema, e.g. conversion classes * @return specific-class aware avro {@link FastSerializer} */ - public FastSerializer getFastSpecificSerializer(Schema schema) { + public FastSerializer getFastSpecificSerializer(Schema schema, SpecificData modelData) { String schemaKey = getSchemaKey(schema, schema); FastSerializer serializer = fastSpecificRecordSerializersCache.get(schemaKey); if (serializer == null) { - AtomicBoolean status = new AtomicBoolean(false); + AtomicBoolean fastSerializerMissingInCache = new AtomicBoolean(false); serializer = fastSpecificRecordSerializersCache.computeIfAbsent( schemaKey, k -> { - status.set(true); - return new FastSerializerWithAvroSpecificImpl<>(schema); + fastSerializerMissingInCache.set(true); + return new FastSerializerWithAvroSpecificImpl<>(schema, modelData); }); - if (status.get()) { - CompletableFuture.supplyAsync(() -> buildSpecificSerializer(schema), executor) + if (fastSerializerMissingInCache.get()) { + CompletableFuture.supplyAsync(() -> buildSpecificSerializer(schema, modelData), executor) .thenAccept(s -> fastSpecificRecordSerializersCache.put(schemaKey, s)); } } @@ -274,28 +303,37 @@ public FastSerializer getFastSpecificSerializer(Schema schema) { return serializer; } + /** + * @see #getFastGenericSerializer(Schema, GenericData) + */ + public FastSerializer getFastGenericSerializer(Schema schema) { + return getFastGenericSerializer(schema, null); + } + /** * Generates if needed and returns generic-class aware avro {@link FastSerializer}. * * @param schema * {@link Schema} of data to write + * @param modelData + * Passes additional information e.g. conversion classes not available in the schema * @return generic-class aware avro {@link FastSerializer} */ - public FastSerializer getFastGenericSerializer(Schema schema) { + public FastSerializer getFastGenericSerializer(Schema schema, GenericData modelData) { String schemaKey = getSchemaKey(schema, schema); FastSerializer serializer = fastGenericRecordSerializersCache.get(schemaKey); if (serializer == null) { - AtomicBoolean status = new AtomicBoolean(false); + AtomicBoolean fastSerializerMissingInCache = new AtomicBoolean(false); serializer = fastGenericRecordSerializersCache.computeIfAbsent( schemaKey, k -> { - status.set(true); - return new FastSerializerWithAvroGenericImpl<>(schema); + fastSerializerMissingInCache.set(true); + return new FastSerializerWithAvroGenericImpl<>(schema, modelData); }); - if (status.get()) { - CompletableFuture.supplyAsync(() -> buildGenericSerializer(schema), executor) + if (fastSerializerMissingInCache.get()) { + CompletableFuture.supplyAsync(() -> buildGenericSerializer(schema, modelData), executor) .thenAccept(s -> fastGenericRecordSerializersCache.put(schemaKey, s)); } } @@ -303,16 +341,31 @@ public FastSerializer getFastGenericSerializer(Schema schema) { return serializer; } + /** + * @see #getFastSpecificDeserializerAsync(Schema, Schema, SpecificData) + */ + public CompletableFuture> getFastSpecificDeserializerAsync(Schema writerSchema, Schema readerSchema) { + return getFastSpecificDeserializerAsync(writerSchema, readerSchema, null); + } + /** * Asynchronously generates if needed and returns specific-class aware avro {@link FastDeserializer}. * * @param writerSchema {@link Schema} of written data * @param readerSchema {@link Schema} intended to be used during deserialization + * @param modelData Passes additional information e.g. conversion classes not available in the schema * @return {@link CompletableFuture} which contains specific-class aware avro {@link FastDeserializer} */ - public CompletableFuture> getFastSpecificDeserializerAsync(Schema writerSchema, Schema readerSchema) { + public CompletableFuture> getFastSpecificDeserializerAsync(Schema writerSchema, Schema readerSchema, SpecificData modelData) { return getFastDeserializerAsync(writerSchema, readerSchema, fastSpecificRecordDeserializersCache, - () -> buildSpecificDeserializer(writerSchema, readerSchema)); + () -> buildSpecificDeserializer(writerSchema, readerSchema, modelData)); + } + + /** + * @see #getFastGenericDeserializerAsync(Schema, Schema, GenericData) + */ + public CompletableFuture> getFastGenericDeserializerAsync(Schema writerSchema, Schema readerSchema) { + return getFastGenericDeserializerAsync(writerSchema, readerSchema); } /** @@ -320,11 +373,12 @@ public CompletableFuture> getFastSpecificDeserializerAsync(S * * @param writerSchema {@link Schema} of written data * @param readerSchema {@link Schema} intended to be used during deserialization + * @param modelData Passes additional information e.g. conversion classes not available in the schema * @return {@link CompletableFuture} which contains generic-class aware avro {@link FastDeserializer} */ - public CompletableFuture> getFastGenericDeserializerAsync(Schema writerSchema, Schema readerSchema) { + public CompletableFuture> getFastGenericDeserializerAsync(Schema writerSchema, Schema readerSchema, GenericData modelData) { return getFastDeserializerAsync(writerSchema, readerSchema, fastGenericRecordDeserializersCache, - () -> buildGenericDeserializer(writerSchema, readerSchema)); + () -> buildGenericDeserializer(writerSchema, readerSchema, modelData)); } private CompletableFuture> getFastDeserializerAsync(Schema writerSchema, Schema readerSchema, @@ -344,18 +398,26 @@ private static String getSchemaKey(Schema writerSchema, Schema readerSchema) { getSchemaFingerprint(readerSchema)); } + /** + * @see #buildFastSpecificDeserializer(Schema, Schema, SpecificData) + */ + public FastDeserializer buildFastSpecificDeserializer(Schema writerSchema, Schema readerSchema) { + return buildFastSpecificDeserializer(writerSchema, readerSchema, null); + } + /** * This function will generate a fast specific deserializer, and it will throw exception if anything wrong happens. * This function can be used to verify whether current {@link FastSerdeCache} could generate proper fast deserializer. * * @param writerSchema writer schema * @param readerSchema reader schema + * @param modelData Passes additional information e.g. conversion classes not available in the schema * @return a fast deserializer */ - public FastDeserializer buildFastSpecificDeserializer(Schema writerSchema, Schema readerSchema) { + public FastDeserializer buildFastSpecificDeserializer(Schema writerSchema, Schema readerSchema, SpecificData modelData) { FastSpecificDeserializerGenerator generator = new FastSpecificDeserializerGenerator<>(writerSchema, readerSchema, classesDir, classLoader, - compileClassPath.orElse(null)); + compileClassPath.orElse(null), modelData); FastDeserializer fastDeserializer = generator.generateDeserializer(); if (LOGGER.isDebugEnabled()) { @@ -377,11 +439,13 @@ public FastDeserializer buildFastSpecificDeserializer(Schema writerSchema, Sc * {@link SpecificDatumReader} if anything wrong happens. * @param writerSchema * @param readerSchema + * @param modelData + * Provides additional information not available in the schema, e.g. conversion classes * @return */ - private FastDeserializer buildSpecificDeserializer(Schema writerSchema, Schema readerSchema) { + private FastDeserializer buildSpecificDeserializer(Schema writerSchema, Schema readerSchema, SpecificData modelData) { try { - return buildFastSpecificDeserializer(writerSchema, readerSchema); + return buildFastSpecificDeserializer(writerSchema, readerSchema, modelData); } catch (FastDeserializerGeneratorException e) { LOGGER.warn("Deserializer generation exception when generating specific FastDeserializer for writer schema: " + "[\n{}\n] and reader schema: [\n{}\n]", writerSchema.toString(true), readerSchema.toString(true), e); @@ -399,18 +463,26 @@ public Object deserialize(Object reuse, Decoder d) throws IOException { }; } + /** + * @see #buildFastGenericDeserializer(Schema, Schema, GenericData) + */ + public FastDeserializer buildFastGenericDeserializer(Schema writerSchema, Schema readerSchema) { + return buildFastGenericDeserializer(writerSchema, readerSchema, null); + } + /** * This function will generate a fast generic deserializer, and it will throw exception if anything wrong happens. * This function can be used to verify whether current {@link FastSerdeCache} could generate proper fast deserializer. * * @param writerSchema writer schema * @param readerSchema reader schema + * @param modelData Provides additional information not available in the schema, e.g. conversion classes * @return a fast deserializer */ - public FastDeserializer buildFastGenericDeserializer(Schema writerSchema, Schema readerSchema) { + public FastDeserializer buildFastGenericDeserializer(Schema writerSchema, Schema readerSchema, GenericData modelData) { FastGenericDeserializerGenerator generator = new FastGenericDeserializerGenerator<>(writerSchema, readerSchema, classesDir, classLoader, - compileClassPath.orElse(null)); + compileClassPath.orElse(null), modelData); FastDeserializer fastDeserializer = generator.generateDeserializer(); @@ -434,11 +506,12 @@ public FastDeserializer buildFastGenericDeserializer(Schema writerSchema, Sch * * @param writerSchema * @param readerSchema + * @param modelData Provides additional information not available in the schema, e.g. conversion classes * @return */ - private FastDeserializer buildGenericDeserializer(Schema writerSchema, Schema readerSchema) { + private FastDeserializer buildGenericDeserializer(Schema writerSchema, Schema readerSchema, GenericData modelData) { try { - return buildFastGenericDeserializer(writerSchema, readerSchema); + return buildFastGenericDeserializer(writerSchema, readerSchema, modelData); } catch (FastDeserializerGeneratorException e) { LOGGER.warn("Deserializer generation exception when generating generic FastDeserializer for writer schema: [\n" + writerSchema.toString(true) + "\n] and reader schema:[\n" + readerSchema.toString(true) + "\n]", e); @@ -447,7 +520,7 @@ private FastDeserializer buildGenericDeserializer(Schema writerSchema, Schema } return new FastDeserializer() { - private DatumReader datumReader = new GenericDatumReader<>(writerSchema, readerSchema); + private DatumReader datumReader = new GenericDatumReader<>(writerSchema, readerSchema, modelData); @Override public Object deserialize(Object reuse, Decoder d) throws IOException { @@ -457,13 +530,17 @@ public Object deserialize(Object reuse, Decoder d) throws IOException { } public FastSerializer buildFastSpecificSerializer(Schema schema) { + return buildFastSpecificSerializer(schema, null); + } + + public FastSerializer buildFastSpecificSerializer(Schema schema, SpecificData modelData) { // Defensive code if (!Utils.isSupportedAvroVersionsForSerializer()) { throw new FastDeserializerGeneratorException("Specific FastSerializer is only supported in following Avro versions: " + Utils.getAvroVersionsSupportedForSerializer()); } FastSpecificSerializerGenerator generator = - new FastSpecificSerializerGenerator<>(schema, classesDir, classLoader, compileClassPath.orElse(null)); + new FastSpecificSerializerGenerator<>(schema, classesDir, classLoader, compileClassPath.orElse(null), modelData); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Generated classes dir: {} and generation of specific FastSerializer is done for schema of type: {}" + @@ -477,11 +554,11 @@ public FastSerializer buildFastSpecificSerializer(Schema schema) { return generator.generateSerializer(); } - private FastSerializer buildSpecificSerializer(Schema schema) { + private FastSerializer buildSpecificSerializer(Schema schema, SpecificData modelData) { if (Utils.isSupportedAvroVersionsForSerializer()) { // Only build fast specific serializer for supported Avro versions. try { - return buildFastSpecificSerializer(schema); + return buildFastSpecificSerializer(schema, modelData); } catch (FastDeserializerGeneratorException e) { LOGGER.warn("Serializer generation exception when generating specific FastSerializer for schema: [\n{}\n]", schema.toString(true), e); @@ -491,7 +568,8 @@ private FastSerializer buildSpecificSerializer(Schema schema) { } return new FastSerializer() { - private final DatumWriter datumWriter = new SpecificDatumWriter(schema); + private final DatumWriter datumWriter = AvroCompatibilityHelper.newSpecificDatumWriter(schema, + modelData != null ? modelData : SpecificData.get()); @Override public void serialize(Object data, Encoder e) throws IOException { @@ -501,13 +579,17 @@ public void serialize(Object data, Encoder e) throws IOException { } public FastSerializer buildFastGenericSerializer(Schema schema) { + return buildFastGenericSerializer(schema, null); + } + + public FastSerializer buildFastGenericSerializer(Schema schema, GenericData modelData) { // Defensive code if (!Utils.isSupportedAvroVersionsForSerializer()) { throw new FastDeserializerGeneratorException("Generic FastSerializer is only supported in following avro versions:" + Utils.getAvroVersionsSupportedForSerializer()); } FastGenericSerializerGenerator generator = - new FastGenericSerializerGenerator<>(schema, classesDir, classLoader, compileClassPath.orElse(null)); + new FastGenericSerializerGenerator<>(schema, classesDir, classLoader, compileClassPath.orElse(null), modelData); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Generated classes dir: {} and generation of generic FastSerializer is done for schema of type: {}" + @@ -521,11 +603,11 @@ public FastSerializer buildFastGenericSerializer(Schema schema) { return generator.generateSerializer(); } - private FastSerializer buildGenericSerializer(Schema schema) { + private FastSerializer buildGenericSerializer(Schema schema, GenericData modelData) { if (Utils.isSupportedAvroVersionsForSerializer()) { // Only build fast generic serializer for supported Avro versions. try { - return buildFastGenericSerializer(schema); + return buildFastGenericSerializer(schema, modelData); } catch (FastDeserializerGeneratorException e) { LOGGER.warn("Serializer generation exception when generating generic FastSerializer for schema: [\n{}\n]", schema.toString(true), e); @@ -535,7 +617,8 @@ private FastSerializer buildGenericSerializer(Schema schema) { } return new FastSerializer() { - private final DatumWriter datumWriter = new GenericDatumWriter(schema); + private final DatumWriter datumWriter = AvroCompatibilityHelper.newGenericDatumWriter(schema, + modelData != null ? modelData : GenericData.get()); @Override public void serialize(Object data, Encoder e) throws IOException { @@ -561,8 +644,8 @@ public Thread newThread(Runnable runnable) { public static class FastDeserializerWithAvroSpecificImpl implements FastDeserializer { private final SpecificDatumReader datumReader; - public FastDeserializerWithAvroSpecificImpl(Schema writerSchema, Schema readerSchema) { - this.datumReader = new ColdSpecificDatumReader<>(writerSchema, readerSchema); + public FastDeserializerWithAvroSpecificImpl(Schema writerSchema, Schema readerSchema, SpecificData modelData) { + this.datumReader = ColdSpecificDatumReader.of(writerSchema, readerSchema, modelData); } @Override @@ -574,8 +657,8 @@ public V deserialize(V reuse, Decoder d) throws IOException { public static class FastDeserializerWithAvroGenericImpl implements FastDeserializer { private final GenericDatumReader datumReader; - public FastDeserializerWithAvroGenericImpl(Schema writerSchema, Schema readerSchema) { - this.datumReader = new ColdGenericDatumReader<>(writerSchema, readerSchema); + public FastDeserializerWithAvroGenericImpl(Schema writerSchema, Schema readerSchema, GenericData modelData) { + this.datumReader = ColdGenericDatumReader.of(writerSchema, readerSchema, modelData); } @Override @@ -585,10 +668,15 @@ public V deserialize(V reuse, Decoder d) throws IOException { } public static class FastSerializerWithAvroSpecificImpl implements FastSerializer { - private final SpecificDatumWriter datumWriter; + private final DatumWriter datumWriter; public FastSerializerWithAvroSpecificImpl(Schema schema) { - this.datumWriter = new SpecificDatumWriter<>(schema); + this(schema, null); + } + + public FastSerializerWithAvroSpecificImpl(Schema schema, SpecificData modelData) { + this.datumWriter = AvroCompatibilityHelper.newSpecificDatumWriter(schema, + modelData != null ? modelData : SpecificData.get()); } @Override @@ -600,8 +688,9 @@ public void serialize(V data, Encoder e) throws IOException { public static class FastSerializerWithAvroGenericImpl implements FastSerializer { private final DatumWriter datumWriter; - public FastSerializerWithAvroGenericImpl(Schema schema) { - this.datumWriter = new GenericDatumWriter<>(schema); + public FastSerializerWithAvroGenericImpl(Schema schema, GenericData modelData) { + this.datumWriter = AvroCompatibilityHelper.newGenericDatumWriter(schema, + modelData != null ? modelData : GenericData.get()); } @Override diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java index 9be04d6ac..7ff49db98 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java @@ -7,8 +7,10 @@ import com.sun.codemodel.JConditional; import com.sun.codemodel.JExpr; import com.sun.codemodel.JExpression; +import com.sun.codemodel.JFieldRef; import com.sun.codemodel.JForEach; import com.sun.codemodel.JForLoop; +import com.sun.codemodel.JInvocation; import com.sun.codemodel.JMethod; import com.sun.codemodel.JMod; import com.sun.codemodel.JPackage; @@ -19,14 +21,17 @@ import java.util.List; import java.util.Map; +import org.apache.avro.Conversion; +import org.apache.avro.Conversions; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.io.Encoder; +import org.apache.avro.specific.SpecificData; import org.apache.avro.util.Utf8; import org.apache.commons.lang3.StringUtils; -public class FastSerializerGenerator extends FastSerdeBase { +public class FastSerializerGenerator extends FastSerdeBase { private static int FIELDS_PER_RECORD_SERIALIZATION_METHOD = 20; @@ -44,10 +49,9 @@ static void setFieldsPerRecordSerializationMethod(int fieldCount) { FIELDS_PER_RECORD_SERIALIZATION_METHOD = fieldCount; } - public FastSerializerGenerator(boolean useGenericTypes, Schema schema, File destination, ClassLoader classLoader, - String compileClassPath) { - super("serialization", useGenericTypes, CharSequence.class, destination, classLoader, compileClassPath, true); + String compileClassPath, U modelData) { + super("serialization", useGenericTypes, CharSequence.class, destination, classLoader, compileClassPath, modelData, true); this.schema = schema; } @@ -63,6 +67,7 @@ public FastSerializer generateSerializer() { try { generatedClass = classPackage._class(className); + injectConversionClasses(); final JMethod serializeMethod = generatedClass.method(JMod.PUBLIC, void.class, "serialize"); final JVar serializeMethodParam; @@ -90,6 +95,7 @@ public FastSerializer generateSerializer() { @SuppressWarnings("unchecked") final Class> clazz = compileClass(className, schemaAssistant.getUsedFullyQualifiedClassNameSet()); + return clazz.getConstructor().newInstance(); } catch (JClassAlreadyExistsException e) { throw new FastSerdeGeneratorException("Class: " + className + " already exists"); @@ -150,7 +156,7 @@ private void processRecord(final Schema recordSchema, JExpression recordExpr, fi JBlock popMethodBody = methodBody; for (Schema.Field field : recordSchema.getFields()) { - /** + /* * We roll the serialization method for very large records, the initial fields are kept in the outer * method as original to maintain performance for smaller records */ @@ -167,15 +173,14 @@ private void processRecord(final Schema recordSchema, JExpression recordExpr, fi } Schema fieldSchema = field.schema(); + JInvocation fieldValueGetter = recordExpr.invoke("get").arg(JExpr.lit(field.pos())); + if (SchemaAssistant.isComplexType(fieldSchema)) { JClass fieldClass = schemaAssistant.classFromSchema(fieldSchema); - JVar containerVar = declareValueVar(field.name(), fieldSchema, popMethodBody); - JExpression valueExpression = JExpr.invoke(recordExpr, "get").arg(JExpr.lit(field.pos())); - containerVar.init(JExpr.cast(fieldClass, valueExpression)); - + JVar containerVar = popMethodBody.decl(fieldClass, getUniqueName(field.name()), JExpr.cast(fieldClass, fieldValueGetter)); processComplexType(fieldSchema, containerVar, popMethodBody); } else { - processSimpleType(fieldSchema, recordExpr.invoke("get").arg(JExpr.lit(field.pos())), popMethodBody); + processSimpleType(fieldSchema, fieldValueGetter, popMethodBody); } } } @@ -192,11 +197,14 @@ private void processArray(final Schema arraySchema, JExpression arrayExpr, JBloc else1.invoke(JExpr.direct(ENCODER), "setItemCount").arg(JExpr.invoke(arrayExpr, "size")); if (SchemaAssistant.isPrimitive(arraySchema.getElementType())) { + // Trick added to support logical types, e.g. (List instanceof PrimitiveIntList) - can't compile + JVar arrayVar = else1.decl(codeModel.ref(Object.class), getUniqueName("array"), arrayExpr); JClass primitiveListInterface = schemaAssistant.classFromSchema(arraySchema, true, false, true); - final JExpression primitiveListCondition = arrayExpr._instanceof(primitiveListInterface); + final JExpression primitiveListCondition = arrayVar._instanceof(primitiveListInterface); + ifCodeGen(else1, primitiveListCondition, then2 -> { - final JVar primitiveList = declareValueVar("primitiveList", arraySchema, then2, true, false, true); - then2.assign(primitiveList, JExpr.cast(primitiveListInterface, arrayExpr)); + final JVar primitiveList = declareValueVar("primitiveList", arraySchema, then2, true, false, true) + .init(JExpr.cast(primitiveListInterface, arrayVar)); processArrayElementLoop(arraySchema, arrayClass, primitiveList, then2, "getPrimitive"); }, else2 -> { processArrayElementLoop(arraySchema, arrayClass, arrayExpr, else2, "get"); @@ -290,7 +298,7 @@ private Integer getIndexNamedForUnion(Schema unionSchema, Schema schema) { throw new RuntimeException("Unknown schema: " + schema + " in union schema: " + unionSchema); } - private void processUnion(final Schema unionSchema, JExpression unionExpr, JBlock body) { + private void processUnion(final Schema unionSchema, final JExpression unionExpr, final JBlock body) { JConditional ifBlock = null; for (Schema schemaOption : unionSchema.getTypes()) { @@ -320,21 +328,31 @@ private void processUnion(final Schema unionSchema, JExpression unionExpr, JBloc JClass optionClass = schemaAssistant.classFromSchema(schemaOption); JClass rawOptionClass = schemaAssistant.classFromSchema(schemaOption, true, true); + JClass optionLogicalTypeClass = logicalTypeEnabled(schemaOption) + ? codeModel.ref(((Conversion) schemaAssistant.getConversion(schemaOption.getLogicalType())).getConvertedType()) + : null; + JExpression condition; /* - * In Avro-1.4, neither GenericEnumSymbol or GenericFixed has associated schema, so we don't expect to see + * In Avro-1.4, neither GenericEnumSymbol nor GenericFixed has associated schema, so we don't expect to see * two or more Enum types or two or more Fixed types in the same Union in generic mode since the writer couldn't - * differentiate the Enum types or the Fixed types, but those scenarios are well supported in Avro-1.7 or above since + * differentiate the Enum types or the Fixed types, but those scenarios are well-supported in Avro-1.7 or above since * both of them have associated 'Schema', so the serializer could recognize the right type * by checking the associated 'Schema' in generic mode. */ if (useGenericTypes && SchemaAssistant.isNamedTypeWithSchema(schemaOption)) { - condition = unionExpr._instanceof(rawOptionClass).cand(JExpr.invoke(JExpr.lit(AvroCompatibilityHelper.getSchemaFullName(schemaOption)), "equals") - /* TODO: Replace by {@link AvroCompatibilityHelper#getSchemaFullName} */ + if (optionLogicalTypeClass != null && schemaOption.getType() == Schema.Type.FIXED) { + condition = unionExpr._instanceof(optionLogicalTypeClass); + } else { + String schemaFullName = AvroCompatibilityHelper.getSchemaFullName(schemaOption); + condition = unionExpr._instanceof(rawOptionClass).cand(JExpr.invoke(JExpr.lit(schemaFullName), "equals") .arg(JExpr.invoke(JExpr.cast(optionClass, unionExpr), "getSchema").invoke("getFullName"))); + } } else { if (unionExpr instanceof JVar && ((JVar)unionExpr).type().equals(rawOptionClass)) { condition = null; + } else if (optionLogicalTypeClass != null) { + condition = unionExpr._instanceof(optionLogicalTypeClass); } else { condition = unionExpr._instanceof(rawOptionClass); } @@ -346,12 +364,14 @@ private void processUnion(final Schema unionSchema, JExpression unionExpr, JBloc ifBlock = ifBlock != null ? ifBlock._elseif(condition) : body._if(condition); unionTypeProcessingBlock = ifBlock._then(); } + unionTypeProcessingBlock.invoke(JExpr.direct(ENCODER), "writeIndex") .arg(JExpr.lit(getIndexNamedForUnion(unionSchema, schemaOption))); if (schemaOption.getType().equals(Schema.Type.UNION) || schemaOption.getType().equals(Schema.Type.NULL)) { throw new FastSerdeGeneratorException("Incorrect union subschema processing: " + schemaOption); } + if (SchemaAssistant.isComplexType(schemaOption)) { processComplexType(schemaOption, JExpr.cast(optionClass, unionExpr), unionTypeProcessingBlock); } else { @@ -361,9 +381,17 @@ private void processUnion(final Schema unionSchema, JExpression unionExpr, JBloc } private void processFixed(Schema fixedSchema, JExpression fixedValueExpression, JBlock body) { - JClass fixedClass = schemaAssistant.classFromSchema(fixedSchema); + JClass fixedClass = schemaAssistant.classFromSchema(fixedSchema, true, false, false, false); + JExpression fixedValueToWrite; + + if (schemaAssistant.logicalTypeEnabled(fixedSchema)) { + fixedValueToWrite = generateConversionToRawType(fixedSchema, fixedValueExpression, body); + } else { + fixedValueToWrite = fixedValueExpression; + } + body.invoke(JExpr.direct(ENCODER), "writeFixed") - .arg(JExpr.invoke(JExpr.cast(fixedClass, fixedValueExpression), "bytes")); + .arg(JExpr.invoke(JExpr.cast(fixedClass, fixedValueToWrite), "bytes")); } private void processEnum(Schema enumSchema, JExpression enumValueExpression, JBlock body) { @@ -385,11 +413,11 @@ private void processEnum(Schema enumSchema, JExpression enumValueExpression, JBl * and maintain a mapping between the schema id and EnumSchema JVar for future use. */ schemaExpression = enumSchemaVarMap.computeIfAbsent(Utils.getSchemaFingerprint(enumSchema), fingerprint -> - generatedClass.field( - JMod.PRIVATE | JMod.FINAL, - Schema.class, - getUniqueName(enumSchema.getName() + "EnumSchema"), - codeModel.ref(Schema.class).staticInvoke("parse").arg(enumSchema.toString())) + generatedClass.field( + JMod.PRIVATE | JMod.FINAL, + Schema.class, + getUniqueName(enumSchema.getName() + "EnumSchema"), + codeModel.ref(Schema.class).staticInvoke("parse").arg(enumSchema.toString())) ); enumValueToStringExpr = enumValueCasted.invoke("toString"); } else { @@ -429,14 +457,21 @@ private void processString(final Schema primitiveSchema, JExpression primitiveVa } private void processPrimitive(final Schema primitiveSchema, JExpression primitiveValueExpression, JBlock body, boolean cast) { + JClass primitiveClass = schemaAssistant.classFromSchema(primitiveSchema, true, false, false, false); + String writeFunction; - JClass primitiveClass = schemaAssistant.classFromSchema(primitiveSchema); - JExpression writeFunctionArgument = cast - ? JExpr.cast(primitiveClass, primitiveValueExpression) - : primitiveValueExpression; + JExpression writeFunctionArgument; + + if (logicalTypeEnabled(primitiveSchema)) { + JVar convertedValueVar = generateConversionToRawType(primitiveSchema, primitiveValueExpression, body); + writeFunctionArgument = JExpr.cast(primitiveClass, convertedValueVar); + } else { + writeFunctionArgument = cast ? JExpr.cast(primitiveClass, primitiveValueExpression) : primitiveValueExpression; + } + switch (primitiveSchema.getType()) { case STRING: - processString(primitiveSchema, primitiveValueExpression, body); + processString(primitiveSchema, writeFunctionArgument, body); return; case BYTES: writeFunction = "writeBytes"; @@ -464,6 +499,20 @@ private void processPrimitive(final Schema primitiveSchema, JExpression primitiv body.invoke(JExpr.direct(ENCODER), writeFunction).arg(writeFunctionArgument); } + private JVar generateConversionToRawType(Schema schemaWithLogicalType, JExpression initialExpression, JBlock body) { + JVar convertedValue = body.decl(codeModel.ref(Object.class), getUniqueName("convertedValue"), initialExpression); + JFieldRef schemaFieldRef = injectLogicalTypeSchema(schemaWithLogicalType); + + body.assign(convertedValue, codeModel.ref(Conversions.class) + .staticInvoke("convertToRawType") + .arg(convertedValue) + .arg(schemaFieldRef) + .arg(schemaFieldRef.invoke("getLogicalType")) + .arg(getConversionRef(schemaWithLogicalType.getLogicalType()))); + + return convertedValue; + } + private boolean methodAlreadyDefined(final Schema schema) { return !Schema.Type.RECORD.equals(schema.getType()) || serializeMethodMap.containsKey(AvroCompatibilityHelper.getSchemaFullName(schema)); } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumReader.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumReader.java index 7012f0490..d621181f0 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumReader.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumReader.java @@ -2,12 +2,15 @@ import java.util.concurrent.CompletableFuture; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.specific.SpecificData; /** * {@link org.apache.avro.specific.SpecificDatumReader} backed by generated deserialization code. */ public class FastSpecificDatumReader extends FastGenericDatumReader { + public FastSpecificDatumReader(Schema schema) { super(schema, schema); } @@ -16,6 +19,10 @@ public FastSpecificDatumReader(Schema writerSchema, Schema readerSchema) { super(writerSchema, readerSchema, FastSerdeCache.getDefaultInstance()); } + public FastSpecificDatumReader(Schema writerSchema, Schema readerSchema, SpecificData modelData) { + super(writerSchema, readerSchema, FastSerdeCache.getDefaultInstance(), modelData); + } + public FastSpecificDatumReader(Schema schema, FastSerdeCache cache) { super(schema, schema, cache); } @@ -24,21 +31,26 @@ public FastSpecificDatumReader(Schema writerSchema, Schema readerSchema, FastSer super(writerSchema, readerSchema, cache); } + public FastSpecificDatumReader(Schema writerSchema, Schema readerSchema, FastSerdeCache cache, SpecificData modelData) { + super(writerSchema, readerSchema, cache, modelData); + } + + @SuppressWarnings("unchecked") @Override protected FastDeserializer getFastDeserializerFromCache(FastSerdeCache fastSerdeCache, Schema writeSchema, - Schema readerSchema) { - return (FastDeserializer) fastSerdeCache.getFastSpecificDeserializer(writeSchema, readerSchema); + Schema readerSchema, GenericData specificData) { + return (FastDeserializer) fastSerdeCache.getFastSpecificDeserializer(writeSchema, readerSchema, (SpecificData) specificData); } @Override protected CompletableFuture> getFastDeserializer(FastSerdeCache fastSerdeCache, - Schema writerSchema, Schema readerSchema) { - return fastSerdeCache.getFastSpecificDeserializerAsync(writerSchema, readerSchema) + Schema writerSchema, Schema readerSchema, GenericData specificData) { + return fastSerdeCache.getFastSpecificDeserializerAsync(writerSchema, readerSchema, (SpecificData) specificData) .thenApply(d -> (FastDeserializer) d); } @Override - protected FastDeserializer getRegularAvroImpl(Schema writerSchema, Schema readerSchema) { - return new FastSerdeCache.FastDeserializerWithAvroSpecificImpl<>(writerSchema, readerSchema); + protected FastDeserializer getRegularAvroImpl(Schema writerSchema, Schema readerSchema, GenericData specificData) { + return new FastSerdeCache.FastDeserializerWithAvroSpecificImpl<>(writerSchema, readerSchema, (SpecificData) specificData); } } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumWriter.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumWriter.java index 89c2b2a4d..1bab37e05 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumWriter.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDatumWriter.java @@ -1,27 +1,39 @@ package com.linkedin.avro.fastserde; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.specific.SpecificData; /** * {@link org.apache.avro.specific.SpecificDatumWriter} backed by generated serialization code. */ public class FastSpecificDatumWriter extends FastGenericDatumWriter { + public FastSpecificDatumWriter(Schema schema) { super(schema); } + public FastSpecificDatumWriter(Schema schema, SpecificData modelData) { + super(schema, modelData); + } + public FastSpecificDatumWriter(Schema schema, FastSerdeCache cache) { super(schema, cache); } + public FastSpecificDatumWriter(Schema schema, SpecificData modelData, FastSerdeCache cache) { + super(schema, modelData, cache); + } + + @SuppressWarnings("unchecked") @Override - protected FastSerializer getFastSerializerFromCache(FastSerdeCache fastSerdeCache, Schema schema) { - return (FastSerializer) fastSerdeCache.getFastSpecificSerializer(schema); + protected FastSerializer getFastSerializerFromCache(FastSerdeCache fastSerdeCache, Schema schema, GenericData specificData) { + return (FastSerializer) fastSerdeCache.getFastSpecificSerializer(schema, (SpecificData) specificData); } @Override - protected FastSerializer getRegularAvroImpl(Schema schema) { - return new FastSerdeCache.FastSerializerWithAvroSpecificImpl<>(schema); + protected FastSerializer getRegularAvroImpl(Schema schema, GenericData specificData) { + return new FastSerdeCache.FastSerializerWithAvroSpecificImpl<>(schema, (SpecificData) specificData); } } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGenerator.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGenerator.java index 13d9bf0df..94aa23b48 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGenerator.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificDeserializerGenerator.java @@ -2,12 +2,13 @@ import java.io.File; import org.apache.avro.Schema; +import org.apache.avro.specific.SpecificData; -public final class FastSpecificDeserializerGenerator extends FastDeserializerGenerator { +public final class FastSpecificDeserializerGenerator extends FastDeserializerGenerator { - FastSpecificDeserializerGenerator(Schema writer, Schema reader, File destination, ClassLoader classLoader, - String compileClassPath) { - super(false, writer, reader, destination, classLoader, compileClassPath); + public FastSpecificDeserializerGenerator(Schema writer, Schema reader, File destination, ClassLoader classLoader, + String compileClassPath, SpecificData modelData) { + super(false, writer, reader, destination, classLoader, compileClassPath, modelData); } } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificSerializerGenerator.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificSerializerGenerator.java index 09ff98233..78e778296 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificSerializerGenerator.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSpecificSerializerGenerator.java @@ -2,12 +2,13 @@ import java.io.File; import org.apache.avro.Schema; +import org.apache.avro.specific.SpecificData; -public class FastSpecificSerializerGenerator extends FastSerializerGenerator { +public class FastSpecificSerializerGenerator extends FastSerializerGenerator { public FastSpecificSerializerGenerator(Schema schema, File destination, ClassLoader classLoader, - String compileClassPath) { - super(false, schema, destination, classLoader, compileClassPath); + String compileClassPath, SpecificData modelData) { + super(false, schema, destination, classLoader, compileClassPath, modelData); } } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java index 520bf1dc9..41c1b2e3d 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java @@ -12,6 +12,8 @@ import com.linkedin.avro.fastserde.primitive.PrimitiveLongArrayList; import com.linkedin.avroutil1.Enums; import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelperCommon; +import com.linkedin.avroutil1.compatibility.AvroVersion; import com.sun.codemodel.JBlock; import com.sun.codemodel.JClass; import com.sun.codemodel.JCodeModel; @@ -36,7 +38,13 @@ import java.util.TreeSet; import java.util.function.BiConsumer; import java.util.function.Supplier; + +import org.apache.avro.Conversion; +import org.apache.avro.Conversions; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; +import org.apache.avro.data.TimeConversions; import org.apache.avro.generic.GenericContainer; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericEnumSymbol; @@ -45,7 +53,7 @@ import org.apache.avro.util.Utf8; -public class SchemaAssistant { +public class SchemaAssistant { // The following constants are not available in avro-1.4 public static final String CLASS_PROP = "java-class"; public static final String KEY_CLASS_PROP = "java-key-class"; @@ -64,6 +72,9 @@ public class SchemaAssistant { * the libraries, which define those classes, and put them in the compile classpath. */ private final Set fullyQualifiedClassNameSet = new HashSet<>(); + + private final T modelData; + /** * Whether this schema assistant is for serializer generation or not. * The reason to differentiate serializer from de-serializer is that for some Avro features, such as java string property, @@ -71,7 +82,8 @@ public class SchemaAssistant { */ private final boolean isForSerializer; - public SchemaAssistant(JCodeModel codeModel, boolean useGenericTypes, Class defaultStringClass, boolean isForSerializer) { + public SchemaAssistant(JCodeModel codeModel, boolean useGenericTypes, Class defaultStringClass, + T modelData, boolean isForSerializer) { this.codeModel = codeModel; this.useGenericTypes = useGenericTypes; /** @@ -79,6 +91,7 @@ public SchemaAssistant(JCodeModel codeModel, boolean useGenericTypes, Class defa */ this.exceptionsFromStringable = new TreeSet<>(Comparator.comparing(Class::getCanonicalName)); this.defaultStringType = codeModel.ref(defaultStringClass); + this.modelData = modelData; this.isForSerializer = isForSerializer; } @@ -277,8 +290,18 @@ public JClass classFromSchema(Schema schema, boolean abstractType, boolean rawTy return classFromSchema(schema, abstractType, rawType, false); } - /* Note that settings abstractType and rawType are not passed to subcalls */ public JClass classFromSchema(Schema schema, boolean abstractType, boolean rawType, boolean primitiveList) { + return classFromSchema(schema, abstractType, rawType, primitiveList, true); + } + + /* Note that settings abstractType and rawType are not passed to subcalls */ + public JClass classFromSchema(Schema schema, boolean abstractType, boolean rawType, boolean primitiveList, + boolean allowLogicalTypes) { + if (allowLogicalTypes && logicalTypeEnabled(schema)) { + Class logicalTypeClass = ((Conversion) getConversion(schema.getLogicalType())).getConvertedType(); + return codeModel.ref(logicalTypeClass); + } + JClass outputClass; switch (schema.getType()) { @@ -344,8 +367,10 @@ public JClass classFromSchema(Schema schema, boolean abstractType, boolean rawTy useGenericTypes ? codeModel.ref(GenericEnumSymbol.class) : codeModel.ref(AvroCompatibilityHelper.getSchemaFullName(schema)); break; case FIXED: - if (useGenericTypes) { - outputClass = codeModel.ref(abstractType ? GenericFixed.class : GenericData.Fixed.class); + if (abstractType) { + outputClass = codeModel.ref(GenericFixed.class); + } else if (useGenericTypes) { + outputClass = codeModel.ref(GenericData.Fixed.class); } else { outputClass = codeModel.ref(AvroCompatibilityHelper.getSchemaFullName(schema)); } @@ -439,6 +464,61 @@ public JClass findStringClass(Schema schema) { return outputClass; } + boolean logicalTypeEnabled(Schema schema) { +// return modelData != null && schema != null && Utils.isLogicalTypeSupported() && schema.getLogicalType() != null; + // TODO uuid in 1.10 no supported ?? + + if (modelData != null && schema != null && Utils.isLogicalTypeSupported()) { + if (schema.getLogicalType() == LogicalTypes.uuid()) { + return AvroCompatibilityHelperCommon.getRuntimeAvroVersion() == AvroVersion.AVRO_1_11; + } else { + return schema.getLogicalType() != null; + } + } else { + return false; + } + } + + Object getConversion(LogicalType logicalType) { // returns Objects so that this class can be loaded with older Avro versions + Conversion conversion = modelData.getConversionFor(logicalType); + if (conversion != null) { + return conversion; + } else { + return getDefaultConversion(logicalType); + } + } + + private Object getDefaultConversion(LogicalType logicalType) { + // used as a fallback when no conversion is provided by modelData + if (logicalType == null) { + throw new NullPointerException("Expected not-null logicalType"); + } + + switch (logicalType.getName()) { + case "decimal": + return new Conversions.DecimalConversion(); + case "uuid": + return new Conversions.UUIDConversion(); + case "date": + return new TimeConversions.DateConversion(); + case "time-millis": + return new TimeConversions.TimeMillisConversion(); + case "time-micros": + return new TimeConversions.TimeMicrosConversion(); + case "timestamp-millis": + return new TimeConversions.TimestampMillisConversion(); + case "timestamp-micros": + return new TimeConversions.TimestampMicrosConversion(); + case "local-timestamp-millis": + return new TimeConversions.LocalTimestampMillisConversion(); + case "local-timestamp-micros": + return new TimeConversions.LocalTimestampMicrosConversion(); + case "duration": // TODO no default implementation? + default: + throw new UnsupportedOperationException("LogicalType " + logicalType.getName() + " is not supported"); + } + } + protected JClass defaultStringType() { return defaultStringType; } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java index 36666be91..443461a28 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java @@ -1,6 +1,7 @@ package com.linkedin.avro.fastserde; import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; +import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelperCommon; import com.linkedin.avroutil1.compatibility.AvroVersion; import com.linkedin.avroutil1.compatibility.AvscGenerationConfig; import java.io.BufferedReader; @@ -10,11 +11,22 @@ import java.security.CodeSource; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.StringJoiner; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.stream.Collectors; + +import javax.lang.model.SourceVersion; + import org.apache.avro.Schema; +import org.apache.commons.lang3.StringUtils; public class Utils { @@ -71,6 +83,13 @@ public static boolean isSupportedAvroVersionsForSerializer() { return AVRO_VERSIONS_SUPPORTED_FOR_SERIALIZER.contains(AvroCompatibilityHelper.getRuntimeAvroVersion()); } + public static boolean isLogicalTypeSupported() { + // Formally Avro_1.8 supports LogicalTypes however there are significant changes compared to versions >=1.9 + // To see rationale simply compare imports in org.apache.avro.data.TimeConversions class between 1.8 and 1.9+ + // Basically 1.8 uses joda.time but 1.9+ uses java.time + return AvroCompatibilityHelperCommon.getRuntimeAvroVersion().laterThan(AvroVersion.AVRO_1_8); + } + public static boolean isWindows() { return IS_WINDOWS; } @@ -225,4 +244,19 @@ public static String inferCompileDependencies(String existingCompileClasspath, S return pathJoiner.toString(); } } + + public static String toValidJavaIdentifier(String javaIdentifier) { + if (StringUtils.isBlank(javaIdentifier)) { + throw new IllegalArgumentException("Expected not-blank identifier!"); + } + + javaIdentifier = StringUtils.deleteWhitespace(javaIdentifier) + .replaceAll("\\W+", "_"); + + if (!SourceVersion.isIdentifier(javaIdentifier) || SourceVersion.isKeyword(javaIdentifier)) { + javaIdentifier = "a_" + javaIdentifier; + } + + return javaIdentifier; + } } diff --git a/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdGenericDatumReader.java b/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdGenericDatumReader.java index 2ecb60769..a71e1299c 100644 --- a/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdGenericDatumReader.java +++ b/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdGenericDatumReader.java @@ -2,6 +2,8 @@ import org.apache.avro.Schema; +import com.linkedin.avro.fastserde.Utils; + /** * A light-weight extension of {@link GenericDatumReader} which merely ensures that the types of the @@ -10,10 +12,23 @@ * This class needs to be in the org.apache.avro.generic package in order to access protected methods. */ public class ColdGenericDatumReader extends GenericDatumReader implements ColdDatumReaderMixIn { + + public ColdGenericDatumReader(Schema writerSchema, Schema readerSchema, GenericData modelData) { + super(writerSchema, readerSchema, modelData != null ? modelData : GenericData.get()); + } + public ColdGenericDatumReader(Schema writerSchema, Schema readerSchema) { super(writerSchema, readerSchema); } + public static ColdGenericDatumReader of(Schema writerSchema, Schema readerSchema, GenericData modelData) { + if (Utils.isLogicalTypeSupported()) { + return new ColdGenericDatumReader<>(writerSchema, readerSchema, modelData); + } else { + return new ColdGenericDatumReader<>(writerSchema, readerSchema); + } + } + @Override protected Object newArray(Object old, int size, Schema schema) { return newArray(old, size, schema, super::newArray); diff --git a/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdSpecificDatumReader.java b/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdSpecificDatumReader.java index 008b97eb7..8f9fd623b 100644 --- a/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdSpecificDatumReader.java +++ b/fastserde/avro-fastserde/src/main/java/org/apache/avro/generic/ColdSpecificDatumReader.java @@ -1,8 +1,11 @@ package org.apache.avro.generic; import org.apache.avro.Schema; +import org.apache.avro.specific.SpecificData; import org.apache.avro.specific.SpecificDatumReader; +import com.linkedin.avro.fastserde.Utils; + /** * A light-weight extension of {@link SpecificDatumReader} which merely ensures that the types of @@ -11,10 +14,23 @@ * This class needs to be in the org.apache.avro.generic package in order to access protected methods. */ public class ColdSpecificDatumReader extends SpecificDatumReader implements ColdDatumReaderMixIn { + + public ColdSpecificDatumReader(Schema writerSchema, Schema readerSchema, SpecificData modelData) { + super(writerSchema, readerSchema, modelData != null ? modelData : SpecificData.get()); + } + public ColdSpecificDatumReader(Schema writerSchema, Schema readerSchema) { super(writerSchema, readerSchema); } + public static ColdSpecificDatumReader of(Schema writerSchema, Schema readerSchema, SpecificData modelData) { + if (Utils.isLogicalTypeSupported()) { + return new ColdSpecificDatumReader<>(writerSchema, readerSchema, modelData); + } else { + return new ColdSpecificDatumReader<>(writerSchema, readerSchema); + } + } + @Override protected Object newArray(Object old, int size, Schema schema) { return newArray(old, size, schema, super::newArray); diff --git a/helper/helper-common/src/main/java/com/linkedin/avroutil1/compatibility/AvroAdapter.java b/helper/helper-common/src/main/java/com/linkedin/avroutil1/compatibility/AvroAdapter.java index ea31c238a..47b7aa159 100644 --- a/helper/helper-common/src/main/java/com/linkedin/avroutil1/compatibility/AvroAdapter.java +++ b/helper/helper-common/src/main/java/com/linkedin/avroutil1/compatibility/AvroAdapter.java @@ -78,6 +78,10 @@ public interface AvroAdapter { SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer, Class readerClass); + DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData); + + DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData); + DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData); DatumReader newSpecificDatumReader(Schema writer, Schema reader, SpecificData specificData); diff --git a/helper/helper/src/main/java/com/linkedin/avroutil1/compatibility/AvroCompatibilityHelper.java b/helper/helper/src/main/java/com/linkedin/avroutil1/compatibility/AvroCompatibilityHelper.java index 54a604f83..49d5b2b70 100644 --- a/helper/helper/src/main/java/com/linkedin/avroutil1/compatibility/AvroCompatibilityHelper.java +++ b/helper/helper/src/main/java/com/linkedin/avroutil1/compatibility/AvroCompatibilityHelper.java @@ -352,6 +352,16 @@ public static SpecificDatumReader newAliasAwareSpecificDatumReader(Schema return ADAPTER.newAliasAwareSpecificDatumReader(writerSchema, readerClass); } + public static DatumWriter newGenericDatumWriter(Schema schema, GenericData genericData) { + assertAvroAvailable(); + return ADAPTER.newGenericDatumWriter(schema, genericData); + } + + public static DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + assertAvroAvailable(); + return ADAPTER.newGenericDatumReader(writer, reader, genericData); + } + public static DatumWriter newSpecificDatumWriter(Schema schema, SpecificData specificData) { assertAvroAvailable(); return ADAPTER.newSpecificDatumWriter(schema, specificData); diff --git a/helper/impls/helper-impl-110/src/main/java/com/linkedin/avroutil1/compatibility/avro110/Avro110Adapter.java b/helper/impls/helper-impl-110/src/main/java/com/linkedin/avroutil1/compatibility/avro110/Avro110Adapter.java index bffc9d501..d6ba6e7ba 100644 --- a/helper/impls/helper-impl-110/src/main/java/com/linkedin/avroutil1/compatibility/avro110/Avro110Adapter.java +++ b/helper/impls/helper-impl-110/src/main/java/com/linkedin/avroutil1/compatibility/avro110/Avro110Adapter.java @@ -36,6 +36,8 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaNormalization; import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.Avro110BinaryDecoderAccessUtil; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; @@ -228,6 +230,16 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriter<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + return new GenericDatumReader<>(writer, reader, genericData); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriter<>(writer, specificData); diff --git a/helper/impls/helper-impl-111/src/main/java/com/linkedin/avroutil1/compatibility/avro111/Avro111Adapter.java b/helper/impls/helper-impl-111/src/main/java/com/linkedin/avroutil1/compatibility/avro111/Avro111Adapter.java index d5947c792..f77b0f9d6 100644 --- a/helper/impls/helper-impl-111/src/main/java/com/linkedin/avroutil1/compatibility/avro111/Avro111Adapter.java +++ b/helper/impls/helper-impl-111/src/main/java/com/linkedin/avroutil1/compatibility/avro111/Avro111Adapter.java @@ -36,6 +36,8 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaNormalization; import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.Avro111BinaryDecoderAccessUtil; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; @@ -228,6 +230,16 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriter<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + return new GenericDatumReader<>(writer, reader, genericData); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriter<>(writer, specificData); diff --git a/helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/Avro14Adapter.java b/helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/Avro14Adapter.java index 69d296837..31014302a 100644 --- a/helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/Avro14Adapter.java +++ b/helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/Avro14Adapter.java @@ -25,6 +25,7 @@ import com.linkedin.avroutil1.compatibility.StringRepresentation; import com.linkedin.avroutil1.compatibility.avro14.backports.Avro14DefaultValuesCache; import com.linkedin.avroutil1.compatibility.avro14.backports.Avro18BufferedBinaryEncoder; +import com.linkedin.avroutil1.compatibility.avro14.backports.GenericDatumWriterExt; import com.linkedin.avroutil1.compatibility.avro14.backports.SpecificDatumWriterExt; import com.linkedin.avroutil1.compatibility.avro14.codec.AliasAwareSpecificDatumReader; import com.linkedin.avroutil1.compatibility.avro14.codec.BoundedMemoryDecoder; @@ -40,6 +41,7 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.io.Avro14BinaryDecoderAccessUtil; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; @@ -217,6 +219,17 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriterExt<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + // genericData not supported here + return new GenericDatumReader<>(writer, reader); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriterExt<>(writer, specificData); diff --git a/helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/backports/GenericDatumWriterExt.java b/helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/backports/GenericDatumWriterExt.java new file mode 100644 index 000000000..9664296b7 --- /dev/null +++ b/helper/impls/helper-impl-14/src/main/java/com/linkedin/avroutil1/compatibility/avro14/backports/GenericDatumWriterExt.java @@ -0,0 +1,24 @@ +/* + * Copyright 2023 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.compatibility.avro14.backports; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; + + +/** + * this class allows constructing a {@link GenericDatumWriter} with + * a specified {@link GenericData} instance under avro 1.4 + * @param + */ +public class GenericDatumWriterExt extends GenericDatumWriter { + + public GenericDatumWriterExt(Schema root, GenericData genericData) { + super(root, genericData); + } +} diff --git a/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/Avro15Adapter.java b/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/Avro15Adapter.java index 551f152fd..cab14dec5 100644 --- a/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/Avro15Adapter.java +++ b/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/Avro15Adapter.java @@ -26,6 +26,8 @@ import com.linkedin.avroutil1.compatibility.StringPropertyUtils; import com.linkedin.avroutil1.compatibility.StringRepresentation; import com.linkedin.avroutil1.compatibility.avro15.backports.Avro15DefaultValuesCache; +import com.linkedin.avroutil1.compatibility.avro15.backports.GenericDatumReaderExt; +import com.linkedin.avroutil1.compatibility.avro15.backports.GenericDatumWriterExt; import com.linkedin.avroutil1.compatibility.avro15.backports.SpecificDatumReaderExt; import com.linkedin.avroutil1.compatibility.avro15.backports.SpecificDatumWriterExt; import com.linkedin.avroutil1.compatibility.avro15.codec.AliasAwareSpecificDatumReader; @@ -227,6 +229,16 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriterExt<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + return new GenericDatumReaderExt<>(writer, reader, genericData); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriterExt<>(writer, specificData); diff --git a/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumReaderExt.java b/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumReaderExt.java new file mode 100644 index 000000000..cf6b394ba --- /dev/null +++ b/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumReaderExt.java @@ -0,0 +1,24 @@ +/* + * Copyright 2023 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.compatibility.avro15.backports; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; + + +/** + * this class allows constructing a {@link GenericDatumReader} with + * a specified {@link GenericData} instance under avro 1.5 + * @param + */ +public class GenericDatumReaderExt extends GenericDatumReader { + + public GenericDatumReaderExt(Schema writer, Schema reader, GenericData genericData) { + super(writer, reader, genericData); + } +} diff --git a/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumWriterExt.java b/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumWriterExt.java new file mode 100644 index 000000000..5c8d8bbb2 --- /dev/null +++ b/helper/impls/helper-impl-15/src/main/java/com/linkedin/avroutil1/compatibility/avro15/backports/GenericDatumWriterExt.java @@ -0,0 +1,24 @@ +/* + * Copyright 2023 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.compatibility.avro15.backports; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; + + +/** + * this class allows constructing a {@link GenericDatumWriter} with + * a specified {@link GenericData} instance under avro 1.5 + * @param + */ +public class GenericDatumWriterExt extends GenericDatumWriter { + + public GenericDatumWriterExt(Schema root, GenericData genericData) { + super(root, genericData); + } +} diff --git a/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/Avro16Adapter.java b/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/Avro16Adapter.java index 9b08a643d..48cae7e18 100644 --- a/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/Avro16Adapter.java +++ b/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/Avro16Adapter.java @@ -25,6 +25,8 @@ import com.linkedin.avroutil1.compatibility.StringPropertyUtils; import com.linkedin.avroutil1.compatibility.StringRepresentation; import com.linkedin.avroutil1.compatibility.avro16.backports.Avro16DefaultValuesCache; +import com.linkedin.avroutil1.compatibility.avro16.backports.GenericDatumReaderExt; +import com.linkedin.avroutil1.compatibility.avro16.backports.GenericDatumWriterExt; import com.linkedin.avroutil1.compatibility.avro16.backports.SpecificDatumWriterExt; import com.linkedin.avroutil1.compatibility.avro16.codec.AliasAwareSpecificDatumReader; import com.linkedin.avroutil1.compatibility.avro16.codec.BoundedMemoryDecoder; @@ -224,6 +226,16 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriterExt<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + return new GenericDatumReaderExt<>(writer, reader, genericData); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriterExt<>(writer, specificData); diff --git a/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumReaderExt.java b/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumReaderExt.java new file mode 100644 index 000000000..7d6e494bc --- /dev/null +++ b/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumReaderExt.java @@ -0,0 +1,24 @@ +/* + * Copyright 2023 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.compatibility.avro16.backports; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; + + +/** + * this class allows constructing a {@link GenericDatumReader} with + * a specified {@link GenericData} instance under avro 1.6 + * @param + */ +public class GenericDatumReaderExt extends GenericDatumReader { + + public GenericDatumReaderExt(Schema writer, Schema reader, GenericData genericData) { + super(writer, reader, genericData); + } +} diff --git a/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumWriterExt.java b/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumWriterExt.java new file mode 100644 index 000000000..9e927667b --- /dev/null +++ b/helper/impls/helper-impl-16/src/main/java/com/linkedin/avroutil1/compatibility/avro16/backports/GenericDatumWriterExt.java @@ -0,0 +1,24 @@ +/* + * Copyright 2023 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +package com.linkedin.avroutil1.compatibility.avro16.backports; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; + + +/** + * this class allows constructing a {@link GenericDatumWriter} with + * a specified {@link GenericData} instance under avro 1.6 + * @param + */ +public class GenericDatumWriterExt extends GenericDatumWriter { + + public GenericDatumWriterExt(Schema root, GenericData genericData) { + super(root, genericData); + } +} diff --git a/helper/impls/helper-impl-17/src/main/java/com/linkedin/avroutil1/compatibility/avro17/Avro17Adapter.java b/helper/impls/helper-impl-17/src/main/java/com/linkedin/avroutil1/compatibility/avro17/Avro17Adapter.java index fca7de289..2352c563d 100644 --- a/helper/impls/helper-impl-17/src/main/java/com/linkedin/avroutil1/compatibility/avro17/Avro17Adapter.java +++ b/helper/impls/helper-impl-17/src/main/java/com/linkedin/avroutil1/compatibility/avro17/Avro17Adapter.java @@ -36,6 +36,8 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaNormalization; import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.Avro17BinaryDecoderAccessUtil; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; @@ -259,6 +261,16 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriter<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + return new GenericDatumReader<>(writer, reader, genericData); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriterExt<>(writer, specificData); diff --git a/helper/impls/helper-impl-18/src/main/java/com/linkedin/avroutil1/compatibility/avro18/Avro18Adapter.java b/helper/impls/helper-impl-18/src/main/java/com/linkedin/avroutil1/compatibility/avro18/Avro18Adapter.java index b87668522..b6f0ea106 100644 --- a/helper/impls/helper-impl-18/src/main/java/com/linkedin/avroutil1/compatibility/avro18/Avro18Adapter.java +++ b/helper/impls/helper-impl-18/src/main/java/com/linkedin/avroutil1/compatibility/avro18/Avro18Adapter.java @@ -34,6 +34,8 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaNormalization; import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.Avro18BinaryDecoderAccessUtil; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; @@ -221,6 +223,16 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriter<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + return new GenericDatumReader<>(writer, reader, genericData); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriter<>(writer, specificData); diff --git a/helper/impls/helper-impl-19/src/main/java/com/linkedin/avroutil1/compatibility/avro19/Avro19Adapter.java b/helper/impls/helper-impl-19/src/main/java/com/linkedin/avroutil1/compatibility/avro19/Avro19Adapter.java index d3d39de87..1395ad8e4 100644 --- a/helper/impls/helper-impl-19/src/main/java/com/linkedin/avroutil1/compatibility/avro19/Avro19Adapter.java +++ b/helper/impls/helper-impl-19/src/main/java/com/linkedin/avroutil1/compatibility/avro19/Avro19Adapter.java @@ -36,6 +36,8 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaNormalization; import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.Avro19BinaryDecoderAccessUtil; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; @@ -231,6 +233,16 @@ public SpecificDatumReader newAliasAwareSpecificDatumReader(Schema writer return new AliasAwareSpecificDatumReader<>(writer, readerSchema); } + @Override + public DatumWriter newGenericDatumWriter(Schema writer, GenericData genericData) { + return new GenericDatumWriter<>(writer, genericData); + } + + @Override + public DatumReader newGenericDatumReader(Schema writer, Schema reader, GenericData genericData) { + return new GenericDatumReader<>(writer, reader, genericData); + } + @Override public DatumWriter newSpecificDatumWriter(Schema writer, SpecificData specificData) { return new SpecificDatumWriter<>(writer, specificData);