From 514d15ed338edcda0ea0719299565670955e93fc Mon Sep 17 00:00:00 2001 From: gaojieliu Date: Tue, 11 Feb 2020 17:34:44 -0800 Subject: [PATCH] Updated fast serializer generator to optimize the enum schema generation (#20) * Updated fast serializer generator to optimize the enum schema generation This code change optimizes the way how to retrieve back the enum schema for avro-1.4, so that it doesn't need to parse a enum schema for every deserialization. Snippet of generated fast serializer: private Map enumSchemaMap = new ConcurrentHashMap(); ... public void serializetestRecord0(IndexedRecord data, Encoder encoder) throws IOException { Schema testEnumEnumSchema1; if (!enumSchemaMap.containsKey(-3346156575824446940L)) { testEnumEnumSchema1 = Schema.parse("{\"type\":\"enum\",\"name\":\"testEnum\",\"namespace\":\"com.adpilot.utils.generated.avro\",\"symbols\":[\"A\",\"B\"]}"); enumSchemaMap.put(-3346156575824446940L, testEnumEnumSchema1); } else { testEnumEnumSchema1 = enumSchemaMap.get(-3346156575824446940L); } ... * Addressed one minior performance issue The new generated code: ... Schema testEnumEnumSchema1 = enumSchemaMap.get(-3346156575824446940L); if (null == testEnumEnumSchema1) { testEnumEnumSchema1 = Schema.parse("{\"type\":\"enum\",\"name\":\"testEnum\",\"namespace\":\"com.adpilot.utils.generated.avro\",\"symbols\":[\"A\",\"B\"]}"); enumSchemaMap.put(-3346156575824446940L, testEnumEnumSchema1); } ... --- .../linkedin/avro/fastserde/FastSerializerGenerator.java | 9 ++++++--- .../com/linkedin/avro/fastserde/FastDatumWriterTest.java | 4 ++-- .../fastserde/FastDeserializerGeneratorForReuseTest.java | 2 +- .../fastserde/FastGenericSerializerGeneratorTest.java | 3 ++- .../linkedin/avro/fastserde/FastSerdeTestsSupport.java | 5 +++-- .../fastserde/FastSpecificSerializerGeneratorTest.java | 3 ++- .../com/linkedin/avro/fastserde/FastStringableTest.java | 3 ++- 7 files changed, 18 insertions(+), 11 deletions(-) diff --git a/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java b/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java index bbe7f7b32..fd2311192 100644 --- a/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java +++ b/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerializerGenerator.java @@ -327,9 +327,12 @@ private void processEnum(Schema enumSchema, JExpression enumValueExpression, JBl enumSchemaIdSet.add(enumSchemaFingerprint); JVar enumSchemaVar = body.decl(codeModel.ref(Schema.class), getVariableName(enumSchema.getName() + "EnumSchema"), - codeModel.ref(Schema.class).staticInvoke("parse").arg(enumSchema.toString()) - ); - body.invoke(enumSchemaMapField, "put").arg(JExpr.lit(enumSchemaFingerprint)).arg(enumSchemaVar); + enumSchemaMapField.invoke("get").arg(JExpr.lit(enumSchemaFingerprint))); + JConditional schemaCheckCond = body._if(JExpr._null().eq(enumSchemaVar)); + JBlock thenBody = schemaCheckCond._then(); + thenBody.assign(enumSchemaVar, codeModel.ref(Schema.class).staticInvoke("parse").arg(enumSchema.toString())); + thenBody.invoke(enumSchemaMapField, "put").arg(JExpr.lit(enumSchemaFingerprint)).arg(enumSchemaVar); + valueToWrite = JExpr.invoke(enumSchemaVar, "getEnumOrdinal").arg(enumValueCasted.invoke("toString")); } } else { diff --git a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java index ac9e3de5d..0d85a48dd 100644 --- a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java +++ b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDatumWriterTest.java @@ -36,7 +36,7 @@ public void shouldCreateSpecificDatumWriter() throws IOException, InterruptedExc // when fastSpecificDatumWriter.write(testRecord, - AvroCompatibilityHelper.newBinaryEncoder(new ByteArrayOutputStream())); + AvroCompatibilityHelper.newBufferedBinaryEncoder(new ByteArrayOutputStream())); // then FastSerializer fastSpecificSerializer = @@ -58,7 +58,7 @@ public void shouldCreateGenericDatumReader() throws IOException, InterruptedExce record.put("test", "test"); // when - fastGenericDatumReader.write(record, AvroCompatibilityHelper.newBinaryEncoder(new ByteArrayOutputStream())); + fastGenericDatumReader.write(record, AvroCompatibilityHelper.newBufferedBinaryEncoder(new ByteArrayOutputStream())); // then FastSerializer fastGenericSerializer = diff --git a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorForReuseTest.java b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorForReuseTest.java index b75044110..15ecad123 100644 --- a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorForReuseTest.java +++ b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorForReuseTest.java @@ -127,7 +127,7 @@ private static Decoder getDecoder(byte[] bytes) { private static byte[] serialize(GenericRecord record, Schema schema) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Encoder encoder = AvroCompatibilityHelper.newBinaryEncoder(baos); + Encoder encoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos); DatumWriter datumWriter = new GenericDatumWriter(schema); datumWriter.write(record, encoder); encoder.flush(); diff --git a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java index 65526abd6..ac41cb570 100644 --- a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java +++ b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastGenericSerializerGeneratorTest.java @@ -21,6 +21,7 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; import org.apache.avro.util.Utf8; import org.testng.Assert; import org.testng.annotations.BeforeTest; @@ -407,7 +408,7 @@ public Decoder dataAsBinaryDecoder(T data) { public Decoder dataAsBinaryDecoder(T data, Schema schema) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); //new BinaryEncoder(baos); + Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos); //new BinaryEncoder(baos); try { FastGenericSerializerGenerator fastGenericSerializerGenerator = diff --git a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java index 85f66a5b2..58f638133 100644 --- a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java +++ b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSerdeTestsSupport.java @@ -12,6 +12,7 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.avro.specific.SpecificRecord; @@ -100,7 +101,7 @@ public static Decoder genericDataAsDecoder(T data) public static Decoder genericDataAsDecoder(T data, Schema schema) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); + Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos); try { GenericDatumWriter writer = new GenericDatumWriter<>(schema); @@ -119,7 +120,7 @@ public static Decoder specificDataAsDecoder(T record) public static Decoder specificDataAsDecoder(T record, Schema schema) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); // BinaryEncoder(baos); + Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos); try { SpecificDatumWriter writer = new SpecificDatumWriter<>(schema); diff --git a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java index 955c750d3..1a7d21d8e 100644 --- a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java +++ b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastSpecificSerializerGeneratorTest.java @@ -23,6 +23,7 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.util.Utf8; import org.testng.Assert; @@ -360,7 +361,7 @@ public Decoder dataAsDecoder(T data) { public Decoder dataAsDecoder(T data, Schema schema) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); + Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos); try { FastSpecificSerializerGenerator fastSpecificSerializerGenerator = diff --git a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java index f56e8597d..9c493d197 100644 --- a/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java +++ b/avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastStringableTest.java @@ -21,6 +21,7 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.util.Utf8; import org.testng.Assert; @@ -188,7 +189,7 @@ public void deserializeStringableFields(Boolean whetherUseFastDeserializer) public Decoder writeWithFastAvro(T data, Schema schema) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); + Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos); try { FastSpecificSerializerGenerator fastSpecificSerializerGenerator =