Skip to content

Commit

Permalink
Updated fast serializer generator to optimize the enum schema generat…
Browse files Browse the repository at this point in the history
…ion (#20)

* Updated fast serializer generator to optimize the enum schema generation

This code change optimizes the way how to retrieve back the enum
schema for avro-1.4, so that it doesn't need to parse a enum schema
for every deserialization.

Snippet of generated fast serializer:

    private Map<Long, Schema> enumSchemaMap = new ConcurrentHashMap<Long, Schema>();

...

public void serializetestRecord0(IndexedRecord data, Encoder encoder)
        throws IOException
    {
        Schema testEnumEnumSchema1;
        if (!enumSchemaMap.containsKey(-3346156575824446940L)) {
            testEnumEnumSchema1 = Schema.parse("{\"type\":\"enum\",\"name\":\"testEnum\",\"namespace\":\"com.adpilot.utils.generated.avro\",\"symbols\":[\"A\",\"B\"]}");
            enumSchemaMap.put(-3346156575824446940L, testEnumEnumSchema1);
        } else {
            testEnumEnumSchema1 = enumSchemaMap.get(-3346156575824446940L);
        }
...

* Addressed one minior performance issue

The new generated code:
...
 Schema testEnumEnumSchema1 = enumSchemaMap.get(-3346156575824446940L);
        if (null == testEnumEnumSchema1) {
            testEnumEnumSchema1 = Schema.parse("{\"type\":\"enum\",\"name\":\"testEnum\",\"namespace\":\"com.adpilot.utils.generated.avro\",\"symbols\":[\"A\",\"B\"]}");
            enumSchemaMap.put(-3346156575824446940L, testEnumEnumSchema1);
        }
...
  • Loading branch information
gaojieliu authored Feb 12, 2020
1 parent eab8395 commit 514d15e
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,12 @@ private void processEnum(Schema enumSchema, JExpression enumValueExpression, JBl
enumSchemaIdSet.add(enumSchemaFingerprint);
JVar enumSchemaVar = body.decl(codeModel.ref(Schema.class),
getVariableName(enumSchema.getName() + "EnumSchema"),
codeModel.ref(Schema.class).staticInvoke("parse").arg(enumSchema.toString())
);
body.invoke(enumSchemaMapField, "put").arg(JExpr.lit(enumSchemaFingerprint)).arg(enumSchemaVar);
enumSchemaMapField.invoke("get").arg(JExpr.lit(enumSchemaFingerprint)));
JConditional schemaCheckCond = body._if(JExpr._null().eq(enumSchemaVar));
JBlock thenBody = schemaCheckCond._then();
thenBody.assign(enumSchemaVar, codeModel.ref(Schema.class).staticInvoke("parse").arg(enumSchema.toString()));
thenBody.invoke(enumSchemaMapField, "put").arg(JExpr.lit(enumSchemaFingerprint)).arg(enumSchemaVar);

valueToWrite = JExpr.invoke(enumSchemaVar, "getEnumOrdinal").arg(enumValueCasted.invoke("toString"));
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public void shouldCreateSpecificDatumWriter() throws IOException, InterruptedExc

// when
fastSpecificDatumWriter.write(testRecord,
AvroCompatibilityHelper.newBinaryEncoder(new ByteArrayOutputStream()));
AvroCompatibilityHelper.newBufferedBinaryEncoder(new ByteArrayOutputStream()));

// then
FastSerializer<TestRecord> fastSpecificSerializer =
Expand All @@ -58,7 +58,7 @@ public void shouldCreateGenericDatumReader() throws IOException, InterruptedExce
record.put("test", "test");

// when
fastGenericDatumReader.write(record, AvroCompatibilityHelper.newBinaryEncoder(new ByteArrayOutputStream()));
fastGenericDatumReader.write(record, AvroCompatibilityHelper.newBufferedBinaryEncoder(new ByteArrayOutputStream()));

// then
FastSerializer<GenericRecord> fastGenericSerializer =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ private static Decoder getDecoder(byte[] bytes) {

private static byte[] serialize(GenericRecord record, Schema schema) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Encoder encoder = AvroCompatibilityHelper.newBinaryEncoder(baos);
Encoder encoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos);
DatumWriter datumWriter = new GenericDatumWriter(schema);
datumWriter.write(record, encoder);
encoder.flush();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.util.Utf8;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
Expand Down Expand Up @@ -407,7 +408,7 @@ public <T extends GenericContainer> Decoder dataAsBinaryDecoder(T data) {

public <T> Decoder dataAsBinaryDecoder(T data, Schema schema) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); //new BinaryEncoder(baos);
Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos); //new BinaryEncoder(baos);

try {
FastGenericSerializerGenerator<T> fastGenericSerializerGenerator =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.specific.SpecificRecord;
Expand Down Expand Up @@ -100,7 +101,7 @@ public static <T extends GenericContainer> Decoder genericDataAsDecoder(T data)

public static <T> Decoder genericDataAsDecoder(T data, Schema schema) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos);
Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos);

try {
GenericDatumWriter<T> writer = new GenericDatumWriter<>(schema);
Expand All @@ -119,7 +120,7 @@ public static <T extends SpecificRecord> Decoder specificDataAsDecoder(T record)

public static <T> Decoder specificDataAsDecoder(T record, Schema schema) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos); // BinaryEncoder(baos);
Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos);

try {
SpecificDatumWriter<T> writer = new SpecificDatumWriter<>(schema);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.util.Utf8;
import org.testng.Assert;
Expand Down Expand Up @@ -360,7 +361,7 @@ public <T extends GenericContainer> Decoder dataAsDecoder(T data) {

public <T> Decoder dataAsDecoder(T data, Schema schema) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos);
Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos);

try {
FastSpecificSerializerGenerator<T> fastSpecificSerializerGenerator =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.util.Utf8;
import org.testng.Assert;
Expand Down Expand Up @@ -188,7 +189,7 @@ public void deserializeStringableFields(Boolean whetherUseFastDeserializer)

public <T> Decoder writeWithFastAvro(T data, Schema schema) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos);
Encoder binaryEncoder = AvroCompatibilityHelper.newBufferedBinaryEncoder(baos);

try {
FastSpecificSerializerGenerator<T> fastSpecificSerializerGenerator =
Expand Down

0 comments on commit 514d15e

Please sign in to comment.