From ca6496c232253be85e2b685964f537b9956be96f Mon Sep 17 00:00:00 2001 From: Andy Sautins Date: Tue, 18 Apr 2023 14:55:24 -0600 Subject: [PATCH] Nested arrays cause 'item-end when a 'x' was expected" error (#476) * Address issue with nested arrays Some schemas with nested arrays ( e.g., NestedArray.avsc ) generate the following error when deserialized with fastserde: Attempt to process a item-end when a string was expected. This PR adds a test case and addresses the issue * Update test * Fix fastserdetest.avsc * Update fastserdetest.java * Remove code to verify test pipeline fails * Make name consistent * Fix FastDeserializerGeneratorBase * Use helper functions so test builds on all versions * Fix case of ItemName * Remove dependency of getSymbolPrintName * Remove unnecessary whitespace change * Add javadoc --------- Co-authored-by: Andy Sautins --- .../src/test/avro/nestedArrayTest.avsc | 26 +++++++++++++ .../avro/fastserde/FastNestedArrayTest.java | 37 +++++++++++++++++++ .../FastDeserializerGeneratorBase.java | 20 ++++++++++ 3 files changed, 83 insertions(+) create mode 100644 fastserde/avro-fastserde-tests-common/src/test/avro/nestedArrayTest.avsc create mode 100644 fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastNestedArrayTest.java diff --git a/fastserde/avro-fastserde-tests-common/src/test/avro/nestedArrayTest.avsc b/fastserde/avro-fastserde-tests-common/src/test/avro/nestedArrayTest.avsc new file mode 100644 index 000000000..1564e229b --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/avro/nestedArrayTest.avsc @@ -0,0 +1,26 @@ +{ + "namespace": "com.linkedin.avro.fastserde.generated.avro", + "name": "NestedArrayTest", + "type": "record", + "fields": [ + { + "name": "NestedArrayItems", + "type": { + "type": "array", + "items": { + "type": "array", + "items": { + "name": "NestedArrayItem", + "type": "record", + "fields": [ + { + "name": "ItemName", + "type": "string" + } + ] + } + } + } + } + ] +} diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastNestedArrayTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastNestedArrayTest.java new file mode 100644 index 000000000..743161ad5 --- /dev/null +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/FastNestedArrayTest.java @@ -0,0 +1,37 @@ +package com.linkedin.avro.fastserde; + +import com.linkedin.avro.fastserde.generated.avro.NestedArrayItem; +import com.linkedin.avro.fastserde.generated.avro.NestedArrayTest; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.avro.io.Decoder; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class FastNestedArrayTest { + + @Test + public void testExample() throws Exception { + + NestedArrayTest nestedArrayTest = new NestedArrayTest(); + + List items = new ArrayList<>(); + NestedArrayItem item = new NestedArrayItem(); + FastSerdeTestsSupport.setField(item, "ItemName", "itemName"); + FastSerdeTestsSupport.setField(nestedArrayTest, "NestedArrayItems", Collections.singletonList(items)); + + Decoder decoder = FastSerdeTestsSupport.specificDataAsDecoder(nestedArrayTest, NestedArrayTest.SCHEMA$); + + FastSpecificDatumReader fastSpecificDatumReader = + new FastSpecificDatumReader<>(NestedArrayTest.SCHEMA$); + FastDeserializer fastDeserializer = + fastSpecificDatumReader.getFastDeserializer().get(); + + NestedArrayTest actual = fastDeserializer.deserialize(decoder); + + Assert.assertEquals(actual, nestedArrayTest); + } +} diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java index 37bab1143..6c9388738 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGeneratorBase.java @@ -6,6 +6,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.ListIterator; +import java.util.Optional; import org.apache.avro.Schema; import com.linkedin.avro.fastserde.backport.Symbol; import org.apache.avro.util.Utf8; @@ -71,6 +72,17 @@ protected static void assignBlockToBody(Object codeContainer, JBlock body) { public abstract FastDeserializer generateDeserializer(); + /** + * Retrieve the symbols associated with the FieldAction necessary + * to generate code to populate the field. + * + * When processing a nested array ( an array directly inside + * another array ) return the symbol.production to navigate the + * nested array properly. + * + * @param action FieldAction being process + * @return list of symbols associated with the input FieldAction + */ protected ListIterator actionIterator(FieldAction action) { ListIterator actionIterator = null; @@ -85,6 +97,14 @@ protected ListIterator actionIterator(FieldAction action) { while (actionIterator.hasNext()) { Symbol symbol = actionIterator.next(); + if (Symbol.Kind.REPEATER.equals(symbol.kind) && + Symbol.Kind.TERMINAL.equals(((Symbol.Repeater)symbol).end) + && "array-end" + .equals(((Symbol.Repeater)symbol).end.toString())) { + actionIterator = Arrays.asList(reverseSymbolArray(symbol.production)).listIterator(); + break; + } + if (symbol instanceof Symbol.ErrorAction) { throw new FastDeserializerGeneratorException(((Symbol.ErrorAction) symbol).msg); }