diff --git a/src/main/java/org/simdjson/OnDemandJsonIterator.java b/src/main/java/org/simdjson/OnDemandJsonIterator.java index 5376504..fdddce7 100644 --- a/src/main/java/org/simdjson/OnDemandJsonIterator.java +++ b/src/main/java/org/simdjson/OnDemandJsonIterator.java @@ -39,7 +39,6 @@ void init(byte[] buffer, int len) { this.len = len; this.depth = 1; } - void skipChild() { skipChild(depth - 1); } @@ -442,6 +441,19 @@ Float getFloat() { } return numberParser.parseFloat(buffer, len, idx); } + String getOrCompressAsString() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == '"') { + return new String(buffer, idx + 1, indexer.peek() - idx - 2); + } else { + return new String(buffer, idx, indexer.peek() - idx); + } + } + String getObjectKey() { + int idx = indexer.getAndAdvance(); + return new String(buffer, idx + 1, indexer.peek() - idx - 2); + } int getRootString(byte[] stringBuffer) { depth--; @@ -621,7 +633,6 @@ IteratorResult startIteratingRootObject() { } return IteratorResult.NOT_EMPTY; } - boolean nextObjectField() { int idx = indexer.getAndAdvance(); byte character = buffer[idx]; diff --git a/src/main/java/org/simdjson/OnDemandJsonValue.java b/src/main/java/org/simdjson/OnDemandJsonValue.java new file mode 100644 index 0000000..877bbd3 --- /dev/null +++ b/src/main/java/org/simdjson/OnDemandJsonValue.java @@ -0,0 +1,65 @@ +package org.simdjson; + +import java.lang.reflect.Type; +import java.util.HashMap; +import java.util.Map; + +public class OnDemandJsonValue { + private final Map children; + private OnDemandJsonValue parent; + private ResolvedClass.ResolvedClassCategory type; + private Object value; + private long version; + private boolean isLeaf; + + public OnDemandJsonValue() { + this.children = new HashMap<>(); + this.parent = null; + this.value = null; + this.version = 0L; + this.isLeaf = false; + } + + public Map getChildren() { + return children; + } + + public OnDemandJsonValue getParent() { + return parent; + } + + public void setParent(OnDemandJsonValue parent) { + this.parent = parent; + } + + public Object getValue() { + return value; + } + + public void setValue(Object value) { + this.value = value; + } + + public long getVersion() { + return version; + } + + public void setVersion(long version) { + this.version = version; + } + public ResolvedClass.ResolvedClassCategory getType() { + return type; + } + + public void setType(ResolvedClass.ResolvedClassCategory type) { + this.type = type; + } + + public boolean isLeaf() { + return isLeaf; + } + + public void setLeaf(boolean leaf) { + isLeaf = leaf; + } +} diff --git a/src/main/java/org/simdjson/PathsBasedJsonParser.java b/src/main/java/org/simdjson/PathsBasedJsonParser.java new file mode 100644 index 0000000..f66f3e7 --- /dev/null +++ b/src/main/java/org/simdjson/PathsBasedJsonParser.java @@ -0,0 +1,215 @@ +package org.simdjson; + +import java.util.Arrays; + +public class PathsBasedJsonParser { + private static final int PADDING = 64; + private static final int DEFAULT_CAPACITY = 34 * 1024 * 1024; // we should be able to handle jsons <= 34MiB + private static final String SINGLE_LEFT_BRACKET = "["; + private static final String DOUBLE_LEFT_BRACKET = "[["; + private static final String SINGLE_RIGHT_BRACKET = "]"; + private static final String DOUBLE_RIGHT_BRACKET = "]]"; + private String typeDelimiter = ":"; + private String pathDelimiter = "\\."; + private final Object[] EMPTY_RESULT; + private Object[] result; + private OnDemandJsonValue[] row; + private long currentVersion = 0; + private OnDemandJsonValue ptr; + private byte[] padded; + private final StructuralIndexer indexer; + private final BitIndexes bitIndexes; + private final OnDemandJsonIterator jsonIterator; + private final byte[] paddedBuffer; + private final OnDemandJsonValue root = new OnDemandJsonValue(); + private static final ResolvedClass.ResolvedClassCategory DEFAULT_TYPE = ResolvedClass.ResolvedClassCategory.STRING; + + public PathsBasedJsonParser(String... args) { + this.bitIndexes = new BitIndexes(DEFAULT_CAPACITY); + this.indexer = new StructuralIndexer(bitIndexes); + this.jsonIterator = new OnDemandJsonIterator(bitIndexes, PADDING); + this.EMPTY_RESULT = new Object[args.length]; + Arrays.fill(this.EMPTY_RESULT, null); + this.result = new Object[args.length]; + this.paddedBuffer = new byte[DEFAULT_CAPACITY]; + this.row = new OnDemandJsonValue[args.length]; + constructPathTree(args); + } + private void constructPathTree(String... args) { + for (int i = 0; i < args.length; i++) { + String[] pathAndType = args[i].split(typeDelimiter); + ResolvedClass.ResolvedClassCategory type = DEFAULT_TYPE; + if (pathAndType.length >= 2) { + type = ResolvedClass.ResolvedClassCategory.valueOf(pathAndType[1]); + } + String path = pathAndType[0]; + // construct path tree + OnDemandJsonValue cur = root; + for (String step : path.split(pathDelimiter)) { + Object key; + if (step.startsWith(SINGLE_LEFT_BRACKET) && !step.startsWith(DOUBLE_LEFT_BRACKET)) { + key = Integer.parseInt(step.substring(1, step.length() - 1)); + } else { + key = step.replace(DOUBLE_LEFT_BRACKET, SINGLE_LEFT_BRACKET) + .replace(DOUBLE_RIGHT_BRACKET, SINGLE_RIGHT_BRACKET); + } + if (!cur.getChildren().containsKey(key)) { + OnDemandJsonValue child = new OnDemandJsonValue(); + child.setParent(cur); + cur.getChildren().put(key, child); + } + cur = cur.getChildren().get(key); + } + cur.setLeaf(true); + cur.setType(type); + row[i] = cur; + } + } + + public Object[] parse(byte[] buffer, int len) { + if (buffer == null || buffer.length == 0) { + return EMPTY_RESULT; + } + padded = padIfNeeded(buffer, len); + Utf8Validator.validate(padded, len); + indexer.index(padded, len); + jsonIterator.init(padded, len); + this.currentVersion++; + this.ptr = root; + switch (padded[bitIndexes.peek()]) { + case '{': + parseRootObject(); + break; + case '[': + parseRootArray(); + break; + default: + throw new RuntimeException("invalid json format, must start with { or ["); + } + return getResult(); + } + private void parseRootObject() { + OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingRootObject(); + iteratorObjectElements(iteratorResult); + jsonIterator.assertNoMoreJsonValues(); + } + private void parseObject() { + OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingObject(); + iteratorObjectElements(iteratorResult); + } + private void parseRootArray() { + OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingRootArray(); + iteratorArrayElements(iteratorResult); + jsonIterator.assertNoMoreJsonValues(); + } + private void parseArray() { + OnDemandJsonIterator.IteratorResult iteratorResult = jsonIterator.startIteratingArray(); + iteratorArrayElements(iteratorResult); + } + private void parseValue() { + Object value = switch (ptr.getType()) { + case BOOLEAN_PRIMITIVE -> jsonIterator.getNonNullBoolean(); + case BOOLEAN -> jsonIterator.getBoolean(); + case BYTE_PRIMITIVE -> jsonIterator.getNonNullByte(); + case BYTE -> jsonIterator.getByte(); + case SHORT_PRIMITIVE -> jsonIterator.getNonNullShort(); + case SHORT -> jsonIterator.getShort(); + case INT_PRIMITIVE -> jsonIterator.getNonNullInt(); + case INT -> jsonIterator.getInt(); + case LONG_PRIMITIVE -> jsonIterator.getNonNullLong(); + case LONG -> jsonIterator.getLong(); + case FLOAT_PRIMITIVE -> jsonIterator.getNonNullFloat(); + case FLOAT -> jsonIterator.getFloat(); + case DOUBLE_PRIMITIVE -> jsonIterator.getNonNullDouble(); + case DOUBLE -> jsonIterator.getDouble(); + case CHAR_PRIMITIVE -> jsonIterator.getNonNullChar(); + case CHAR -> jsonIterator.getChar(); + case STRING -> jsonIterator.getOrCompressAsString(); + default -> throw new RuntimeException("only support basic type, not support " + ptr.getType().name()); + }; + ptr.setValue(value); + } + private void iteratorObjectElements(OnDemandJsonIterator.IteratorResult result) { + if (result == OnDemandJsonIterator.IteratorResult.NOT_EMPTY) { + int collected = 0; + int fieldNum = ptr.getChildren().size(); + boolean hasFields = true; + int parentDepth = jsonIterator.getDepth() - 1; + while (collected < fieldNum && hasFields) { + String key = jsonIterator.getObjectKey(); + jsonIterator.moveToFieldValue(); + if (ptr.getChildren().containsKey(key)) { + ptr = ptr.getChildren().get(key); + parseElement(); + collected++; + ptr = ptr.getParent(); + } else { + jsonIterator.skipChild(); + } + hasFields = jsonIterator.nextObjectField(); + } + jsonIterator.skipChild(parentDepth); + } + } + private void iteratorArrayElements(OnDemandJsonIterator.IteratorResult result) { + if (result == OnDemandJsonIterator.IteratorResult.NOT_EMPTY) { + int collected = 0; + int fieldNum = ptr.getChildren().size(); + boolean hasFields = true; + int index = 0; + int parentDepth = jsonIterator.getDepth() - 2; + while (collected < fieldNum && hasFields) { + if (ptr.getChildren().containsKey(index)) { + ptr = ptr.getChildren().get(index); + parseElement(); + collected++; + ptr = ptr.getParent(); + } else { + jsonIterator.skipChild(); + } + index++; + hasFields = jsonIterator.nextArrayElement(); + } + jsonIterator.skipChild(parentDepth); + } + } + private void parseElement() { + char currentChar = (char) padded[bitIndexes.peek()]; + if (currentChar == '{' || currentChar == '[') { + int startOffset = bitIndexes.peek(); + if (currentChar == '{') { + parseObject(); + } else { + parseArray(); + } + if (ptr.isLeaf()) { + int endOffset = bitIndexes.peek(); + ptr.setVersion(currentVersion); + ptr.setValue(new String(padded, startOffset, endOffset - startOffset)); + } + } else { + if (ptr.isLeaf()) { + ptr.setVersion(currentVersion); + } + parseValue(); + } + } + private Object[] getResult() { + for (int i = 0; i < result.length; i++) { + if (row[i].getVersion() < currentVersion) { + result[i] = null; + continue; + } + result[i] = row[i].getValue(); + } + return result; + } + private byte[] padIfNeeded(byte[] buffer, int len) { + if (buffer.length - len < PADDING) { + System.arraycopy(buffer, 0, paddedBuffer, 0, len); + return paddedBuffer; + } + return buffer; + } + +} diff --git a/src/main/java/org/simdjson/ResolvedClass.java b/src/main/java/org/simdjson/ResolvedClass.java index 67c6887..0fdc45d 100644 --- a/src/main/java/org/simdjson/ResolvedClass.java +++ b/src/main/java/org/simdjson/ResolvedClass.java @@ -10,9 +10,9 @@ import java.nio.charset.StandardCharsets; import java.util.List; -class ResolvedClass { +public class ResolvedClass { - enum ResolvedClassCategory { + public enum ResolvedClassCategory { BOOLEAN_PRIMITIVE(boolean.class, new boolean[0]), BOOLEAN(Boolean.class, new Boolean[0]), BYTE_PRIMITIVE(byte.class, new byte[0]), diff --git a/src/test/java/org/simdjson/demand/PathsBasedTest.java b/src/test/java/org/simdjson/demand/PathsBasedTest.java new file mode 100644 index 0000000..3b5ac50 --- /dev/null +++ b/src/test/java/org/simdjson/demand/PathsBasedTest.java @@ -0,0 +1,80 @@ +package org.simdjson.demand; + +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; +import static org.simdjson.testutils.TestUtils.toUtf8; + +import org.junit.jupiter.api.Test; +import org.simdjson.PathsBasedJsonParser; + +public class PathsBasedTest { + @Test + public void testParseObjectWithDefaultTypeString() { + byte[] bytes = toUtf8("{\"first\": 1, \"field\": 2, \"second\": 3}"); + PathsBasedJsonParser parser = new PathsBasedJsonParser("first", "second", "third"); + Object[] result = parser.parse(bytes, bytes.length); + assertThat(result[0]).isEqualTo("1"); + assertThat(result[1]).isEqualTo("3"); + assertThat(result[2]).isEqualTo(null); + } + + @Test + public void testParseObjectWithType() { + byte[] bytes = toUtf8("{\"first\": 1, \"field\": 2, \"second\": 3}"); + PathsBasedJsonParser parser = new PathsBasedJsonParser("first:INT", "second:INT", "third:INT"); + Object[] result = parser.parse(bytes, bytes.length); + assertThat(result[0]).isEqualTo(1); + assertThat(result[1]).isEqualTo(3); + assertThat(result[2]).isEqualTo(null); + } + + @Test + public void testParseArrayWithDefaultTypeString() { + byte[] bytes = toUtf8("[1, \"a\", {\"[first]\": 1, \"field\": 2, \"second\": 3}]"); + PathsBasedJsonParser parser = new PathsBasedJsonParser("[0]", "[2].[[first]]", "[2].second", "[2]"); + Object[] result = parser.parse(bytes, bytes.length); + assertThat(result[0]).isEqualTo("1"); + assertThat(result[1]).isEqualTo("1"); + assertThat(result[2]).isEqualTo("3"); + assertThat(result[3]).isEqualTo("{\"[first]\": 1, \"field\": 2, \"second\": 3}"); + } + + @Test + public void testParseArrayWithType() { + byte[] bytes = toUtf8("[1, \"a\", {\"[first]\": 1, \"field\": 2, \"second\": 3}]"); + PathsBasedJsonParser parser = new PathsBasedJsonParser("[0]:INT", "[2].[[first]]:INT", "[2].second:INT", "[2]:STRING"); + Object[] result = parser.parse(bytes, bytes.length); + assertThat(result[0]).isEqualTo(1); + assertThat(result[1]).isEqualTo(1); + assertThat(result[2]).isEqualTo(3); + assertThat(result[3]).isEqualTo("{\"[first]\": 1, \"field\": 2, \"second\": 3}"); + } + + @Test + public void testFieldNamesWithNonAsciiCharacters() { + byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}"); + PathsBasedJsonParser parser = new PathsBasedJsonParser("ąćśńźż:INT", "\\u20A9\\u0E3F:INT", "αβγ:INT", "😀abc😀:INT"); + Object[] result = parser.parse(json, json.length); + assertThat(result[0]).isEqualTo(1); + assertThat(result[1]).isEqualTo(2); + assertThat(result[2]).isEqualTo(3); + assertThat(result[3]).isEqualTo(4); + } + + @Test + public void testComplexJson() { + byte[] json = + toUtf8("{\"object1\":{\"field1\":123,\"field2\":\"xyz\",\"field3\":3.14,\"field4\":true,\"field5\":null,\"field6\":{\"field7\":{\"field8\":\"abc\"},\"field9\":[1,2,3,4],\"field10\":[1,\"xyz\",1.1,[1,2,3]]}},\"object2\":[\"xyz\",{},123,null,[[1,2,3],[4,5,6]]]}"); + PathsBasedJsonParser parser = new PathsBasedJsonParser("object1.field1:INT", "object1.field3:DOUBLE", "object1.field6.field9", + "object1.field6.field9.[0]:INT", "object1.field6.field9.[0].[0]", "object2.[4]", "object2.[4].[1]", "object2.[4].[1].[1]:INT"); + Object[] result = parser.parse(json, json.length); + assertThat(result[0]).isEqualTo(123); + assertThat(result[1]).isEqualTo(3.14); + assertThat(result[2]).isEqualTo("[1,2,3,4]"); + assertThat(result[3]).isEqualTo(1); + assertThat(result[4]).isEqualTo(null); + assertThat(result[5]).isEqualTo("[[1,2,3],[4,5,6]]"); + assertThat(result[6]).isEqualTo("[4,5,6]"); + assertThat(result[7]).isEqualTo(5); + } + +}