From 437f45ab4ccb93314b5b1e2f4ce7bfba48f55fec Mon Sep 17 00:00:00 2001 From: SriramKeerthi Date: Sat, 26 Nov 2016 21:18:07 -0800 Subject: [PATCH 1/3] Adding support for iterators --- README.md | 2 +- jaggr/pom.xml | 2 +- .../com/caffinc/jaggr/core/Aggregation.java | 95 ++++++-- .../jaggr/core/AggregationBuilderTest.java | 206 +++++++++++++++++- .../test/resources/collectStringsResult.json | 5 + 5 files changed, 285 insertions(+), 25 deletions(-) create mode 100644 jaggr/src/test/resources/collectStringsResult.json diff --git a/README.md b/README.md index a4b0666..d314f57 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ jaggr is on Bintray and Maven Central (Soon): com.caffinc jaggr - 0.1 + 0.2 Assume the following JSON documents are stored in a file called `raw.json`: diff --git a/jaggr/pom.xml b/jaggr/pom.xml index 030a90e..2f258fd 100644 --- a/jaggr/pom.xml +++ b/jaggr/pom.xml @@ -6,7 +6,7 @@ com.caffinc jaggr - 0.1 + 0.2 jaggr Simple JSON Aggregator for Java https://github.com/caffinc/jaggr diff --git a/jaggr/src/main/java/com/caffinc/jaggr/core/Aggregation.java b/jaggr/src/main/java/com/caffinc/jaggr/core/Aggregation.java index cf8e5c2..c2a3011 100644 --- a/jaggr/src/main/java/com/caffinc/jaggr/core/Aggregation.java +++ b/jaggr/src/main/java/com/caffinc/jaggr/core/Aggregation.java @@ -3,13 +3,10 @@ import com.caffinc.jaggr.core.operations.Operation; import com.caffinc.jaggr.core.utils.FieldValueExtractor; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** - * Aggregates list of objects based on operations + * Aggregates list or iterators of JSON objects based on aggregation operations * * @author Sriram * @since 11/26/2016 @@ -25,29 +22,83 @@ public class Aggregation { this.operationMap = operationMap; } - public List> aggregate(List> objects) { + /** + * Aggregates over an Iterator of JSON Objects + * + * @param objectIterator JSON Object iterator + * @return aggregation result + */ + public List> aggregate(Iterator> objectIterator) { Map> workspace = new HashMap<>(); - for (Map object : objects) { - String id = "0"; - if (_id != null) { - id = String.valueOf(FieldValueExtractor.getValue(idSplit, object)); + if (objectIterator != null) { + while (objectIterator.hasNext()) { + Map object = objectIterator.next(); + aggregate(object, workspace); } - if (!workspace.containsKey(id)) { - Map groupWorkspace = new HashMap<>(); - groupWorkspace.put("_id", id); - workspace.put(id, groupWorkspace); - } - Map groupWorkspace = workspace.get(id); - for (Map.Entry operationEntry : operationMap.entrySet()) { - String field = operationEntry.getKey(); - Operation operation = operationEntry.getValue(); - Object t0 = groupWorkspace.get(field); - Object t1 = operation.aggregate(t0, object); - groupWorkspace.put(field, t1); + } + return computeResults(workspace); + } + + /** + * Aggregates over a list of JSON Objects + * + * @param objectList JSON Object list + * @return aggregation result + */ + public List> aggregate(List> objectList) { + Map> workspace = new HashMap<>(); + if (objectList != null) { + for (Map object : objectList) { + aggregate(object, workspace); } } + return computeResults(workspace); + } + + /** + * Aggregates a single object into the workspace + * + * @param object Object to perform aggregations on + * @param workspace Workspace to hold temporary aggregation results in + */ + private void aggregate(Map object, Map> workspace) { + // Identify the ID of the document + String id = "0"; + if (_id != null) { + id = String.valueOf(FieldValueExtractor.getValue(idSplit, object)); + } + if (!workspace.containsKey(id)) { + Map groupWorkspace = new HashMap<>(); + groupWorkspace.put("_id", id); + workspace.put(id, groupWorkspace); + } + // Get the workspace for the given ID + Map groupWorkspace = workspace.get(id); + for (Map.Entry operationEntry : operationMap.entrySet()) { + // Get the key in the workspace + String field = operationEntry.getKey(); + // Get the operation + Operation operation = operationEntry.getValue(); + // Get the accumulated value in the workspace + Object t0 = groupWorkspace.get(field); + // Get the new value after performing the operation + Object t1 = operation.aggregate(t0, object); + // Write the result back in the workspace + groupWorkspace.put(field, t1); + } + } + + /** + * Computes final results from the workspace + * + * @param workspace Workspace holding intermediate results + * @return Final aggregation result + */ + private List> computeResults(Map> workspace) { List> resultList = new ArrayList<>(); + // Loop through all the values in the workspace map for (Map groupWorkspace : workspace.values()) { + // Perform final result computation for (Map.Entry operationEntry : operationMap.entrySet()) { String field = operationEntry.getKey(); Operation operation = operationEntry.getValue(); diff --git a/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java b/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java index 6d272f8..87ebc74 100644 --- a/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java +++ b/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java @@ -19,7 +19,7 @@ */ public class AggregationBuilderTest { private static final Gson gson = new Gson(); - private static List> jsonList = new ArrayList<>(); + private static List> jsonList; private static T roughen(Object o, Class t) { return gson.fromJson(gson.toJson(o), t); @@ -202,4 +202,208 @@ public void testMultiOperation() throws Exception { Set> result = roughen(aggregation.aggregate(jsonList), HashSet.class); assertEquals("Multiple aggregations result should be as expected", expected, result); } + + @Test + public void testCollectStrings() throws Exception { + String field = "test.f"; + String collectField = "f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("collectStringsResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("list", new CollectOperation(collectField)) + .addOperation("set", new CollectSetOperation(collectField)) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList), HashSet.class); + assertEquals("Collect for Strings should work as expected", expected, result); + } + + + @Test + public void testIterativeSimpleGrouping() throws Exception { + String field = "f"; + Set expectedResult = new HashSet<>(); + for (Map obj : jsonList) { + expectedResult.add(String.valueOf(obj.get(field))); + } + + Set result = new HashSet<>(); + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field).getAggregation(); + List> resultList = aggregation.aggregate(jsonList.iterator()); + for (Map resultObj : resultList) { + result.add(resultObj.get("_id")); + } + + assertEquals("Grouping by ID should match", expectedResult, result); + } + + @Test + public void testIterativeNestedGrouping() throws Exception { + String field = "test.f"; + Set expectedResult = new HashSet<>(); + for (Map obj : jsonList) { + expectedResult.add(String.valueOf(FieldValueExtractor.getValue(field, obj))); + } + + Set result = new HashSet<>(); + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field).getAggregation(); + List> resultList = aggregation.aggregate(jsonList.iterator()); + for (Map resultObj : resultList) { + result.add(resultObj.get("_id")); + } + + assertEquals("Grouping by field should match", expectedResult, result); + } + + @Test + public void testIterativeCountOperation() throws Exception { + String field = "f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("countResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("count", new CountOperation()) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Counts should be as expected", expected, result); + } + + @Test + public void testIterativeMaxOperation() throws Exception { + String field = "f"; + String maxField = "test.f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("maxResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("max", new MaxOperation(maxField)) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Max should be as expected", expected, result); + } + + @Test + public void testIterativeMinOperation() throws Exception { + String field = "f"; + String minField = "test.f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("minResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("min", new MinOperation(minField)) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Min should be as expected", expected, result); + } + + @Test + public void testIterativeCollectOperation() throws Exception { + String field = "f"; + String collectField = "_id"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("collectResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("list", new CollectOperation(collectField)) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Collected lists should be as expected", expected, result); + } + + @Test + public void testIterativeCollectSetOperation() throws Exception { + String field = "f"; + String collectField = "test.f"; + Map expectedMap = new HashMap<>(); + for (Map expectedObject : JsonFileReader.readJsonFromResource("collectSetResult.json")) { + expectedMap.put(String.valueOf(expectedObject.get("_id")), new HashSet((List) expectedObject.get("set"))); + } + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("set", new CollectSetOperation(collectField)) + .getAggregation(); + Map resultMap = new HashMap<>(); + for (Map resultObject : (Set>) roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class)) { + resultMap.put(String.valueOf(resultObject.get("_id")), new HashSet((List) resultObject.get("set"))); + } + assertEquals("Collected sets should be as expected", expectedMap, resultMap); + } + + @Test + public void testIterativeSumOperation() throws Exception { + String field = "f"; + String sumField = "test.f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("sumResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("sum", new SumOperation(sumField)) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Sum should be as expected", expected, result); + } + + @Test + public void testIterativeAverageOperation() throws Exception { + String field = "f"; + String avgField = "test.f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("avgResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("avg", new AverageOperation(avgField)) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Average should be as expected", expected, result); + } + + @Test + public void testIterativeOperationWithoutGrouping() throws Exception { + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("grouplessResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .addOperation("count", new CountOperation()) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Counts without grouping should be as expected", expected, result); + } + + @Test + public void testIterativeMultiOperation() throws Exception { + String field = "f"; + String avgField = "test.f"; + String sumField = "test.f"; + String minField = "test.f"; + String maxField = "test.f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("multiResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("avg", new AverageOperation(avgField)) + .addOperation("sum", new SumOperation(sumField)) + .addOperation("min", new MinOperation(minField)) + .addOperation("max", new MaxOperation(maxField)) + .addOperation("count", new CountOperation()) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Multiple aggregations result should be as expected", expected, result); + } + + @Test + public void testIterativeCollectStrings() throws Exception { + String field = "test.f"; + String collectField = "f"; + Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("collectStringsResult.json")); + + Aggregation aggregation = new AggregationBuilder() + .setGroupBy(field) + .addOperation("list", new CollectOperation(collectField)) + .addOperation("set", new CollectSetOperation(collectField)) + .getAggregation(); + Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); + assertEquals("Collect for Strings should work as expected", expected, result); + } } diff --git a/jaggr/src/test/resources/collectStringsResult.json b/jaggr/src/test/resources/collectStringsResult.json new file mode 100644 index 0000000..1ef21e5 --- /dev/null +++ b/jaggr/src/test/resources/collectStringsResult.json @@ -0,0 +1,5 @@ +{"_id": "1.0", "set": ["a", "b"], "list": ["a", "b", "b", "b", "b", "b"]} +{"_id": "-1.0", "set": ["a"], "list": ["a"]} +{"_id": "2.0", "set": ["a"], "list": ["a"]} +{"_id": "3.0", "set": ["a"], "list": ["a"]} +{"_id": "5.0", "set": ["a"], "list": ["a"]} From fc8a4c993bd2edf204dafc20f4d10fa68fee1c0b Mon Sep 17 00:00:00 2001 From: SriramKeerthi Date: Sat, 26 Nov 2016 21:26:39 -0800 Subject: [PATCH 2/3] Adding iterator info to README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d314f57..71ffbf3 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,10 @@ Aggregation can now be performed using the `aggregate()` method: List> result = aggregation.aggregate(jsonList); +Aggregation also supports Iterators: + + List> result = aggregation.aggregate(jsonList.iterator()); + The result of the above aggregation would look as follows: {"_id": "a", "avg": 2.0, "sum": 10, "min": -1, "max": 5, "count": 5} From 5ad7d8f920127a3d2d35155b0de8daa1f408d0a8 Mon Sep 17 00:00:00 2001 From: SriramKeerthi Date: Sat, 26 Nov 2016 21:36:18 -0800 Subject: [PATCH 3/3] Updating tests --- .../jaggr/core/AggregationBuilderTest.java | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java b/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java index 87ebc74..42d8052 100644 --- a/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java +++ b/jaggr/src/test/java/com/caffinc/jaggr/core/AggregationBuilderTest.java @@ -207,15 +207,27 @@ public void testMultiOperation() throws Exception { public void testCollectStrings() throws Exception { String field = "test.f"; String collectField = "f"; - Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("collectStringsResult.json")); + Map expectedMap1 = new HashMap<>(); + Map expectedMap2 = new HashMap<>(); + for (Map expectedObject : JsonFileReader.readJsonFromResource("collectStringsResult.json")) { + expectedMap1.put(String.valueOf(expectedObject.get("_id")), new HashSet((List) expectedObject.get("set"))); + expectedMap2.put(String.valueOf(expectedObject.get("_id")), expectedObject.get("list")); + } Aggregation aggregation = new AggregationBuilder() .setGroupBy(field) .addOperation("list", new CollectOperation(collectField)) .addOperation("set", new CollectSetOperation(collectField)) .getAggregation(); - Set> result = roughen(aggregation.aggregate(jsonList), HashSet.class); - assertEquals("Collect for Strings should work as expected", expected, result); + Map resultMap1 = new HashMap<>(); + Map resultMap2 = new HashMap<>(); + for (Map resultObject : (Set>) roughen(aggregation.aggregate(jsonList), HashSet.class)) { + resultMap1.put(String.valueOf(resultObject.get("_id")), new HashSet((List) resultObject.get("set"))); + resultMap2.put(String.valueOf(resultObject.get("_id")), resultObject.get("list")); + } + + assertEquals("Collect for Strings should work as expected", expectedMap1, resultMap1); + assertEquals("CollectSet for Strings should work as expected", expectedMap2, resultMap2); } @@ -396,14 +408,26 @@ public void testIterativeMultiOperation() throws Exception { public void testIterativeCollectStrings() throws Exception { String field = "test.f"; String collectField = "f"; - Set> expected = new HashSet<>(JsonFileReader.readJsonFromResource("collectStringsResult.json")); + Map expectedMap1 = new HashMap<>(); + Map expectedMap2 = new HashMap<>(); + for (Map expectedObject : JsonFileReader.readJsonFromResource("collectStringsResult.json")) { + expectedMap1.put(String.valueOf(expectedObject.get("_id")), new HashSet((List) expectedObject.get("set"))); + expectedMap2.put(String.valueOf(expectedObject.get("_id")), expectedObject.get("list")); + } Aggregation aggregation = new AggregationBuilder() .setGroupBy(field) .addOperation("list", new CollectOperation(collectField)) .addOperation("set", new CollectSetOperation(collectField)) .getAggregation(); - Set> result = roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class); - assertEquals("Collect for Strings should work as expected", expected, result); + Map resultMap1 = new HashMap<>(); + Map resultMap2 = new HashMap<>(); + for (Map resultObject : (Set>) roughen(aggregation.aggregate(jsonList.iterator()), HashSet.class)) { + resultMap1.put(String.valueOf(resultObject.get("_id")), new HashSet((List) resultObject.get("set"))); + resultMap2.put(String.valueOf(resultObject.get("_id")), resultObject.get("list")); + } + + assertEquals("Collect for Strings should work as expected", expectedMap1, resultMap1); + assertEquals("CollectSet for Strings should work as expected", expectedMap2, resultMap2); } }