Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Template-based Query Generation: Generalization and User Configuration #153

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
64db68e
brought skeleton up to date
Aug 3, 2020
bb726fb
brought up to date with Victor's progress---it works
Aug 3, 2020
00122b1
refactored Pairs and fixed indentation
Aug 5, 2020
ff8955b
restructured json for generalizability
spoiledhua Aug 6, 2020
533dfe7
made edits to correct queries, added pair class correctly
Aug 7, 2020
1c11d2b
Merge branch 'query-generation' of https://github.com/AllenWang314/bi…
Aug 7, 2020
c7acd85
added random date generation
Aug 7, 2020
330b6e3
refactored DataTypeMap parser for generalizability
spoiledhua Aug 7, 2020
a9f1f90
fixed merge conflicts
spoiledhua Aug 7, 2020
95c1e0e
generalization refactoring complete
spoiledhua Aug 9, 2020
de9689a
Merge branch 'master' into query-generation
spoiledhua Aug 9, 2020
6d2a806
added semicolons at end of queries
spoiledhua Aug 9, 2020
e868ee4
Merge branch 'query-generation' of https://github.com/AllenWang314/bi…
spoiledhua Aug 9, 2020
6dcadd6
manually added data generation
spoiledhua Aug 11, 2020
f626435
merged Victor's progress in
Aug 12, 2020
3c2e2e3
added mySQL mappings
spoiledhua Aug 12, 2020
e57b53a
began initial user parsing
spoiledhua Aug 12, 2020
1437b7e
fix merge conflicts data-generation:master
spoiledhua Aug 12, 2020
d16de2b
Merge pull request #16 from AllenWang314/query-generation
spoiledhua Aug 12, 2020
1306d84
updated config file
spoiledhua Aug 12, 2020
33551a7
debugged a few queries
Aug 12, 2020
d0bbd30
finished user config parsing, generalization finalized
spoiledhua Aug 12, 2020
cf8ec2e
Merge pull request #17 from AllenWang314/user-config
spoiledhua Aug 12, 2020
bb94168
fixed typo and merge conflict
Aug 12, 2020
ac63da0
added datset name
Aug 12, 2020
ae79287
added bigquery table formatting
spoiledhua Aug 13, 2020
06e373e
Merge branch 'master' into user-config
spoiledhua Aug 13, 2020
5bb5af7
Merge pull request #18 from AllenWang314/user-config
spoiledhua Aug 13, 2020
d24a5ba
resolved merge conflicts
spoiledhua Aug 13, 2020
381fb1f
Merge pull request #19 from AllenWang314/user-config
spoiledhua Aug 18, 2020
8a1003b
added javadoc, refactored for consistent use of FeatureType instead o…
spoiledhua Aug 19, 2020
0355509
Merge pull request #20 from AllenWang314/code-cleanup
spoiledhua Aug 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import data.Table;
import graph.MarkovChain;
import graph.Node;
import parser.*;
import parser.FeatureType;
import parser.User;
import parser.Utils;
import query.Query;
import query.Skeleton;
import token.Tokenizer;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;

import static java.nio.charset.StandardCharsets.UTF_8;

/**
* Class that parses config file and creates queries from markov chain
*/
Expand All @@ -29,40 +24,39 @@ public class QueryGenerator {
private final String filePathDependenciesDDL = "./src/main/resources/dialect_config/ddl_dependencies.json";
private final String filePathDependenciesDML = "./src/main/resources/dialect_config/dml_dependencies.json";
private final String filePathDependenciesDQL = "./src/main/resources/dialect_config/dql_dependencies.json";
private final String filePathUser = "./src/main/resources/user_config/config.json";

private final MarkovChain<Query> markovChain;
private Random r = new Random();
private Node<Query> source = new Node<>(new Query(FeatureType.FEATURE_ROOT), r);
private final User user = Utils.getUser(Paths.get(filePathUser));
private Node<Query> source = new Node<>(new Query(user.getStartFeature()), r);
private Node<Query> sink = new Node<>(new Query(user.getEndFeature()), r);

/**
*
* @throws Exception
* Query generator that converts query skeletons to real query strings ready for output
* @throws IOException if the IO for user parsing fails
*/
public QueryGenerator() throws Exception {
// TODO (Victor):
// 1. Use parser.Utils to parse user json and create graph.MarkovChain and nodes
// 2. Generate number of queries given in config
// 3. pass to them to Keyword or query.Skeleton

// create nodes
Map<String, Node<Query>> nodeMap = new HashMap<>();
addNodeMap(nodeMap, Paths.get(filePathConfigDDL), r);
addNodeMap(nodeMap, Paths.get(filePathConfigDML), r);
addNodeMap(nodeMap, Paths.get(filePathConfigDQL), r);

// TODO (Victor): Parse these two helper nodes from user config
nodeMap.put("FEATURE_ROOT", source);
nodeMap.put("FEATURE_SINK", new Node<>(new Query(FeatureType.FEATURE_SINK), r));

Map<String, List<String>> neighborMap = new HashMap<>();
addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesDDL));
addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesDML));
addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesDQL));
addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesRoot));

for (String nodeKey : nodeMap.keySet()) {
public QueryGenerator() throws IOException {

// create map of references to nodes
Map<FeatureType, Node<Query>> nodeMap = new HashMap<>();
Utils.addNodeMap(nodeMap, Paths.get(filePathConfigDDL), r);
Utils.addNodeMap(nodeMap, Paths.get(filePathConfigDML), r);
Utils.addNodeMap(nodeMap, Paths.get(filePathConfigDQL), r);
nodeMap.put(user.getStartFeature(), source);
nodeMap.put(user.getEndFeature(), sink);

// create map of nodes to their neighbors
Map<FeatureType, List<FeatureType>> neighborMap = new HashMap<>();
Utils.addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesDDL));
Utils.addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesDML));
Utils.addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesDQL));
Utils.addNeighborMap(neighborMap, nodeMap.keySet(), Paths.get(filePathDependenciesRoot));

// set neighbors for each node
for (FeatureType nodeKey : nodeMap.keySet()) {
HashSet<Node<Query>> nodeNeighbors = new HashSet<>();
for (String neighbor : neighborMap.get(nodeKey)) {
for (FeatureType neighbor : neighborMap.get(nodeKey)) {
if (nodeMap.keySet().contains(neighbor)) {
nodeNeighbors.add(nodeMap.get(neighbor));
}
Expand All @@ -74,76 +68,40 @@ public QueryGenerator() throws Exception {
}

/**
* generates queries from markov chain starting from root
* @return real queries from markov chain starting from root
*/
public void generateQueries(int numberQueries) {
ImmutableList.Builder<String> postgreBuilder = ImmutableList.builder();
ImmutableList.Builder<String> bigQueryBuilder = ImmutableList.builder();
public void generateQueries() throws IOException {
Map<String, List<String>> dialectQueries = new HashMap<>();

for (String dialect : user.getDialectIndicators().keySet()) {
if (user.getDialectIndicators().get(dialect)) {
dialectQueries.put(dialect, new ArrayList<>());
}
}

Tokenizer tokenizer = new Tokenizer(r);

int i = 0;
while (i < numberQueries) {
while (i < user.getNumQueries()) {
List<Query> rawQueries = markovChain.randomWalk(source);

if (rawQueries.get(rawQueries.size()-1).getType() == FeatureType.FEATURE_SINK) {
List<Query> actualQueries = rawQueries.subList(2, rawQueries.size()-1);
List<Query> actualQueries = rawQueries.subList(2, rawQueries.size() - 1);
Skeleton skeleton = new Skeleton(actualQueries, tokenizer);
postgreBuilder.add(String.join(" ", skeleton.getPostgreSkeleton()));
bigQueryBuilder.add(String.join(" ", skeleton.getBigQuerySkeleton()));
bigQueryBuilder.add(";");
i++;
}
}

ImmutableList<String> postgreSyntax = postgreBuilder.build();
ImmutableList<String> bigQuerySyntax = bigQueryBuilder.build();

ImmutableMap.Builder<String, ImmutableList<String>> builder = ImmutableMap.builder();
builder.put("PostgreSQL", postgreSyntax);
builder.put("BigQuery", bigQuerySyntax);
ImmutableMap<String, ImmutableList<String>> outputs = builder.build();

try {
Utils.writeDirectory(outputs);
} catch (IOException exception){
exception.printStackTrace();
}
}

private Map<String, Node<Query>> addNodeMap(Map<String, Node<Query>> nodeMap, Path input, Random r) {
try {
BufferedReader reader = Files.newBufferedReader(input, UTF_8);
Gson gson = new Gson();
FeatureIndicators featureIndicators = gson.fromJson(reader, FeatureIndicators.class);

for (FeatureIndicator featureIndicator : featureIndicators.getFeatureIndicators()) {
if (featureIndicator.getIsIncluded()) {
nodeMap.put(featureIndicator.getFeature().name(), new Node<>(new Query(featureIndicator.getFeature()), r));
for (String dialect : user.getDialectIndicators().keySet()) {
if (user.getDialectIndicators().get(dialect)) {
dialectQueries.get(dialect).add(String.join(" ", skeleton.getDialectSkeletons().get(dialect)) + ";");
}
}
}
} catch (IOException exception) {
exception.printStackTrace();
i++;
}

return nodeMap;
}
Table dataTable = tokenizer.getTable();

private Map<String, List<String>> addNeighborMap(Map<String, List<String>> neighborMap, Set<String> nodes, Path input) {
try {
BufferedReader reader = Files.newBufferedReader(input, UTF_8);
Gson gson = new Gson();
Dependencies dependencies = gson.fromJson(reader, Dependencies.class);

for (Dependency dependency : dependencies.getDependencies()) {
if (nodes.contains(dependency.getNode())) {
neighborMap.put(dependency.getNode(), dependency.getNeighbors());
}
}
} catch (IOException exception) {
Utils.writeDirectory(dialectQueries, dataTable);
} catch (IOException exception){
exception.printStackTrace();
}

return neighborMap;
}

}
29 changes: 15 additions & 14 deletions tools/template_based_query_generation/src/main/java/data/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;


/**
Expand All @@ -24,7 +25,7 @@ public class Table {
public Table(String name) {
this.name = name;
this.numRows = 0;
this.schema = new ArrayList<MutablePair<String, DataType>>();
this.schema = new ArrayList<>();
}

/**
Expand All @@ -36,7 +37,7 @@ public void addColumn(String columnName, DataType type) {
this.schema.add(new MutablePair(columnName, type));
}

public ArrayList<MutablePair<String, DataType>> getSchema() {
public List<MutablePair<String, DataType>> getSchema() {
return this.schema;
}

Expand Down Expand Up @@ -71,7 +72,7 @@ public String getRandomColumn() {
* @return name of random column of given type
*/
public String getRandomColumn(DataType type) {
ArrayList<MutablePair<String, DataType>> columns = new ArrayList<MutablePair<String, DataType>>();
List<MutablePair<String, DataType>> columns = new ArrayList<>();
for (MutablePair<String, DataType> col: this.schema) {
if (col.getRight() == type) columns.add(col);
}
Expand All @@ -86,39 +87,39 @@ public String getRandomColumn(DataType type) {
* @return column of data with type dataType and numRows rows
* @throws IllegalArgumentException
*/
public ArrayList<?> generateColumn(int numRows, DataType dataType) throws IllegalArgumentException {
public List<?> generateColumn(int numRows, DataType dataType) throws IllegalArgumentException {
if (dataType.isIntegerType()) {
ArrayList<Integer> data = new ArrayList<Integer>();
List<Integer> data = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
data.add(Utils.generateRandomIntegerData(dataType));
}
return data;
} else if (dataType.isLongType()) {
ArrayList<Long> data = new ArrayList<Long>();
List<Long> data = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
data.add(Utils.generateRandomLongData(dataType));
}
return data;
} else if (dataType.isDoubleType()) {
ArrayList<Double> data = new ArrayList<Double>();
List<Double> data = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
data.add(Utils.generateRandomDoubleData(dataType));
}
return data;
} else if (dataType.isBigDecimalType()) {
ArrayList<BigDecimal> data = new ArrayList<BigDecimal>();
List<BigDecimal> data = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
data.add(Utils.generateRandomBigDecimalData(dataType));
}
return data;
} else if (dataType.isStringType()) {
ArrayList<String> data = new ArrayList<String>();
List<String> data = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
data.add(Utils.generateRandomStringData(dataType));
}
return data;
} else if (dataType.isBooleanType()) {
ArrayList<Boolean> data = new ArrayList<Boolean>();
List<Boolean> data = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
data.add(Utils.generateRandomBooleanData(dataType));
}
Expand All @@ -132,7 +133,7 @@ public ArrayList<?> generateColumn(int numRows, DataType dataType) throws Illega
*
* @return sample data with number of rows being number of rows in table
*/
public ArrayList<ArrayList<?>> generateData() {
public List<List<?>> generateData() {
return generateData(this.numRows);
}

Expand All @@ -142,10 +143,10 @@ public ArrayList<ArrayList<?>> generateData() {
* @param numRows number of rows to generate
* @return sample data with number of rows being numRows
*/
public ArrayList<ArrayList<?>> generateData(int numRows) {
ArrayList<ArrayList<?>> data = new ArrayList<ArrayList<?>>();
public List<List<?>> generateData(int numRows) {
List<List<?>> data = new ArrayList<>();
for (int i = 0; i < this.schema.size(); i++) {
ArrayList<?> column = this.generateColumn(numRows, this.schema.get(i).getRight());
List<?> column = this.generateColumn(numRows, this.schema.get(i).getRight());
data.add(column);
}
return data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public class MarkovChain<E> {
* @param nodes
*/
public MarkovChain(HashMap<Node<E>, HashMap<Node<E>, Double>> nodes) {
this.nodes = new HashSet<Node<E>>();
this.nodes = new HashSet<>();
this.nodes.addAll(nodes.keySet());
for (Node<E> n : nodes.keySet()) {
n.setNeighbors(nodes.get(n));
Expand All @@ -38,7 +38,7 @@ public MarkovChain(HashSet<Node<E>> nodes) {
* @return list of nodes for a random walk from start node
*/
public ArrayList<E> randomWalk(Node<E> start) {
ArrayList<E> walk = new ArrayList<E>();
ArrayList<E> walk = new ArrayList<>();
Node<E> current = start;
while (current.hasNextNode()) {
walk.add(current.getObj());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class Node<E> {
public Node(E obj, int seed) {
this.obj = obj;
this.r = new Random(seed);
this.setNeighbors(new HashMap<Node<E>, Double>());
this.setNeighbors(new HashMap<>());
}

/**
Expand All @@ -31,14 +31,14 @@ public Node(E obj, int seed) {
public Node(E obj, Random r) {
this.obj = obj;
this.r = r;
this.setNeighbors(new HashMap<Node<E>, Double>());
this.setNeighbors(new HashMap<>());
}

/**
* updates neighborList and cProbabilities when neighbors is changed
*/
private void updateProbabilities(HashMap<Node<E>, Double> neighbors) {
TreeMap<Double, Node<E>> newCumulativeProbabilities = new TreeMap<Double, Node<E>>();
TreeMap<Double, Node<E>> newCumulativeProbabilities = new TreeMap<>();
if (neighbors.size() != 0) {
double total = 0;
for (Node<E> n: neighbors.keySet()) {
Expand Down Expand Up @@ -90,7 +90,7 @@ public void setNeighbors(HashMap<Node<E>, Double> neighbors) {
* @param neighbors
*/
public void setNeighbors(HashSet<Node<E>> neighbors) {
HashMap<Node<E>, Double> edges = new HashMap<Node<E>, Double>();
HashMap<Node<E>, Double> edges = new HashMap<>();
double c = (neighbors.size() == 0) ? 0 : 1.0/neighbors.size();
for (Node<E> n: neighbors) {
edges.put(n, c);
Expand All @@ -102,4 +102,4 @@ public TreeMap<Double, Node<E>> getCumulativeProbabilities() {
return this.cumulativeProbabilities;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@
public class Dependency {

/* the current node */
private String node;
private FeatureType node;

/* the possible neighbors to the current node */
private List<String> neighbors;
private List<FeatureType> neighbors;

public String getNode() {
public FeatureType getNode() {
return this.node;
}

public void setNode(String node) {
public void setNode(FeatureType node) {
this.node = node;
}

public List<String> getNeighbors() {
public List<FeatureType> getNeighbors() {
return this.neighbors;
}

public void setNeighbors(List<String> neighbors) {
public void setNeighbors(List<FeatureType> neighbors) {
this.neighbors = neighbors;
}
}
Loading