From 8310d175e51c553c4e1ed3a53e3e16401157c154 Mon Sep 17 00:00:00 2001 From: athawk81 Date: Wed, 27 May 2015 16:48:49 -0500 Subject: [PATCH] made more flexble csv reader --- .../utlities/CSVToMapOfNumericLists.java | 2 +- .../selectors/CSVToMapOfObjectLists.java | 151 ++++++++++++++++++ 2 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 src/main/java/quickml/utlities/selectors/CSVToMapOfObjectLists.java diff --git a/src/main/java/quickml/utlities/CSVToMapOfNumericLists.java b/src/main/java/quickml/utlities/CSVToMapOfNumericLists.java index 7b309ecd..520bb35d 100644 --- a/src/main/java/quickml/utlities/CSVToMapOfNumericLists.java +++ b/src/main/java/quickml/utlities/CSVToMapOfNumericLists.java @@ -84,7 +84,7 @@ public static void main(String[] args) { CSVToMapOfNumericLists csvReader = new CSVToMapOfNumericLists(',', true); try { - Map> mapOfinstances = csvReader.readCsv("wfRes"); + Map> mapOfinstances = csvReader.readCsv("/Users/alexanderhawk/Downloads/20150526-181451.csv"); for (String key : mapOfinstances.keySet()) { System.out.println("list key: " + key + "list Vals" + mapOfinstances.get(key).toString()); } diff --git a/src/main/java/quickml/utlities/selectors/CSVToMapOfObjectLists.java b/src/main/java/quickml/utlities/selectors/CSVToMapOfObjectLists.java new file mode 100644 index 00000000..2f4db77d --- /dev/null +++ b/src/main/java/quickml/utlities/selectors/CSVToMapOfObjectLists.java @@ -0,0 +1,151 @@ +package quickml.utlities.selectors; + +import au.com.bytecode.opencsv.CSVReader; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.joda.time.DateTime; +import org.joda.time.Hours; +import org.joda.time.Seconds; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import quickml.supervised.regressionModel.IsotonicRegression.PoolAdjacentViolatorsModel; +import quickml.utlities.LinePlotter; +import quickml.utlities.LinePlotterBuilder; + +import javax.xml.datatype.Duration; +import java.io.FileReader; +import java.text.DateFormat; +import java.util.List; +import java.util.Map; +import java.util.TreeSet; + +/** + * Created by alexanderhawk on 10/2/14. + */ + + +/* This class converts the contents of a csv file into a map of lists...where the contents of each collumn are + stored in a List. Each list is contained in a map, where the column names (or numbers if names are not + provided in a header) are the keys for their respective lists. + */ + +public class CSVToMapOfObjectLists { + private List header; + private char delimiter = ','; + private boolean headerPresent = true; + private Map> dataLists = Maps.newHashMap(); + + public CSVToMapOfObjectLists() { + } + + public CSVToMapOfObjectLists(char delimiter) { + this.delimiter = delimiter; + } + + public CSVToMapOfObjectLists(char delimiter, boolean noHeaderPresent) { + this.delimiter = delimiter; + this.headerPresent = noHeaderPresent; + } + + public Map> readCsv(String fileName) throws Exception { + + CSVReader reader = new CSVReader(new FileReader(fileName), delimiter, '"'); + List csvLines = reader.readAll(); + + try { + header = Lists.newArrayList(); + int startIndex = 1; + if (!headerPresent) { + startIndex = 0; + for (int i = 0; i < csvLines.get(0).length; i++) { + header.add(String.valueOf(i)); + dataLists.put(header.get(i), Lists.newArrayList()); + } + } else { + for (int i = 0; i < csvLines.get(0).length; i++) { + header.add(csvLines.get(0)[i]); + dataLists.put(header.get(i), Lists.newArrayList()); + } + for (int i = startIndex; i < csvLines.size(); i++) { + appendLineToLists(csvLines.get(i)); + } + } + } catch (Exception e) { + throw new RuntimeException(e.getMessage()); + } + return dataLists; + } + + private void appendLineToLists(String[] dataLine) { + for (int i = 0; i < dataLine.length; i++) { + if (dataLine[i].isEmpty()) { + continue; + } + List listToappendTo = dataLists.get(header.get(i)); + try { + listToappendTo.add(Double.valueOf(dataLine[i])); + } catch (NumberFormatException e) { + listToappendTo.add(dataLine[i]); + } + } + } + + + public static void main(String[] args) { + + CSVToMapOfObjectLists csvReader = new CSVToMapOfObjectLists(',', true); + try { + Map> mapOfinstances = csvReader.readCsv("/Users/alexanderhawk/Downloads/20150526-181451.csv"); + for (String key : mapOfinstances.keySet()) { + System.out.println("list key: " + key + " list Vals" + mapOfinstances.get(key).toString()); + } + } catch (Exception e) { + throw new RuntimeException(); + } + + List observations = Lists.newArrayList(); + List predictions = Lists.newArrayList(); + + + + for (int i = 0; i < csvReader.dataLists.get("target_cpc_customers").size(); i++) { + + final Double target_cpc_customers = (Double)csvReader.dataLists.get("target_cpc_customers").get(i); + Double effective_cust_daily_spend = (Double)csvReader.dataLists.get("actual_spend_customers").get(i)/computeReservationTime( + (String)csvReader.dataLists.get("reserved_at").get(i), (String)csvReader.dataLists.get("actual_spend_time").get(i)); + + PoolAdjacentViolatorsModel.Observation observation = new PoolAdjacentViolatorsModel.Observation + (target_cpc_customers, effective_cust_daily_spend); + observations.add(observation); + } + + PoolAdjacentViolatorsModel pav = new PoolAdjacentViolatorsModel(observations, 16); + PoolAdjacentViolatorsModel.Observation prev = null; + // observations.sort(Comparator.naturalOrder()); + for (PoolAdjacentViolatorsModel.Observation observation : observations) { + + PoolAdjacentViolatorsModel.Observation obsToAdd = new PoolAdjacentViolatorsModel.Observation(observation.input, pav.predict(observation.input)); + predictions.add(obsToAdd); + + if (prev !=null && prev.output < observation.output) + System.out.println("prev out: " + prev.toString() + ". obs.input: " + obsToAdd.toString()); + prev = obsToAdd; + } + + + TreeSet calibrationSet = pav.getCalibrationSet(); + TreeSet preSmoothingSet = pav.getPreSmoothingSet(); + System.out.println("calibration set: " + calibrationSet.toString()); + LinePlotter linePlotter = new LinePlotterBuilder().chartTitle("pav trial").xAxisLabel("cpc").yAxisLabel("rate").buildLinePlotter(); + linePlotter.addSeries(calibrationSet, "unweigted linear interpolation"); + linePlotter.addSeries(predictions, "weighted linear interpolation"); + linePlotter.addSeries(preSmoothingSet, "binned observations with weight 4"); + linePlotter.displayPlot(); + } + private static double computeReservationTime(String startTime, String endTime) { + DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("YYYY-MM-dd'T'HH:mm:ss"); + DateTime startDateTime = dateFormatter.parseDateTime(startTime.substring(0, 19)); + DateTime endDateTime = dateFormatter.parseDateTime(endTime.substring(0,19)); + return ((double) Seconds.secondsBetween(startDateTime, endDateTime).getSeconds())/(24.0*60*60); + } +} \ No newline at end of file