Skip to content
This repository has been archived by the owner on Aug 30, 2022. It is now read-only.

Commit

Permalink
Fix bug in quartiles computation
Browse files Browse the repository at this point in the history
  • Loading branch information
Mihai Budiu committed Apr 15, 2020
1 parent c8d5cac commit 96846c6
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public QuantilesVector create(@Nullable ITable data) {
int bucket = this.buckets.indexOf(bucketCol, current);
if (bucket < 0)
result.outOfBounds();
if (sampledCol.isMissing(current))
else if (sampledCol.isMissing(current))
result.addMissing(bucket);
else
result.add(bucket, sampledCol.asDouble(current));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ public class NumericSamples implements IJson {
* Number of missing values.
*/
public long missing;
/**
* Number of elements scanned.
*/
public long count;

// Following 2 are only used only during construction.
/**
Expand All @@ -64,20 +68,28 @@ public class NumericSamples implements IJson {
*/
@Nullable
private Randomness random;
/**
* Used during construction: insert a row only when skipRows is 0.
*/
private int skipRows;

public NumericSamples(double samplingRate, long seed) {
this.samplingRate = samplingRate;
this.missing = 0;
this.random = new Randomness(seed);
this.empty = true;
this.samples = new JsonList<Double>();
this.skipRows = 0;
this.count = 0;
}

private NumericSamples(List<Double> data) {
private NumericSamples(List<Double> data, long count, double samplingRate) {
this.empty = false;
this.missing = 0;
this.samplingRate = 0;
this.samplingRate = samplingRate;
this.skipRows = 0;
this.samples = new JsonList<Double>(data);
this.count = count;
}

public int size() {
Expand All @@ -92,7 +104,9 @@ public boolean empty() {
* Combine two sets of numeric samples. It just concatenates the lists.
*/
NumericSamples add(NumericSamples other) {
NumericSamples result = new NumericSamples(this.samples);
NumericSamples result = new NumericSamples(this.samples, this.count + other.count,
Utilities.div(this.samplingRate * this.count + other.samplingRate * other.count,
(this.count + other.count)));
if (this.empty()) {
result.min = other.min;
result.max = other.max;
Expand All @@ -113,6 +127,7 @@ NumericSamples add(NumericSamples other) {
* Here is a number from the distribution; sample it.
*/
public void add(double d) {
this.count++;
if (this.empty) {
this.empty = false;
this.min = d;
Expand All @@ -121,9 +136,15 @@ public void add(double d) {
this.min = Math.min(this.min, d);
this.max = Math.max(this.max, d);
}
assert this.random != null;
if (this.random.nextDouble() <= this.samplingRate)
if (this.skipRows == 0) {
this.samples.add(d);
assert this.random != null;
this.skipRows = this.random.nextGeometric(this.samplingRate);
if (this.skipRows > 0)
this.skipRows--;
} else {
this.skipRows--;
}
}

/**
Expand All @@ -143,7 +164,7 @@ public NumericSamples quantiles(int expectedCount) {
return this;
List<Double> small = Utilities.decimate(this.samples, Math.floorDiv(this.samples.size(), expectedCount));
small.remove(0); // skip 0-th quantile
NumericSamples result = new NumericSamples(small);
NumericSamples result = new NumericSamples(small, this.count, this.samplingRate);
result.min = this.min;
result.max = this.max;
return result;
Expand Down
6 changes: 6 additions & 0 deletions platform/src/main/java/org/hillview/utils/Utilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ public static <T extends Comparable<T>> void checkSorted(final T[] a) {
") are not in sorted order.");
}

public static double div(double value, long count) {
if (count == 0)
return value; // probably also 0
return value / count;
}

public static long toLong(double value) {
if (value < Long.MIN_VALUE || value > Long.MAX_VALUE)
throw new RuntimeException("Cannot convert to long " + value);
Expand Down
2 changes: 1 addition & 1 deletion web/src/main/java/org/hillview/targets/TableTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ public void getQuantilesVector(RpcRequest request, RpcRequestContext context) {
assert buckets.getBucketCount() == info.nonNullCounts.length;
for (int i = 0; i < info.nonNullCounts.length; i++) {
double ct = info.nonNullCounts[i];
double rate = Math.max(samplesRequired / ct, 1.0);
double rate = Math.min(samplesRequired / ct, 1.0);
samplingRates[i] = rate;
}
QuantilesVectorSketch qvs = new QuantilesVectorSketch(
Expand Down
28 changes: 28 additions & 0 deletions web/src/main/webapp/dataViews/quartilesVectorView.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ export class QuartilesVectorView extends HistogramViewBase {
text: "heatmap",
action: () => { this.doHeatmap(); },
help: "Plot this data as a heatmap view.",
}, {
text: "2D histogram",
action: () => { this.do2DHistogram(); },
help: "Plot this data as a 2D histogram.",
}, {
text: "1D histogram",
action: () => { this.doHistogram(); },
help: "Plot the X column as 1 D histogam.",
}]) },
page.dataset.combineMenu(this, page.pageId),
]);
Expand Down Expand Up @@ -168,6 +176,26 @@ export class QuartilesVectorView extends HistogramViewBase {
}));
}

public doHistogram(): void {
const cds = [this.xAxisData.description];
const rr = this.createDataQuantilesRequest(cds, this.page, "Histogram");
rr.invoke(new DataRangesReceiver(this, this.page, rr, this.schema,
[this.xAxisData.bucketCount], cds, null, {
reusePage: false,
chartKind: "Histogram"
}));
}

public do2DHistogram(): void {
const cds = [this.xAxisData.description, this.qCol];
const rr = this.createDataQuantilesRequest(cds, this.page, "2DHistogram");
rr.invoke(new DataRangesReceiver(this, this.page, rr, this.schema,
[this.xAxisData.bucketCount, 0], cds, null, {
reusePage: false,
chartKind: "2DHistogram"
}));
}

public export(): void {
const lines: string[] = this.asCSV();
const fileName = "quantiles2d.csv";
Expand Down

0 comments on commit 96846c6

Please sign in to comment.