From 49492211012c047c795e44d0c1b13309b1f4bbf4 Mon Sep 17 00:00:00 2001 From: gwlucastrig Date: Sat, 20 Jan 2024 12:46:34 -0500 Subject: [PATCH] Issue 106: SVM improve sample-spacing computation --- .../main/java/org/tinfour/common/Vertex.java | 42 +++- .../tinfour/contour/ContourBuilderForTin.java | 3 + .../org/tinfour/svm/SvmBathymetryData.java | 6 + .../java/org/tinfour/svm/SvmComputation.java | 188 ++++++++++++------ .../svm/SvmSinglePointAnomalyFilter.java | 136 +++++++++++++ .../tinfour/svm/properties/SvmProperties.java | 25 ++- .../org/tinfour/svm/SvmTemplate.properties | 10 - 7 files changed, 321 insertions(+), 89 deletions(-) create mode 100644 svm/src/main/java/org/tinfour/svm/SvmSinglePointAnomalyFilter.java diff --git a/core/src/main/java/org/tinfour/common/Vertex.java b/core/src/main/java/org/tinfour/common/Vertex.java index 5f6ed133..3499784d 100644 --- a/core/src/main/java/org/tinfour/common/Vertex.java +++ b/core/src/main/java/org/tinfour/common/Vertex.java @@ -104,6 +104,12 @@ public class Vertex implements ISamplePoint { */ public static final int BIT_CONSTRAINT = 0x02; + /** + * A bit flag indicating that the vertex is to be treated as "withheld" and + * should not be processed as part of a Delaunay triangulation (TIN). + * This flag is used in support of data filtering and similar operations. + */ + public static final int BIT_WITHHELD = 0x04; /** * An indexing value assigned to the Vertex. In this package, it is used * primary for diagnostic purposes and labeling graphics. @@ -365,6 +371,34 @@ public void setConstraintMember(boolean constraintMember) { } } + + + /** + * Indicates whether a vertex is marked as withheld. This setting is + * typically set by application code or other utilities rather than by Tinfour + * internal operations. + * + * @return true if vertex is withheld; otherwise, false + */ + public boolean isWithheld() { + return (status & BIT_WITHHELD) != 0; + } + + /** + * Sets or clears the is-withheld status of a vertex. + * + * @param synthetic true if vertex is withheld; otherwise, false + */ + public void setWithheld(boolean synthetic) { + if (synthetic) { + status |= BIT_WITHHELD; + } else { + status &= ~BIT_WITHHELD; + } + } + + + /** * Sets the status value of the vertex. This method is intended to * provide an efficient way of setting multiple status flags at once. @@ -387,7 +421,7 @@ public int getStatus() { /** * Indicates whether a vertex is part of a constraint definition or - * lies on the border of an area constraint. + * lies on the border of an area constraint. * * @return true if vertex is a constraint member; otherwise, false */ @@ -396,7 +430,7 @@ public boolean isConstraintMember() { } /** - * Gets the auxiliary index for the vertex. The auxiliary index + * Gets the auxiliary index for the vertex. The auxiliary index * field is one byte in size and supports integer values in the * range 0 to 255. It is used to support graph-coloring algorithms * but is available for other uses as well. @@ -407,10 +441,10 @@ public int getAuxiliaryIndex() { } /** - * Sets the auxiliary index for the vertex. The auxiliary index + * Sets the auxiliary index for the vertex. The auxiliary index * field is one byte in size and supports integer values in the * range 0 to 255. It is used to support graph-coloring algorithms - * but is available for other uses as well. + * but is available for other uses as well. * @param auxiliaryIndex a value in the range 0 to 255 */ public void setAuxiliaryIndex(int auxiliaryIndex) { diff --git a/core/src/main/java/org/tinfour/contour/ContourBuilderForTin.java b/core/src/main/java/org/tinfour/contour/ContourBuilderForTin.java index 4d863167..e566e8dc 100644 --- a/core/src/main/java/org/tinfour/contour/ContourBuilderForTin.java +++ b/core/src/main/java/org/tinfour/contour/ContourBuilderForTin.java @@ -778,6 +778,9 @@ private void buildRegions() { buildRegionsUsingPerimeter(); for (Contour contour : closedContourList) { + if(contour.isEmpty()){ + continue; + } ContourRegion region = new ContourRegion(contour); regionList.add(region); } diff --git a/svm/src/main/java/org/tinfour/svm/SvmBathymetryData.java b/svm/src/main/java/org/tinfour/svm/SvmBathymetryData.java index 964a0c86..170f631d 100644 --- a/svm/src/main/java/org/tinfour/svm/SvmBathymetryData.java +++ b/svm/src/main/java/org/tinfour/svm/SvmBathymetryData.java @@ -655,4 +655,10 @@ public void setSurveyPerimeter(ListperimeterVertices){ public ListgetSurveyPerimeter(){ return surveyPerimeter; } + + void replaceSoundings(Listreplacements){ + soundings.clear(); + supplement.clear(); + soundings.addAll(replacements); + } } diff --git a/svm/src/main/java/org/tinfour/svm/SvmComputation.java b/svm/src/main/java/org/tinfour/svm/SvmComputation.java index cf714763..c522e3af 100644 --- a/svm/src/main/java/org/tinfour/svm/SvmComputation.java +++ b/svm/src/main/java/org/tinfour/svm/SvmComputation.java @@ -38,7 +38,6 @@ import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; -import java.util.BitSet; import java.util.List; import java.util.function.Consumer; import org.tinfour.common.GeometricOperations; @@ -270,6 +269,24 @@ int getFlatTriangleCount() { } } + private static class SampleSpacing{ + final int nSamples; + final double sigma; + final double mean; + final double median; + final double lenMin; + final double lenMax; + + SampleSpacing(int nSamples, double mean, double sigma, double median, double lenMin, double lenMax){ + this.nSamples = nSamples; + this.mean = mean; + this.sigma = sigma; + this.median = median; + this.lenMin = lenMin; + this.lenMax = lenMax; + } + } + /** * Performs the main process, printing the results to the specified print * stream. @@ -335,6 +352,34 @@ public void processVolume( long timeToBuildTin = time1 - time0; long timeToAddConstraints = time2 - time1; + long spTime0 = System.nanoTime(); + SampleSpacing spacing = this.evaluateSampleSpacing(tin); + long spTime1 = System.nanoTime(); + long timeToFindSampleSpacing = spTime1-spTime0; + + // The experimental filter is a non-advertised feature for removing + // anomalous points. + if(properties.isExperimentalFilterEnabled()){ + ps.println("Processing experimental filter"); + spTime0 = System.nanoTime(); + SvmSinglePointAnomalyFilter filter = new SvmSinglePointAnomalyFilter(); + int nFilter = filter.process(ps, tin); + if(nFilter>0){ + // some vertices were marked as withheld + ArrayList filteredSamples = new ArrayList<>(soundings.size()); + for(Vertex v: soundings){ + if(!v.isWithheld()){ + filteredSamples.add(v); + } + } + soundings = filteredSamples; + data.replaceSoundings(filteredSamples); + spTime1 = System.nanoTime(); + long timeToFilter = spTime1-spTime0; + ps.format("Time for experimental filter %9.1f ms%n", timeToFilter/1.0e+6); + } + } + TriangleSurvey trigSurvey = new TriangleSurvey(tin, shoreReferenceElevation); TriangleCollector.visitSimpleTriangles(tin, trigSurvey); @@ -468,46 +513,14 @@ public void processVolume( data.getMaxZ() / lengthFactor, (data.getMaxZ() - data.getMinZ()) / lengthFactor); - if (properties.isSoundingSpacingEnabled()) { - KahanSummation sumLen = new KahanSummation(); - KahanSummation sumLen2 = new KahanSummation(); - int nEdgeMax = tin.getMaximumEdgeAllocationIndex(); - BitSet visited = new BitSet(nEdgeMax); - float[] lenArray = new float[nEdgeMax]; - int nLen = 0; - for(IQuadEdge edge: tin.edges()){ - int eIndex = edge.getBaseIndex(); - if(visited.get(eIndex)){ - continue; - } - visited.set(eIndex); - if(edge.isConstrainedRegionBorder()){ - continue; - } - - if(lakeConsumer.isWater(edge)){ - double len = edge.getLength(); - sumLen.add(len); - sumLen2.add(len*len); - lenArray[nLen++] = (float)len; - } - } - Arrays.sort(lenArray, 0, nLen); - double sLen = sumLen.getSum(); - double sLen2 = sumLen2.getSum(); - double sigma = Double.NaN; - double meanLen = sumLen.getMean(); - double medianLen = lenArray[nLen / 2]; - if(nLen>2){ - sigma = Math.sqrt((sLen2 - (sLen / nLen) * sLen) / (nLen - 1)); - } - ps.format(" Sounding spacing%n"); - ps.format(" mean %12.3f %s%n", meanLen / lengthFactor, lengthUnits); - ps.format(" std dev %12.3f %s%n", sigma/lengthFactor, lengthUnits); - ps.format(" median %12.3f %s%n", medianLen / lengthFactor, lengthUnits); - ps.format("%n"); - } + ps.format(" Sounding spacing%n"); + ps.format(" mean %12.3f %s%n", spacing.mean / lengthFactor, lengthUnits); + ps.format(" std dev %12.3f %s%n", spacing.sigma / lengthFactor, lengthUnits); + ps.format(" median %12.3f %s%n", spacing.median/ lengthFactor, lengthUnits); + ps.format(" maximim %12.3f %s%n", spacing.lenMax/ lengthFactor, lengthUnits); + ps.format(" minimum %14.5f %s%n", spacing.lenMin/ lengthFactor, lengthUnits); + ps.format("%n"); double rawVolume = lakeConsumer.getVolume(); double rawSurfArea = lakeConsumer.getSurfaceArea(); @@ -518,7 +531,6 @@ public void processVolume( double surfArea = lakeConsumer.getSurfaceArea() / areaFactor; double avgDepth = (rawVolume / rawSurfArea) / lengthFactor; double adjMeanDepth = rawAdjMeanDepth / lengthFactor; - double vertexSpacing = estimateInteriorVertexSpacing(tin, lakeConsumer); double rawFlatArea = lakeConsumer.getFlatArea(); double flatArea = lakeConsumer.getFlatArea() / areaFactor; @@ -533,7 +545,7 @@ public void processVolume( flatArea, areaUnits, rawFlatArea, lengthUnits); ps.format(" Avg depth %18.2f %s%n", avgDepth, lengthUnits); ps.format(" Adj mean depth %18.2f %s%n", adjMeanDepth, lengthUnits); - ps.format(" Mean Vertex Spacing %18.2f %s%n", vertexSpacing, lengthUnits); + ps.format(" Mean Vertex Spacing %18.2f %s%n", spacing.mean, lengthUnits); } else { ps.format(" Volume %,18.2f %s %,28.1f %s^3%n", volume, volumeUnits, rawVolume, lengthUnits); @@ -543,7 +555,7 @@ public void processVolume( flatArea, areaUnits, rawFlatArea, lengthUnits); ps.format(" Avg depth %,18.2f %s%n", avgDepth, lengthUnits); ps.format(" Adj mean depth %,18.2f %s%n", adjMeanDepth, lengthUnits); - ps.format(" Mean Vertex Spacing %,18.2f %s%n", vertexSpacing, lengthUnits); + ps.format(" Mean Vertex Spacing %,18.2f %s%n", spacing.mean, lengthUnits); } ps.format(" N Triangles %15d%n", lakeConsumer.nTriangles); ps.format(" N Flat Triangles %15d%n", lakeConsumer.nFlatTriangles); @@ -563,6 +575,7 @@ public void processVolume( ps.format("Time to load data %9.1f ms%n", data.getTimeToLoadData() / 1.0e+6); ps.format("Time to build TIN %9.1f ms%n", timeToBuildTin / 1.0e+6); ps.format("Time to add shore constraint %9.1f ms%n", timeToAddConstraints / 1.0e+6); + ps.format("Time to find sample spacing %9.1f ms%n", timeToFindSampleSpacing/1.0e+6); ps.format("Time to remedy flat triangles %9.1f ms%n", timeToFixFlats / 1.0e+6); ps.format("Time to compute lake volume %9.1f ms%n", (time2 - time1) / 1.0e+6); ps.format("Time for all analysis %9.1f ms%n", (time2 - time0) / 1.0e+6); @@ -678,35 +691,82 @@ private double getPerimeterSum(List constraints) { return perimeterSum.getSum(); } + /** - * Estimates vertex spacing for TIN based exclusively on interior edges. - * Constraint edges and perimeter edges are not included. - * - * @param tin a valid TIN. - * @param lakeData a valid instance of input data - * @return an estimated vertex spacing + * Finds the spacing between samples in the source data, selecting + * neighbors based on the Delaunay triangulation. This method + * applies logic to exclude constraint edges and edges that + * attach samples to constraint vertices. The rationale + * for this exclusion is that the constraints have a different + * level of data collection than the samples and would have + * different statistics. The logic below also excludes + * perimeter edges + *

+ * This logic assumes that the TIN was prepared using the SVM conventions + * of populating the auxiliary index of the sample vertices with + * a flag indicating that they are depth samples. + * @param tin a valid instance + * @return a valid instance */ - private double estimateInteriorVertexSpacing(IIncrementalTin tin, LakeData lakeData) { - KahanSummation sumLength = new KahanSummation(); - int n = 0; - for (IQuadEdge e : tin.edges()) { - if (lakeData.isWater(e)) { - // the edge lies in a water area, but we also need + private SampleSpacing evaluateSampleSpacing(IIncrementalTin tin){ + + List constraintsFromTin = tin.getConstraints(); + boolean[] water = new boolean[constraintsFromTin.size()]; + for (IConstraint con : constraintsFromTin) { + water[con.getConstraintIndex()] = (Boolean) con.getApplicationData(); + } + KahanSummation sumLen = new KahanSummation(); + KahanSummation sumLen2 = new KahanSummation(); + int nEdgeMax = tin.getMaximumEdgeAllocationIndex(); + float[] lenArray = new float[nEdgeMax]; + int nLen = 0; + double lenMin = Double.POSITIVE_INFINITY; + double lenMax = Double.NEGATIVE_INFINITY; + + + for (IQuadEdge edge : tin.edges()) { + if (!edge.isConstrainedRegionInterior()) { + continue; + } + + int conIndex = edge.getConstraintIndex(); + if (water[conIndex]) { + // the edge lies in a water area, but we also need // to exclude any edges that connect a sounding to // a constraint border. - Vertex a = e.getA(); + Vertex a = edge.getA(); + Vertex b = edge.getB(); int aAux = a.getAuxiliaryIndex(); - int bAux = a.getAuxiliaryIndex(); - if (aAux == SvmBathymetryData.BATHYMETRY_SOURCE && bAux == SvmBathymetryData.BATHYMETRY_SOURCE) { - n++; - sumLength.add(e.getLength()); + int bAux = b.getAuxiliaryIndex(); + if (aAux == SvmBathymetryData.BATHYMETRY_SOURCE + && bAux == SvmBathymetryData.BATHYMETRY_SOURCE) { + double len = edge.getLength(); + sumLen.add(len); + sumLen2.add(len * len); + lenArray[nLen++] = (float) len; + if(lenlenMax){ + lenMax = len; + } } } } - if (n == 0) { - return 0; + + Arrays.sort(lenArray, 0, nLen); + double sLen = sumLen.getSum(); + double sLen2 = sumLen2.getSum(); + + double sigma = Double.NaN; + double mean = sumLen.getMean(); + double median = lenArray[nLen / 2]; + if (nLen > 2) { + sigma = Math.sqrt((sLen2 - (sLen / nLen) * sLen) / (nLen - 1)); } - return sumLength.getSum() / n; - } + + return new SampleSpacing(nLen, mean, sigma, median, lenMin, lenMax); + + } } diff --git a/svm/src/main/java/org/tinfour/svm/SvmSinglePointAnomalyFilter.java b/svm/src/main/java/org/tinfour/svm/SvmSinglePointAnomalyFilter.java new file mode 100644 index 00000000..0da1003b --- /dev/null +++ b/svm/src/main/java/org/tinfour/svm/SvmSinglePointAnomalyFilter.java @@ -0,0 +1,136 @@ +/* -------------------------------------------------------------------- + * Copyright (C) 2024 Gary W. Lucas. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * --------------------------------------------------------------------- + */ + + /* + * ----------------------------------------------------------------------- + * + * Revision History: + * Date Name Description + * ------ --------- ------------------------------------------------- + * 01/2024 G. Lucas Created + * + * Notes: + * + * ----------------------------------------------------------------------- + */ +package org.tinfour.svm; + +import java.io.PrintStream; +import java.util.BitSet; +import org.tinfour.common.IIncrementalTin; +import org.tinfour.common.IQuadEdge; +import org.tinfour.common.Vertex; + +/** + * Implements logic for removing single-point anomalies from the + * source data. This class is an experiment and is not yet ready for + * general use. + */ +class SvmSinglePointAnomalyFilter { + + /** + * Process the input sample data in the TIN and remove single-point anomalies. + * @param ps a valid instance to receive results + * @param tin a valid instance + * @return + */ + int process( + PrintStream ps, + IIncrementalTin tin) { + int maxEdgeIndex = tin.getMaximumEdgeAllocationIndex(); + BitSet edgeSet = new BitSet(maxEdgeIndex); + for (IQuadEdge edge : tin.getPerimeter()) { + int baseIndex = edge.getBaseIndex(); + edgeSet.set(baseIndex); + edgeSet.set(baseIndex | 1); + } + + double mCutOff = 0.015; + double mReject = 0.5; + double mMax = 0; + int nNegReject = 0; + int nPosReject = 0; + for (IQuadEdge qEdge : tin.edges()) { + IQuadEdge edge = qEdge; + innerLoop: + for (int iEdge = 0; iEdge < 2; iEdge++, edge = edge.getDual()) { + int eIndex = edge.getIndex(); + if (edgeSet.get(eIndex)) { + continue; + } + edgeSet.set(eIndex); + Vertex a = edge.getA(); + if (a.isConstraintMember()) { + continue; + } + double aZ = a.getZ(); + int nPos = 0; + int nNeg = 0; + int n = 0; + double m = 0; + int nSupport = 0; + for (IQuadEdge e : edge.pinwheel()) { + edgeSet.set(e.getIndex()); + Vertex b = edge.getB(); + double bZ = b.getZ(); + double d = e.getLength(); + double mZ = (bZ - aZ) / d; // slope + if (mZ > 0) { + nPos++; + } else { + nNeg++; + } + + double mAbs = Math.abs(mZ); + m += mAbs; + if (mAbs < mCutOff) { + nSupport++; + } + + n++; + if (mAbs > mMax) { + mMax = mAbs; + } + } + + if (nSupport==0 && nPos > 0 && nNeg == 0) { + m /= n; + if (m > mReject) { + //System.out.format("reject pos %10d %9.3f %9.3f%n", a.getIndex(), m, dSum); + a.setWithheld(true); + nPosReject++; + } + } + if (nSupport==0 && nPos == 0 && nNeg > 0) { + m /= n; + if (m > mReject) { + //System.out.format("reject neg %10d %9.3f %9.3f%n", a.getIndex(), m, dSum); + a.setWithheld(true); + nNegReject++; + } + } + + + } + } + + ps.println("Rejected " + (nPosReject+nNegReject)); + ps.format(" Pos: %8d%n", nPosReject); + ps.format(" Neg: %8d%n", nNegReject); + return nPosReject+nNegReject; + } +} diff --git a/svm/src/main/java/org/tinfour/svm/properties/SvmProperties.java b/svm/src/main/java/org/tinfour/svm/properties/SvmProperties.java index 79af173b..5577f484 100644 --- a/svm/src/main/java/org/tinfour/svm/properties/SvmProperties.java +++ b/svm/src/main/java/org/tinfour/svm/properties/SvmProperties.java @@ -58,7 +58,6 @@ public class SvmProperties { private final static String tableKey = "table"; private final static String tableIntervalKey = "tableInterval"; private final static String flatFixerKey = "remediateFlatTriangles"; - private final static String soundingSpacingKey = "computeSoundingSpacing"; private final static String inputFolderKey = "inputFolder"; private final static String outputFolderKey = "outputFolder"; @@ -87,6 +86,8 @@ public class SvmProperties { private final static String anomalyTableFileKey = "anomalyTableFileName"; + private final static String experimentalFilterKey = "experimentalFilter"; + final Properties properties = new Properties(); final List keyList = new ArrayList<>(); private File specificationFile; @@ -533,16 +534,6 @@ public boolean isFlatFixerEnabled() { return test; } - /** - * Indicates whether the computation of sounding spacing is enabled. - * - * @return true if computation is to be performed; otherwise, false. - */ - public boolean isSoundingSpacingEnabled() { - String s = properties.getProperty(soundingSpacingKey, "false"); - boolean test = Boolean.parseBoolean(s.trim()); - return test; - } private int findMaxNameLength(int m0, List samples) { int m = m0; @@ -1060,4 +1051,16 @@ public boolean isGeoTiffDataCompressionEnabled() { } return false; } + + public boolean isExperimentalFilterEnabled(){ + String s = properties.getProperty(SvmProperties.experimentalFilterKey); + if (s != null && !s.isBlank()) { + try { + return Boolean.parseBoolean(s.trim()); + } catch (NumberFormatException nfe) { + throw new IllegalArgumentException("Invalid boolean specification for experimentalFilter: "+s); + } + } + return false; + } } diff --git a/svm/src/main/resources/org/tinfour/svm/SvmTemplate.properties b/svm/src/main/resources/org/tinfour/svm/SvmTemplate.properties index 2f06de4e..994001bf 100644 --- a/svm/src/main/resources/org/tinfour/svm/SvmTemplate.properties +++ b/svm/src/main/resources/org/tinfour/svm/SvmTemplate.properties @@ -30,7 +30,6 @@ bounds = AH17_Lake10.shp | Elevation shorelineReferenceElevation = 2220 remediateFlatTriangles = true -computeSoundingSpacing = false unitOfDistance = ft unitOfArea = acres | 43560 @@ -260,15 +259,6 @@ capacityGraphTitle = SVM Analysis for Alan Henry Reservoir # value. By default, remediation is not activated. # remediateFlatTriangles = true # -# Compute Sounding Spacing -------------------------------------------- -# If configured to do so, the SVM utility will compute the sounding spacing -# for input data. The mean, standard deviation, and median spacing between soundings in the -# "samples" files will be computed. This calculation is based on the lengths -# of interior edges in the Delaunay triangulation construced from the -# sources and constraints. Constraint-edges (i.e. shoreline edges) are -# not considered. -# By default, computeSoundingSpacing is assigned a value of false. -# # # Parameters for Raster File Processing ---------------------------------- # rasterFileName = SVM_AlanHenryRaster