-
Notifications
You must be signed in to change notification settings - Fork 74
v0.2.52..v0.2.53 changeset DuplicateNodeRemover.cpp
Garret Voltz edited this page Feb 12, 2020
·
1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/ops/DuplicateNodeRemover.cpp b/hoot-core/src/main/cpp/hoot/core/ops/DuplicateNodeRemover.cpp
new file mode 100644
index 0000000..dbbde79
--- /dev/null
+++ b/hoot-core/src/main/cpp/hoot/core/ops/DuplicateNodeRemover.cpp
@@ -0,0 +1,314 @@
+/*
+ * This file is part of Hootenanny.
+ *
+ * Hootenanny is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * --------------------------------------------------------------------
+ *
+ * The following copyright notices are generated automatically. If you
+ * have a new notice to add, please use the format:
+ * " * @copyright Copyright ..."
+ * This will properly maintain the copyright information. DigitalGlobe
+ * copyrights will be updated automatically.
+ *
+ * @copyright Copyright (C) 2015, 2016, 2017, 2018, 2019, 2020 DigitalGlobe (http://www.digitalglobe.com/)
+ */
+
+#include "DuplicateNodeRemover.h"
+
+// Hoot
+#include <hoot/core/util/Factory.h>
+#include <hoot/core/util/MapProjector.h>
+#include <hoot/core/index/ClosePointHash.h>
+#include <hoot/core/index/OsmMapIndex.h>
+#include <hoot/core/util/Settings.h>
+#include <hoot/core/util/ConfigOptions.h>
+#include <hoot/core/util/Log.h>
+#include <hoot/core/util/MapProjector.h>
+#include <hoot/core/elements/OsmUtils.h>
+#include <hoot/core/visitors/RemoveDuplicateWayNodesVisitor.h>
+#include <hoot/core/schema/ExactTagDifferencer.h>
+
+// Qt
+#include <QTime>
+
+// Standard
+#include <iostream>
+
+// TGS
+#include <tgs/StreamUtils.h>
+#include <tgs/RStarTree/HilbertRTree.h>
+
+using namespace Tgs;
+
+namespace hoot
+{
+
+HOOT_FACTORY_REGISTER(OsmMapOperation, DuplicateNodeRemover)
+
+double calcDistanceSquared(const NodePtr& n1, const NodePtr& n2)
+{
+ double dx = n1->getX() - n2->getX();
+ double dy = n1->getY() - n2->getY();
+ return dx * dx + dy * dy;
+}
+
+DuplicateNodeRemover::DuplicateNodeRemover(Meters distanceThreshold)
+{
+ _distance = distanceThreshold;
+ if (_distance < 0.0)
+ {
+ // This default value was found experimentally with the building data from #3446 and #3495 and
+ // validated with JOSM validation. Further tweaks may be required to the default value for other
+ // datasets.
+ _distance = ConfigOptions().getDuplicateNodeRemoverDistanceThreshold();
+ if (_distance <= 0.0)
+ {
+ throw IllegalArgumentException(
+ "Nearby node merging distance must be greater than zero. Distance specified: " +
+ QString::number(_distance));
+ }
+ }
+}
+
+void DuplicateNodeRemover::apply(std::shared_ptr<OsmMap>& map)
+{
+ QTime time;
+ time.start();
+
+ std::shared_ptr<OsmMap> wgs84;
+ std::shared_ptr<OsmMap> planar;
+
+ if (MapProjector::isGeographic(map))
+ {
+ wgs84 = map;
+ planar.reset(new OsmMap(map));
+ MapProjector::projectToPlanar(planar);
+ }
+ else
+ {
+ planar = map;
+ // if we need to check for bounds containment
+ if (_bounds.isNull() == false)
+ {
+ wgs84.reset(new OsmMap(map));
+ MapProjector::projectToWgs84(wgs84);
+ }
+ }
+
+ ClosePointHash cph(_distance);
+
+ int startNodeCount = 0;
+ const NodeMap& nodes = planar->getNodes();
+ for (NodeMap::const_iterator it = nodes.begin(); it != nodes.end(); ++it)
+ {
+ const NodePtr& n = it->second;
+ cph.addPoint(n->getX(), n->getY(), n->getId());
+ startNodeCount++;
+
+ if (startNodeCount % 100000 == 0)
+ {
+ PROGRESS_INFO(
+ "\tInitialized " << StringUtils::formatLargeNumber(startNodeCount) << " nodes / " <<
+ StringUtils::formatLargeNumber(nodes.size()) << " for duplicate removal.");
+ }
+ }
+ LOG_VART(cph.size());
+
+ double distanceSquared = _distance * _distance;
+ int processedCount = 0;
+ cph.resetIterator();
+ ExactTagDifferencer tagDiff;
+
+ while (cph.next())
+ {
+ const std::vector<long>& v = cph.getMatch();
+
+ for (size_t i = 0; i < v.size(); i++)
+ {
+ const long matchIdI = v[i];
+
+ if (!map->containsNode(matchIdI)) continue;
+
+ for (size_t j = 0; j < v.size(); j++)
+ {
+ bool replace = false;
+ double calcdDistanceSquared = -1.0;
+ const long matchIdJ = v[j];
+
+ if (matchIdI != matchIdJ && map->containsNode(matchIdJ))
+ {
+ const NodePtr& n1 = planar->getNode(matchIdI);
+ const NodePtr& n2 = planar->getNode(matchIdJ);
+ LOG_VART(n1);
+ LOG_VART(n2);
+
+ if (n1->getStatus() == n2->getStatus())
+ {
+ calcdDistanceSquared = calcDistanceSquared(n1, n2);
+ LOG_VART(distanceSquared);
+ LOG_VART(calcdDistanceSquared);
+ if (distanceSquared > calcdDistanceSquared)
+ {
+ // if the geographic bounds are not specified.
+ if (_bounds.isNull() == true)
+ {
+ replace = true;
+ }
+ // if the geographic bounds are specified, then make sure both points are inside.
+ else
+ {
+ const NodePtr& g1 = wgs84->getNode(matchIdI);
+ const NodePtr& g2 = wgs84->getNode(matchIdJ);
+ if (_bounds.contains(g1->getX(), g1->getY()) &&
+ _bounds.contains(g2->getX(), g2->getY()))
+ {
+ replace = true;
+ }
+ }
+ LOG_VART(replace);
+
+ LOG_VART(tagDiff.diff(map, n1, n2) != 0.0);
+ if (replace && tagDiff.diff(map, n1, n2) != 0.0)
+ {
+ replace = false;
+ }
+
+ if (replace)
+ {
+ LOG_TRACE(
+ "Merging nodes: " << ElementId(ElementType::Node, matchIdJ) << " and " <<
+ ElementId(ElementType::Node, matchIdI) << "...");
+ map->replaceNode(matchIdJ, matchIdI);
+ _numAffected++;
+ }
+ }
+ }
+ }
+
+ // custom logging for debugging purposes
+ if (Log::getInstance().getLevel() <= Log::Trace)
+ {
+ if (calcdDistanceSquared != -1.0)
+ {
+ _logMergeResult(
+ matchIdI, matchIdJ, map, replace, std::sqrt(distanceSquared),
+ std::sqrt(calcdDistanceSquared));
+ }
+ else
+ {
+ _logMergeResult(matchIdI, matchIdJ, map, replace);
+ }
+ }
+ }
+ }
+
+ processedCount++;
+ if (processedCount % 10000 == 0)
+ {
+ PROGRESS_INFO(
+ "\tMerged " << StringUtils::formatLargeNumber(_numAffected) << " node groups from " <<
+ StringUtils::formatLargeNumber(startNodeCount) << " total nodes.");
+ }
+ }
+
+ // Due to how Way::replaceNode is being called, we could end up with some duplicate way nodes, so
+ // let's remove those here.
+ RemoveDuplicateWayNodesVisitor dupeNodesRemover;
+ LOG_INFO("\t" << dupeNodesRemover.getInitStatusMessage());
+ map->visitWaysRw(dupeNodesRemover);
+ LOG_DEBUG("\t" << dupeNodesRemover.getCompletedStatusMessage());
+}
+
+bool DuplicateNodeRemover::_passesLogMergeFilter(const long nodeId1, const long nodeId2,
+ OsmMapPtr& map) const
+{
+ // can add various filtering criteria here for debugging purposes...
+
+ QStringList kvps;
+ kvps.append("OBJECTID=168008");
+ kvps.append("OBJECTID=76174");
+
+ std::set<ElementId> wayIdsOwning1;
+ const std::set<long> waysOwning1 = OsmUtils::getContainingWayIdsByNodeId(nodeId1, map);
+ for (std::set<long>::const_iterator it = waysOwning1.begin(); it != waysOwning1.end(); ++it)
+ {
+ wayIdsOwning1.insert(ElementId(ElementType::Way, *it));
+ }
+ if (OsmUtils::anyElementsHaveAnyKvp(kvps, wayIdsOwning1, map))
+ {
+ return true;
+ }
+
+ std::set<ElementId> wayIdsOwning2;
+ const std::set<long> waysOwning2 = OsmUtils::getContainingWayIdsByNodeId(nodeId2, map);
+ for (std::set<long>::const_iterator it = waysOwning2.begin(); it != waysOwning2.end(); ++it)
+ {
+ wayIdsOwning2.insert(ElementId(ElementType::Way, *it));
+ }
+ if (OsmUtils::anyElementsHaveAnyKvp(kvps, wayIdsOwning2, map))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+void DuplicateNodeRemover::_logMergeResult(const long nodeId1, const long nodeId2, OsmMapPtr& map,
+ const bool replaced, const double distance,
+ const double calcdDistance)
+{
+ if (_passesLogMergeFilter(nodeId1, nodeId2, map))
+ {
+ QString msg = "merging nodes: ";
+ if (!replaced)
+ {
+ msg.prepend("not ");
+ }
+ msg += QString::number(nodeId1) + " and " + QString::number(nodeId2);
+ if (calcdDistance != -1.0)
+ {
+ msg +=
+ " at a distance of: " + QString::number(calcdDistance) +
+ " where the distance threshold is: " + QString::number(distance);
+ }
+ msg += "...";
+ LOG_TRACE(msg);
+ LOG_TRACE(
+ "Node " << nodeId1 << " belongs to ways: " <<
+ OsmUtils::getContainingWayIdsByNodeId(nodeId1, map));
+ LOG_TRACE(
+ "Node " << nodeId2 << " belongs to ways: " <<
+ OsmUtils::getContainingWayIdsByNodeId(nodeId2, map));
+ LOG_VART(OsmUtils::nodesAreContainedInTheSameWay(nodeId1, nodeId2, map));
+ }
+}
+
+void DuplicateNodeRemover::removeNodes(std::shared_ptr<OsmMap> map, Meters distanceThreshold)
+{
+ DuplicateNodeRemover mnn(distanceThreshold);
+ mnn.apply(map);
+}
+
+void DuplicateNodeRemover::readObject(QDataStream& is)
+{
+ is >> _distance;
+}
+
+void DuplicateNodeRemover::writeObject(QDataStream& os) const
+{
+ os << _distance;
+}
+
+}