-
Notifications
You must be signed in to change notification settings - Fork 74
v0.2.49..v0.2.50 changeset ManualMatchValidator.cpp
Garret Voltz edited this page Nov 6, 2019
·
1 revision
diff --git a/hoot-core/src/main/cpp/hoot/core/ops/ManualMatchValidator.cpp b/hoot-core/src/main/cpp/hoot/core/ops/ManualMatchValidator.cpp
new file mode 100644
index 0000000..ee89506
--- /dev/null
+++ b/hoot-core/src/main/cpp/hoot/core/ops/ManualMatchValidator.cpp
@@ -0,0 +1,313 @@
+/*
+ * This file is part of Hootenanny.
+ *
+ * Hootenanny is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * --------------------------------------------------------------------
+ *
+ * The following copyright notices are generated automatically. If you
+ * have a new notice to add, please use the format:
+ * " * @copyright Copyright ..."
+ * This will properly maintain the copyright information. DigitalGlobe
+ * copyrights will be updated automatically.
+ *
+ * @copyright Copyright (C) 2019 DigitalGlobe (http://www.digitalglobe.com/)
+ */
+#include "ManualMatchValidator.h"
+
+// hoot
+#include <hoot/core/util/Factory.h>
+
+namespace hoot
+{
+
+HOOT_FACTORY_REGISTER(OsmMapOperation, ManualMatchValidator)
+
+ManualMatchValidator::ManualMatchValidator() :
+_requireRef1(true),
+_allowUuidManualMatchIds(false),
+_fullDebugOutput(false)
+{
+ _uuidRegEx.setPattern(
+ "\\{[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}\\}");
+}
+
+void ManualMatchValidator::apply(const OsmMapPtr& map)
+{
+ _numAffected = 0;
+ _errors.clear();
+ _warnings.clear();
+ _ref1Mappings.getIdToTagValueMappings().clear();
+ _ref1Mappings.setTagKey(MetadataTags::Ref1());
+
+ // Get all the REF1 values, as we'll need them when examining REF2/REVIEW tags.
+ map->visitRo(_ref1Mappings);
+
+ const NodeMap& nodes = map->getNodes();
+ for (NodeMap::const_iterator it = nodes.begin(); it != nodes.end(); ++it)
+ {
+ ConstNodePtr node = it->second;
+ _validate(node);
+ }
+
+ const WayMap& ways = map->getWays();
+ for (WayMap::const_iterator it = ways.begin(); it != ways.end(); ++it)
+ {
+ ConstWayPtr way = it->second;
+ _validate(way);
+ }
+
+ // Can't remember right now if relations are ever manually matched...maybe?
+ const RelationMap& relations = map->getRelations();
+ for (RelationMap::const_iterator it = relations.begin(); it != relations.end(); ++it)
+ {
+ ConstRelationPtr relation = it->second;
+ _validate(relation);
+ }
+}
+
+void ManualMatchValidator::_validate(const ConstElementPtr& element)
+{
+ LOG_VARD(_requireRef1);
+ LOG_VARD(_allowUuidManualMatchIds);
+
+ // Just recording one error for each element for performance reasons, even if there are multiple.
+
+ const Tags& tags = element->getTags();
+
+ // if the tag key for the id exists, it can't be empty
+ QString ref1;
+ Tags::const_iterator tagRef1Itr = tags.find(MetadataTags::Ref1());
+ if (tagRef1Itr != tags.end())
+ {
+ ref1 = tagRef1Itr.value().trimmed().toLower();
+ if (ref1.isEmpty())
+ {
+ _recordIssue(element, "Empty REF1 tag");
+ return;
+ }
+ }
+ LOG_VART(ref1);
+
+ QStringList ref2;
+ Tags::const_iterator tagRef2Itr = tags.find(MetadataTags::Ref2());
+ if (tagRef2Itr != tags.end())
+ {
+ // use SkipEmptyParts to get past trailing semicolons
+ ref2 =
+ tagRef2Itr.value().trimmed().toLower().split(";", QString::SplitBehavior::SkipEmptyParts);
+ ref2.removeAll(";");
+ if (ref2.isEmpty() || (ref2.size() == 1 && ref2.at(0).trimmed().isEmpty()))
+ {
+ _recordIssue(element, "Empty REF2 tag");
+ return;
+ }
+ }
+ LOG_VART(ref2.size());
+
+ QStringList review;
+ Tags::const_iterator tagReviewItr = tags.find(MetadataTags::Review());
+ if (tagReviewItr != tags.end())
+ {
+ review =
+ tagReviewItr.value().trimmed().toLower().split(";", QString::SplitBehavior::SkipEmptyParts);
+ review.removeAll(";");
+ if (review.isEmpty() || (review.size() == 1 && review.at(0).trimmed().isEmpty()))
+ {
+ _recordIssue(element, "Empty REVIEW tag");
+ return;
+ }
+ }
+ LOG_VART(review.size());
+
+ // REF2 and review can be multiple IDs (many to one match), but REF1 is always a single ID.
+ if (ref1.split(";").size() > 1)
+ {
+ _recordIssue(element, "REF1 ID must be singular. REF1=" + ref1);
+ }
+ // validate manual match ids
+ else if (!ref1.isEmpty() && !_isValidRef1Id(ref1))
+ {
+ _recordIssue(element, "Invalid REF1=" + ref1);
+ }
+ // can't have both ref1 and ref2/review on the same element
+ else if (!ref1.isEmpty() && (!ref2.isEmpty() || !review.isEmpty()))
+ {
+ _recordIssue(element, "Element has both REF1 and either a REF2 or REVIEW tag");
+ }
+ // an unknown1 element can't have a ref2 or review tag
+ else if (element->getStatus() == Status::Unknown1 && (!ref2.isEmpty() || !review.isEmpty()))
+ {
+ _recordIssue(element, "Unknown1 element with REF2 or REVIEW tag");
+ }
+ // an unknown2 element can't have a ref1 tag
+ else if (element->getStatus() == Status::Unknown2 && !ref1.isEmpty())
+ {
+ _recordIssue(element, "Unknown2 element with REF1 tag");
+ }
+ // If a ref2 or review has multiple ID's, they should all be hex.
+ else if (!ref2.isEmpty() && ref2.size() > 1 &&
+ (ref2.contains("todo", Qt::CaseInsensitive) ||
+ ref2.contains("none", Qt::CaseInsensitive)))
+ {
+ _recordIssue(element, "Invalid many to one REF2=" + ref2.join(";"));
+ }
+ else if (!review.isEmpty() && review.size() > 1 &&
+ (review.contains("todo") || review.contains("none")))
+ {
+ _recordIssue(element, "Invalid many to one REVIEW=" + review.join(";"));
+ }
+ // check for dupes
+ else if (!ref2.isEmpty() && ref2.toSet().size() != ref2.size() )
+ {
+ const QStringList duplicates = StringUtils::getDuplicates(ref2).toList();
+ assert(duplicates.size() > 0);
+ _recordIssue(element, "Duplicate IDs found in REF2: " + duplicates.join(";"));
+ }
+ else if (!review.isEmpty() && review.toSet().size() != review.size())
+ {
+ const QStringList duplicates = StringUtils::getDuplicates(review).toList();
+ assert(duplicates.size() > 0);
+ _recordIssue(element, "Duplicate IDs found in REVIEW: " + duplicates.join(";"));
+ }
+ else if (!ref2.isEmpty())
+ {
+ for (int i = 0; i < ref2.size(); i++)
+ {
+ const QString ref2Id = ref2.at(i);
+ LOG_VART(ref2Id);
+
+ if (!_isValidRef2OrReviewId(ref2Id))
+ {
+ _recordIssue(element, "Invalid REF2=" + ref2Id);
+ break;
+ }
+ // make sure a ref1 exists for each ref2
+ else if (!_isValidNonUniqueMatchId(ref2Id) &&
+ !_ref1Mappings.getIdToTagValueMappings().values().contains(ref2Id))
+ {
+ if (_requireRef1)
+ {
+ _recordIssue(element, "No REF1 exists for REF2=" + ref2Id);
+ }
+ else
+ {
+ _recordIssue(element, "No REF1 exists for REF2=" + ref2Id, false);
+ }
+ break;
+ }
+ // same id can't be on both ref2 and review for the same element
+ else if (!review.isEmpty() && !_isValidNonUniqueMatchId(ref2Id) && review.contains(ref2Id))
+ {
+ _recordIssue(element, "Invalid repeated ID: REF2=" + ref2Id + ", REVIEW=" + ref2Id);
+ break;
+ }
+ }
+ }
+ // same as previous, except for review instead of ref2
+ else if (!review.isEmpty())
+ {
+ for (int i = 0; i < review.size(); i++)
+ {
+ const QString reviewId = review.at(i);
+ LOG_VART(reviewId);
+
+ if (!_isValidRef2OrReviewId(reviewId))
+ {
+ _recordIssue(element, "Invalid REVIEW=" + reviewId);
+ break;
+ }
+ else if (!_isValidNonUniqueMatchId(reviewId) &&
+ !_ref1Mappings.getIdToTagValueMappings().values().contains(reviewId))
+ {
+ _recordIssue(element, "No REF1 exists for REVIEW=" + reviewId);
+ break;
+ }
+ }
+ }
+
+ _numAffected++;
+}
+
+void ManualMatchValidator::_recordIssue(const ConstElementPtr& element, QString message,
+ const bool isError)
+{
+ // It can be tough to track down problems in elements without unique tags, since the source file
+ // element IDs won't necessarily match the element IDs here.
+ Tags tags = element->getTags();
+ tags.remove(MetadataTags::Ref1());
+ tags.remove(MetadataTags::Ref2());
+ tags.remove(MetadataTags::Review());
+ if (!_fullDebugOutput)
+ {
+ Tags::const_iterator tagItr = tags.find(MetadataTags::Uuid());
+ if (tagItr != tags.end())
+ {
+ // We'll take advantage here if an element has a uuid. score-matches adds uuids, but that's
+ // done after reading the source file, so not much help when its time to find the problem in
+ // the source file itself.
+
+ message += "; uuid=" + tagItr.value();
+ }
+ }
+ else
+ {
+ // Here we'll just add all the tags.
+
+ message += "; tags: " + tags.toString();
+ }
+
+ LOG_VART(message);
+ if (isError)
+ {
+ _errors[element->getElementId()] = message;
+ }
+ else
+ {
+ _warnings[element->getElementId()] = message;
+ }
+}
+
+bool ManualMatchValidator::_isValidRef2OrReviewId(const QString& matchId) const
+{
+ const QString matchIdTemp = matchId.trimmed().toLower();
+ return
+ !matchIdTemp.isEmpty() &&
+ (_isValidNonUniqueMatchId(matchIdTemp) || _isValidUniqueMatchId(matchIdTemp));
+}
+
+bool ManualMatchValidator::_isValidRef1Id(const QString& matchId) const
+{
+ const QString matchIdTemp = matchId.trimmed().toLower();
+ return
+ !matchIdTemp.isEmpty() && !_isValidNonUniqueMatchId(matchIdTemp) &&
+ _isValidUniqueMatchId(matchIdTemp);
+}
+
+bool ManualMatchValidator::_isValidUniqueMatchId(const QString& matchId) const
+{
+ return
+ // backward compatibility with the original uuid ids
+ (_allowUuidManualMatchIds && _uuidRegEx.exactMatch(matchId)) ||
+ // This corresponds with how AddRef1Visitor creates the ids.
+ (matchId.size() >= 6 && StringUtils::isAlphaNumeric(matchId.right(6)));
+}
+
+bool ManualMatchValidator::_isValidNonUniqueMatchId(const QString& matchId) const
+{
+ const QString matchIdTemp = matchId.toLower();
+ return matchIdTemp == "none" || matchIdTemp == "todo";
+}
+
+}