Skip to content

Commit

Permalink
fix a bug caused by our internal integer coordinates in the inner/out…
Browse files Browse the repository at this point in the history
…er computation, add --use-inner-outer option (this is experimental, so disabled by default)
  • Loading branch information
patrickbr committed May 16, 2024
1 parent ba8366f commit 501d359
Show file tree
Hide file tree
Showing 9 changed files with 233 additions and 102 deletions.
24 changes: 12 additions & 12 deletions src/spatialjoin/GeometryCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,6 @@ sj::Area sj::GeometryCache<sj::Area>::getFromDisk(size_t off,
_geomsFReads[tid].read(reinterpret_cast<char*>(&ret.outerArea),
sizeof(double));

// simplified inner
// readPoly(_geomsFReads[tid], ret.inner);

// simplified outer
// readPoly(_geomsFReads[tid], ret.outer);

// boxIds
uint32_t numBoxIds;
_geomsFReads[tid].read(reinterpret_cast<char*>(&numBoxIds), sizeof(uint32_t));
Expand All @@ -215,6 +209,12 @@ sj::Area sj::GeometryCache<sj::Area>::getFromDisk(size_t off,
// OBB
readPoly(_geomsFReads[tid], ret.obb);

// simplified inner
readPoly(_geomsFReads[tid], ret.inner);

// simplified outer
readPoly(_geomsFReads[tid], ret.outer);

return ret;
}

Expand Down Expand Up @@ -352,12 +352,6 @@ size_t sj::GeometryCache<sj::Area>::add(const sj::Area& val) {
_geomsF.write(reinterpret_cast<const char*>(&val.outerArea), sizeof(double));
_geomsOffset += sizeof(double);

// innerGeom
// writePoly(val.inner);

// outerGeom
// writePoly(val.outer);

// boxIds
uint32_t size = val.boxIds.size();
_geomsF.write(reinterpret_cast<const char*>(&size), sizeof(uint32_t));
Expand All @@ -384,6 +378,12 @@ size_t sj::GeometryCache<sj::Area>::add(const sj::Area& val) {
// OBB
writePoly(val.obb);

// innerGeom
writePoly(val.inner);

// outerGeom
writePoly(val.outer);

return ret;
}

Expand Down
12 changes: 6 additions & 6 deletions src/spatialjoin/GeometryCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,6 @@ struct Area {
// outer area
double outerArea;

// inner geom
// util::geo::I32XSortedPolygon inner;

// outer geom
// util::geo::I32XSortedPolygon outer;

// box ids
std::vector<sj::boxids::BoxId> boxIds;

Expand All @@ -57,6 +51,12 @@ struct Area {

// OBB
util::geo::I32XSortedPolygon obb;

// inner geom
util::geo::I32XSortedPolygon inner;

// outer geom
util::geo::I32XSortedPolygon outer;
};

struct SimpleLine {
Expand Down
138 changes: 68 additions & 70 deletions src/spatialjoin/InnerOuter.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,27 @@ namespace innerouter {

enum class Mode { INNER, OUTER };

const static double MIN_GAIN = 0.20;

// ____________________________________________________________________________
template <typename T>
double signedDistanceFromPointToLine(const util::geo::Point<T>& A,
const util::geo::Point<T>& B,
const util::geo::Point<T>& C) {
// Check that the input is OK and not A == B.
if (A == B) return 0;

// The actual computation, see this Wikipedia article for the formula:
// https://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
double distAB = sqrt(
(A.getX() * 1.0 - B.getX() * 1.0) * (A.getX() * 1.0 - B.getX() * 1.0) +
(A.getY() * 1.0 - B.getY() * 1.0) * (A.getY() * 1.0 - B.getY() * 1.0));
double areaTriangleTimesTwo =
(B.getY() * 1.0 - A.getY() * 1.0) * (A.getX() * 1.0 - C.getX() * 1.0) -
(B.getX() * 1.0 - A.getX() * 1.0) * (A.getY() * 1.0 - C.getY() * 1.0);
return areaTriangleTimesTwo / distAB;
}

// ____________________________________________________________________________
template <Mode MODE, typename T>
bool innerOuterDouglasPeucker(const util::geo::Ring<T>& inputPoints,
Expand Down Expand Up @@ -107,92 +128,69 @@ bool innerOuterDouglasPeucker(const util::geo::Ring<T>& inputPoints,
return a || b;
}

// ____________________________________________________________________________
template <typename T>
double signedDistanceFromPointToLine(const util::geo::Point<T>& A,
const util::geo::Point<T>& B,
const util::geo::Point<T>& C) {
// Check that the input is OK and not A == B.
if (A == B) return 0;

// The actual computation, see this Wikipedia article for the formula:
// https://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
double distAB = sqrt((A.getX() - B.getX()) * (A.getX() - B.getX()) +
(A.getY() - B.getY()) * (A.getY() - B.getY()));
double areaTriangleTimesTwo = (B.getY() - A.getY()) * (A.getX() - C.getX()) -
(B.getX() - A.getX()) * (A.getY() - C.getY());
return areaTriangleTimesTwo / distAB;
}

// ____________________________________________________________________________
template <Mode MODE, typename T>
util::geo::MultiPolygon<T> simplifiedArea(
const util::geo::MultiPolygon<T>& area, double factor) {
// default value: empty area, means no inner geom
util::geo::Polygon<T> ret;

if (area.size() == 0) return ret;
util::geo::Polygon<T> simplifiedPoly(const util::geo::Polygon<T>& poly,
double factor) {
if (poly.getOuter().size() == 0) return {};

size_t numPointsOld = 0;
size_t numPointsNew = 0;

for (const auto& poly : area) {
util::geo::Polygon<T> simplified;
numPointsOld += poly.getOuter().size();

for (const auto& origInner : poly.getInners()) {
numPointsOld += origInner.size();
if (origInner.size() < 4) {
numPointsNew += origInner.size();
simplified.getInners().push_back(origInner);
continue;
}

// inner polygons are given in counter-clockwise order

double eps = sqrt(util::geo::area(origInner) / 3.14) * 3.14 * 2 * factor;

// simplify the inner geometries with outer simplification, because
// inner geometries are given counter-clockwise, it is not
// necessary to swap the simplification mode
util::geo::Ring<T> retDP;
size_t m = floor(origInner.size() / 2);
innerOuterDouglasPeucker<MODE>(origInner, retDP, 0, m, eps);
innerOuterDouglasPeucker<MODE>(origInner, retDP, m + 1,
origInner.size() - 1, eps);
retDP.push_back(retDP.front()); // ensure valid polygon
simplified.getInners().push_back(retDP);
numPointsNew += retDP.size();
}
util::geo::Polygon<T> simplified;
numPointsOld += poly.getOuter().size();

if (poly.getOuter().size() < 4) {
numPointsNew += poly.getOuter().size();
simplified.getOuter() = poly.getOuter();
} else {
double eps =
sqrt(util::geo::area(poly.getOuter()) / 3.14) * 3.14 * 2 * factor;

// simplify the outer geometry with inner simplification
util::geo::Ring<T> retDP;
size_t m = floor(poly.getOuter().size() / 2);
innerOuterDouglasPeucker<MODE>(poly.getOuter(), retDP, 0, m, eps);
innerOuterDouglasPeucker<MODE>(poly.getOuter(), retDP, m + 1,
poly.getOuter().size() - 1, eps);
retDP.push_back(retDP.front()); // ensure valid polygon
numPointsNew += retDP.size();
simplified.getOuter() = retDP;
for (const auto& origInner : poly.getInners()) {
numPointsOld += origInner.size();
if (origInner.size() < 4) {
numPointsNew += origInner.size();
simplified.getInners().push_back(origInner);
continue;
}

ret.push_back(simplified);
// inner polygons are given in counter-clockwise order

double eps =
sqrt(util::geo::ringArea(origInner) / 3.14) * 3.14 * 2 * factor;

// simplify the inner geometries with outer simplification, because
// inner geometries are given counter-clockwise, it is not
// necessary to swap the simplification mode
util::geo::Ring<T> retDP;
size_t m = floor(origInner.size() / 2);
innerOuterDouglasPeucker<MODE>(origInner, retDP, 0, m, eps);
innerOuterDouglasPeucker<MODE>(origInner, retDP, m + 1,
origInner.size() - 1, eps);
retDP.push_back(retDP.front()); // ensure valid polygon
simplified.getInners().push_back(retDP);
numPointsNew += retDP.size();
}

if (poly.getOuter().size() < 4) {
numPointsNew += poly.getOuter().size();
simplified.getOuter() = poly.getOuter();
} else {
double eps =
sqrt(util::geo::ringArea(poly.getOuter()) / 3.14) * 3.14 * 2 * factor;

// simplify the outer geometry with inner simplification
util::geo::Ring<T> retDP;
size_t m = floor(poly.getOuter().size() / 2);
innerOuterDouglasPeucker<MODE>(poly.getOuter(), retDP, 0, m, eps);
innerOuterDouglasPeucker<MODE>(poly.getOuter(), retDP, m + 1,
poly.getOuter().size() - 1, eps);
retDP.push_back(retDP.front()); // ensure valid polygon
numPointsNew += retDP.size();
simplified.getOuter() = retDP;
}

if (numPointsNew >= numPointsOld) {
if ((numPointsNew * 1.0) / (numPointsOld * 1.0) > MIN_GAIN) {
// gain too low, return empty poly to avoid extra space and double-checking
// later on
return {};
}

return ret;
return simplified;
}

} // namespace innerouter
Expand Down
16 changes: 11 additions & 5 deletions src/spatialjoin/SpatialJoinMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ void printHelp(int argc, char** argv) {
<< std::setw(41) << " --no-diag-box"
<< "disable diagonal bounding-box based pre-filter\n"
<< std::setw(41) << " --no-fast-sweep-skip"
<< "disable fast sweep skip using binary search" << std::endl;
<< "disable fast sweep skip using binary search\n"
<< std::setw(41) << " --use-inner-outer"
<< "(experimental) use inner/outer geometries" << std::endl;
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -106,6 +108,7 @@ int main(int argc, char** argv) {
bool useCutouts = true;
bool useDiagBox = true;
bool useFastSweepSkip = true;
bool useInnerOuter = false;

for (int i = 1; i < argc; i++) {
std::string cur = argv[i];
Expand Down Expand Up @@ -152,6 +155,8 @@ int main(int argc, char** argv) {
useDiagBox = false;
} else if (cur == "--no-fast-sweep-skip") {
useFastSweepSkip = false;
} else if (cur == "--use-inner-outer") {
useInnerOuter = true;
} else {
std::cerr << "Unknown option '" << cur << "', see -h" << std::endl;
exit(1);
Expand Down Expand Up @@ -211,10 +216,11 @@ int main(int argc, char** argv) {

size_t NUM_THREADS = std::thread::hardware_concurrency();

Sweeper sweeper({NUM_THREADS, prefix, intersects, contains, covers, touches,
equals, overlaps, crosses, suffix, useBoxIds, useArea,
useOBB, useCutouts, useDiagBox, useFastSweepSkip},
useCache, cache, output);
Sweeper sweeper(
{NUM_THREADS, prefix, intersects, contains, covers, touches, equals,
overlaps, crosses, suffix, useBoxIds, useArea, useOBB, useCutouts,
useDiagBox, useFastSweepSkip, useInnerOuter},
useCache, cache, output);

if (!useCache) {
LOGTO(INFO, std::cerr) << "Parsing input geometries...";
Expand Down
34 changes: 32 additions & 2 deletions src/spatialjoin/Stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ struct Stats {
uint64_t timeCutoutGeoCheckLineLine = 0;
uint64_t timeCutoutGeoCheckLinePoint = 0;

uint64_t timeInnerOuterCheckAreaArea = 0;
uint64_t timeInnerOuterCheckAreaLine = 0;
uint64_t timeInnerOuterCheckAreaPoint = 0;

size_t fullGeoChecksAreaArea = 0;
size_t fullGeoChecksAreaLine = 0;
size_t fullGeoChecksAreaPoint = 0;
Expand All @@ -47,6 +51,10 @@ struct Stats {
size_t cutoutGeoChecksLineLine = 0;
size_t cutoutGeoChecksLinePoint = 0;

size_t innerOuterChecksAreaArea = 0;
size_t innerOuterChecksAreaLine = 0;
size_t innerOuterChecksAreaPoint = 0;

std::string toString();
};

Expand All @@ -62,7 +70,8 @@ inline std::string Stats::toString() {
timeFullGeoCheckAreaPoint + timeFullGeoCheckLineLine +
timeFullGeoCheckLinePoint + timeCutoutGeoCheckAreaArea +
timeCutoutGeoCheckAreaLine + timeCutoutGeoCheckAreaPoint +
timeCutoutGeoCheckLineLine + timeCutoutGeoCheckLinePoint) /
timeCutoutGeoCheckLineLine + timeCutoutGeoCheckLinePoint +
timeInnerOuterCheckAreaArea + timeInnerOuterCheckAreaLine) /
1000000000.0;

std::stringstream ss;
Expand Down Expand Up @@ -165,6 +174,21 @@ inline std::string Stats::toString() {
<< " cutout geom checks LINE/POINT: " << t << " s (" << ((t / sum) * 100.0)
<< "%)\n";

t = double(timeInnerOuterCheckAreaArea) / 1000000000.0;
ss << "time for " << innerOuterChecksAreaArea
<< " inner/outer checks AREA/AREA: " << t << " s (" << ((t / sum) * 100.0)
<< "%)\n";

t = double(timeInnerOuterCheckAreaLine) / 1000000000.0;
ss << "time for " << innerOuterChecksAreaLine
<< " inner/outer checks AREA/LINE: " << t << " s (" << ((t / sum) * 100.0)
<< "%)\n";

t = double(timeInnerOuterCheckAreaPoint) / 1000000000.0;
ss << "time for " << innerOuterChecksAreaPoint
<< " inner/outer checks AREA/POINT: " << t << " s (" << ((t / sum) * 100.0)
<< "%)\n";

t = double(timeWrite) / 1000000000.0;
ss << "time for output writing: " << t << " s (" << ((t / sum) * 100.0)
<< "%)\n";
Expand Down Expand Up @@ -197,6 +221,9 @@ inline Stats operator+(const Stats& a, const Stats& b) {
a.timeCutoutGeoCheckAreaPoint + b.timeCutoutGeoCheckAreaPoint,
a.timeCutoutGeoCheckLineLine + b.timeCutoutGeoCheckLineLine,
a.timeCutoutGeoCheckLinePoint + b.timeCutoutGeoCheckLinePoint,
a.timeInnerOuterCheckAreaArea + b.timeInnerOuterCheckAreaArea,
a.timeInnerOuterCheckAreaLine + b.timeInnerOuterCheckAreaLine,
a.timeInnerOuterCheckAreaPoint + b.timeInnerOuterCheckAreaPoint,
a.fullGeoChecksAreaArea + b.fullGeoChecksAreaArea,
a.fullGeoChecksAreaLine + b.fullGeoChecksAreaLine,
a.fullGeoChecksAreaPoint + b.fullGeoChecksAreaPoint,
Expand All @@ -206,7 +233,10 @@ inline Stats operator+(const Stats& a, const Stats& b) {
a.cutoutGeoChecksAreaLine + b.cutoutGeoChecksAreaLine,
a.cutoutGeoChecksAreaPoint + b.cutoutGeoChecksAreaPoint,
a.cutoutGeoChecksLineLine + b.cutoutGeoChecksLineLine,
a.cutoutGeoChecksLinePoint + b.cutoutGeoChecksLinePoint};
a.cutoutGeoChecksLinePoint + b.cutoutGeoChecksLinePoint,
a.innerOuterChecksAreaArea + b.innerOuterChecksAreaArea,
a.innerOuterChecksAreaLine + b.innerOuterChecksAreaLine,
a.innerOuterChecksAreaPoint + b.innerOuterChecksAreaPoint};
}

inline void operator+=(Stats& a, const Stats& b) { a = a + b; }
Expand Down
Loading

0 comments on commit 501d359

Please sign in to comment.