From 33d68ea2aebcd62954bf89ce5c081fbb88ef1f46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Grenotton?= Date: Fri, 2 Jun 2023 02:15:14 +0200 Subject: [PATCH] Rely on numpy array to store pending ways Much more efficient memory-wise than massive lists of tuples, allowing to process much bigger dataset with a reasonable amount of memory. Fixes #16 --- .gitignore | 2 +- pyhgtmap/output/__init__.py | 38 +++++++++++++++++++++++++++++++------ pyhgtmap/output/o5mUtil.py | 16 ++++++++-------- pyhgtmap/output/osmUtil.py | 10 +++++----- pyhgtmap/output/pbfUtil.py | 23 +++++++++++++--------- pyproject.toml | 1 + 6 files changed, 61 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index e08625f..45e3d91 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -venv/ +venv*/ .vscode/ .pytest_cache/ __pycache__/ diff --git a/pyhgtmap/output/__init__.py b/pyhgtmap/output/__init__.py index 0d7288e..4cf24d9 100644 --- a/pyhgtmap/output/__init__.py +++ b/pyhgtmap/output/__init__.py @@ -1,7 +1,8 @@ import logging -from typing import Callable, List, NamedTuple, Tuple +from typing import Any, Callable, List, NamedTuple, Tuple import numpy +from nptyping import NDArray, Structure from pyhgtmap.hgt.tile import TileContours @@ -18,6 +19,12 @@ ], ) +# Efficient representation of many ways (array of 4-tuple, similar to a list of WayType) +WaysType = NDArray[ + Any, + Structure["first_node_id: Int, nb_nodes: Int, closed_loop: Bool, elevation: Int"], +] + NodeType = Tuple[int, int] @@ -42,7 +49,7 @@ class Output: def __init__(self) -> None: self.timestampString: str - self.ways_pending_write: List[Tuple[List[WayType], int]] = [] + self.ways_pending_write: List[Tuple[WaysType, int]] = [] def write_nodes( self, @@ -50,29 +57,33 @@ def write_nodes( timestamp_string: str, start_node_id: int, osm_version: float, - ) -> Tuple[int, List[WayType]]: + ) -> Tuple[int, WaysType]: """ Write nodes and prepare associated ways. Return (latest_node_id, [ways]) tuple. """ raise NotImplementedError - def write_ways(self, ways: List[WayType], start_way_id: int) -> None: + def write_ways(self, ways: WaysType, start_way_id: int) -> None: """ Add ways previously prepared by write_nodes to be written later (as ways should ideally be written after all nodes). """ self.ways_pending_write.append((ways, start_way_id)) - def _write_ways(self, ways: List[WayType], start_way_id: int) -> None: + def _write_ways(self, ways: WaysType, start_way_id: int) -> None: """Actually write ways, upon output finalization via done().""" raise NotImplementedError def done(self) -> None: """Finalize and close file.""" - logger.debug("done() - Writing pending ways") + logger.debug( + "done() - Writing %s pending ways", + sum([len(x[0]) for x in self.ways_pending_write]), + ) for ways, start_way_id in self.ways_pending_write: self._write_ways(ways, start_way_id) + logger.debug("done() - done!") def flush(self) -> None: """Flush file to disk.""" @@ -121,3 +132,18 @@ def make_nodes_ways( else: ways.append(WayType(nodeRefs[0], len(nodeRefs), False, elevation)) return nodes, ways + + +def build_efficient_ways(ways: List[WayType]) -> WaysType: + """Convert a list of ways (tuples) into a more efficient numpy array.""" + return numpy.array( + ways, + dtype=numpy.dtype( + [ + ("first_node_id", int), + ("nb_nodes", int), + ("closed_loop", bool), + ("elevation", int), + ] + ), + ) # type: ignore # not supported by pylance diff --git a/pyhgtmap/output/o5mUtil.py b/pyhgtmap/output/o5mUtil.py index b23fa0a..ad6d82e 100644 --- a/pyhgtmap/output/o5mUtil.py +++ b/pyhgtmap/output/o5mUtil.py @@ -2,7 +2,7 @@ import time -from typing import Callable, List, Tuple +from typing import Callable, Tuple import pyhgtmap.output from pyhgtmap import output @@ -173,7 +173,7 @@ def makeNodeData(self, node, lastNode, idDelta): # no tags, so data is complete now return join(data) - def _write_ways(self, ways, startWayId): + def _write_ways(self, ways: pyhgtmap.output.WaysType, startWayId): """writes ways to self.outf. ways shall be a list of (, , , ) tuples. """ @@ -187,7 +187,7 @@ def _write_ways(self, ways, startWayId): for way in ways[1:]: self.writeWay(way, idDelta=1) - def writeWay(self, way, idDelta, first=False): + def writeWay(self, way: pyhgtmap.output.WayType, idDelta, first=False): wayDataset = [] # 0x11 means way wayDataset.append(writableInt(0x11)) @@ -197,7 +197,7 @@ def writeWay(self, way, idDelta, first=False): wayDataset.append(wayData) self.outf.write(join(wayDataset)) - def makeWayData(self, way, idDelta, first): + def makeWayData(self, way: pyhgtmap.output.WayType, idDelta, first): startNodeId, length, isCycle, elevation = way data = [] data.append(sint2str(idDelta)) @@ -263,16 +263,16 @@ def write_nodes( timestamp_string: str, start_node_id: int, osm_version: float, - ) -> Tuple[int, List[output.WayType]]: + ) -> Tuple[int, output.WaysType]: return writeNodes(self, tile_contours, timestamp_string, start_node_id) def writeNodes( output: Output, tile_contours: TileContours, - timestampString, + timestampString, # dummy option start_node_id, -): # dummy option +) -> Tuple[int, output.WaysType]: IDCounter = pyhgtmap.output.Id(start_node_id) ways = [] nodes = [] @@ -294,4 +294,4 @@ def writeNodes( if len(nodes) > 0: output.write(str((startId, nodes)) + "\n") output.flush() - return newId, ways + return newId, pyhgtmap.output.build_efficient_ways(ways) diff --git a/pyhgtmap/output/osmUtil.py b/pyhgtmap/output/osmUtil.py index 9d0fdb8..82ede0a 100644 --- a/pyhgtmap/output/osmUtil.py +++ b/pyhgtmap/output/osmUtil.py @@ -1,7 +1,7 @@ import datetime import time from io import IOBase -from typing import Callable, List, Tuple +from typing import Callable, Tuple import numpy @@ -80,7 +80,7 @@ def write(self, output): def flush(self) -> None: self.outF.flush() - def _write_ways(self, ways, startWayId): + def _write_ways(self, ways: pyhgtmap.output.WaysType, startWayId): IDCounter = pyhgtmap.output.Id(startWayId) for startNodeId, length, isCycle, elevation in ways: IDCounter.curId += 1 @@ -109,7 +109,7 @@ def write_nodes( timestamp_string: str, start_node_id: int, osm_version: float, - ) -> Tuple[int, List[pyhgtmap.output.WayType]]: + ) -> Tuple[int, pyhgtmap.output.WaysType]: return writeXML( self, tile_contours, timestamp_string, start_node_id, osm_version ) @@ -163,7 +163,7 @@ def _writeContourNodes( def writeXML( output, tile_contours: TileContours, timestampString, start_node_id, osm_version -): +) -> Tuple[int, pyhgtmap.output.WaysType]: """emits node OSM XML to and collects path information. may be anything having a write method. For now, its used with @@ -195,4 +195,4 @@ def writeXML( ) # output.flush() newId = IDCounter.getId() - return newId, ways + return newId, pyhgtmap.output.build_efficient_ways(ways) diff --git a/pyhgtmap/output/pbfUtil.py b/pyhgtmap/output/pbfUtil.py index cc08b31..8bad03f 100644 --- a/pyhgtmap/output/pbfUtil.py +++ b/pyhgtmap/output/pbfUtil.py @@ -5,12 +5,12 @@ import time from typing import Callable, List, Tuple -import numpy -import numpy.typing import npyosmium import npyosmium.io import npyosmium.osm import npyosmium.osm.mutable +import numpy +import numpy.typing import pyhgtmap.output from pyhgtmap.hgt.tile import TileContours @@ -69,22 +69,26 @@ def makeHeader(self, pyhgtmap_version) -> npyosmium.io.Header: return osm_header - def _write_ways(self, ways: List[pyhgtmap.output.WayType], startWayId) -> None: + def _write_ways(self, ways: pyhgtmap.output.WaysType, startWayId) -> None: """writes ways to self.outf. ways shall be a list of (, , , ) tuples. The waylist is split up to make sure the pbf blobs will not be too big. """ for ind, way in enumerate(ways): - closed_loop_id: list[int] = [way.first_node_id] if way.closed_loop else [] + closed_loop_id: list[int] = ( + [way["first_node_id"]] if way["closed_loop"] else [] + ) osm_way = npyosmium.osm.mutable.Way( id=startWayId + ind, tags=( - ("ele", str(way.elevation)), + ("ele", str(way["elevation"])), ("contour", "elevation"), - ("contour_ext", self.elevClassifier(way.elevation)), + ("contour_ext", self.elevClassifier(way["elevation"])), ), - nodes=list(range(way.first_node_id, way.first_node_id + way.nb_nodes)) + nodes=list( + range(way["first_node_id"], way["first_node_id"] + way["nb_nodes"]) + ) + closed_loop_id, ) self.osm_writer.add_way(osm_way) @@ -102,7 +106,7 @@ def write_nodes( timestamp_string: str, start_node_id: int, osm_version: float, - ) -> Tuple[int, List[pyhgtmap.output.WayType]]: + ) -> Tuple[int, pyhgtmap.output.WaysType]: logger.debug(f"writeNodes - startId: {start_node_id}") ways: List[pyhgtmap.output.WayType] = [] @@ -130,4 +134,5 @@ def write_nodes( next_node_id += len(contour) logger.debug(f"writeNodes - next_node_id: {next_node_id}") - return next_node_id, ways + + return next_node_id, pyhgtmap.output.build_efficient_ways(ways) diff --git a/pyproject.toml b/pyproject.toml index e77de58..d8e8dae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "contourpy>=1.0.7", "matplotlib>=3.4.3", "numpy>=1.24.2", + "nptyping>=2.5.0", "npyosmium>=3.6.1", "pybind11-rdp>=0.1.3", "scipy>=1.8.0",