From 6dace61fc044d99388119bd1bfa0e376296177b4 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Thu, 30 May 2024 14:24:26 -0600 Subject: [PATCH 01/30] fix: Set self.file to None so we don't get duplicate footers --- osm_fieldwork/osmfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/osm_fieldwork/osmfile.py b/osm_fieldwork/osmfile.py index 29604c21..73ea39c0 100755 --- a/osm_fieldwork/osmfile.py +++ b/osm_fieldwork/osmfile.py @@ -108,6 +108,7 @@ def footer(self): self.file.flush() if self.file is False: self.file.close() + self.file = None def write( self, From 969d7ea736fb5d1567573312ac9098fa2daea2b5 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 11:40:19 -0600 Subject: [PATCH 02/30] fix: Add leisure fields, ignore cellular --- osm_fieldwork/xforms.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/osm_fieldwork/xforms.yaml b/osm_fieldwork/xforms.yaml index da5a87d9..a80ea9eb 100644 --- a/osm_fieldwork/xforms.yaml +++ b/osm_fieldwork/xforms.yaml @@ -12,6 +12,8 @@ convert: - camptype: tourism - openfire: leisure=firepit + - fire_pit: leisure=firepit + - picnic_table: leisure=picnic_table - latitude: lat - longitude: lon - altitude: ele @@ -20,7 +22,6 @@ convert: - submissiondate: timestamp - comment: note - view: viewpoint - - cell: cellular - Monday: Mo - Tuesday: Tu - Wednesday: We @@ -113,6 +114,8 @@ private: - lateral_system - access_roof - updatedat + - cell + - cellular # All of these tags are in the CSV file, and can be ignored ignore: @@ -120,7 +123,6 @@ ignore: - __id - model - type - - features - accuracy - meta - __system @@ -160,8 +162,12 @@ ignore: - government_menu - note - instanceid + - begin_group + - end_group + - image multiple: - healthcare - amenity_type - specialty + - features From 86251979131a69e1bec335b1e5d595bc6c4cc4a9 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 11:41:52 -0600 Subject: [PATCH 03/30] fix: Add function to parse select_multiple --- osm_fieldwork/convert.py | 54 +++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index ceae62df..b9b21638 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -28,7 +28,6 @@ # Instantiate logger log = logging.getLogger(__name__) - def escape(value: str): """Escape characters like embedded quotes in text fields. @@ -92,7 +91,7 @@ def __init__( def privateData( self, keyword: str, - ): + ) -> bool: """See is a keyword is in the private data category. Args: @@ -106,7 +105,7 @@ def privateData( def convertData( self, keyword: str, - ): + ) -> bool: """See is a keyword is in the convert data category. Args: @@ -120,7 +119,7 @@ def convertData( def ignoreData( self, keyword: str, - ): + ) -> bool: """See is a keyword is in the convert data category. Args: @@ -134,7 +133,7 @@ def ignoreData( def getKeyword( self, value: str, - ): + ) -> str: """Get the keyword for a value from the yaml file. Args: @@ -152,7 +151,7 @@ def getKeyword( def getValues( self, keyword: str = None, - ): + ) -> str: """Get the values for a primary key. Args: @@ -171,7 +170,7 @@ def convertEntry( self, tag: str, value: str, - ): + ) -> list: """Convert a tag and value from the ODK represention to an OSM one. Args: @@ -188,6 +187,9 @@ def convertEntry( # logging.debug(f"FIXME: Ignoring {tag}") return None low = tag.lower() + if value is None: + return low + if low not in self.convert and low not in self.ignore and low not in self.private: return {tag: value} @@ -220,7 +222,7 @@ def convertValue( self, tag: str, value: str, - ): + ) -> list: """Convert a single tag value. Args: @@ -256,14 +258,14 @@ def convertValue( entry[tag] = vals[value] else: entry[tmp[0]] = tmp[1] - logging.debug("\tValue %s converted to %s" % (value, entry)) + logging.debug("\tValue %s converted value to %s" % (value, entry)) all.append(entry) return all def convertTag( self, tag: str, - ): + ) -> str: """Convert a single tag. Args: @@ -276,21 +278,49 @@ def convertTag( if low in self.convert: newtag = self.convert[low] if type(newtag) is str: - logging.debug("\tTag '%s' converted to '%s'" % (tag, newtag)) + logging.debug("\tTag '%s' converted tag to '%s'" % (tag, newtag)) tmp = newtag.split("=") if len(tmp) > 1: newtag = tmp[0] elif type(newtag) is list: logging.error("FIXME: list()") # epdb.st() - return low + return low, value elif type(newtag) is dict: # logging.error("FIXME: dict()") return low return newtag.lower() else: + logging.debug(f"Not in convert!: {low}") return low + def convertMultiple( + self, + value: str, + ) -> list: + """ + Convert a single tag from a select_multiple question.. + + Args: + value (str): The tags from the ODK XML file + + Returns: + (list): The new tags + """ + tags = list() + for tag in value.split(' '): + low = tag.lower() + if self.convertData(low): + newtag = self.convert[low] + # tags.append({newtag}: {value}) + if newtag.find('=') > 0: + tmp = newtag.split('=') + tags.append({tmp[0]: tmp[1]}) + else: + tags.append({low: "yes"}) + logging.debug(f"\tConverted multiple to {tags}") + return tags + def dump(self): """Dump internal data structures, for debugging purposes only.""" print("YAML file: %s" % self.filespec) From 904445c8762595a744a6c6df035ab234211bf389 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 11:43:14 -0600 Subject: [PATCH 04/30] fix: Use new convertMultiple() to support select_multiple in XForms --- osm_fieldwork/CSVDump.py | 97 ++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/osm_fieldwork/CSVDump.py b/osm_fieldwork/CSVDump.py index 83cc256f..094c74bd 100755 --- a/osm_fieldwork/CSVDump.py +++ b/osm_fieldwork/CSVDump.py @@ -58,6 +58,8 @@ def __init__( self.config = super().__init__(yaml) self.saved = dict() self.defaults = dict() + self.entries = dict() + self.types = dict() def lastSaved( self, @@ -81,10 +83,17 @@ def parseXLS( ): """Parse the source XLSFile if available to look for details we need.""" if xlsfile is not None and len(xlsfile) > 0: - entries = pd.read_excel(xlsfile, sheet_name=[0]) + self.entries = pd.read_excel(xlsfile, sheet_name=[0])[0] # There will only be a single sheet - names = entries[0]["name"] - defaults = entries[0]["default"] + names = self.entries["name"] + defaults = self.entries["default"] + i = 0 + while i < len(self.entries): + if type(self.entries['type'][i]) == float: + self.types[self.entries['name'][i]] = None + else: + self.types[self.entries['name'][i]] = self.entries['type'][i].split(' ')[0] + i += 1 total = len(names) i = 0 while i < total: @@ -167,8 +176,16 @@ def parse( self, filespec: str, data: str = None, - ): - """Parse the CSV file from ODK Central and convert it to a data structure.""" + ) -> list: + """ + Parse the CSV file from ODK Central and convert it to a data structure. + + Args: + filespec (str): The file to parse. + data (str): Or the data to parse. + Returns: + (list): The list of features with tags + """ all_tags = list() if not data: f = open(filespec, newline="") @@ -179,32 +196,34 @@ def parse( tags = dict() # log.info(f"ROW: {row}") for keyword, value in row.items(): - if keyword is None or len(keyword) == 0: + if keyword is None or len(value) == 0: continue - base = self.basename(keyword).lower() # There's many extraneous fields in the input file which we don't need. if base is None or base in self.ignore or value is None: continue - # if base in self.multiple: - # epdb.st() - # entry = reader[keyword] - # for key, val in entry.items(): - # print(key)75.66.108.181 - # if key == "name": - # tags['name'] = val - # continue else: + # log.info(f"ITEM: {keyword} = {value}") + if base in self.types: + if self.types[base] == "select_multiple": + vals = self.convertMultiple(value) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + # print(f"BASE {tags}") + continue # When using geopoint warmup, once the display changes to the map + # location, there is not always a value if the accuracy is way # off. In this case use the warmup value, which is where we are - # standing anyway. + # hopefully standing anyway. if base == "latitude" and len(value) == 0: if "warmup-Latitude" in row: value = row["warmup-Latitude"] if base == "longitude" and len(value) == 0: value = row["warmup-Longitude"] items = self.convertEntry(base, value) + # log.info(f"ROW: {base} {value}") if len(items) > 0: if base in self.saved: @@ -224,6 +243,7 @@ def parse( tags[k] = v else: tags[base] = value + # log.debug(f"\tFIXME1: {tags}") all_tags.append(tags) return all_tags @@ -231,7 +251,7 @@ def parse( def basename( self, line: str, - ): + ) -> str: """Extract the basename of a path after the last -.""" tmp = line.split("-") if len(tmp) == 0: @@ -242,7 +262,7 @@ def basename( def createEntry( self, entry: dict, - ): + ) -> list: """Create the feature data structure.""" # print(line) feature = dict() @@ -277,37 +297,27 @@ def createEntry( attrs["lon"] = geometry[1] continue - if len(attrs["lat"]) == 0: + if 'lat' in attrs and len(attrs["lat"]) == 0: continue + if key is not None and len(key) > 0 and key in attributes: attrs[key] = value log.debug("Adding attribute %s with value %s" % (key, value)) - else: - if key in self.multiple: - for item in value: - if key in item: - for entry in item[key].split(): - vals = self.getValues(key) - if entry in vals: - if vals[entry].find("="): - tmp = vals[entry].split("=") - tags[tmp[0]] = tmp[1] - else: - tags[entry] = "yes" - continue + continue - if value is not None and value != "no" and value != "unknown": - if key == "track" or key == "geoline": - # refs.append(tags) - # log.debug("Adding reference %s" % tags) - refs = value.split(";") - elif len(value) > 0: - if self.privateData(key): - priv[key] = value - else: - tags[key] = value + if value is not None and value != "no" and value != "unknown": + if key == "track" or key == "geoline": + # refs.append(tags) + # log.debug("Adding reference %s" % tags) + refs = value.split(";") + elif len(value) > 0: + if self.privateData(key): + priv[key] = value + else: + tags[key] = value + feature["attrs"] = attrs if len(tags) > 0: - feature["attrs"] = attrs + logging.debug(f"TAGS: {tags}") feature["tags"] = tags if len(refs) > 1: feature["refs"] = refs @@ -367,6 +377,7 @@ def main(): csvin.writeOSM(node) refs.append(nodeid) nodeid -= 1 + feature["refs"] = refs csvin.writeOSM(feature) else: From edea8ba7500d638b50291ed8d38c38e93e1e0c8f Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 11:44:02 -0600 Subject: [PATCH 05/30] fix: Refactor test case for select_multiple, now it actually works --- tests/test_convert.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/test_convert.py b/tests/test_convert.py index f322c29a..687514d9 100755 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright (c) 2021, 2022, 2023 Humanitarian OpenStreetMap Team +# Copyright (c) 2021, 2022, 2023, 2024 Humanitarian OpenStreetMap Team # # This file is part of Osm-Fieldwork. # @@ -20,10 +20,15 @@ import argparse import os +import logging +import sys from osm_fieldwork.convert import Convert from osm_fieldwork.xlsforms import xlsforms_path +# Instantiate logger +log = logging.getLogger(__name__) + # find the path of root tests dir rootdir = os.path.dirname(os.path.abspath(__file__)) path = xlsforms_path.replace("/xlsforms", "") @@ -74,9 +79,9 @@ def test_sub_value(): def test_multiple_value(): """Test tag value conversion.""" hits = 0 - # Test a value that gets converted - vals = csv.convertValue("amenity", "coffee") - if len(vals) == 2 and vals[0]["amenity"] == "cafe" and vals[1]["cuisine"] == "coffee_shop": + vals = csv.convertMultiple("picnic_table fire_pit parking") + print(vals) + if len(vals) > 0 and vals[0]["leisure"] == "picnic_table" and vals[1]["leisure"] == "firepit": hits += 1 assert hits == 1 @@ -84,9 +89,19 @@ def test_multiple_value(): # Run standalone for easier debugging when not under pytest if __name__ == "__main__": parser = argparse.ArgumentParser(description="Read and convert a JSON file from ODK Central") + parser.add_argument("-v", "--verbose", nargs="?", const="0", help="verbose output") parser.add_argument("--infile", default=f"{rootdir}/testdata/testcamps.json", help="The JSON input file") args = parser.parse_args() + # if verbose, dump to the terminal + if args.verbose is not None: + logging.basicConfig( + level=logging.DEBUG, + format=("%(threadName)10s - %(name)s - %(levelname)s - %(message)s"), + datefmt="%y-%m-%d %H:%M:%S", + stream=sys.stdout, + ) + test_keywords() test_convert_tag() test_single_value() From 77ac3d92dfb817b89d7ac9a5ec6183fe2094e9aa Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 11:49:33 -0600 Subject: [PATCH 06/30] fix: Minor reformatting and updating of code comment blocks, also add return data types --- osm_fieldwork/convert.py | 48 ++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index b9b21638..55b46620 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright (c) 2020, 2021, 2022, 2023 Humanitarian OpenStreetMap Team +# Copyright (c) 2020, 2021, 2022, 2023, 2024 Humanitarian OpenStreetMap Team # # This file is part of OSM-Fieldwork. # @@ -28,8 +28,9 @@ # Instantiate logger log = logging.getLogger(__name__) -def escape(value: str): - """Escape characters like embedded quotes in text fields. +def escape(value: str) -> str: + """ + Escape characters like embedded quotes in text fields. Args: value (str):The string to modify @@ -41,9 +42,9 @@ def escape(value: str): tmp = value.replace("&", " and ") return tmp.replace("'", "'") - class Convert(YamlFile): - """A class to apply a YAML config file and convert ODK to OSM. + """ + A class to apply a YAML config file and convert ODK to OSM. Returns: (Convert): An instance of this object @@ -92,13 +93,14 @@ def privateData( self, keyword: str, ) -> bool: - """See is a keyword is in the private data category. + """ + Search he private data category for a keyword. Args: keyword (str): The keyword to search for Returns: - (bool): Check to see if the keyword is in the private data section + (bool): =If the keyword is in the private data section """ return keyword.lower() in self.private @@ -106,7 +108,8 @@ def convertData( self, keyword: str, ) -> bool: - """See is a keyword is in the convert data category. + """ + Search the convert data category for a keyword. Args: keyword (str): The keyword to search for @@ -120,7 +123,8 @@ def ignoreData( self, keyword: str, ) -> bool: - """See is a keyword is in the convert data category. + """ + Search the convert data category for a ketyword. Args: keyword (str): The keyword to search for @@ -134,10 +138,12 @@ def getKeyword( self, value: str, ) -> str: - """Get the keyword for a value from the yaml file. + """ + Get the keyword for a value from the yaml file. Args: value (str): The value to find the keyword for + Returns: (str): The keyword if found, or None """ @@ -152,7 +158,8 @@ def getValues( self, keyword: str = None, ) -> str: - """Get the values for a primary key. + """ + Get the values for a primary key. Args: keyword (str): The keyword to get the value of @@ -171,7 +178,8 @@ def convertEntry( tag: str, value: str, ) -> list: - """Convert a tag and value from the ODK represention to an OSM one. + """ + Convert a tag and value from the ODK represention to an OSM one. Args: tag (str): The tag from the ODK XML file @@ -223,7 +231,8 @@ def convertValue( tag: str, value: str, ) -> list: - """Convert a single tag value. + """ + Convert a single tag value. Args: tag (str): The tag from the ODK XML file @@ -266,7 +275,8 @@ def convertTag( self, tag: str, ) -> str: - """Convert a single tag. + """ + Convert a single tag. Args: tag (str): The tag from the ODK XML file @@ -299,7 +309,7 @@ def convertMultiple( value: str, ) -> list: """ - Convert a single tag from a select_multiple question.. + Convert a multiple tags from a select_multiple question.. Args: value (str): The tags from the ODK XML file @@ -322,7 +332,9 @@ def convertMultiple( return tags def dump(self): - """Dump internal data structures, for debugging purposes only.""" + """ + Dump internal data structures, for debugging purposes only. + """ print("YAML file: %s" % self.filespec) print("Convert section") for key, val in self.convert.items(): @@ -343,7 +355,9 @@ def dump(self): # this way than using pytest, # def main(): - """This main function lets this class be run standalone by a bash script.""" + """ + This main function lets this class be run standalone by a bash script. + """ parser = argparse.ArgumentParser(description="Read and parse a YAML file") parser.add_argument("-v", "--verbose", action="store_true", help="verbose output") From d8b004c36a0283e8c4e6e24e498bdbfde7b68154 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 12:12:07 -0600 Subject: [PATCH 07/30] fix: Update and reformat all code comment blocks --- osm_fieldwork/CSVDump.py | 108 ++++++++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 18 deletions(-) diff --git a/osm_fieldwork/CSVDump.py b/osm_fieldwork/CSVDump.py index 094c74bd..8df119a9 100755 --- a/osm_fieldwork/CSVDump.py +++ b/osm_fieldwork/CSVDump.py @@ -38,7 +38,9 @@ class CSVDump(Convert): - """A class to parse the CSV files from ODK Central.""" + """ + A class to parse the CSV files from ODK Central. + """ def __init__( self, @@ -64,7 +66,17 @@ def __init__( def lastSaved( self, keyword: str, - ): + ) -> str: + """ + Get the last saved value for a question. + + Args: + keyword (str): The keyword to search for + + Returns: + (str): The last saved value for the question + + """ if keyword is not None and len(keyword) > 0: return self.saved[keyword] return None @@ -73,15 +85,37 @@ def updateSaved( self, keyword: str, value: str, - ): + ) -> bool: + """ + Update the last saved value for a question. + + Args: + keyword (str): The keyword to search for + value (str): The new value + + Returns: + (bool): If the new value got saved + + """ if keyword is not None and value is not None and len(value) > 0: self.saved[keyword] = value + return True + else: + return False def parseXLS( self, xlsfile: str, - ): - """Parse the source XLSFile if available to look for details we need.""" + ) -> bool: + """ + Parse the source XLSFile if available to look for details we need. + + Args: + xlsfile (str): + + Returns: + (bool): whether the file was parsed without error + """ if xlsfile is not None and len(xlsfile) > 0: self.entries = pd.read_excel(xlsfile, sheet_name=[0])[0] # There will only be a single sheet @@ -112,7 +146,12 @@ def createOSM( self, filespec: str, ): - """Create an OSM XML output files.""" + """ + Create an OSM XML output files. + + Args: + filespec (str): The output file name + """ log.debug("Creating OSM XML file: %s" % filespec) self.osm = OsmFile(filespec) # self.osm.header() @@ -121,7 +160,12 @@ def writeOSM( self, feature: dict, ): - """Write a feature to an OSM XML output file.""" + """ + Write a feature to an OSM XML output file. + + Args: + feature (dict): The OSM feature to write to + """ out = "" if "id" in feature["tags"]: feature["id"] = feature["tags"]["id"] @@ -140,24 +184,36 @@ def finishOSM(self): def createGeoJson( self, - file: str = "tmp.geojson", + filespec: str = "tmp.geojson", ): - """Create a GeoJson output file.""" - log.debug("Creating GeoJson file: %s" % file) - self.json = open(file, "w") + """ + Create a GeoJson output file. + + Args: + filespec (str): The output file name + """ + log.debug("Creating GeoJson file: %s" % filespec) + self.json = open(filespec, "w") def writeGeoJson( self, feature: dict, ): - """Write a feature to a GeoJson output file.""" + """ + Write a feature to a GeoJson output file. + + Args: + feature (dict): The OSM feature to write to + """ # These get written later when finishing , since we have to create a FeatureCollection if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: return None self.features.append(feature) def finishGeoJson(self): - """Write the GeoJson FeatureCollection to the output file and close it.""" + """ + Write the GeoJson FeatureCollection to the output file and close it. + """ features = list() for item in self.features: if len(item["attrs"]["lon"]) == 0 or len(item["attrs"]["lat"]) == 0: @@ -183,6 +239,7 @@ def parse( Args: filespec (str): The file to parse. data (str): Or the data to parse. + Returns: (list): The list of features with tags """ @@ -252,7 +309,15 @@ def basename( self, line: str, ) -> str: - """Extract the basename of a path after the last -.""" + """ + Extract the basename of a path after the last -. + + Args: + line (str): The path from the json file entry + + Returns: + (str): The last node of the path + """ tmp = line.split("-") if len(tmp) == 0: return line @@ -262,8 +327,16 @@ def basename( def createEntry( self, entry: dict, - ) -> list: - """Create the feature data structure.""" + ) -> dict: + """ + Create the feature data structure. + + Args: + entry (dict): The feature data + + Returns: + (dict): The OSM data structure for this entry from the json file + """ # print(line) feature = dict() attrs = dict() @@ -326,9 +399,8 @@ def createEntry( return feature - def main(): - """ """ + """Run conversion directly from the terminal.""" parser = argparse.ArgumentParser(description="convert CSV from ODK Central to OSM XML") parser.add_argument("-v", "--verbose", action="store_true", help="verbose output") parser.add_argument("-y", "--yaml", help="Alternate YAML file") From 2adbfa928207de3b81c0599a39f2ea4e2054ebce Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 12:19:35 -0600 Subject: [PATCH 08/30] fix: Move pareseXLS to the Convert class so it can be shared --- osm_fieldwork/CSVDump.py | 39 --------------------------------------- osm_fieldwork/convert.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/osm_fieldwork/CSVDump.py b/osm_fieldwork/CSVDump.py index 8df119a9..191c6ff5 100755 --- a/osm_fieldwork/CSVDump.py +++ b/osm_fieldwork/CSVDump.py @@ -103,45 +103,6 @@ def updateSaved( else: return False - def parseXLS( - self, - xlsfile: str, - ) -> bool: - """ - Parse the source XLSFile if available to look for details we need. - - Args: - xlsfile (str): - - Returns: - (bool): whether the file was parsed without error - """ - if xlsfile is not None and len(xlsfile) > 0: - self.entries = pd.read_excel(xlsfile, sheet_name=[0])[0] - # There will only be a single sheet - names = self.entries["name"] - defaults = self.entries["default"] - i = 0 - while i < len(self.entries): - if type(self.entries['type'][i]) == float: - self.types[self.entries['name'][i]] = None - else: - self.types[self.entries['name'][i]] = self.entries['type'][i].split(' ')[0] - i += 1 - total = len(names) - i = 0 - while i < total: - entry = defaults[i] - if str(entry) != "nan": - pat = re.compile("..last-saved.*") - if pat.match(entry): - name = entry.split("#")[1][:-1] - self.saved[name] = None - else: - self.defaults[names[i]] = entry - i += 1 - return True - def createOSM( self, filespec: str, diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index 55b46620..cc6becf7 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -21,6 +21,8 @@ import argparse import logging import sys +import pandas as pd +import re from osm_fieldwork.xlsforms import xlsforms_path from osm_fieldwork.yamlfile import YamlFile @@ -66,6 +68,9 @@ def __init__( self.convert = dict() self.ignore = list() self.private = list() + self.defaults = dict() + self.entries = dict() + self.types = dict() for item in self.yaml.yaml["convert"]: key = list(item.keys())[0] value = item[key] @@ -331,6 +336,37 @@ def convertMultiple( logging.debug(f"\tConverted multiple to {tags}") return tags + def parseXLS( + self, + xlsfile: str, + ): + """Parse the source XLSFile if available to look for details we need.""" + if xlsfile is not None and len(xlsfile) > 0: + self.entries = pd.read_excel(xlsfile, sheet_name=[0])[0] + # There will only be a single sheet + names = self.entries["name"] + defaults = self.entries["default"] + i = 0 + while i < len(self.entries): + if type(self.entries['type'][i]) == float: + self.types[self.entries['name'][i]] = None + else: + self.types[self.entries['name'][i]] = self.entries['type'][i].split(' ')[0] + i += 1 + total = len(names) + i = 0 + while i < total: + entry = defaults[i] + if str(entry) != "nan": + pat = re.compile("..last-saved.*") + if pat.match(entry): + name = entry.split("#")[1][:-1] + self.saved[name] = None + else: + self.defaults[names[i]] = entry + i += 1 + return True + def dump(self): """ Dump internal data structures, for debugging purposes only. @@ -419,7 +455,6 @@ def main(): for i in entry: print("XX: %r" % i) - if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" main() From ca88b6b8c501debea80f576a338496681f1a5e5e Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 2 Jun 2024 12:45:50 -0600 Subject: [PATCH 09/30] fix: Move createEntry() to Convert class so it can be shared --- osm_fieldwork/CSVDump.py | 76 +--------------------------------------- osm_fieldwork/convert.py | 75 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 75 deletions(-) diff --git a/osm_fieldwork/CSVDump.py b/osm_fieldwork/CSVDump.py index 191c6ff5..eab7ffc6 100755 --- a/osm_fieldwork/CSVDump.py +++ b/osm_fieldwork/CSVDump.py @@ -285,81 +285,6 @@ def basename( base = tmp[len(tmp) - 1] return base - def createEntry( - self, - entry: dict, - ) -> dict: - """ - Create the feature data structure. - - Args: - entry (dict): The feature data - - Returns: - (dict): The OSM data structure for this entry from the json file - """ - # print(line) - feature = dict() - attrs = dict() - tags = dict() - priv = dict() - refs = list() - - # log.debug("Creating entry") - # First convert the tag to the approved OSM equivalent - if "lat" in entry and "lon" in entry: - attrs["lat"] = entry["lat"] - attrs["lon"] = entry["lon"] - for key, value in entry.items(): - attributes = ( - "id", - "timestamp", - "lat", - "lon", - "uid", - "user", - "version", - "action", - ) - - # When using existing OSM data, there's a special geometry field. - # Otherwise use the GPS coordinates where you are. - if key == "geometry" and len(value) > 0: - geometry = value.split(" ") - if len(geometry) == 4: - attrs["lat"] = geometry[0] - attrs["lon"] = geometry[1] - continue - - if 'lat' in attrs and len(attrs["lat"]) == 0: - continue - - if key is not None and len(key) > 0 and key in attributes: - attrs[key] = value - log.debug("Adding attribute %s with value %s" % (key, value)) - continue - - if value is not None and value != "no" and value != "unknown": - if key == "track" or key == "geoline": - # refs.append(tags) - # log.debug("Adding reference %s" % tags) - refs = value.split(";") - elif len(value) > 0: - if self.privateData(key): - priv[key] = value - else: - tags[key] = value - feature["attrs"] = attrs - if len(tags) > 0: - logging.debug(f"TAGS: {tags}") - feature["tags"] = tags - if len(refs) > 1: - feature["refs"] = refs - if len(priv) > 0: - feature["private"] = priv - - return feature - def main(): """Run conversion directly from the terminal.""" parser = argparse.ArgumentParser(description="convert CSV from ODK Central to OSM XML") @@ -383,6 +308,7 @@ def main(): csvin = CSVDump(args.yaml) else: csvin = CSVDump() + csvin.parseXLS(args.xlsfile) osmoutfile = os.path.basename(args.infile.replace(".csv", ".osm")) csvin.createOSM(osmoutfile) diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index cc6becf7..bf31e59c 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -367,6 +367,81 @@ def parseXLS( i += 1 return True + def createEntry( + self, + entry: dict, + ) -> dict: + """ + Create the feature data structure. + + Args: + entry (dict): The feature data + + Returns: + (dict): The OSM data structure for this entry from the json file + """ + # print(line) + feature = dict() + attrs = dict() + tags = dict() + priv = dict() + refs = list() + + # log.debug("Creating entry") + # First convert the tag to the approved OSM equivalent + if "lat" in entry and "lon" in entry: + attrs["lat"] = entry["lat"] + attrs["lon"] = entry["lon"] + for key, value in entry.items(): + attributes = ( + "id", + "timestamp", + "lat", + "lon", + "uid", + "user", + "version", + "action", + ) + + # When using existing OSM data, there's a special geometry field. + # Otherwise use the GPS coordinates where you are. + if key == "geometry" and len(value) > 0: + geometry = value.split(" ") + if len(geometry) == 4: + attrs["lat"] = geometry[0] + attrs["lon"] = geometry[1] + continue + + if 'lat' in attrs and len(attrs["lat"]) == 0: + continue + + if key is not None and len(key) > 0 and key in attributes: + attrs[key] = value + log.debug("Adding attribute %s with value %s" % (key, value)) + continue + + if value is not None and value != "no" and value != "unknown": + if key == "track" or key == "geoline": + # refs.append(tags) + # log.debug("Adding reference %s" % tags) + refs = value.split(";") + elif len(value) > 0: + if self.privateData(key): + priv[key] = value + else: + tags[key] = value + feature["attrs"] = attrs + if len(tags) > 0: + # logging.debug(f"TAGS: {tags}") + feature["tags"] = tags + if len(refs) > 1: + feature["refs"] = refs + if len(priv) > 0: + feature["private"] = priv + + return feature + def dump(self): """ Dump internal data structures, for debugging purposes only. From 864feedd4a80a7c20be51a192ca39d8cac19d7b6 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Mon, 3 Jun 2024 17:35:09 -0600 Subject: [PATCH 10/30] fix: refactor converting a JSON file from Central to OSM XML and add select_multiple support --- osm_fieldwork/convert.py | 12 +- osm_fieldwork/json2osm.py | 410 +++++++++++++++++--------------------- 2 files changed, 192 insertions(+), 230 deletions(-) diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index bf31e59c..4b3c19cb 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -71,6 +71,7 @@ def __init__( self.defaults = dict() self.entries = dict() self.types = dict() + self.saved = dict() for item in self.yaml.yaml["convert"]: key = list(item.keys())[0] value = item[key] @@ -413,12 +414,12 @@ def createEntry( attrs["lon"] = geometry[1] continue - if 'lat' in attrs and len(attrs["lat"]) == 0: - continue + # if 'lat' in attrs and len(attrs["lat"]) == 0: + # continue if key is not None and len(key) > 0 and key in attributes: attrs[key] = value - log.debug("Adding attribute %s with value %s" % (key, value)) + # log.debug("Adding attribute %s with value %s" % (key, value)) continue if value is not None and value != "no" and value != "unknown": @@ -426,6 +427,11 @@ def createEntry( # refs.append(tags) # log.debug("Adding reference %s" % tags) refs = value.split(";") + elif type(value) != str: + if self.privateData(key): + priv[key] = str(value) + else: + tags[key] = str(value) elif len(value) > 0: if self.privateData(key): priv[key] = value diff --git a/osm_fieldwork/json2osm.py b/osm_fieldwork/json2osm.py index 5d9c4009..944dedaf 100755 --- a/osm_fieldwork/json2osm.py +++ b/osm_fieldwork/json2osm.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright (c) 2023 Humanitarian OpenStreetMap Team +# Copyright (c) 2023, 2024 Humanitarian OpenStreetMap Team # # This file is part of OSM-Fieldwork. # @@ -45,7 +45,8 @@ def __init__( self, yaml: str = None, ): - """A class to convert the JSON file from ODK Central, or the GeoJson + """ + A class to convert the JSON file from ODK Central, or the GeoJson file created by the odk2geojson utility. Args: @@ -62,33 +63,12 @@ def __init__( self.features = list() self.config = super().__init__(yaml) - # FIXME: a work in progress - # def parseXLS(self, xlsfile: str): - # """Parse the source XLSFile if available to look for details we need""" - # if xlsfile is not None and len(xlsfile) > 0: - # entries = pd.read_excel(xlsfile, sheet_name=[0]) - # # There will only be a single sheet - # names = entries[0]['name'] - # defaults = entries[0]['default'] - # total = len(names) - # i = 0 - # while i < total: - # entry = defaults[i] - # if str(entry) != 'nan': - # pat = re.compile("..last-saved.*") - # if pat.match(entry): - # name = entry.split('#')[1][:-1] - # self.saved[name] = None - # else: - # self.defaults[names[i]] = entry - # i += 1 - # return True - def createOSM( self, filespec: str = "tmp.osm", - ): - """Create an OSM XML output files. + ) -> OsmFile: + """ + Create an OSM XML output files. Args: filespec (str): The filespec for the output OSM XML file @@ -104,7 +84,8 @@ def writeOSM( self, feature: dict, ): - """Write a feature to an OSM XML output file. + """ + Write a feature to an OSM XML output file. Args: feature (dict): The feature to write to the OSM XML output file @@ -127,7 +108,8 @@ def writeOSM( self.osm.write(out) def finishOSM(self): - """Write the OSM XML file footer and close it. The destructor in the + """ + Write the OSM XML file footer and close it. The destructor in the OsmFile class should do this, but this is the manual way. """ self.osm.footer() @@ -136,10 +118,11 @@ def createGeoJson( self, file="tmp.geojson", ): - """Create a GeoJson output file. + """ + Create a GeoJson output file. Args: - file (str): The filespec of the output GeoJson file + file (str): The filespec of the output GeoJson file """ log.debug("Creating GeoJson file: %s" % file) self.json = open(file, "w") @@ -148,7 +131,8 @@ def writeGeoJson( self, feature: dict, ): - """Write a feature to a GeoJson output file. + """ + Write a feature to a GeoJson output file. Args: feature (dict): The feature to write to the GeoJson output file @@ -159,7 +143,9 @@ def writeGeoJson( self.features.append(feature) def finishGeoJson(self): - """Write the GeoJson FeatureCollection to the output file and close it.""" + """ + Write the GeoJson FeatureCollection to the output file and close it. + """ features = list() for item in self.features: # poi = Point() @@ -176,8 +162,9 @@ def parse( self, filespec: str = None, data: str = None, - ): - """Parse the JSON file from ODK Central and convert it to a data structure. + ) -> list: + """ + Parse the JSON file from ODK Central and convert it to a data structure. The input is either a filespec to open, or the data itself. Args: @@ -188,7 +175,7 @@ def parse( (list): A list of all the features in the input file """ log.debug(f"Parsing JSON file {filespec}") - all_tags = list() + total = list() if not data: file = open(filespec, "r") infile = Path(filespec) @@ -198,13 +185,12 @@ def parse( reader = json.load(file) else: log.error("Need to specify a JSON or GeoJson file!") - return all_tags + return total elif isinstance(data, str): reader = geojson.loads(data) elif isinstance(data, list): reader = data - total = list() # JSON files from Central use value as the keyword, whereas # GeoJSON uses features for the same thing. if "value" in reader: @@ -214,216 +200,137 @@ def parse( else: data = reader for row in data: - # log.info(f"ROW: {row}") + # log.debug(f"ROW: {row}\n") tags = dict() - if "geometry" in row: - # If geom not point, convert to centroid - if row["geometry"]["type"] != "Point": - log.debug(f"Converting {row['geometry']['type']} geometry to centroid point") - geom = shapely.from_geojson(str(row)) - centroid = shapely.to_geojson(geom.centroid) - row["geometry"] = centroid - tags["geometry"] = row["geometry"] - else: - pat = re.compile("[-0-9.]*, [0-9.-]*, [0-9.]*") - gps = re.findall(pat, str(row)) - # If geopoint warmup is used, there will be two matches, we only - # want the second one, which is the location. - for coords in gps: - tags["geometry"] = coords + # Extract the location regardless of what the tag is + # called. + # pat = re.compile("[-0-9.]*, [0-9.-]*, [0-9.]*") + # gps = re.findall(pat, str(row)) + # tmp = list() + # if len(gps) == 0: + # log.error(f"No location data in: {row}") + # continue + # elif len(gps) == 1: + # # Only the warmup has any coordinates. + # tmp = gps[0].split(" ") + # elif len(gps) == 2: + # # both the warmup and the coordinates have values + # tmp = gps[1].split(" ") + + # if len(tmp) > 0: + # lat = float(tmp[0][:-1]) + # lon = float(tmp[1][:-1]) + # geom = Point([lon, lat]) + # row["geometry"] = geom + # # tags["geometry"] = row["geometry"] + if "properties" in row: row["properties"] # A GeoJson formatted file else: pass # A JOSM file from ODK Central - # flatten all the groups into a single data structure + # flatten all the groups into a sodk2geojson.pyingle data structure flattened = flatdict.FlatDict(row) for k, v in flattened.items(): last = k.rfind(":") + 1 key = k[last:] - # log.debug(f"Processing tag {key} = {v}") - # names and comments may have spaces, otherwise - # it's from a select_multiple - pat = re.compile("name[:a-z]*") - names = re.findall(pat, key) - if len(names) > 0: - for name in names: - tags[name] = v - continue - if key == "comment": - tags[key] = v # a JSON file from ODK Central always uses coordinates as # the keyword + if key is None or key in self.ignore or v is None: + continue + log.debug(f"Processing tag {key} = {v}") if key == "coordinates": if isinstance(v, list): - lat = v[1] - lon = v[0] - tags["geometry"] = f"{lat} {lon}" + tags["lat"] = v[1] + tags["lon"] = v[0] + # poi = Point(float(lon), float(lat)) + # tags["geometry"] = poi continue - if key == "xlocation": - tags["geometry"] = v - continue - tags[key] = v - total.append(tags) - # log.debug(f"Finished parsing JSON file {filespec}") - return total - - def createEntry( - self, - entry: dict, - ): - """Create the feature data structure for this entry. - - Args: - entry (dict): The feature to convert to the output format + if key in self.types: + if self.types[key] == "select_multiple": + # log.debug(f"Found key '{self.types[key]}'") + if v is None: + continue + vals = self.convertMultiple(v) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + # print(f"BASE {tags}") + continue + + items = self.convertEntry(key, v) + if items is None or len(items) == 0: + continue - Returns: - (dict): The new entry for the output file - """ - # print(line) - feature = dict() - attrs = dict() - tags = dict() - priv = dict() - refs = list() - - # log.debug("Creating entry") - # First convert the tag to the approved OSM equivalent - for key, value in entry.items(): - # When using existing OSM data, there's a special geometry field. - # Otherwise use the GPS coordinates where you are. - lat = None - lon = None - if isinstance(value, float): - continue - # log.debug(f"FIXME: {key} = {value} {type(value)}") - if key == "xid" and value is not None: - attrs["id"] = int(value) - if key == "geometry": - # The GeoJson file has the geometry field. Usually it's a list - # but on occasion it's a string instead, so turn it into a list - if isinstance(value, str) and len(coords := value.split(" ")) >= 2: - lat = coords[0] - lon = coords[1] - - # Parse as geojson + if type(items) == str: + log.debug(f"string Item {items}") else: - geom = shapely.from_geojson(str(value)) - - if geom.geom_type != "Point": - # Use centroid if polygon - geom = geom.centroid - - # Get coords from point - lat = geom.y - lon = geom.x - - attrs["lat"] = lat - attrs["lon"] = lon - # log.debug(f"ATTRS: {attrs}") - - # Some tags are actually attributes - # print(f"FIXME: {key} {key in attributes}") - # if key in self.multiple: - # for item in value: - # if key in item: - # for entry in item[key].split(): - # vals = self.getValues(key) - # if entry in vals: - # if vals[entry].find("="): - # tmp = vals[entry].split("=") - # tags[tmp[0]] = tmp[1] - # else: - # tags[entry] = "yes" - # continue - - if isinstance(value, str) and (value == "no" or value == "unknown"): - pass - elif value is not None: - if key == "track" or key == "geoline": - refs.append(tag) - log.debug("Adding reference %s" % tag) - elif len(str(value)) > 0: - if self.privateData(key): - priv[key] = value - else: - item = self.convertEntry(key, value) - if item is not None and isinstance(item, dict): - tags.update(item) - elif isinstance(item, list): - for entry in item: - tags.update(entry) - + log.debug(f"dict Item {items}") + if len(items) == 0: + tags.update(items[0]) + # log.debug(f"TAGS: {tags}") if len(tags) > 0: - if "geometry" in tags: - del tags["geometry"] - feature["attrs"] = attrs - feature["tags"] = tags - if len(refs) > 0: - feature["refs"] = refs - if len(priv) > 0: - feature["private"] = priv - - return feature - - -def json2osm(input_file, yaml_file=None): - """Process the JSON file from ODK Central or the GeoJSON file to OSM XML format. - - Args: - input_file (str): The path to the input JSON or GeoJSON file. - yaml_file (str): The path to the YAML config file (optional). - - Returns: - osmoutfile (str): Path to the converted OSM XML file. - """ - log.info(f"Converting JSON file to OSM: {input_file}") - if yaml_file: - jsonin = JsonDump(yaml_file) - else: - jsonin = JsonDump() - - # jsonin.parseXLS(args.xlsfile) + total.append(tags) - # Modify the input file name for the 2 output files, which will get written - # to the current directory. - - base = Path(input_file).stem - osmoutfile = f"{base}-out.osm" - jsonin.createOSM(osmoutfile) - - data = jsonin.parse(input_file) - # This OSM XML file only has OSM appropriate tags and values - - for entry in data: - feature = jsonin.createEntry(entry) - - # Sometimes bad entries, usually from debugging XForm design, sneak in - if len(feature) == 0: - continue - - if len(feature) > 0: - if "lat" not in feature["attrs"]: - if "geometry" in feature["tags"]: - if isinstance(feature["tags"]["geometry"], str): - coords = list(feature["tags"]["geometry"]) - # del feature['tags']['geometry'] - elif "coordinates" in feature["tags"]: - coords = feature["tags"]["coordinates"] - feature["attrs"] = {"lat": coords[1], "lon": coords[0]} - else: - log.warning(f"Bad record! {feature}") - continue # Skip bad records - - jsonin.writeOSM(feature) - # log.debug("Writing final OSM XML file...") - - # jsonin.finishOSM() - log.info(f"Wrote OSM XML file: {osmoutfile}") - - return osmoutfile + # log.debug(f"Finished parsing JSON file {filespec}") + return total +# def json2osm( +# cmdln: dict, +# ) -> str: +# """ +# Process the JSON file from ODK Central or the GeoJSON file to OSM XML format. + +# Args: +# cmdln (dict): The data from the command line + +# Returns: +# osmoutfile (str): Path to the converted OSM XML file. +# """ +# log.info(f"Converting JSON file to OSM: {cmdln['infile']}") +# if yaml_file: +# jsonin = JsonDump({cmd['yaml']}) +# else: +# jsonin = JsonDump() + +# # Modify the input file name for the 2 output files, which will get written +# # to the current directory. + +# base = Path(input_file).stem +# osmoutfile = f"{base}-out.osm" +# jsonin.createOSM(osmoutfile) + +# data = jsonin.parse(input_file) +# # This OSM XML file only has OSM appropriate tags and values + +# for entry in data: +# feature = jsonin.createEntry(entry) + +# # Sometimes bad entries, usually from debugging XForm design, sneak in +# if len(feature) == 0: +# continue + +# if len(feature) > 0: +# if "lat" not in feature["attrs"]: +# if "geometry" in feature["tags"]: +# if isinstance(feature["tags"]["geometry"], str): +# coords = list(feature["tags"]["geometry"]) +# # del feature['tags']['geometry'] +# elif "coordinates" in feature["tags"]: +# coords = feature["tags"]["coordinates"] +# feature["attrs"] = {"lat": coords[1], "lon": coords[0]} +# else: +# log.warning(f"Bad record! {feature}") +# continue # Skip bad records + +# jsonin.writeOSM(feature) +# # log.debug("Writing final OSM XML file...") + +# # jsonin.finishOSM() +# log.info(f"Wrote OSM XML file: {osmoutfile}") + +# return osmoutfile def main(): """Run conversion directly from the terminal.""" @@ -444,8 +351,57 @@ def main(): ) logging.getLogger("urllib3").setLevel(logging.DEBUG) - json2osm(args.infile, args.yaml) + if args.yaml: + jsonvin = JsonDump(args.yaml) + else: + jsonin = JsonDump() + jsonin.parseXLS(args.xlsfile) + + base = Path(args.infile).stem + osmoutfile = f"{base}.osm" + jsonin.createOSM(osmoutfile) + + jsonoutfile = f"{base}.geojson" + jsonin.createGeoJson(jsonoutfile) + + log.debug("Parsing json files %r" % args.infile) + data = jsonin.parse(args.infile) + # This OSM XML file only has OSM appropriate tags and values + nodeid = -1000 + for entry in data: + feature = jsonin.createEntry(entry) + if len(feature) == 0: + continue + if "refs" in feature: + refs = list() + for ref in feature["refs"]: + now = datetime.now().strftime("%Y-%m-%dT%TZ") + if len(ref) == 0: + continue + coords = ref.split(" ") + print(coords) + node = {"attrs": {"id": nodeid, "version": 1, "timestamp": now, "lat": coords[0], "lon": coords[1]}, "tags": dict()} + jsonin.writeOSM(node) + refs.append(nodeid) + nodeid -= 1 + + feature["refs"] = refs + jsonin.writeOSM(feature) + else: + # Sometimes bad entries, usually from debugging XForm design, sneak in + if "lat" not in feature["attrs"]: + log.warning("Bad record! %r" % feature) + continue + jsonin.writeOSM(feature) + # This GeoJson file has all the data values + jsonin.writeGeoJson(feature) + # print("TAGS: %r" % feature['tags']) + + jsonin.finishOSM() + jsonin.finishGeoJson() + log.info("Wrote OSM XML file: %r" % osmoutfile) + log.info("Wrote GeoJson file: %r" % jsonoutfile) if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" From 9ec1ec6f3ba0df49bcafcbe1209273fae9e5bef2 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Thu, 6 Jun 2024 07:48:41 -0600 Subject: [PATCH 11/30] fix: Move code for writing to output files to it's own class --- osm_fieldwork/support.py | 142 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 osm_fieldwork/support.py diff --git a/osm_fieldwork/support.py b/osm_fieldwork/support.py new file mode 100644 index 00000000..81eed219 --- /dev/null +++ b/osm_fieldwork/support.py @@ -0,0 +1,142 @@ +#!/usr/bin/python3 + +# Copyright (c) 2020, 2021, 2022, 2023, 2024 Humanitarian OpenStreetMap Team +# +# This file is part of OSM-Fieldwork. +# +# OSM-Fieldwork is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# OSM-Fieldwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with OSM-Fieldwork. If not, see . +# + +import logging +from datetime import datetime +from geojson import Feature, FeatureCollection, Point, dump +from osm_fieldwork.osmfile import OsmFile +from osm_fieldwork.xlsforms import xlsforms_path +from pathlib import Path + +# Instantiate logger +log = logging.getLogger(__name__) + +class OutSupport(object): + def __init__(self, + filespec: str = None, + ): + self.osm = None + self.filespec = filespec + self.features = list() + if filespec: + path = Path(filespec) + if path.suffix == ".osm": + self.createOSM(filespec) + elif path.suffix == ".geojson": + self.createGeoJson(filespec) + else: + log.error(f"{filespec} is not a valid file!") + + def createOSM( + self, + filespec: str = None, + ) -> bool: + """ + Create an OSM XML output files. + + Args: + filespec (str): The output file name + """ + if filespec is not None: + log.debug("Creating OSM XML file: %s" % filespec) + self.osm = OsmFile(filespec) + elif self.filespec is not None: + log.debug("Creating OSM XML file: %s" % self.filespec) + self.osm = OsmFile(self.filespec) + + return True + + def writeOSM( + self, + feature: dict, + ) -> bool: + """ + Write a feature to an OSM XML output file. + + Args: + feature (dict): The OSM feature to write to + """ + out = "" + if "id" in feature["tags"]: + feature["id"] = feature["tags"]["id"] + if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: + return None + if "refs" not in feature: + out += self.osm.createNode(feature) + else: + out += self.osm.createWay(feature) + self.osm.write(out) + + return True + + def finishOSM(self): + """Write the OSM XML file footer and close it.""" + # This is now handled by a destructor in the OsmFile class + # self.osm.footer() + + def createGeoJson( + self, + filespec: str = "tmp.geojson", + ) -> bool: + """ + Create a GeoJson output file. + + Args: + filespec (str): The output file name + """ + log.debug("Creating GeoJson file: %s" % filespec) + self.json = open(filespec, "w") + + return True + + def writeGeoJson( + self, + feature: dict, + ) -> bool: + """ + Write a feature to a GeoJson output file. + + Args: + feature (dict): The OSM feature to write to + """ + # These get written later when finishing , since we have to create a FeatureCollection + if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: + return None + self.features.append(feature) + + return True + + def finishGeoJson(self): + """ + Write the GeoJson FeatureCollection to the output file and close it. + """ + features = list() + for item in self.features: + if len(item["attrs"]["lon"]) == 0 or len(item["attrs"]["lat"]) == 0: + log.warning("Bad location data in entry! %r", item["attrs"]) + continue + poi = Point((float(item["attrs"]["lon"]), float(item["attrs"]["lat"]))) + if "private" in item: + props = {**item["tags"], **item["private"]} + else: + props = item["tags"] + features.append(Feature(geometry=poi, properties=props)) + collection = FeatureCollection(features) + dump(collection, self.json) From 6eda12a4661fbf7978519bfd61293c574ee42eb5 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Thu, 6 Jun 2024 07:49:24 -0600 Subject: [PATCH 12/30] fix: Correctly parse an instanxe file from ODK Collect, and make a dict --- osm_fieldwork/ODKInstance.py | 73 ++++++++++++++---------------------- 1 file changed, 28 insertions(+), 45 deletions(-) diff --git a/osm_fieldwork/ODKInstance.py b/osm_fieldwork/ODKInstance.py index 7ad8317c..77d3238d 100755 --- a/osm_fieldwork/ODKInstance.py +++ b/osm_fieldwork/ODKInstance.py @@ -22,24 +22,23 @@ import os import re import sys - -# from shapely.geometry import Point, LineString, Polygon +import flatdict +import json from collections import OrderedDict - import xmltodict # Instantiate logger log = logging.getLogger(__name__) - class ODKInstance(object): def __init__( self, filespec: str = None, data: str = None, ): - """This class imports a ODK Instance file, which is in XML into a data - structure. + """ + This class imports a ODK Instance file, which is in XML into a + data structure. Args: filespec (str): The filespec to the ODK XML Instance file @@ -50,6 +49,7 @@ def __init__( """ self.data = data self.filespec = filespec + self.ignore = ["today", "start", "deviceid", "nodel", "instanceID"] if filespec: self.data = self.parse(filespec=filespec) elif data: @@ -59,8 +59,9 @@ def parse( self, filespec: str, data: str = None, - ): - """Import an ODK XML Instance file ito a data structure. The input is + ) -> dict: + """ + Import an ODK XML Instance file ito a data structure. The input is either a filespec to the Instance file copied off your phone, or the XML that has been read in elsewhere. @@ -69,9 +70,9 @@ def parse( data (str): The XML data Returns: - (list): All the entries in the IOPDK XML Instance file + (dict): All the entries in the OSM XML Instance file """ - rows = list() + row = dict() if filespec: logging.info("Processing instance file: %s" % filespec) file = open(filespec, "rb") @@ -80,48 +81,29 @@ def parse( elif data: xml = data doc = xmltodict.parse(xml) - import json json.dumps(doc) tags = dict() data = doc["data"] - for i, j in data.items(): - if j is None or i == "meta": + flattened = flatdict.FlatDict(data) + rows = list() + pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") + for key, value in flattened.items(): + if key[0] == '@' or value is None: continue - print(f"tag: {i} == {j}") - pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") - if pat.match(str(j)): - if i == "warmup": - continue - gps = j.split(" ") - tags["lat"] = gps[0] - tags["lon"] = gps[1] + if re.search(pat, value): + gps = value.split(" ") + row["lat"] = gps[0] + row["lon"] = gps[1] + continue + + # print(key, value) + tmp = key.split(':') + if tmp[len(tmp) - 1] in self.ignore: continue - if type(j) == OrderedDict or type(j) == dict: - for ii, jj in j.items(): - pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") - if pat.match(str(jj)): - gps = jj.split(" ") - tags["lat"] = gps[0] - tags["lon"] = gps[1] - continue - if jj is None: - continue - print(f"tag: {i} == {j}") - if type(jj) == OrderedDict or type(jj) == dict: - for iii, jjj in jj.items(): - if jjj is not None: - tags[iii] = jjj - # print(iii, jjj) - else: - print(ii, jj) - tags[ii] = jj - else: - if i[0:1] != "@": - tags[i] = j - rows.append(tags) - return rows + row[tmp[len(tmp) - 1]] = value + return row if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" @@ -147,3 +129,4 @@ def parse( inst = ODKInstance(args.infile) data = inst.parse(args.infile) + # print(data) From 98a3430fdb2ffa3d168d05ae651e8ab5bee0f992 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Thu, 6 Jun 2024 12:27:02 -0600 Subject: [PATCH 13/30] fix: Major refactoring, it now works like the other conversion classes --- osm_fieldwork/odk2osm.py | 111 +++++++++++++-------------------------- 1 file changed, 37 insertions(+), 74 deletions(-) diff --git a/osm_fieldwork/odk2osm.py b/osm_fieldwork/odk2osm.py index 19d1bf3d..429179df 100755 --- a/osm_fieldwork/odk2osm.py +++ b/osm_fieldwork/odk2osm.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 # -# Copyright (C) 2020, 2021, 2022, 2023 Humanitarian OpenstreetMap Team +# Copyright (C) 2020, 2021, 2022, 2023, 2024 Humanitarian OpenstreetMap Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,12 +26,20 @@ from collections import OrderedDict from datetime import datetime from pathlib import Path - import xmltodict +from osm_fieldwork.convert import Convert +from osm_fieldwork.ODKInstance import ODKInstance +from osm_fieldwork.support import OutSupport # Instantiate logger log = logging.getLogger(__name__) +class OdkDump(Convert): + def __init__( + self, + yaml: str = None, + ): + self.config = super().__init__(yaml) def main(): """This is a program that reads in the ODK Instance file, which is in XML, @@ -39,6 +47,8 @@ def main(): """ parser = argparse.ArgumentParser(description="Convert ODK XML instance file to OSM XML format") parser.add_argument("-v", "--verbose", nargs="?", const="0", help="verbose output") + parser.add_argument("-y", "--yaml", help="Alternate YAML file") + parser.add_argument("-x", "--xlsfile", help="Source XLSFile") parser.add_argument("-i", "--instance", required=True, help="The instance file(s) from ODK Collect") # parser.add_argument("-o","--outfile", default='tmp.csv', help='The output file for JOSM') args = parser.parse_args() @@ -69,82 +79,35 @@ def main(): xmlfiles.append(full + ".xml") # print(xmlfiles) + if args.yaml: + xlsin = OdkDump(args.yaml) + else: + xlsin = OdkDump() + xlsin.parseXLS(args.xlsfile) - # These are all generated by Collect, and can be ignored - rows = list() - for instance in xmlfiles: - logging.info("Processing instance file: %s" % instance) - with open(instance, "rb") as file: - # Instances are small, read the whole file - xml = file.read(os.path.getsize(instance)) - doc = xmltodict.parse(xml) - fields = list() - tags = dict() - data = doc["data"] - for i, j in data.items(): - if j is None or i == "meta": - continue - # print(f"tag: {i} == {j}") - pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") - if pat.match(str(j)): - if i == "warmup": - continue - gps = j.split(" ") - tags["lat"] = gps[0] - tags["lon"] = gps[1] - continue - if type(j) == OrderedDict or type(j) == dict: - for ii, jj in j.items(): - pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") - if pat.match(str(jj)): - gps = jj.split(" ") - tags["lat"] = gps[0] - tags["lon"] = gps[1] - continue - if jj is None: - continue - print(f"tag2: {i} == {j}") - if type(jj) == OrderedDict or type(jj) == dict: - for iii, jjj in jj.items(): - if jjj is not None: - pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") - if pat.match(str(jjj)): - gps = jjj.split(" ") - tags["lat"] = gps[0] - tags["lon"] = gps[1] - continue - else: - tags[iii] = jjj - # print(f"FOO {iii}, {jjj}") - else: - # print(f"WHERE {ii}, {jj}") - fields.append(ii) - tags[ii] = jj - else: - if i[0:1] != "@": - tags[i] = j - rows.append(tags) - - xml = os.path.basename(xmlfiles[0]) - tmp = xml.replace(" ", "").split("_") now = datetime.now() - timestamp = f"_{now.year}_{now.hour}_{now.minute}" + timestamp = f"-{now.year}_{now.month}_{now.day}_{now.hour}_{now.minute}" + result = re.search("[*0-9-_]*$", args.instance) + base = args.instance[:result.start()] + jsonoutfile = f"{base}{timestamp}.geojson" + jsonout = OutSupport(jsonoutfile) - outfile = tmp[0] + timestamp + ".csv" - - with open(outfile, "w", newline="") as csvfile: - fields = list() - for row in rows: - for key in row.keys(): - if key not in fields: - fields.append(key) - out = csv.DictWriter(csvfile, dialect="excel", fieldnames=fields) - out.writeheader() - for row in rows: - out.writerow(row) - - print("Wrote: %s" % outfile) + osmoutfile = f"{base}{timestamp}.osm" + osmout = OutSupport(osmoutfile) + # These are all generated by Collect, and can be ignored + files = list() + convert = Convert() + for instance in xmlfiles: + logging.info(f"Processing instance {instance}") + odk = ODKInstance(instance) + entry = convert.createEntry(odk.data) + print(entry) + osmout.writeOSM(entry) + jsonout.writeGeoJson(entry) + + print("Wrote: %s" % osmoutfile) + print("Wrote: %s" % jsonoutfile) if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" From 9bf510a5330377efecb8b0ccc0f737c47d2ed6f5 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Thu, 6 Jun 2024 14:21:44 -0600 Subject: [PATCH 14/30] fix: Parse the XLS file so conversion is better --- osm_fieldwork/odk2osm.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/osm_fieldwork/odk2osm.py b/osm_fieldwork/odk2osm.py index 429179df..61daab58 100755 --- a/osm_fieldwork/odk2osm.py +++ b/osm_fieldwork/odk2osm.py @@ -78,13 +78,6 @@ def main(): full = os.path.join(toplevel, os.path.basename(toplevel)) xmlfiles.append(full + ".xml") - # print(xmlfiles) - if args.yaml: - xlsin = OdkDump(args.yaml) - else: - xlsin = OdkDump() - xlsin.parseXLS(args.xlsfile) - now = datetime.now() timestamp = f"-{now.year}_{now.month}_{now.day}_{now.hour}_{now.minute}" result = re.search("[*0-9-_]*$", args.instance) @@ -98,13 +91,13 @@ def main(): # These are all generated by Collect, and can be ignored files = list() convert = Convert() + convert.parseXLS(args.xlsfile) for instance in xmlfiles: logging.info(f"Processing instance {instance}") odk = ODKInstance(instance) entry = convert.createEntry(odk.data) - print(entry) - osmout.writeOSM(entry) jsonout.writeGeoJson(entry) + osmout.writeOSM(entry) print("Wrote: %s" % osmoutfile) print("Wrote: %s" % jsonoutfile) From 34992318a3111d552e98c80966e0ac007a89a295 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Thu, 6 Jun 2024 14:22:18 -0600 Subject: [PATCH 15/30] fix: Convert ODK XML to OSM XML --- osm_fieldwork/convert.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index 4b3c19cb..1437311a 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -405,6 +405,8 @@ def createEntry( "action", ) + if key in self.ignore: + continue # When using existing OSM data, there's a special geometry field. # Otherwise use the GPS coordinates where you are. if key == "geometry" and len(value) > 0: @@ -421,8 +423,18 @@ def createEntry( attrs[key] = value # log.debug("Adding attribute %s with value %s" % (key, value)) continue - if value is not None and value != "no" and value != "unknown": + if key == 'username': + tags['user'] = value + continue + items = self.convertEntry(key, value) + if key in self.types: + if self.types[key] == "select_multiple": + vals = self.convertMultiple(value) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + continue if key == "track" or key == "geoline": # refs.append(tags) # log.debug("Adding reference %s" % tags) From eda2ada1c03dbed4d65d668b3b6da8c877dfc8ef Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 9 Jun 2024 19:37:48 -0600 Subject: [PATCH 16/30] fix: move basename from covert and make a standalone function --- osm_fieldwork/{CSVDump.py => csvdump.py} | 0 osm_fieldwork/{json2osm.py => jsondump.py} | 0 osm_fieldwork/support.py | 72 ++++++++++++++++++++++ 3 files changed, 72 insertions(+) rename osm_fieldwork/{CSVDump.py => csvdump.py} (100%) rename osm_fieldwork/{json2osm.py => jsondump.py} (100%) diff --git a/osm_fieldwork/CSVDump.py b/osm_fieldwork/csvdump.py similarity index 100% rename from osm_fieldwork/CSVDump.py rename to osm_fieldwork/csvdump.py diff --git a/osm_fieldwork/json2osm.py b/osm_fieldwork/jsondump.py similarity index 100% rename from osm_fieldwork/json2osm.py rename to osm_fieldwork/jsondump.py diff --git a/osm_fieldwork/support.py b/osm_fieldwork/support.py index 81eed219..7602d1f7 100644 --- a/osm_fieldwork/support.py +++ b/osm_fieldwork/support.py @@ -28,6 +28,24 @@ # Instantiate logger log = logging.getLogger(__name__) +def basename( + line: str, +) -> str: + """ + Extract the basename of a path after the last -. + + Args: + line (str): The path from the json file entry + + Returns: + (str): The last node of the path + """ + tmp = line.split("-") + if len(tmp) == 0: + return line + base = tmp[len(tmp) - 1] + return base + class OutSupport(object): def __init__(self, filespec: str = None, @@ -140,3 +158,57 @@ def finishGeoJson(self): features.append(Feature(geometry=poi, properties=props)) collection = FeatureCollection(features) dump(collection, self.json) + + + def WriteData(self, + base: str, + data: dict(), + ) -> bool: + """ + Write the data to the output files. + + Args: + base (str): The base of the input file name + data (dict): The data to write + + Returns: + (bool): Whether the data got written + """ + osmoutfile = f"{base}.osm" + self.createOSM(osmoutfile) + + jsonoutfile = f"{base}.geojson" + self.createGeoJson(jsonoutfile) + + nodeid = -1000 + for feature in data: + if len(feature) == 0: + continue + if "refs" in feature: + # it's a way + refs = list() + for ref in feature["refs"]: + now = datetime.now().strftime("%Y-%m-%dT%TZ") + if len(ref) == 0: + continue + coords = ref.split(" ") + node = {"attrs": {"id": nodeid, "version": 1, "timestamp": now, "lat": coords[0], "lon": coords[1]}, "tags": dict()} + self.writeOSM(node) + self.writeGeoJson(node) + refs.append(nodeid) + nodeid -= 1 + feature["refs"] = refs + else: + # it's a node + if "lat" not in feature["attrs"]: + # Sometimes bad entries, usually from debugging XForm design, sneak in + log.warning("Bad record! %r" % feature) + continue + self.writeOSM(feature) + + self.finishOSM() + log.info("Wrote OSM XML file: %r" % osmoutfile) + self.finishGeoJson() + log.info("Wrote GeoJson file: %r" % jsonoutfile) + + return True From 1763ce546ecc4277353b22166153be6ea6b96703 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 9 Jun 2024 19:38:58 -0600 Subject: [PATCH 17/30] fix: Move all file output and other code to a shareable class --- osm_fieldwork/csvdump.py | 151 +-------------------------------------- 1 file changed, 2 insertions(+), 149 deletions(-) diff --git a/osm_fieldwork/csvdump.py b/osm_fieldwork/csvdump.py index eab7ffc6..16340141 100755 --- a/osm_fieldwork/csvdump.py +++ b/osm_fieldwork/csvdump.py @@ -32,6 +32,7 @@ from osm_fieldwork.convert import Convert from osm_fieldwork.osmfile import OsmFile from osm_fieldwork.xlsforms import xlsforms_path +from osm_fieldwork.support import basename # Instantiate logger log = logging.getLogger(__name__) @@ -41,7 +42,6 @@ class CSVDump(Convert): """ A class to parse the CSV files from ODK Central. """ - def __init__( self, yaml: str = None, @@ -63,132 +63,6 @@ def __init__( self.entries = dict() self.types = dict() - def lastSaved( - self, - keyword: str, - ) -> str: - """ - Get the last saved value for a question. - - Args: - keyword (str): The keyword to search for - - Returns: - (str): The last saved value for the question - - """ - if keyword is not None and len(keyword) > 0: - return self.saved[keyword] - return None - - def updateSaved( - self, - keyword: str, - value: str, - ) -> bool: - """ - Update the last saved value for a question. - - Args: - keyword (str): The keyword to search for - value (str): The new value - - Returns: - (bool): If the new value got saved - - """ - if keyword is not None and value is not None and len(value) > 0: - self.saved[keyword] = value - return True - else: - return False - - def createOSM( - self, - filespec: str, - ): - """ - Create an OSM XML output files. - - Args: - filespec (str): The output file name - """ - log.debug("Creating OSM XML file: %s" % filespec) - self.osm = OsmFile(filespec) - # self.osm.header() - - def writeOSM( - self, - feature: dict, - ): - """ - Write a feature to an OSM XML output file. - - Args: - feature (dict): The OSM feature to write to - """ - out = "" - if "id" in feature["tags"]: - feature["id"] = feature["tags"]["id"] - if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: - return None - if "refs" not in feature: - out += self.osm.createNode(feature) - else: - out += self.osm.createWay(feature) - self.osm.write(out) - - def finishOSM(self): - """Write the OSM XML file footer and close it.""" - # This is now handled by a destructor in the OsmFile class - # self.osm.footer() - - def createGeoJson( - self, - filespec: str = "tmp.geojson", - ): - """ - Create a GeoJson output file. - - Args: - filespec (str): The output file name - """ - log.debug("Creating GeoJson file: %s" % filespec) - self.json = open(filespec, "w") - - def writeGeoJson( - self, - feature: dict, - ): - """ - Write a feature to a GeoJson output file. - - Args: - feature (dict): The OSM feature to write to - """ - # These get written later when finishing , since we have to create a FeatureCollection - if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: - return None - self.features.append(feature) - - def finishGeoJson(self): - """ - Write the GeoJson FeatureCollection to the output file and close it. - """ - features = list() - for item in self.features: - if len(item["attrs"]["lon"]) == 0 or len(item["attrs"]["lat"]) == 0: - log.warning("Bad location data in entry! %r", item["attrs"]) - continue - poi = Point((float(item["attrs"]["lon"]), float(item["attrs"]["lat"]))) - if "private" in item: - props = {**item["tags"], **item["private"]} - else: - props = item["tags"] - features.append(Feature(geometry=poi, properties=props)) - collection = FeatureCollection(features) - dump(collection, self.json) - def parse( self, filespec: str, @@ -216,7 +90,7 @@ def parse( for keyword, value in row.items(): if keyword is None or len(value) == 0: continue - base = self.basename(keyword).lower() + base = basename(keyword).lower() # There's many extraneous fields in the input file which we don't need. if base is None or base in self.ignore or value is None: continue @@ -241,7 +115,6 @@ def parse( if base == "longitude" and len(value) == 0: value = row["warmup-Longitude"] items = self.convertEntry(base, value) - # log.info(f"ROW: {base} {value}") if len(items) > 0: if base in self.saved: @@ -266,25 +139,6 @@ def parse( all_tags.append(tags) return all_tags - def basename( - self, - line: str, - ) -> str: - """ - Extract the basename of a path after the last -. - - Args: - line (str): The path from the json file entry - - Returns: - (str): The last node of the path - """ - tmp = line.split("-") - if len(tmp) == 0: - return line - base = tmp[len(tmp) - 1] - return base - def main(): """Run conversion directly from the terminal.""" parser = argparse.ArgumentParser(description="convert CSV from ODK Central to OSM XML") @@ -354,7 +208,6 @@ def main(): log.info("Wrote OSM XML file: %r" % osmoutfile) log.info("Wrote GeoJson file: %r" % jsonoutfile) - if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" main() From 18f4601529386b43ef8354a76d966f1558aad2c5 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 9 Jun 2024 19:40:12 -0600 Subject: [PATCH 18/30] fix: Move all file output and other code to a shareable class --- osm_fieldwork/jsondump.py | 151 -------------------------------------- 1 file changed, 151 deletions(-) diff --git a/osm_fieldwork/jsondump.py b/osm_fieldwork/jsondump.py index 944dedaf..e0e1602c 100755 --- a/osm_fieldwork/jsondump.py +++ b/osm_fieldwork/jsondump.py @@ -63,101 +63,6 @@ def __init__( self.features = list() self.config = super().__init__(yaml) - def createOSM( - self, - filespec: str = "tmp.osm", - ) -> OsmFile: - """ - Create an OSM XML output files. - - Args: - filespec (str): The filespec for the output OSM XML file - - Returns: - (OsmFile): An instance of the OSM XML output file - """ - log.debug(f"Creating OSM XML file: {filespec}") - self.osm = OsmFile(filespec) - return self.osm - - def writeOSM( - self, - feature: dict, - ): - """ - Write a feature to an OSM XML output file. - - Args: - feature (dict): The feature to write to the OSM XML output file - """ - out = "" - if "id" in feature["tags"]: - feature["id"] = feature["tags"]["id"] - if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: - return None - if "user" in feature["tags"] and "user" not in feature["attrs"]: - feature["attrs"]["user"] = feature["tags"]["user"] - del feature["tags"]["user"] - if "uid" in feature["tags"] and "uid" not in ["attrs"]: - feature["attrs"]["uid"] = feature["tags"]["uid"] - del feature["tags"]["uid"] - if "refs" not in feature: - out += self.osm.createNode(feature, True) - else: - out += self.osm.createWay(feature, True) - self.osm.write(out) - - def finishOSM(self): - """ - Write the OSM XML file footer and close it. The destructor in the - OsmFile class should do this, but this is the manual way. - """ - self.osm.footer() - - def createGeoJson( - self, - file="tmp.geojson", - ): - """ - Create a GeoJson output file. - - Args: - file (str): The filespec of the output GeoJson file - """ - log.debug("Creating GeoJson file: %s" % file) - self.json = open(file, "w") - - def writeGeoJson( - self, - feature: dict, - ): - """ - Write a feature to a GeoJson output file. - - Args: - feature (dict): The feature to write to the GeoJson output file - """ - # These get written later when finishing , since we have to create a FeatureCollection - if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: - return None - self.features.append(feature) - - def finishGeoJson(self): - """ - Write the GeoJson FeatureCollection to the output file and close it. - """ - features = list() - for item in self.features: - # poi = Point() - poi = Point((float(item["attrs"]["lon"]), float(item["attrs"]["lat"]))) - if "private" in item: - props = {**item["tags"], **item["private"]} - else: - props = item["tags"] - features.append(Feature(geometry=poi, properties=props)) - collection = FeatureCollection(features) - dump(collection, self.json) - def parse( self, filespec: str = None, @@ -276,62 +181,6 @@ def parse( # log.debug(f"Finished parsing JSON file {filespec}") return total -# def json2osm( -# cmdln: dict, -# ) -> str: -# """ -# Process the JSON file from ODK Central or the GeoJSON file to OSM XML format. - -# Args: -# cmdln (dict): The data from the command line - -# Returns: -# osmoutfile (str): Path to the converted OSM XML file. -# """ -# log.info(f"Converting JSON file to OSM: {cmdln['infile']}") -# if yaml_file: -# jsonin = JsonDump({cmd['yaml']}) -# else: -# jsonin = JsonDump() - -# # Modify the input file name for the 2 output files, which will get written -# # to the current directory. - -# base = Path(input_file).stem -# osmoutfile = f"{base}-out.osm" -# jsonin.createOSM(osmoutfile) - -# data = jsonin.parse(input_file) -# # This OSM XML file only has OSM appropriate tags and values - -# for entry in data: -# feature = jsonin.createEntry(entry) - -# # Sometimes bad entries, usually from debugging XForm design, sneak in -# if len(feature) == 0: -# continue - -# if len(feature) > 0: -# if "lat" not in feature["attrs"]: -# if "geometry" in feature["tags"]: -# if isinstance(feature["tags"]["geometry"], str): -# coords = list(feature["tags"]["geometry"]) -# # del feature['tags']['geometry'] -# elif "coordinates" in feature["tags"]: -# coords = feature["tags"]["coordinates"] -# feature["attrs"] = {"lat": coords[1], "lon": coords[0]} -# else: -# log.warning(f"Bad record! {feature}") -# continue # Skip bad records - -# jsonin.writeOSM(feature) -# # log.debug("Writing final OSM XML file...") - -# # jsonin.finishOSM() -# log.info(f"Wrote OSM XML file: {osmoutfile}") - -# return osmoutfile - def main(): """Run conversion directly from the terminal.""" parser = argparse.ArgumentParser(description="convert JSON from ODK Central to OSM XML") From 86cd1ce0843f9b4a44fdfdbd6f73cad4b02b83af Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 9 Jun 2024 19:41:19 -0600 Subject: [PATCH 19/30] fix: Now that more code is shareable, merge all parsers programs into a single one --- osm_fieldwork/odk2osm.py | 95 +++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/osm_fieldwork/odk2osm.py b/osm_fieldwork/odk2osm.py index 61daab58..1bfdad88 100755 --- a/osm_fieldwork/odk2osm.py +++ b/osm_fieldwork/odk2osm.py @@ -27,29 +27,26 @@ from datetime import datetime from pathlib import Path import xmltodict +import glob from osm_fieldwork.convert import Convert from osm_fieldwork.ODKInstance import ODKInstance from osm_fieldwork.support import OutSupport +from osm_fieldwork.jsondump import JsonDump +from osm_fieldwork.csvdump import CSVDump # Instantiate logger log = logging.getLogger(__name__) -class OdkDump(Convert): - def __init__( - self, - yaml: str = None, - ): - self.config = super().__init__(yaml) - def main(): - """This is a program that reads in the ODK Instance file, which is in XML, + """ + This is a program that reads in the ODK Instance file, which is in XML, and converts it to an OSM XML file so it can be viewed in an editor. """ parser = argparse.ArgumentParser(description="Convert ODK XML instance file to OSM XML format") parser.add_argument("-v", "--verbose", nargs="?", const="0", help="verbose output") parser.add_argument("-y", "--yaml", help="Alternate YAML file") parser.add_argument("-x", "--xlsfile", help="Source XLSFile") - parser.add_argument("-i", "--instance", required=True, help="The instance file(s) from ODK Collect") + parser.add_argument("-i", "--infile", required=True, help="The input file") # parser.add_argument("-o","--outfile", default='tmp.csv', help='The output file for JOSM') args = parser.parse_args() @@ -62,45 +59,55 @@ def main(): stream=sys.stdout, ) + toplevel = Path(args.infile) + out = OutSupport() xmlfiles = list() - if args.instance.find("*") >= 0: - toplevel = Path() - for dir in toplevel.glob(args.instance): - if dir.is_dir(): - xml = os.listdir(dir) - # There is always only one XML file per instance - full = os.path.join(dir, xml[0]) - xmlfiles.append(full) - else: - toplevel = Path(args.instance) - if toplevel.is_dir(): - # There is always only one XML file per instance - full = os.path.join(toplevel, os.path.basename(toplevel)) - xmlfiles.append(full + ".xml") - - now = datetime.now() - timestamp = f"-{now.year}_{now.month}_{now.day}_{now.hour}_{now.minute}" - result = re.search("[*0-9-_]*$", args.instance) - base = args.instance[:result.start()] - jsonoutfile = f"{base}{timestamp}.geojson" - jsonout = OutSupport(jsonoutfile) - - osmoutfile = f"{base}{timestamp}.osm" - osmout = OutSupport(osmoutfile) - - # These are all generated by Collect, and can be ignored - files = list() convert = Convert() - convert.parseXLS(args.xlsfile) - for instance in xmlfiles: - logging.info(f"Processing instance {instance}") - odk = ODKInstance(instance) + data = list() + # It's a wildcard, used for XML instance files + if args.infile.find("*") >= 0: + log.debug(f"Parsing multiple ODK XML files {args.infile}") + toplevel = Path(args.infile[:-1]) + for dirs in glob.glob(args.infile): + xml = os.listdir(dirs) + # There ilineagelineages always only one XML file per infile + full = os.path.join(dirs, xml[0]) + xmlfiles.append(full) + for infile in xmlfiles: + logging.info(f"Processing infile {infile}") + odk = ODKInstance(infile) + entry = convert.createEntry(odk.data) + data.append(entry) + elif toplevel.suffix == '.xml': + # It's an instance file from ODK Collect + log.debug(f"Parsing ODK XML files {args.infile}") + # There is always only one XML file per infile + full = os.path.join(toplevel, os.path.basename(toplevel)) + xmlfiles.append(full + ".xml") + odk = ODKInstance(args.infile) entry = convert.createEntry(odk.data) - jsonout.writeGeoJson(entry) - osmout.writeOSM(entry) + data.append(entry) + elif toplevel.suffix == ".csv": + log.debug(f"Parsing csv files {args.infile}") + if args.yaml: + csvin = CSVDump(args.yaml) + else: + csvin = CSVDump() + csvin.parseXLS(args.xlsfile) + for entry in csvin.parse(args.infile): + data.append(convert.createEntry(entry)) + elif toplevel.suffix == ".json": + log.debug(f"Parsing json files {args.infile}") + if args.yaml: + jsonin = JsonDump(args.yaml) + else: + jsonin = JsonDump() + jsonin.parseXLS(args.xlsfile) + for entry in jsonin.parse(args.infile): + data.append(convert.createEntry(entry)) - print("Wrote: %s" % osmoutfile) - print("Wrote: %s" % jsonoutfile) + # Write the data + out.WriteData(toplevel.stem, data) if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" From 7be8ce32854834629ad42a98a0d5630bde18ccde Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 9 Jun 2024 19:45:13 -0600 Subject: [PATCH 20/30] fix: Start moving all parsers to this file --- osm_fieldwork/parsers.py | 261 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 osm_fieldwork/parsers.py diff --git a/osm_fieldwork/parsers.py b/osm_fieldwork/parsers.py new file mode 100644 index 00000000..834cfae4 --- /dev/null +++ b/osm_fieldwork/parsers.py @@ -0,0 +1,261 @@ +#!/usr/bin/python3 + +# Copyright (c) 2024 Humanitarian OpenStreetMap Team +# +# This file is part of OSM-Fieldwork. +# +# OSM-Fieldwork is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# OSM-Fieldwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with OSM-Fieldwork. If not, see . +# + +import argparse +import csv +import logging +import os +import re +import sys +from datetime import datetime +from collections import OrderedDict +from pathlib import Path +import xmltodict +from geojson import Feature, FeatureCollection, dump +from osm_fieldwork.osmfile import OsmFile +from osm_fieldwork.xlsforms import xlsforms_path +from osm_fieldwork.ODKInstance import ODKInstance +from osm_fieldwork.support import basename + +# Instantiate logger +log = logging.getLogger(__name__) + + +class ODKParsers(Convert): + """ + A class to parse the CSV files from ODK Central. + """ + def __init__( + self, + yaml: str = None, + ): + self.fields = dict() + self.nodesets = dict() + self.data = list() + self.osm = None + self.json = None + self.features = list() + xlsforms_path.replace("xlsforms", "") + if yaml: + pass + else: + pass + self.config = super().__init__(yaml) + self.saved = dict() + self.defaults = dict() + self.entries = dict() + self.types = dict() + + def CSVparser( + self, + filespec: str, + data: str = None, + ) -> list: + """ + Parse the CSV file from ODK Central and convert it to a data structure. + + Args: + filespec (str): The file to parse. + data (str): Or the data to parse. + + Returns: + (list): The list of features with tags + """ + all_tags = list() + if not data: + f = open(filespec, newline="") + reader = csv.DictReader(f, delimiter=",") + else: + reader = csv.DictReader(data, delimiter=",") + for row in reader: + tags = dict() + # log.info(f"ROW: {row}") + for keyword, value in row.items(): + if keyword is None or len(value) == 0: + continue + base = basename(keyword).lower() + # There's many extraneous fields in the input file which we don't need. + if base is None or base in self.ignore or value is None: + continue + else: + # log.info(f"ITEM: {keyword} = {value}") + if base in self.types: + if self.types[base] == "select_multiple": + vals = self.convertMultiple(value) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + # print(f"BASE {tags}") + continue + # When using geopoint warmup, once the display changes to the map + + # location, there is not always a value if the accuracy is way + # off. In this case use the warmup value, which is where we are + # hopefully standing anyway. + if base == "latitude" and len(value) == 0: + if "warmup-Latitude" in row: + value = row["warmup-Latitude"] + if base == "longitude" and len(value) == 0: + value = row["warmup-Longitude"] + items = self.convertEntry(base, value) + # log.info(f"ROW: {base} {value}") + if len(items) > 0: + if base in self.saved: + if str(value) == "nan" or len(value) == 0: + # log.debug(f"FIXME: {base} {value}") + val = self.saved[base] + if val and len(value) == 0: + log.warning(f'Using last saved value for "{base}"! Now "{val}"') + value = val + else: + self.saved[base] = value + log.debug(f'Updating last saved value for "{base}" with "{value}"') + # Handle nested dict in list + if isinstance(items, list): + items = items[0] + for k, v in items.items(): + tags[k] = v + else: + tags[base] = value + + # log.debug(f"\tFIXME1: {tags}") + all_tags.append(tags) + return all_tags + + def JSONparser( + self, + filespec: str = None, + data: str = None, + ) -> list: + """ + Parse the JSON file from ODK Central and convert it to a data structure. + The input is either a filespec to open, or the data itself. + + Args: + filespec (str): The JSON or GeoJson input file to convert + data (str): The data to convert + + Returns: + (list): A list of all the features in the input file + """ + log.debug(f"Parsing JSON file {filespec}") + total = list() + if not data: + file = open(filespec, "r") + infile = Path(filespec) + if infile.suffix == ".geojson": + reader = geojson.load(file) + elif infile.suffix == ".json": + reader = json.load(file) + else: + log.error("Need to specify a JSON or GeoJson file!") + return total + elif isinstance(data, str): + reader = geojson.loads(data) + elif isinstance(data, list): + reader = data + + # JSON files from Central use value as the keyword, whereas + # GeoJSON uses features for the same thing. + if "value" in reader: + data = reader["value"] + elif "features" in reader: + data = reader["features"] + else: + data = reader + for row in data: + # log.debug(f"ROW: {row}\n") + tags = dict() + # Extract the location regardless of what the tag is + # called. + # pat = re.compile("[-0-9.]*, [0-9.-]*, [0-9.]*") + # gps = re.findall(pat, str(row)) + # tmp = list() + # if len(gps) == 0: + # log.error(f"No location data in: {row}") + # continue + # elif len(gps) == 1: + # # Only the warmup has any coordinates. + # tmp = gps[0].split(" ") + # elif len(gps) == 2: + # # both the warmup and the coordinates have values + # tmp = gps[1].split(" ") + + # if len(tmp) > 0: + # lat = float(tmp[0][:-1]) + # lon = float(tmp[1][:-1]) + # geom = Point([lon, lat]) + # row["geometry"] = geom + # # tags["geometry"] = row["geometry"] + + if "properties" in row: + row["properties"] # A GeoJson formatted file + else: + pass # A JOSM file from ODK Central + + # flatten all the groups into a sodk2geojson.pyingle data structure + flattened = flatdict.FlatDict(row) + for k, v in flattened.items(): + last = k.rfind(":") + 1 + key = k[last:] + # a JSON file from ODK Central always uses coordinates as + # the keyword + if key is None or key in self.ignore or v is None: + continue + log.debug(f"Processing tag {key} = {v}") + if key == "coordinates": + if isinstance(v, list): + tags["lat"] = v[1] + tags["lon"] = v[0] + # poi = Point(float(lon), float(lat)) + # tags["geometry"] = poi + continue + + if key in self.types: + if self.types[key] == "select_multiple": + # log.debug(f"Found key '{self.types[key]}'") + if v is None: + continue + vals = self.convertMultiple(v) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + # print(f"BASE {tags}") + continue + + items = self.convertEntry(key, v) + if items is None or len(items) == 0: + continue + + if type(items) == str: + log.debug(f"string Item {items}") + else: + log.debug(f"dict Item {items}") + if len(items) == 0: + tags.update(items[0]) + # log.debug(f"TAGS: {tags}") + if len(tags) > 0: + total.append(tags) + + # log.debug(f"Finished parsing JSON file {filespec}") + return total + +from osm_fieldwork.ODKInstance import ODKInstance +from osm_fieldwork.ODKInstance import ODKInstance From 29bde7ceb21cb0f747cba0c9081d6a755f749f7b Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Fri, 14 Jun 2024 09:59:14 -0600 Subject: [PATCH 21/30] fix: Drop now unused command line utilities --- osm_fieldwork/json2osm.py | 262 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 5 - 2 files changed, 262 insertions(+), 5 deletions(-) create mode 100644 osm_fieldwork/json2osm.py diff --git a/osm_fieldwork/json2osm.py b/osm_fieldwork/json2osm.py new file mode 100644 index 00000000..f95597ec --- /dev/null +++ b/osm_fieldwork/json2osm.py @@ -0,0 +1,262 @@ +#!/usr/bin/python3 + +# Copyright (c) 2023, 2024 Humanitarian OpenStreetMap Team +# +# This file is part of OSM-Fieldwork. +# +# OSM-Fieldwork is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# OSM-Fieldwork is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with OSM-Fieldwork. If not, see . +# + +import argparse +import json +import logging + +# import pandas as pd +import re +import sys +from pathlib import Path + +import flatdict +import geojson +import shapely +from geojson import Feature, FeatureCollection, Point, dump + +from osm_fieldwork.convert import Convert +from osm_fieldwork.osmfile import OsmFile + +log = logging.getLogger(__name__) + + +class JsonDump(Convert): + """A class to parse the JSON files from ODK Central or odk2geojson.""" + + def __init__( + self, + yaml: str = None, + ): + """ + A class to convert the JSON file from ODK Central, or the GeoJson + file created by the odk2geojson utility. + + Args: + yaml (str): The filespec of the YAML config file + + Returns: + (JsonDump): An instance of this object + """ + self.fields = dict() + self.nodesets = dict() + self.data = list() + self.osm = None + self.json = None + self.features = list() + self.config = super().__init__(yaml) + self.saved = dict() + self.defaults = dict() + self.entries = dict() + self.types = dict() + + def parse( + self, + filespec: str = None, + data: str = None, + ) -> list: + """ + Parse the JSON file from ODK Central and convert it to a data structure. + The input is either a filespec to open, or the data itself. + + Args: + filespec (str): The JSON or GeoJson input file to convert + data (str): The data to convert + + Returns: + (list): A list of all the features in the input file + """ + log.debug(f"Parsing JSON file {filespec}") + total = list() + if not data: + file = open(filespec, "r") + infile = Path(filespec) + if infile.suffix == ".geojson": + reader = geojson.load(file) + elif infile.suffix == ".json": + reader = json.load(file) + else: + log.error("Need to specify a JSON or GeoJson file!") + return total + elif isinstance(data, str): + reader = geojson.loads(data) + elif isinstance(data, list): + reader = data + + # JSON files from Central use value as the keyword, whereas + # GeoJSON uses features for the same thing. + if "value" in reader: + data = reader["value"] + elif "features" in reader: + data = reader["features"] + else: + data = reader + for row in data: + # log.debug(f"ROW: {row}\n") + tags = dict() + # Extract the location regardless of what the tag is + # called. + # pat = re.compile("[-0-9.]*, [0-9.-]*, [0-9.]*") + # gps = re.findall(pat, str(row)) + # tmp = list() + # if len(gps) == 0: + # log.error(f"No location data in: {row}") + # continue + # elif len(gps) == 1: + # # Only the warmup has any coordinates. + # tmp = gps[0].split(" ") + # elif len(gps) == 2: + # # both the warmup and the coordinates have values + # tmp = gps[1].split(" ") + + # if len(tmp) > 0: + # lat = float(tmp[0][:-1]) + # lon = float(tmp[1][:-1]) + # geom = Point([lon, lat]) + # row["geometry"] = geom + # # tags["geometry"] = row["geometry"] + + if "properties" in row: + row["properties"] # A GeoJson formatted file + else: + pass # A JOSM file from ODK Central + + # flatten all the groups into a sodk2geojson.pyingle data structure + flattened = flatdict.FlatDict(row) + for k, v in flattened.items(): + last = k.rfind(":") + 1 + key = k[last:] + # a JSON file from ODK Central always uses coordinates as + # the keyword + if key is None or key in self.ignore or v is None: + continue + log.debug(f"Processing tag {key} = {v}") + if key == "coordinates": + if isinstance(v, list): + tags["lat"] = v[1] + tags["lon"] = v[0] + # poi = Point(float(lon), float(lat)) + # tags["geometry"] = poi + continue + + if key in self.types: + if self.types[key] == "select_multiple": + # log.debug(f"Found key '{self.types[key]}'") + if v is None: + continue + vals = self.convertMultiple(v) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + # print(f"BASE {tags}") + continue + + items = self.convertEntry(key, v) + if items is None or len(items) == 0: + continue + + if type(items) == str: + log.debug(f"string Item {items}") + else: + log.debug(f"dict Item {items}") + if len(items) == 0: + tags.update(items[0]) + # log.debug(f"TAGS: {tags}") + if len(tags) > 0: + total.append(tags) + + # log.debug(f"Finished parsing JSON file {filespec}") + return total + +def main(): + """Run conversion directly from the terminal.""" + parser = argparse.ArgumentParser(description="convert JSON from ODK Central to OSM XML") + parser.add_argument("-v", "--verbose", action="store_true", help="verbose output") + parser.add_argument("-y", "--yaml", help="Alternate YAML file") + parser.add_argument("-x", "--xlsfile", help="Source XLSFile") + parser.add_argument("-i", "--infile", required=True, help="The input file downloaded from ODK Central") + args = parser.parse_args() + + # if verbose, dump to the terminal. + if args.verbose is not None: + logging.basicConfig( + level=logging.DEBUG, + format=("%(threadName)10s - %(name)s - %(levelname)s - %(message)s"), + datefmt="%y-%m-%d %H:%M:%S", + stream=sys.stdout, + ) + logging.getLogger("urllib3").setLevel(logging.DEBUG) + + if args.yaml: + jsonvin = JsonDump(args.yaml) + else: + jsonin = JsonDump() + + jsonin.parseXLS(args.xlsfile) + + base = Path(args.infile).stem + osmoutfile = f"{base}.osm" + jsonin.createOSM(osmoutfile) + + jsonoutfile = f"{base}.geojson" + jsonin.createGeoJson(jsonoutfile) + + log.debug("Parsing json files %r" % args.infile) + data = jsonin.parse(args.infile) + + # This OSM XML file only has OSM appropriate tags and values + nodeid = -1000 + for entry in data: + feature = jsonin.createEntry(entry) + if len(feature) == 0: + continue + if "refs" in feature: + refs = list() + for ref in feature["refs"]: + now = datetime.now().strftime("%Y-%m-%dT%TZ") + if len(ref) == 0: + continue + coords = ref.split(" ") + print(coords) + node = {"attrs": {"id": nodeid, "version": 1, "timestamp": now, "lat": coords[0], "lon": coords[1]}, "tags": dict()} + jsonin.writeOSM(node) + refs.append(nodeid) + nodeid -= 1 + + feature["refs"] = refs + jsonin.writeOSM(feature) + else: + # Sometimes bad entries, usually from debugging XForm design, sneak in + if "lat" not in feature["attrs"]: + log.warning("Bad record! %r" % feature) + continue + jsonin.writeOSM(feature) + # This GeoJson file has all the data values + jsonin.writeGeoJson(feature) + # print("TAGS: %r" % feature['tags']) + + jsonin.finishOSM() + jsonin.finishGeoJson() + log.info("Wrote OSM XML file: %r" % osmoutfile) + log.info("Wrote GeoJson file: %r" % jsonoutfile) + +if __name__ == "__main__": + """This is just a hook so this file can be run standlone during development.""" + main() diff --git a/pyproject.toml b/pyproject.toml index d1d77a93..48f628b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,13 +128,8 @@ convention = "google" "osm_fieldwork/basemapper.py" = ["N802"] [project.scripts] -json2osm = "osm_fieldwork.json2osm:main" basemapper = "osm_fieldwork.basemapper:main" osm2favorites = "osm_fieldwork.osm2favorities:main" -csv2osm = "osm_fieldwork.CSVDump:main" -odk2csv = "osm_fieldwork.odk2csv:main" odk2osm = "osm_fieldwork.odk2osm:main" -odk2geojson = "osm_fieldwork.odk2geojson:main" -odk_merge = "osm_fieldwork.odk_merge:main" odk_client = "osm_fieldwork.odk_client:main" make_data_extract = "osm_fieldwork.make_data_extract:main" From 653b43fbd1a58705c9d1bba33180208e63a5e780 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 13:24:28 -0600 Subject: [PATCH 22/30] fix: Add a comment about the files about to be deleted sincee they've been replaced --- osm_fieldwork/odk2csv.py | 4 +++- osm_fieldwork/odk2geojson.py | 4 +++- osm_fieldwork/odk_merge.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/osm_fieldwork/odk2csv.py b/osm_fieldwork/odk2csv.py index 25d97dda..91d451ef 100755 --- a/osm_fieldwork/odk2csv.py +++ b/osm_fieldwork/odk2csv.py @@ -1,7 +1,9 @@ #!/usr/bin/python3 +# This file has been replaced by ODKParsers(), and will be delete in the next release. + # -# Copyright (C) 2020, 2021, 2022, 2023 Humanitarian OpenstreetMap Team +# Copyright (C) 2020, 2021, 2022, 2023, 2024 Humanitarian OpenstreetMap Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/osm_fieldwork/odk2geojson.py b/osm_fieldwork/odk2geojson.py index bb916386..2c3f75ef 100755 --- a/osm_fieldwork/odk2geojson.py +++ b/osm_fieldwork/odk2geojson.py @@ -1,7 +1,9 @@ #!/usr/bin/python3 +# This file has been replaced by ODKParsers(), and will be delete in the next release. + # -# Copyright (C) 2023 Humanitarian OpenstreetMap Team +# Copyright (C) 2023, 2024 Humanitarian OpenstreetMap Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/osm_fieldwork/odk_merge.py b/osm_fieldwork/odk_merge.py index 5b4a6e8c..b75f4451 100755 --- a/osm_fieldwork/odk_merge.py +++ b/osm_fieldwork/odk_merge.py @@ -1,6 +1,8 @@ #!/usr/bin/python3 -# Copyright (c) 2022, 2023 Humanitarian OpenStreetMap Team +# This file has been replaced by ODKParsers(), and will be delete in the next release. + +# Copyright (c) 2022, 2023, 2024 Humanitarian OpenStreetMap Team # # This program is free software: you can redistribute it and/or # modify it under the terms of the GNU General Public License as From 97dc7fe4ed9dd30fd7f16edb28b1cda51b910cf6 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 13:25:24 -0600 Subject: [PATCH 23/30] fix: Improve basemap() to split on : as well as - --- osm_fieldwork/support.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/osm_fieldwork/support.py b/osm_fieldwork/support.py index 7602d1f7..cd91a9a8 100644 --- a/osm_fieldwork/support.py +++ b/osm_fieldwork/support.py @@ -40,11 +40,17 @@ def basename( Returns: (str): The last node of the path """ - tmp = line.split("-") - if len(tmp) == 0: + if line.find("-") > 0: + tmp = line.split("-") + if len(tmp) > 0: + return tmp[len(tmp) - 1] + elif line.find(":") > 0: + tmp = line.split(":") + if len(tmp) > 0: + return tmp[len(tmp) - 1] + else: + # return tmp[len(tmp) - 1] return line - base = tmp[len(tmp) - 1] - return base class OutSupport(object): def __init__(self, @@ -92,8 +98,11 @@ def writeOSM( feature (dict): The OSM feature to write to """ out = "" - if "id" in feature["tags"]: - feature["id"] = feature["tags"]["id"] + if "tags" in feature: + if "id" in feature["tags"]: + feature["id"] = feature["tags"]["id"] + else: + return True if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: return None if "refs" not in feature: From 1bacb971b69e93a868648c27a8aa9f09f9d9be0d Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 13:26:01 -0600 Subject: [PATCH 24/30] fix: Add XMLParser, derived from ODKInstance --- osm_fieldwork/parsers.py | 124 +++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 38 deletions(-) diff --git a/osm_fieldwork/parsers.py b/osm_fieldwork/parsers.py index 834cfae4..27b6c8bd 100644 --- a/osm_fieldwork/parsers.py +++ b/osm_fieldwork/parsers.py @@ -24,11 +24,15 @@ import os import re import sys +import json +import flatdict + from datetime import datetime from collections import OrderedDict from pathlib import Path import xmltodict from geojson import Feature, FeatureCollection, dump +from osm_fieldwork.convert import Convert from osm_fieldwork.osmfile import OsmFile from osm_fieldwork.xlsforms import xlsforms_path from osm_fieldwork.ODKInstance import ODKInstance @@ -100,9 +104,7 @@ def CSVparser( if self.types[base] == "select_multiple": vals = self.convertMultiple(value) if len(vals) > 0: - for tag in vals: - tags.update(tag) - # print(f"BASE {tags}") + tags.update(vals) continue # When using geopoint warmup, once the display changes to the map @@ -134,8 +136,7 @@ def CSVparser( tags[k] = v else: tags[base] = value - - # log.debug(f"\tFIXME1: {tags}") + # log.debug(f"\tFIXME1: {tags}") all_tags.append(tags) return all_tags @@ -183,28 +184,6 @@ def JSONparser( for row in data: # log.debug(f"ROW: {row}\n") tags = dict() - # Extract the location regardless of what the tag is - # called. - # pat = re.compile("[-0-9.]*, [0-9.-]*, [0-9.]*") - # gps = re.findall(pat, str(row)) - # tmp = list() - # if len(gps) == 0: - # log.error(f"No location data in: {row}") - # continue - # elif len(gps) == 1: - # # Only the warmup has any coordinates. - # tmp = gps[0].split(" ") - # elif len(gps) == 2: - # # both the warmup and the coordinates have values - # tmp = gps[1].split(" ") - - # if len(tmp) > 0: - # lat = float(tmp[0][:-1]) - # lon = float(tmp[1][:-1]) - # geom = Point([lon, lat]) - # row["geometry"] = geom - # # tags["geometry"] = row["geometry"] - if "properties" in row: row["properties"] # A GeoJson formatted file else: @@ -212,6 +191,7 @@ def JSONparser( # flatten all the groups into a sodk2geojson.pyingle data structure flattened = flatdict.FlatDict(row) + # log.debug(f"FLAT: {flattened}\n") for k, v in flattened.items(): last = k.rfind(":") + 1 key = k[last:] @@ -219,7 +199,7 @@ def JSONparser( # the keyword if key is None or key in self.ignore or v is None: continue - log.debug(f"Processing tag {key} = {v}") + # log.debug(f"Processing tag {key} = {v}") if key == "coordinates": if isinstance(v, list): tags["lat"] = v[1] @@ -235,21 +215,20 @@ def JSONparser( continue vals = self.convertMultiple(v) if len(vals) > 0: - for tag in vals: - tags.update(tag) - # print(f"BASE {tags}") + tags.update(vals) continue - items = self.convertEntry(key, v) if items is None or len(items) == 0: continue if type(items) == str: log.debug(f"string Item {items}") - else: - log.debug(f"dict Item {items}") - if len(items) == 0: - tags.update(items[0]) + elif type(items) == list: + # log.debug(f"list Item {items}") + tags.update(items[0]) + elif type(items) == dict: + # log.debug(f"dict Item {items}") + tags.update(items) # log.debug(f"TAGS: {tags}") if len(tags) > 0: total.append(tags) @@ -257,5 +236,74 @@ def JSONparser( # log.debug(f"Finished parsing JSON file {filespec}") return total -from osm_fieldwork.ODKInstance import ODKInstance -from osm_fieldwork.ODKInstance import ODKInstance + def XMLparser( + self, + filespec: str, + data: str = None, + ) -> list: + """ + Import an ODK XML Instance file ito a data structure. The input is + either a filespec to the Instance file copied off your phone, or + the XML that has been read in elsewhere. + + Args: + filespec (str): The filespec to the ODK XML Instance file + data (str): The XML data + + Returns: + (list): All the entries in the OSM XML Instance file + """ + row = dict() + if filespec: + logging.info("Processing instance file: %s" % filespec) + file = open(filespec, "rb") + # Instances are small, read the whole file + xml = file.read(os.path.getsize(filespec)) + elif data: + xml = data + doc = xmltodict.parse(xml) + + json.dumps(doc) + tags = dict() + data = doc["data"] + flattened = flatdict.FlatDict(data) + # total = list() + # log.debug(f"FLAT: {flattened}") + pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") + for key, value in flattened.items(): + if key[0] == '@' or value is None: + continue + # Get the last element deliminated by a dash + # for CSV & JSON, or a colon for ODK XML. + base = basename(key) + log.debug(f"FLAT: {base} = {value}") + if base in self.ignore: + continue + if re.search(pat, value): + gps = value.split(" ") + row["lat"] = gps[0] + row["lon"] = gps[1] + continue + + if base in self.types: + if self.types[base] == "select_multiple": + # log.debug(f"Found key '{self.types[base]}'") + vals = self.convertMultiple(value) + if len(vals) > 0: + tags.update(vals) + continue + else: + item = self.convertEntry(base, value) + if item is None or len(item) == 0: + continue + if len(tags) == 0: + tags = item[0] + else: + if type(item) == list: + # log.debug(f"list Item {item}") + tags.update(item[0]) + elif type(item) == dict: + # log.debug(f"dict Item {item}") + tags.update(item) + row.update(tags) + return [row] From b253a9d68aef3b147f625babadc8b0361bbbd338 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 13:26:29 -0600 Subject: [PATCH 25/30] fix: be less verbose --- osm_fieldwork/osmfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/osm_fieldwork/osmfile.py b/osm_fieldwork/osmfile.py index 73ea39c0..2fe5f486 100755 --- a/osm_fieldwork/osmfile.py +++ b/osm_fieldwork/osmfile.py @@ -81,7 +81,7 @@ def __init__( def __del__(self): """Close the OSM XML file automatically.""" - log.debug("Closing output file") + # log.debug("Closing output file") self.footer() def isclosed(self): @@ -112,7 +112,7 @@ def footer(self): def write( self, - data=None, + data = None, ): """Write the data to the OSM XML file.""" if type(data) == list: From 3647f12f3bd06c9f61f2389e08b1e4561f083c76 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 13:27:31 -0600 Subject: [PATCH 26/30] fix: Use new XMLParser() instead of ODKInstance --- osm_fieldwork/convert.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index 1437311a..e0add921 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -212,14 +212,14 @@ def convertEntry( # If the tag is in the config file, convert it. if self.convertData(newtag): newtag = self.convertTag(newtag) - if newtag != tag: - logging.debug(f"Converted Tag for entry {tag} to {newtag}") + #if newtag != tag: + # logging.debug(f"Converted Tag for entry {tag} to {newtag}") # Truncate the elevation, as it's really long if newtag == "ele": value = value[:7] newval = self.convertValue(newtag, value) - logging.debug("Converted Value for entry '%s' to '%s'" % (value, newval)) + # logging.debug("Converted Value for entry '%s' to '%s'" % (value, newval)) # there can be multiple new tag/value pairs for some values from ODK if type(newval) == str: all.append({newtag: newval}) @@ -294,7 +294,7 @@ def convertTag( if low in self.convert: newtag = self.convert[low] if type(newtag) is str: - logging.debug("\tTag '%s' converted tag to '%s'" % (tag, newtag)) + # logging.debug("\tTag '%s' converted tag to '%s'" % (tag, newtag)) tmp = newtag.split("=") if len(tmp) > 1: newtag = tmp[0] @@ -323,18 +323,20 @@ def convertMultiple( Returns: (list): The new tags """ - tags = list() + tags = dict() for tag in value.split(' '): low = tag.lower() if self.convertData(low): newtag = self.convert[low] - # tags.append({newtag}: {value}) if newtag.find('=') > 0: tmp = newtag.split('=') - tags.append({tmp[0]: tmp[1]}) + if tmp[0] in tags: + tags[tmp[0]] = f"{tags[tmp[0]]};{tmp[1]}" + else: + tags.update({tmp[0]: tmp[1]}) else: - tags.append({low: "yes"}) - logging.debug(f"\tConverted multiple to {tags}") + tags.update({low: "yes"}) + # logging.debug(f"\tConverted multiple to {tags}") return tags def parseXLS( From 3f7b94d1c9e88420e1565a46169fdbb1228c0ec5 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 13:28:09 -0600 Subject: [PATCH 27/30] fix: Use new Parsers() class instead of the old files --- osm_fieldwork/odk2osm.py | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/osm_fieldwork/odk2osm.py b/osm_fieldwork/odk2osm.py index 1bfdad88..c977749e 100755 --- a/osm_fieldwork/odk2osm.py +++ b/osm_fieldwork/odk2osm.py @@ -31,8 +31,7 @@ from osm_fieldwork.convert import Convert from osm_fieldwork.ODKInstance import ODKInstance from osm_fieldwork.support import OutSupport -from osm_fieldwork.jsondump import JsonDump -from osm_fieldwork.csvdump import CSVDump +from osm_fieldwork.parsers import ODKParsers # Instantiate logger log = logging.getLogger(__name__) @@ -60,9 +59,10 @@ def main(): ) toplevel = Path(args.infile) + odk = ODKParsers(args.yaml) + odk.parseXLS(args.xlsfile) out = OutSupport() xmlfiles = list() - convert = Convert() data = list() # It's a wildcard, used for XML instance files if args.infile.find("*") >= 0: @@ -70,13 +70,11 @@ def main(): toplevel = Path(args.infile[:-1]) for dirs in glob.glob(args.infile): xml = os.listdir(dirs) - # There ilineagelineages always only one XML file per infile full = os.path.join(dirs, xml[0]) xmlfiles.append(full) for infile in xmlfiles: - logging.info(f"Processing infile {infile}") - odk = ODKInstance(infile) - entry = convert.createEntry(odk.data) + tmp = odk.XMLparser(infile) + entry = odk.createEntry(tmp[0]) data.append(entry) elif toplevel.suffix == '.xml': # It's an instance file from ODK Collect @@ -84,27 +82,18 @@ def main(): # There is always only one XML file per infile full = os.path.join(toplevel, os.path.basename(toplevel)) xmlfiles.append(full + ".xml") - odk = ODKInstance(args.infile) - entry = convert.createEntry(odk.data) + tmp = odk.XMLparser(args.infile) + # odki = ODKInstance(filespec=args.infile, yaml=args.yaml) + entry = odk.createEntry(tmp) data.append(entry) elif toplevel.suffix == ".csv": log.debug(f"Parsing csv files {args.infile}") - if args.yaml: - csvin = CSVDump(args.yaml) - else: - csvin = CSVDump() - csvin.parseXLS(args.xlsfile) - for entry in csvin.parse(args.infile): - data.append(convert.createEntry(entry)) + for entry in odk.CSVparser(args.infile): + data.append(odk.createEntry(entry)) elif toplevel.suffix == ".json": log.debug(f"Parsing json files {args.infile}") - if args.yaml: - jsonin = JsonDump(args.yaml) - else: - jsonin = JsonDump() - jsonin.parseXLS(args.xlsfile) - for entry in jsonin.parse(args.infile): - data.append(convert.createEntry(entry)) + for entry in odk.JSONparser(args.infile): + data.append(odk.createEntry(entry)) # Write the data out.WriteData(toplevel.stem, data) From 995355510402419f0bb40cbc007f17fb7c7ea3ea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 15 Jun 2024 19:45:48 +0000 Subject: [PATCH 28/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- osm_fieldwork/ODKInstance.py | 16 ++++----- osm_fieldwork/convert.py | 67 ++++++++++++++---------------------- osm_fieldwork/csvdump.py | 17 ++++----- osm_fieldwork/jsondump.py | 12 +++---- osm_fieldwork/odk2osm.py | 19 ++++------ osm_fieldwork/osmfile.py | 2 +- osm_fieldwork/parsers.py | 32 ++++++----------- osm_fieldwork/support.py | 51 ++++++++++++++------------- tests/test_convert.py | 1 - 9 files changed, 88 insertions(+), 129 deletions(-) diff --git a/osm_fieldwork/ODKInstance.py b/osm_fieldwork/ODKInstance.py index 77d3238d..626d3134 100755 --- a/osm_fieldwork/ODKInstance.py +++ b/osm_fieldwork/ODKInstance.py @@ -18,26 +18,26 @@ # import argparse +import json import logging import os import re import sys + import flatdict -import json -from collections import OrderedDict import xmltodict # Instantiate logger log = logging.getLogger(__name__) + class ODKInstance(object): def __init__( self, filespec: str = None, data: str = None, ): - """ - This class imports a ODK Instance file, which is in XML into a + """This class imports a ODK Instance file, which is in XML into a data structure. Args: @@ -60,8 +60,7 @@ def parse( filespec: str, data: str = None, ) -> dict: - """ - Import an ODK XML Instance file ito a data structure. The input is + """Import an ODK XML Instance file ito a data structure. The input is either a filespec to the Instance file copied off your phone, or the XML that has been read in elsewhere. @@ -89,7 +88,7 @@ def parse( rows = list() pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") for key, value in flattened.items(): - if key[0] == '@' or value is None: + if key[0] == "@" or value is None: continue if re.search(pat, value): gps = value.split(" ") @@ -98,13 +97,14 @@ def parse( continue # print(key, value) - tmp = key.split(':') + tmp = key.split(":") if tmp[len(tmp) - 1] in self.ignore: continue row[tmp[len(tmp) - 1]] = value return row + if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" parser = argparse.ArgumentParser() diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index 26d9ce6c..cd55b7b7 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -22,8 +22,6 @@ import logging import re import sys -import pandas as pd -import re import pandas as pd @@ -33,9 +31,9 @@ # Instantiate logger log = logging.getLogger(__name__) + def escape(value: str) -> str: - """ - Escape characters like embedded quotes in text fields. + """Escape characters like embedded quotes in text fields. Args: value (str):The string to modify @@ -47,9 +45,9 @@ def escape(value: str) -> str: tmp = value.replace("&", " and ") return tmp.replace("'", "'") + class Convert(YamlFile): - """ - A class to apply a YAML config file and convert ODK to OSM. + """A class to apply a YAML config file and convert ODK to OSM. Returns: (Convert): An instance of this object @@ -102,8 +100,7 @@ def privateData( self, keyword: str, ) -> bool: - """ - Search the private data category for a keyword. + """Search the private data category for a keyword. Args: keyword (str): The keyword to search for @@ -117,8 +114,7 @@ def convertData( self, keyword: str, ) -> bool: - """ - Search the convert data category for a keyword. + """Search the convert data category for a keyword. Args: keyword (str): The keyword to search for @@ -132,8 +128,7 @@ def ignoreData( self, keyword: str, ) -> bool: - """ - Search the convert data category for a ketyword. + """Search the convert data category for a ketyword. Args: keyword (str): The keyword to search for @@ -147,8 +142,7 @@ def getKeyword( self, value: str, ) -> str: - """ - Get the keyword for a value from the yaml file. + """Get the keyword for a value from the yaml file. Args: value (str): The value to find the keyword for @@ -167,8 +161,7 @@ def getValues( self, keyword: str = None, ) -> str: - """ - Get the values for a primary key. + """Get the values for a primary key. Args: keyword (str): The keyword to get the value of @@ -187,8 +180,7 @@ def convertEntry( tag: str, value: str, ) -> list: - """ - Convert a tag and value from the ODK represention to an OSM one. + """Convert a tag and value from the ODK represention to an OSM one. Args: tag (str): The tag from the ODK XML file @@ -215,7 +207,7 @@ def convertEntry( # If the tag is in the config file, convert it. if self.convertData(newtag): newtag = self.convertTag(newtag) - #if newtag != tag: + # if newtag != tag: # logging.debug(f"Converted Tag for entry {tag} to {newtag}") # Truncate the elevation, as it's really long @@ -240,8 +232,7 @@ def convertValue( tag: str, value: str, ) -> list: - """ - Convert a single tag value. + """Convert a single tag value. Args: tag (str): The tag from the ODK XML file @@ -284,8 +275,7 @@ def convertTag( self, tag: str, ) -> str: - """ - Convert a single tag. + """Convert a single tag. Args: tag (str): The tag from the ODK XML file @@ -317,8 +307,7 @@ def convertMultiple( self, value: str, ) -> list: - """ - Convert a multiple tags from a select_multiple question.. + """Convert a multiple tags from a select_multiple question.. Args: value (str): The tags from the ODK XML file @@ -327,12 +316,12 @@ def convertMultiple( (list): The new tags """ tags = dict() - for tag in value.split(' '): + for tag in value.split(" "): low = tag.lower() if self.convertData(low): newtag = self.convert[low] - if newtag.find('=') > 0: - tmp = newtag.split('=') + if newtag.find("=") > 0: + tmp = newtag.split("=") if tmp[0] in tags: tags[tmp[0]] = f"{tags[tmp[0]]};{tmp[1]}" else: @@ -354,10 +343,10 @@ def parseXLS( defaults = self.entries["default"] i = 0 while i < len(self.entries): - if type(self.entries['type'][i]) == float: - self.types[self.entries['name'][i]] = None + if type(self.entries["type"][i]) == float: + self.types[self.entries["name"][i]] = None else: - self.types[self.entries['name'][i]] = self.entries['type'][i].split(' ')[0] + self.types[self.entries["name"][i]] = self.entries["type"][i].split(" ")[0] i += 1 total = len(names) i = 0 @@ -377,8 +366,7 @@ def createEntry( self, entry: dict, ) -> dict: - """ - Create the feature data structure. + """Create the feature data structure. Args: entry (dict): The feature data @@ -429,8 +417,8 @@ def createEntry( # log.debug("Adding attribute %s with value %s" % (key, value)) continue if value is not None and value != "no" and value != "unknown": - if key == 'username': - tags['user'] = value + if key == "username": + tags["user"] = value continue items = self.convertEntry(key, value) if key in self.types: @@ -466,9 +454,7 @@ def createEntry( return feature def dump(self): - """ - Dump internal data structures, for debugging purposes only. - """ + """Dump internal data structures, for debugging purposes only.""" print("YAML file: %s" % self.filespec) print("Convert section") for key, val in self.convert.items(): @@ -489,9 +475,7 @@ def dump(self): # this way than using pytest, # def main(): - """ - This main function lets this class be run standalone by a bash script. - """ + """This main function lets this class be run standalone by a bash script.""" parser = argparse.ArgumentParser(description="Read and parse a YAML file") parser.add_argument("-v", "--verbose", action="store_true", help="verbose output") @@ -553,6 +537,7 @@ def main(): for i in entry: print("XX: %r" % i) + if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" main() diff --git a/osm_fieldwork/csvdump.py b/osm_fieldwork/csvdump.py index 16340141..d68e3695 100755 --- a/osm_fieldwork/csvdump.py +++ b/osm_fieldwork/csvdump.py @@ -22,26 +22,20 @@ import csv import logging import os -import re import sys from datetime import datetime -import pandas as pd -from geojson import Feature, FeatureCollection, Point, dump - from osm_fieldwork.convert import Convert -from osm_fieldwork.osmfile import OsmFile -from osm_fieldwork.xlsforms import xlsforms_path from osm_fieldwork.support import basename +from osm_fieldwork.xlsforms import xlsforms_path # Instantiate logger log = logging.getLogger(__name__) class CSVDump(Convert): - """ - A class to parse the CSV files from ODK Central. - """ + """A class to parse the CSV files from ODK Central.""" + def __init__( self, yaml: str = None, @@ -68,8 +62,7 @@ def parse( filespec: str, data: str = None, ) -> list: - """ - Parse the CSV file from ODK Central and convert it to a data structure. + """Parse the CSV file from ODK Central and convert it to a data structure. Args: filespec (str): The file to parse. @@ -139,6 +132,7 @@ def parse( all_tags.append(tags) return all_tags + def main(): """Run conversion directly from the terminal.""" parser = argparse.ArgumentParser(description="convert CSV from ODK Central to OSM XML") @@ -208,6 +202,7 @@ def main(): log.info("Wrote OSM XML file: %r" % osmoutfile) log.info("Wrote GeoJson file: %r" % jsonoutfile) + if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" main() diff --git a/osm_fieldwork/jsondump.py b/osm_fieldwork/jsondump.py index e0e1602c..2bde04a3 100755 --- a/osm_fieldwork/jsondump.py +++ b/osm_fieldwork/jsondump.py @@ -23,17 +23,13 @@ import logging # import pandas as pd -import re import sys from pathlib import Path import flatdict import geojson -import shapely -from geojson import Feature, FeatureCollection, Point, dump from osm_fieldwork.convert import Convert -from osm_fieldwork.osmfile import OsmFile log = logging.getLogger(__name__) @@ -45,8 +41,7 @@ def __init__( self, yaml: str = None, ): - """ - A class to convert the JSON file from ODK Central, or the GeoJson + """A class to convert the JSON file from ODK Central, or the GeoJson file created by the odk2geojson utility. Args: @@ -68,8 +63,7 @@ def parse( filespec: str = None, data: str = None, ) -> list: - """ - Parse the JSON file from ODK Central and convert it to a data structure. + """Parse the JSON file from ODK Central and convert it to a data structure. The input is either a filespec to open, or the data itself. Args: @@ -181,6 +175,7 @@ def parse( # log.debug(f"Finished parsing JSON file {filespec}") return total + def main(): """Run conversion directly from the terminal.""" parser = argparse.ArgumentParser(description="convert JSON from ODK Central to OSM XML") @@ -252,6 +247,7 @@ def main(): log.info("Wrote OSM XML file: %r" % osmoutfile) log.info("Wrote GeoJson file: %r" % jsonoutfile) + if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" main() diff --git a/osm_fieldwork/odk2osm.py b/osm_fieldwork/odk2osm.py index c977749e..663bd3fc 100755 --- a/osm_fieldwork/odk2osm.py +++ b/osm_fieldwork/odk2osm.py @@ -18,27 +18,21 @@ # import argparse -import csv +import glob import logging import os -import re import sys -from collections import OrderedDict -from datetime import datetime from pathlib import Path -import xmltodict -import glob -from osm_fieldwork.convert import Convert -from osm_fieldwork.ODKInstance import ODKInstance -from osm_fieldwork.support import OutSupport + from osm_fieldwork.parsers import ODKParsers +from osm_fieldwork.support import OutSupport # Instantiate logger log = logging.getLogger(__name__) + def main(): - """ - This is a program that reads in the ODK Instance file, which is in XML, + """This is a program that reads in the ODK Instance file, which is in XML, and converts it to an OSM XML file so it can be viewed in an editor. """ parser = argparse.ArgumentParser(description="Convert ODK XML instance file to OSM XML format") @@ -76,7 +70,7 @@ def main(): tmp = odk.XMLparser(infile) entry = odk.createEntry(tmp[0]) data.append(entry) - elif toplevel.suffix == '.xml': + elif toplevel.suffix == ".xml": # It's an instance file from ODK Collect log.debug(f"Parsing ODK XML files {args.infile}") # There is always only one XML file per infile @@ -98,6 +92,7 @@ def main(): # Write the data out.WriteData(toplevel.stem, data) + if __name__ == "__main__": """This is just a hook so this file can be run standlone during development.""" main() diff --git a/osm_fieldwork/osmfile.py b/osm_fieldwork/osmfile.py index 2fe5f486..ac1eec96 100755 --- a/osm_fieldwork/osmfile.py +++ b/osm_fieldwork/osmfile.py @@ -112,7 +112,7 @@ def footer(self): def write( self, - data = None, + data=None, ): """Write the data to the OSM XML file.""" if type(data) == list: diff --git a/osm_fieldwork/parsers.py b/osm_fieldwork/parsers.py index 27b6c8bd..3d8e718a 100644 --- a/osm_fieldwork/parsers.py +++ b/osm_fieldwork/parsers.py @@ -18,34 +18,27 @@ # along with OSM-Fieldwork. If not, see . # -import argparse import csv +import json import logging import os import re -import sys -import json -import flatdict - -from datetime import datetime -from collections import OrderedDict from pathlib import Path + +import flatdict import xmltodict -from geojson import Feature, FeatureCollection, dump + from osm_fieldwork.convert import Convert -from osm_fieldwork.osmfile import OsmFile -from osm_fieldwork.xlsforms import xlsforms_path -from osm_fieldwork.ODKInstance import ODKInstance from osm_fieldwork.support import basename +from osm_fieldwork.xlsforms import xlsforms_path # Instantiate logger log = logging.getLogger(__name__) class ODKParsers(Convert): - """ - A class to parse the CSV files from ODK Central. - """ + """A class to parse the CSV files from ODK Central.""" + def __init__( self, yaml: str = None, @@ -72,8 +65,7 @@ def CSVparser( filespec: str, data: str = None, ) -> list: - """ - Parse the CSV file from ODK Central and convert it to a data structure. + """Parse the CSV file from ODK Central and convert it to a data structure. Args: filespec (str): The file to parse. @@ -145,8 +137,7 @@ def JSONparser( filespec: str = None, data: str = None, ) -> list: - """ - Parse the JSON file from ODK Central and convert it to a data structure. + """Parse the JSON file from ODK Central and convert it to a data structure. The input is either a filespec to open, or the data itself. Args: @@ -241,8 +232,7 @@ def XMLparser( filespec: str, data: str = None, ) -> list: - """ - Import an ODK XML Instance file ito a data structure. The input is + """Import an ODK XML Instance file ito a data structure. The input is either a filespec to the Instance file copied off your phone, or the XML that has been read in elsewhere. @@ -271,7 +261,7 @@ def XMLparser( # log.debug(f"FLAT: {flattened}") pat = re.compile("[0-9.]* [0-9.-]* [0-9.]* [0-9.]*") for key, value in flattened.items(): - if key[0] == '@' or value is None: + if key[0] == "@" or value is None: continue # Get the last element deliminated by a dash # for CSV & JSON, or a colon for ODK XML. diff --git a/osm_fieldwork/support.py b/osm_fieldwork/support.py index cd91a9a8..6fa50379 100644 --- a/osm_fieldwork/support.py +++ b/osm_fieldwork/support.py @@ -20,19 +20,20 @@ import logging from datetime import datetime +from pathlib import Path + from geojson import Feature, FeatureCollection, Point, dump + from osm_fieldwork.osmfile import OsmFile -from osm_fieldwork.xlsforms import xlsforms_path -from pathlib import Path # Instantiate logger log = logging.getLogger(__name__) + def basename( line: str, ) -> str: - """ - Extract the basename of a path after the last -. + """Extract the basename of a path after the last -. Args: line (str): The path from the json file entry @@ -52,10 +53,12 @@ def basename( # return tmp[len(tmp) - 1] return line + class OutSupport(object): - def __init__(self, - filespec: str = None, - ): + def __init__( + self, + filespec: str = None, + ): self.osm = None self.filespec = filespec self.features = list() @@ -72,8 +75,7 @@ def createOSM( self, filespec: str = None, ) -> bool: - """ - Create an OSM XML output files. + """Create an OSM XML output files. Args: filespec (str): The output file name @@ -91,8 +93,7 @@ def writeOSM( self, feature: dict, ) -> bool: - """ - Write a feature to an OSM XML output file. + """Write a feature to an OSM XML output file. Args: feature (dict): The OSM feature to write to @@ -122,8 +123,7 @@ def createGeoJson( self, filespec: str = "tmp.geojson", ) -> bool: - """ - Create a GeoJson output file. + """Create a GeoJson output file. Args: filespec (str): The output file name @@ -137,8 +137,7 @@ def writeGeoJson( self, feature: dict, ) -> bool: - """ - Write a feature to a GeoJson output file. + """Write a feature to a GeoJson output file. Args: feature (dict): The OSM feature to write to @@ -151,9 +150,7 @@ def writeGeoJson( return True def finishGeoJson(self): - """ - Write the GeoJson FeatureCollection to the output file and close it. - """ + """Write the GeoJson FeatureCollection to the output file and close it.""" features = list() for item in self.features: if len(item["attrs"]["lon"]) == 0 or len(item["attrs"]["lat"]) == 0: @@ -168,13 +165,12 @@ def finishGeoJson(self): collection = FeatureCollection(features) dump(collection, self.json) - - def WriteData(self, - base: str, - data: dict(), - ) -> bool: - """ - Write the data to the output files. + def WriteData( + self, + base: str, + data: dict(), + ) -> bool: + """Write the data to the output files. Args: base (str): The base of the input file name @@ -201,7 +197,10 @@ def WriteData(self, if len(ref) == 0: continue coords = ref.split(" ") - node = {"attrs": {"id": nodeid, "version": 1, "timestamp": now, "lat": coords[0], "lon": coords[1]}, "tags": dict()} + node = { + "attrs": {"id": nodeid, "version": 1, "timestamp": now, "lat": coords[0], "lon": coords[1]}, + "tags": dict(), + } self.writeOSM(node) self.writeGeoJson(node) refs.append(nodeid) diff --git a/tests/test_convert.py b/tests/test_convert.py index 2b81e9d5..5adb0046 100755 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -21,7 +21,6 @@ import argparse import logging import os -import logging import sys from osm_fieldwork.convert import Convert From cb0fe7ab8e508ea110186be6baa99a2faff8da5e Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 14:02:56 -0600 Subject: [PATCH 29/30] fix: Fix tests to work with new ODKParsers() class --- tests/test_central.py | 0 tests/test_convert.py | 4 ++-- tests/test_csv.py | 14 ++++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) mode change 100644 => 100755 tests/test_central.py diff --git a/tests/test_central.py b/tests/test_central.py old mode 100644 new mode 100755 diff --git a/tests/test_convert.py b/tests/test_convert.py index 2b81e9d5..12419514 100755 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -81,8 +81,8 @@ def test_multiple_value(): """Test tag value conversion.""" hits = 0 vals = csv.convertMultiple("picnic_table fire_pit parking") - print(vals) - if len(vals) > 0 and vals[0]["leisure"] == "picnic_table" and vals[1]["leisure"] == "firepit": + # print(vals) + if len(vals) > 0 and vals["leisure"] == "picnic_table;firepit": hits += 1 assert hits == 1 diff --git a/tests/test_csv.py b/tests/test_csv.py index 7509cbad..8ee2125f 100755 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -21,7 +21,8 @@ import argparse import os -from osm_fieldwork.CSVDump import CSVDump +from osm_fieldwork.parsers import ODKParsers +from osm_fieldwork.support import OutSupport # find the path of root tests dir rootdir = os.path.dirname(os.path.abspath(__file__)) @@ -30,20 +31,21 @@ def test_csv(): """Make sure the CSV file got loaded and parsed.""" # FIXME use fixture - csv = CSVDump() - data = csv.parse(f"{rootdir}/testdata/test.csv") + csv = ODKParsers() + data = csv.CSVparser(f"{rootdir}/testdata/test.csv") assert len(data) > 0 def test_init(): """Make sure the YAML file got loaded.""" - csv = CSVDump() + csv = ODKParsers() assert len(csv.yaml.yaml) > 0 def test_osm_entry(infile=f"{rootdir}/testdata/test.csv"): - csv = CSVDump() - csv.createOSM(infile) + csv = ODKParsers() + out = OutSupport() + out.createOSM(infile) line = { "timestamp": "2021-09-25T14:27:43.862Z", "end": "2021-09-24T17:55:26.194-06:00", From 0e42198fdcfa34668049a3293cc65d38fae70e91 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sat, 15 Jun 2024 14:03:37 -0600 Subject: [PATCH 30/30] fix: Make sure value also isn't NULL --- osm_fieldwork/parsers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/osm_fieldwork/parsers.py b/osm_fieldwork/parsers.py index 27b6c8bd..f3627f38 100644 --- a/osm_fieldwork/parsers.py +++ b/osm_fieldwork/parsers.py @@ -92,7 +92,9 @@ def CSVparser( tags = dict() # log.info(f"ROW: {row}") for keyword, value in row.items(): - if keyword is None or len(value) == 0: + if keyword is None or value is None: + continue + if len(value) == 0: continue base = basename(keyword).lower() # There's many extraneous fields in the input file which we don't need.