From a21dfc2ae0abf32d808c9b0973f681d3bafb5884 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Wed, 5 Jun 2024 05:05:56 -0600 Subject: [PATCH] feat: add support for select_multiple in XLSForms (#257) * fix: Set self.file to None so we don't get duplicate footers * fix: Add leisure fields, ignore cellular * fix: Add function to parse select_multiple * fix: Use new convertMultiple() to support select_multiple in XForms * fix: Refactor test case for select_multiple, now it actually works * fix: Minor reformatting and updating of code comment blocks, also add return data types * fix: Update and reformat all code comment blocks * fix: Move pareseXLS to the Convert class so it can be shared * fix: Move createEntry() to Convert class so it can be shared * fix: refactor converting a JSON file from Central to OSM XML and add select_multiple support * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- osm_fieldwork/CSVDump.py | 213 +++++++++------------- osm_fieldwork/convert.py | 182 +++++++++++++++++-- osm_fieldwork/json2osm.py | 366 ++++++++++++++++---------------------- osm_fieldwork/osmfile.py | 1 + osm_fieldwork/xforms.yaml | 10 +- tests/test_convert.py | 23 ++- 6 files changed, 435 insertions(+), 360 deletions(-) diff --git a/osm_fieldwork/CSVDump.py b/osm_fieldwork/CSVDump.py index 83cc256f..1f608faf 100755 --- a/osm_fieldwork/CSVDump.py +++ b/osm_fieldwork/CSVDump.py @@ -22,11 +22,9 @@ import csv import logging import os -import re import sys from datetime import datetime -import pandas as pd from geojson import Feature, FeatureCollection, Point, dump from osm_fieldwork.convert import Convert @@ -58,11 +56,22 @@ def __init__( self.config = super().__init__(yaml) self.saved = dict() self.defaults = dict() + self.entries = dict() + self.types = dict() def lastSaved( self, keyword: str, - ): + ) -> str: + """Get the last saved value for a question. + + Args: + keyword (str): The keyword to search for + + Returns: + (str): The last saved value for the question + + """ if keyword is not None and len(keyword) > 0: return self.saved[keyword] return None @@ -71,39 +80,32 @@ def updateSaved( self, keyword: str, value: str, - ): + ) -> bool: + """Update the last saved value for a question. + + Args: + keyword (str): The keyword to search for + value (str): The new value + + Returns: + (bool): If the new value got saved + + """ if keyword is not None and value is not None and len(value) > 0: self.saved[keyword] = value - - def parseXLS( - self, - xlsfile: str, - ): - """Parse the source XLSFile if available to look for details we need.""" - if xlsfile is not None and len(xlsfile) > 0: - entries = pd.read_excel(xlsfile, sheet_name=[0]) - # There will only be a single sheet - names = entries[0]["name"] - defaults = entries[0]["default"] - total = len(names) - i = 0 - while i < total: - entry = defaults[i] - if str(entry) != "nan": - pat = re.compile("..last-saved.*") - if pat.match(entry): - name = entry.split("#")[1][:-1] - self.saved[name] = None - else: - self.defaults[names[i]] = entry - i += 1 - return True + return True + else: + return False def createOSM( self, filespec: str, ): - """Create an OSM XML output files.""" + """Create an OSM XML output files. + + Args: + filespec (str): The output file name + """ log.debug("Creating OSM XML file: %s" % filespec) self.osm = OsmFile(filespec) # self.osm.header() @@ -112,7 +114,11 @@ def writeOSM( self, feature: dict, ): - """Write a feature to an OSM XML output file.""" + """Write a feature to an OSM XML output file. + + Args: + feature (dict): The OSM feature to write to + """ out = "" if "id" in feature["tags"]: feature["id"] = feature["tags"]["id"] @@ -131,17 +137,25 @@ def finishOSM(self): def createGeoJson( self, - file: str = "tmp.geojson", + filespec: str = "tmp.geojson", ): - """Create a GeoJson output file.""" - log.debug("Creating GeoJson file: %s" % file) - self.json = open(file, "w") + """Create a GeoJson output file. + + Args: + filespec (str): The output file name + """ + log.debug("Creating GeoJson file: %s" % filespec) + self.json = open(filespec, "w") def writeGeoJson( self, feature: dict, ): - """Write a feature to a GeoJson output file.""" + """Write a feature to a GeoJson output file. + + Args: + feature (dict): The OSM feature to write to + """ # These get written later when finishing , since we have to create a FeatureCollection if "lat" not in feature["attrs"] or "lon" not in feature["attrs"]: return None @@ -167,8 +181,16 @@ def parse( self, filespec: str, data: str = None, - ): - """Parse the CSV file from ODK Central and convert it to a data structure.""" + ) -> list: + """Parse the CSV file from ODK Central and convert it to a data structure. + + Args: + filespec (str): The file to parse. + data (str): Or the data to parse. + + Returns: + (list): The list of features with tags + """ all_tags = list() if not data: f = open(filespec, newline="") @@ -179,32 +201,34 @@ def parse( tags = dict() # log.info(f"ROW: {row}") for keyword, value in row.items(): - if keyword is None or len(keyword) == 0: + if keyword is None or len(value) == 0: continue - base = self.basename(keyword).lower() # There's many extraneous fields in the input file which we don't need. if base is None or base in self.ignore or value is None: continue - # if base in self.multiple: - # epdb.st() - # entry = reader[keyword] - # for key, val in entry.items(): - # print(key)75.66.108.181 - # if key == "name": - # tags['name'] = val - # continue else: + # log.info(f"ITEM: {keyword} = {value}") + if base in self.types: + if self.types[base] == "select_multiple": + vals = self.convertMultiple(value) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + # print(f"BASE {tags}") + continue # When using geopoint warmup, once the display changes to the map + # location, there is not always a value if the accuracy is way # off. In this case use the warmup value, which is where we are - # standing anyway. + # hopefully standing anyway. if base == "latitude" and len(value) == 0: if "warmup-Latitude" in row: value = row["warmup-Latitude"] if base == "longitude" and len(value) == 0: value = row["warmup-Longitude"] items = self.convertEntry(base, value) + # log.info(f"ROW: {base} {value}") if len(items) > 0: if base in self.saved: @@ -224,6 +248,7 @@ def parse( tags[k] = v else: tags[base] = value + # log.debug(f"\tFIXME1: {tags}") all_tags.append(tags) return all_tags @@ -231,94 +256,24 @@ def parse( def basename( self, line: str, - ): - """Extract the basename of a path after the last -.""" + ) -> str: + """Extract the basename of a path after the last -. + + Args: + line (str): The path from the json file entry + + Returns: + (str): The last node of the path + """ tmp = line.split("-") if len(tmp) == 0: return line base = tmp[len(tmp) - 1] return base - def createEntry( - self, - entry: dict, - ): - """Create the feature data structure.""" - # print(line) - feature = dict() - attrs = dict() - tags = dict() - priv = dict() - refs = list() - - # log.debug("Creating entry") - # First convert the tag to the approved OSM equivalent - if "lat" in entry and "lon" in entry: - attrs["lat"] = entry["lat"] - attrs["lon"] = entry["lon"] - for key, value in entry.items(): - attributes = ( - "id", - "timestamp", - "lat", - "lon", - "uid", - "user", - "version", - "action", - ) - - # When using existing OSM data, there's a special geometry field. - # Otherwise use the GPS coordinates where you are. - if key == "geometry" and len(value) > 0: - geometry = value.split(" ") - if len(geometry) == 4: - attrs["lat"] = geometry[0] - attrs["lon"] = geometry[1] - continue - - if len(attrs["lat"]) == 0: - continue - if key is not None and len(key) > 0 and key in attributes: - attrs[key] = value - log.debug("Adding attribute %s with value %s" % (key, value)) - else: - if key in self.multiple: - for item in value: - if key in item: - for entry in item[key].split(): - vals = self.getValues(key) - if entry in vals: - if vals[entry].find("="): - tmp = vals[entry].split("=") - tags[tmp[0]] = tmp[1] - else: - tags[entry] = "yes" - continue - - if value is not None and value != "no" and value != "unknown": - if key == "track" or key == "geoline": - # refs.append(tags) - # log.debug("Adding reference %s" % tags) - refs = value.split(";") - elif len(value) > 0: - if self.privateData(key): - priv[key] = value - else: - tags[key] = value - if len(tags) > 0: - feature["attrs"] = attrs - feature["tags"] = tags - if len(refs) > 1: - feature["refs"] = refs - if len(priv) > 0: - feature["private"] = priv - - return feature - def main(): - """ """ + """Run conversion directly from the terminal.""" parser = argparse.ArgumentParser(description="convert CSV from ODK Central to OSM XML") parser.add_argument("-v", "--verbose", action="store_true", help="verbose output") parser.add_argument("-y", "--yaml", help="Alternate YAML file") @@ -340,6 +295,7 @@ def main(): csvin = CSVDump(args.yaml) else: csvin = CSVDump() + csvin.parseXLS(args.xlsfile) osmoutfile = os.path.basename(args.infile.replace(".csv", ".osm")) csvin.createOSM(osmoutfile) @@ -367,6 +323,7 @@ def main(): csvin.writeOSM(node) refs.append(nodeid) nodeid -= 1 + feature["refs"] = refs csvin.writeOSM(feature) else: diff --git a/osm_fieldwork/convert.py b/osm_fieldwork/convert.py index ceae62df..42fa9991 100755 --- a/osm_fieldwork/convert.py +++ b/osm_fieldwork/convert.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright (c) 2020, 2021, 2022, 2023 Humanitarian OpenStreetMap Team +# Copyright (c) 2020, 2021, 2022, 2023, 2024 Humanitarian OpenStreetMap Team # # This file is part of OSM-Fieldwork. # @@ -20,8 +20,11 @@ import argparse import logging +import re import sys +import pandas as pd + from osm_fieldwork.xlsforms import xlsforms_path from osm_fieldwork.yamlfile import YamlFile @@ -29,7 +32,7 @@ log = logging.getLogger(__name__) -def escape(value: str): +def escape(value: str) -> str: """Escape characters like embedded quotes in text fields. Args: @@ -66,6 +69,10 @@ def __init__( self.convert = dict() self.ignore = list() self.private = list() + self.defaults = dict() + self.entries = dict() + self.types = dict() + self.saved = dict() for item in self.yaml.yaml["convert"]: key = list(item.keys())[0] value = item[key] @@ -92,22 +99,22 @@ def __init__( def privateData( self, keyword: str, - ): - """See is a keyword is in the private data category. + ) -> bool: + """Search he private data category for a keyword. Args: keyword (str): The keyword to search for Returns: - (bool): Check to see if the keyword is in the private data section + (bool): =If the keyword is in the private data section """ return keyword.lower() in self.private def convertData( self, keyword: str, - ): - """See is a keyword is in the convert data category. + ) -> bool: + """Search the convert data category for a keyword. Args: keyword (str): The keyword to search for @@ -120,8 +127,8 @@ def convertData( def ignoreData( self, keyword: str, - ): - """See is a keyword is in the convert data category. + ) -> bool: + """Search the convert data category for a ketyword. Args: keyword (str): The keyword to search for @@ -134,11 +141,12 @@ def ignoreData( def getKeyword( self, value: str, - ): + ) -> str: """Get the keyword for a value from the yaml file. Args: value (str): The value to find the keyword for + Returns: (str): The keyword if found, or None """ @@ -152,7 +160,7 @@ def getKeyword( def getValues( self, keyword: str = None, - ): + ) -> str: """Get the values for a primary key. Args: @@ -171,7 +179,7 @@ def convertEntry( self, tag: str, value: str, - ): + ) -> list: """Convert a tag and value from the ODK represention to an OSM one. Args: @@ -188,6 +196,9 @@ def convertEntry( # logging.debug(f"FIXME: Ignoring {tag}") return None low = tag.lower() + if value is None: + return low + if low not in self.convert and low not in self.ignore and low not in self.private: return {tag: value} @@ -220,7 +231,7 @@ def convertValue( self, tag: str, value: str, - ): + ) -> list: """Convert a single tag value. Args: @@ -256,14 +267,14 @@ def convertValue( entry[tag] = vals[value] else: entry[tmp[0]] = tmp[1] - logging.debug("\tValue %s converted to %s" % (value, entry)) + logging.debug("\tValue %s converted value to %s" % (value, entry)) all.append(entry) return all def convertTag( self, tag: str, - ): + ) -> str: """Convert a single tag. Args: @@ -276,21 +287,158 @@ def convertTag( if low in self.convert: newtag = self.convert[low] if type(newtag) is str: - logging.debug("\tTag '%s' converted to '%s'" % (tag, newtag)) + logging.debug("\tTag '%s' converted tag to '%s'" % (tag, newtag)) tmp = newtag.split("=") if len(tmp) > 1: newtag = tmp[0] elif type(newtag) is list: logging.error("FIXME: list()") # epdb.st() - return low + return low, value elif type(newtag) is dict: # logging.error("FIXME: dict()") return low return newtag.lower() else: + logging.debug(f"Not in convert!: {low}") return low + def convertMultiple( + self, + value: str, + ) -> list: + """Convert a multiple tags from a select_multiple question.. + + Args: + value (str): The tags from the ODK XML file + + Returns: + (list): The new tags + """ + tags = list() + for tag in value.split(" "): + low = tag.lower() + if self.convertData(low): + newtag = self.convert[low] + # tags.append({newtag}: {value}) + if newtag.find("=") > 0: + tmp = newtag.split("=") + tags.append({tmp[0]: tmp[1]}) + else: + tags.append({low: "yes"}) + logging.debug(f"\tConverted multiple to {tags}") + return tags + + def parseXLS( + self, + xlsfile: str, + ): + """Parse the source XLSFile if available to look for details we need.""" + if xlsfile is not None and len(xlsfile) > 0: + self.entries = pd.read_excel(xlsfile, sheet_name=[0])[0] + # There will only be a single sheet + names = self.entries["name"] + defaults = self.entries["default"] + i = 0 + while i < len(self.entries): + if type(self.entries["type"][i]) == float: + self.types[self.entries["name"][i]] = None + else: + self.types[self.entries["name"][i]] = self.entries["type"][i].split(" ")[0] + i += 1 + total = len(names) + i = 0 + while i < total: + entry = defaults[i] + if str(entry) != "nan": + pat = re.compile("..last-saved.*") + if pat.match(entry): + name = entry.split("#")[1][:-1] + self.saved[name] = None + else: + self.defaults[names[i]] = entry + i += 1 + return True + + def createEntry( + self, + entry: dict, + ) -> dict: + """Create the feature data structure. + + Args: + entry (dict): The feature data + + Returns: + (dict): The OSM data structure for this entry from the json file + """ + # print(line) + feature = dict() + attrs = dict() + tags = dict() + priv = dict() + refs = list() + + # log.debug("Creating entry") + # First convert the tag to the approved OSM equivalent + if "lat" in entry and "lon" in entry: + attrs["lat"] = entry["lat"] + attrs["lon"] = entry["lon"] + for key, value in entry.items(): + attributes = ( + "id", + "timestamp", + "lat", + "lon", + "uid", + "user", + "version", + "action", + ) + + # When using existing OSM data, there's a special geometry field. + # Otherwise use the GPS coordinates where you are. + if key == "geometry" and len(value) > 0: + geometry = value.split(" ") + if len(geometry) == 4: + attrs["lat"] = geometry[0] + attrs["lon"] = geometry[1] + continue + + # if 'lat' in attrs and len(attrs["lat"]) == 0: + # continue + + if key is not None and len(key) > 0 and key in attributes: + attrs[key] = value + # log.debug("Adding attribute %s with value %s" % (key, value)) + continue + + if value is not None and value != "no" and value != "unknown": + if key == "track" or key == "geoline": + # refs.append(tags) + # log.debug("Adding reference %s" % tags) + refs = value.split(";") + elif type(value) != str: + if self.privateData(key): + priv[key] = str(value) + else: + tags[key] = str(value) + elif len(value) > 0: + if self.privateData(key): + priv[key] = value + else: + tags[key] = value + feature["attrs"] = attrs + if len(tags) > 0: + # logging.debug(f"TAGS: {tags}") + feature["tags"] = tags + if len(refs) > 1: + feature["refs"] = refs + if len(priv) > 0: + feature["private"] = priv + + return feature + def dump(self): """Dump internal data structures, for debugging purposes only.""" print("YAML file: %s" % self.filespec) diff --git a/osm_fieldwork/json2osm.py b/osm_fieldwork/json2osm.py index 5d9c4009..03bb2757 100755 --- a/osm_fieldwork/json2osm.py +++ b/osm_fieldwork/json2osm.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright (c) 2023 Humanitarian OpenStreetMap Team +# Copyright (c) 2023, 2024 Humanitarian OpenStreetMap Team # # This file is part of OSM-Fieldwork. # @@ -23,13 +23,11 @@ import logging # import pandas as pd -import re import sys from pathlib import Path import flatdict import geojson -import shapely from geojson import Feature, FeatureCollection, Point, dump from osm_fieldwork.convert import Convert @@ -62,32 +60,10 @@ def __init__( self.features = list() self.config = super().__init__(yaml) - # FIXME: a work in progress - # def parseXLS(self, xlsfile: str): - # """Parse the source XLSFile if available to look for details we need""" - # if xlsfile is not None and len(xlsfile) > 0: - # entries = pd.read_excel(xlsfile, sheet_name=[0]) - # # There will only be a single sheet - # names = entries[0]['name'] - # defaults = entries[0]['default'] - # total = len(names) - # i = 0 - # while i < total: - # entry = defaults[i] - # if str(entry) != 'nan': - # pat = re.compile("..last-saved.*") - # if pat.match(entry): - # name = entry.split('#')[1][:-1] - # self.saved[name] = None - # else: - # self.defaults[names[i]] = entry - # i += 1 - # return True - def createOSM( self, filespec: str = "tmp.osm", - ): + ) -> OsmFile: """Create an OSM XML output files. Args: @@ -139,7 +115,7 @@ def createGeoJson( """Create a GeoJson output file. Args: - file (str): The filespec of the output GeoJson file + file (str): The filespec of the output GeoJson file """ log.debug("Creating GeoJson file: %s" % file) self.json = open(file, "w") @@ -176,7 +152,7 @@ def parse( self, filespec: str = None, data: str = None, - ): + ) -> list: """Parse the JSON file from ODK Central and convert it to a data structure. The input is either a filespec to open, or the data itself. @@ -188,7 +164,7 @@ def parse( (list): A list of all the features in the input file """ log.debug(f"Parsing JSON file {filespec}") - all_tags = list() + total = list() if not data: file = open(filespec, "r") infile = Path(filespec) @@ -198,13 +174,12 @@ def parse( reader = json.load(file) else: log.error("Need to specify a JSON or GeoJson file!") - return all_tags + return total elif isinstance(data, str): reader = geojson.loads(data) elif isinstance(data, list): reader = data - total = list() # JSON files from Central use value as the keyword, whereas # GeoJSON uses features for the same thing. if "value" in reader: @@ -214,215 +189,138 @@ def parse( else: data = reader for row in data: - # log.info(f"ROW: {row}") + # log.debug(f"ROW: {row}\n") tags = dict() - if "geometry" in row: - # If geom not point, convert to centroid - if row["geometry"]["type"] != "Point": - log.debug(f"Converting {row['geometry']['type']} geometry to centroid point") - geom = shapely.from_geojson(str(row)) - centroid = shapely.to_geojson(geom.centroid) - row["geometry"] = centroid - tags["geometry"] = row["geometry"] - else: - pat = re.compile("[-0-9.]*, [0-9.-]*, [0-9.]*") - gps = re.findall(pat, str(row)) - # If geopoint warmup is used, there will be two matches, we only - # want the second one, which is the location. - for coords in gps: - tags["geometry"] = coords + # Extract the location regardless of what the tag is + # called. + # pat = re.compile("[-0-9.]*, [0-9.-]*, [0-9.]*") + # gps = re.findall(pat, str(row)) + # tmp = list() + # if len(gps) == 0: + # log.error(f"No location data in: {row}") + # continue + # elif len(gps) == 1: + # # Only the warmup has any coordinates. + # tmp = gps[0].split(" ") + # elif len(gps) == 2: + # # both the warmup and the coordinates have values + # tmp = gps[1].split(" ") + + # if len(tmp) > 0: + # lat = float(tmp[0][:-1]) + # lon = float(tmp[1][:-1]) + # geom = Point([lon, lat]) + # row["geometry"] = geom + # # tags["geometry"] = row["geometry"] + if "properties" in row: row["properties"] # A GeoJson formatted file else: pass # A JOSM file from ODK Central - # flatten all the groups into a single data structure + # flatten all the groups into a sodk2geojson.pyingle data structure flattened = flatdict.FlatDict(row) for k, v in flattened.items(): last = k.rfind(":") + 1 key = k[last:] - # log.debug(f"Processing tag {key} = {v}") - # names and comments may have spaces, otherwise - # it's from a select_multiple - pat = re.compile("name[:a-z]*") - names = re.findall(pat, key) - if len(names) > 0: - for name in names: - tags[name] = v - continue - if key == "comment": - tags[key] = v # a JSON file from ODK Central always uses coordinates as # the keyword + if key is None or key in self.ignore or v is None: + continue + log.debug(f"Processing tag {key} = {v}") if key == "coordinates": if isinstance(v, list): - lat = v[1] - lon = v[0] - tags["geometry"] = f"{lat} {lon}" + tags["lat"] = v[1] + tags["lon"] = v[0] + # poi = Point(float(lon), float(lat)) + # tags["geometry"] = poi continue - if key == "xlocation": - tags["geometry"] = v + + if key in self.types: + if self.types[key] == "select_multiple": + # log.debug(f"Found key '{self.types[key]}'") + if v is None: + continue + vals = self.convertMultiple(v) + if len(vals) > 0: + for tag in vals: + tags.update(tag) + # print(f"BASE {tags}") + continue + + items = self.convertEntry(key, v) + if items is None or len(items) == 0: continue - tags[key] = v - total.append(tags) + + if type(items) == str: + log.debug(f"string Item {items}") + else: + log.debug(f"dict Item {items}") + if len(items) == 0: + tags.update(items[0]) + # log.debug(f"TAGS: {tags}") + if len(tags) > 0: + total.append(tags) # log.debug(f"Finished parsing JSON file {filespec}") return total - def createEntry( - self, - entry: dict, - ): - """Create the feature data structure for this entry. - Args: - entry (dict): The feature to convert to the output format +# def json2osm( +# cmdln: dict, +# ) -> str: +# """ +# Process the JSON file from ODK Central or the GeoJSON file to OSM XML format. - Returns: - (dict): The new entry for the output file - """ - # print(line) - feature = dict() - attrs = dict() - tags = dict() - priv = dict() - refs = list() - - # log.debug("Creating entry") - # First convert the tag to the approved OSM equivalent - for key, value in entry.items(): - # When using existing OSM data, there's a special geometry field. - # Otherwise use the GPS coordinates where you are. - lat = None - lon = None - if isinstance(value, float): - continue - # log.debug(f"FIXME: {key} = {value} {type(value)}") - if key == "xid" and value is not None: - attrs["id"] = int(value) - if key == "geometry": - # The GeoJson file has the geometry field. Usually it's a list - # but on occasion it's a string instead, so turn it into a list - if isinstance(value, str) and len(coords := value.split(" ")) >= 2: - lat = coords[0] - lon = coords[1] - - # Parse as geojson - else: - geom = shapely.from_geojson(str(value)) - - if geom.geom_type != "Point": - # Use centroid if polygon - geom = geom.centroid - - # Get coords from point - lat = geom.y - lon = geom.x - - attrs["lat"] = lat - attrs["lon"] = lon - # log.debug(f"ATTRS: {attrs}") - - # Some tags are actually attributes - # print(f"FIXME: {key} {key in attributes}") - # if key in self.multiple: - # for item in value: - # if key in item: - # for entry in item[key].split(): - # vals = self.getValues(key) - # if entry in vals: - # if vals[entry].find("="): - # tmp = vals[entry].split("=") - # tags[tmp[0]] = tmp[1] - # else: - # tags[entry] = "yes" - # continue - - if isinstance(value, str) and (value == "no" or value == "unknown"): - pass - elif value is not None: - if key == "track" or key == "geoline": - refs.append(tag) - log.debug("Adding reference %s" % tag) - elif len(str(value)) > 0: - if self.privateData(key): - priv[key] = value - else: - item = self.convertEntry(key, value) - if item is not None and isinstance(item, dict): - tags.update(item) - elif isinstance(item, list): - for entry in item: - tags.update(entry) - - if len(tags) > 0: - if "geometry" in tags: - del tags["geometry"] - feature["attrs"] = attrs - feature["tags"] = tags - if len(refs) > 0: - feature["refs"] = refs - if len(priv) > 0: - feature["private"] = priv - - return feature - - -def json2osm(input_file, yaml_file=None): - """Process the JSON file from ODK Central or the GeoJSON file to OSM XML format. - - Args: - input_file (str): The path to the input JSON or GeoJSON file. - yaml_file (str): The path to the YAML config file (optional). - - Returns: - osmoutfile (str): Path to the converted OSM XML file. - """ - log.info(f"Converting JSON file to OSM: {input_file}") - if yaml_file: - jsonin = JsonDump(yaml_file) - else: - jsonin = JsonDump() +# Args: +# cmdln (dict): The data from the command line - # jsonin.parseXLS(args.xlsfile) +# Returns: +# osmoutfile (str): Path to the converted OSM XML file. +# """ +# log.info(f"Converting JSON file to OSM: {cmdln['infile']}") +# if yaml_file: +# jsonin = JsonDump({cmd['yaml']}) +# else: +# jsonin = JsonDump() - # Modify the input file name for the 2 output files, which will get written - # to the current directory. +# # Modify the input file name for the 2 output files, which will get written +# # to the current directory. - base = Path(input_file).stem - osmoutfile = f"{base}-out.osm" - jsonin.createOSM(osmoutfile) +# base = Path(input_file).stem +# osmoutfile = f"{base}-out.osm" +# jsonin.createOSM(osmoutfile) - data = jsonin.parse(input_file) - # This OSM XML file only has OSM appropriate tags and values +# data = jsonin.parse(input_file) +# # This OSM XML file only has OSM appropriate tags and values - for entry in data: - feature = jsonin.createEntry(entry) +# for entry in data: +# feature = jsonin.createEntry(entry) - # Sometimes bad entries, usually from debugging XForm design, sneak in - if len(feature) == 0: - continue +# # Sometimes bad entries, usually from debugging XForm design, sneak in +# if len(feature) == 0: +# continue - if len(feature) > 0: - if "lat" not in feature["attrs"]: - if "geometry" in feature["tags"]: - if isinstance(feature["tags"]["geometry"], str): - coords = list(feature["tags"]["geometry"]) - # del feature['tags']['geometry'] - elif "coordinates" in feature["tags"]: - coords = feature["tags"]["coordinates"] - feature["attrs"] = {"lat": coords[1], "lon": coords[0]} - else: - log.warning(f"Bad record! {feature}") - continue # Skip bad records +# if len(feature) > 0: +# if "lat" not in feature["attrs"]: +# if "geometry" in feature["tags"]: +# if isinstance(feature["tags"]["geometry"], str): +# coords = list(feature["tags"]["geometry"]) +# # del feature['tags']['geometry'] +# elif "coordinates" in feature["tags"]: +# coords = feature["tags"]["coordinates"] +# feature["attrs"] = {"lat": coords[1], "lon": coords[0]} +# else: +# log.warning(f"Bad record! {feature}") +# continue # Skip bad records - jsonin.writeOSM(feature) - # log.debug("Writing final OSM XML file...") +# jsonin.writeOSM(feature) +# # log.debug("Writing final OSM XML file...") - # jsonin.finishOSM() - log.info(f"Wrote OSM XML file: {osmoutfile}") +# # jsonin.finishOSM() +# log.info(f"Wrote OSM XML file: {osmoutfile}") - return osmoutfile +# return osmoutfile def main(): @@ -444,7 +342,57 @@ def main(): ) logging.getLogger("urllib3").setLevel(logging.DEBUG) - json2osm(args.infile, args.yaml) + if args.yaml: + jsonvin = JsonDump(args.yaml) + else: + jsonin = JsonDump() + + jsonin.parseXLS(args.xlsfile) + + base = Path(args.infile).stem + osmoutfile = f"{base}.osm" + jsonin.createOSM(osmoutfile) + + jsonoutfile = f"{base}.geojson" + jsonin.createGeoJson(jsonoutfile) + + log.debug("Parsing json files %r" % args.infile) + data = jsonin.parse(args.infile) + # This OSM XML file only has OSM appropriate tags and values + nodeid = -1000 + for entry in data: + feature = jsonin.createEntry(entry) + if len(feature) == 0: + continue + if "refs" in feature: + refs = list() + for ref in feature["refs"]: + now = datetime.now().strftime("%Y-%m-%dT%TZ") + if len(ref) == 0: + continue + coords = ref.split(" ") + print(coords) + node = {"attrs": {"id": nodeid, "version": 1, "timestamp": now, "lat": coords[0], "lon": coords[1]}, "tags": dict()} + jsonin.writeOSM(node) + refs.append(nodeid) + nodeid -= 1 + + feature["refs"] = refs + jsonin.writeOSM(feature) + else: + # Sometimes bad entries, usually from debugging XForm design, sneak in + if "lat" not in feature["attrs"]: + log.warning("Bad record! %r" % feature) + continue + jsonin.writeOSM(feature) + # This GeoJson file has all the data values + jsonin.writeGeoJson(feature) + # print("TAGS: %r" % feature['tags']) + + jsonin.finishOSM() + jsonin.finishGeoJson() + log.info("Wrote OSM XML file: %r" % osmoutfile) + log.info("Wrote GeoJson file: %r" % jsonoutfile) if __name__ == "__main__": diff --git a/osm_fieldwork/osmfile.py b/osm_fieldwork/osmfile.py index 29604c21..73ea39c0 100755 --- a/osm_fieldwork/osmfile.py +++ b/osm_fieldwork/osmfile.py @@ -108,6 +108,7 @@ def footer(self): self.file.flush() if self.file is False: self.file.close() + self.file = None def write( self, diff --git a/osm_fieldwork/xforms.yaml b/osm_fieldwork/xforms.yaml index da5a87d9..a80ea9eb 100644 --- a/osm_fieldwork/xforms.yaml +++ b/osm_fieldwork/xforms.yaml @@ -12,6 +12,8 @@ convert: - camptype: tourism - openfire: leisure=firepit + - fire_pit: leisure=firepit + - picnic_table: leisure=picnic_table - latitude: lat - longitude: lon - altitude: ele @@ -20,7 +22,6 @@ convert: - submissiondate: timestamp - comment: note - view: viewpoint - - cell: cellular - Monday: Mo - Tuesday: Tu - Wednesday: We @@ -113,6 +114,8 @@ private: - lateral_system - access_roof - updatedat + - cell + - cellular # All of these tags are in the CSV file, and can be ignored ignore: @@ -120,7 +123,6 @@ ignore: - __id - model - type - - features - accuracy - meta - __system @@ -160,8 +162,12 @@ ignore: - government_menu - note - instanceid + - begin_group + - end_group + - image multiple: - healthcare - amenity_type - specialty + - features diff --git a/tests/test_convert.py b/tests/test_convert.py index f322c29a..5adb0046 100755 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -# Copyright (c) 2021, 2022, 2023 Humanitarian OpenStreetMap Team +# Copyright (c) 2021, 2022, 2023, 2024 Humanitarian OpenStreetMap Team # # This file is part of Osm-Fieldwork. # @@ -19,11 +19,16 @@ # import argparse +import logging import os +import sys from osm_fieldwork.convert import Convert from osm_fieldwork.xlsforms import xlsforms_path +# Instantiate logger +log = logging.getLogger(__name__) + # find the path of root tests dir rootdir = os.path.dirname(os.path.abspath(__file__)) path = xlsforms_path.replace("/xlsforms", "") @@ -74,9 +79,9 @@ def test_sub_value(): def test_multiple_value(): """Test tag value conversion.""" hits = 0 - # Test a value that gets converted - vals = csv.convertValue("amenity", "coffee") - if len(vals) == 2 and vals[0]["amenity"] == "cafe" and vals[1]["cuisine"] == "coffee_shop": + vals = csv.convertMultiple("picnic_table fire_pit parking") + print(vals) + if len(vals) > 0 and vals[0]["leisure"] == "picnic_table" and vals[1]["leisure"] == "firepit": hits += 1 assert hits == 1 @@ -84,9 +89,19 @@ def test_multiple_value(): # Run standalone for easier debugging when not under pytest if __name__ == "__main__": parser = argparse.ArgumentParser(description="Read and convert a JSON file from ODK Central") + parser.add_argument("-v", "--verbose", nargs="?", const="0", help="verbose output") parser.add_argument("--infile", default=f"{rootdir}/testdata/testcamps.json", help="The JSON input file") args = parser.parse_args() + # if verbose, dump to the terminal + if args.verbose is not None: + logging.basicConfig( + level=logging.DEBUG, + format=("%(threadName)10s - %(name)s - %(levelname)s - %(message)s"), + datefmt="%y-%m-%d %H:%M:%S", + stream=sys.stdout, + ) + test_keywords() test_convert_tag() test_single_value()