fix: Major refactoring to use the new osm-rawdata module

hotosm · Sep 11, 2023 · 403151b · 403151b
1 parent 06f926f
commit 403151b
Show file tree

Hide file tree

Showing 2 changed files with 146 additions and 615 deletions.
diff --git a/osm_fieldwork/filter_data.py b/osm_fieldwork/filter_data.py
@@ -28,6 +28,9 @@
 from osm_fieldwork.xlsforms import xlsforms_path
 import yaml
 
+# Find the other files for this project
+import osm_fieldwork as of
+rootdir = of.__path__[0]
 
 # Instantiate logger
 log = logging.getLogger(__name__)
@@ -62,6 +65,9 @@ def parse(self,
             title (str): The title from the XLSForm Setting sheet
             extract (str): The data extract filename from the XLSForm Survey sheet
         """
+        excel_object = pd.ExcelFile(filespec)
+        entries = excel_object.parse(sheet_name=[0, 1, 2], index_col=0,
+                                     usercols=[0, 1, 2])
         entries = pd.read_excel(filespec, sheet_name=[0, 1, 2])
         title = entries[2]['form_title'].to_list()[0]
         extract = ""
@@ -80,10 +86,7 @@ def parse(self,
             if key == 'model' or str(key) == "nan":
                 index += 1
                 continue
-            if 'name' in entries:
-                value = entries[1]['name'][index]
-            else:
-                value = None
+            value = entries[1]['name'][index]
             if value == "<text>" or str(value) == "null":
                 index += 1
                 continue
@@ -92,29 +95,29 @@ def parse(self,
             self.tags[key].append(value)
             index += 1
 
-        # The yaml config file for the query has a list of columns
-        # to keep in addition to this default set.
-        path = xlsforms_path.replace("xlsforms", "data_models")
-        category = os.path.basename(filespec).replace(".xls", "")
-        file = open(f"{path}/{category}.yaml", "r").read()
-        self.yaml = yaml.load(file, Loader=yaml.Loader)
-        keep = ("name",
-                "name:en",
-                "id",
-                "operator",
-                "addr:street",
-                "addr:housenumber",
-                "osm_id",
-                "title",
-                "tags",
-                "label",
-                "landuse",
-                "opening_hours",
-                "tourism",
-                )
-        self.keep = list(keep)
-        if 'keep' in self.yaml:
-            self.keep.extend(self.yaml['keep'])
+        # # The yaml config file for the query has a list of columns
+        # # to keep in addition to this default set.
+        # path = xlsforms_path.replace("xlsforms", "data_models")
+        # category = os.path.basename(filespec).replace(".xls", "")
+        # file = open(f"{path}/{category}.yaml", "r").read()
+        # self.yaml = yaml.load(file, Loader=yaml.Loader)
+        # keep = ("name",
+        #         "name:en",
+        #         "id",
+        #         "operator",
+        #         "addr:street",
+        #         "addr:housenumber",
+        #         "osm_id",
+        #         "title",
+        #         "tags",
+        #         "label",
+        #         "landuse",
+        #         "opening_hours",
+        #         "tourism",
+        #         )
+        # self.keep = list(keep)
+        # if 'keep' in self.yaml:
+        #     self.keep.extend(self.yaml['keep'])
 
         return title, extract
 
@@ -131,7 +134,6 @@ def cleanData(self,
             (FeatureCollection): The modifed data
         
         """
-        tmpfile = data
         if type(data) == str:
             outfile = open(f"new-{data}", "x")
             infile = open(tmpfile, "r")
@@ -146,12 +148,14 @@ def cleanData(self,
             "version",
             "changeset",
             )
+        keep = ('osm_id', 'id', 'version')
         collection = list()
         for feature in indata['features']:
             properties = dict()
             for key, value in feature['properties'].items():
-                # log.debug(f"{key} = {value}")
-                if key in self.keep:
+                log.debug(f"{key} = {value}")
+                # if key in self.keep:
+                if False:
                     if key == 'tags':
                         for k, v in value.items():
                             if k[:4] == "name":
@@ -165,8 +169,11 @@ def cleanData(self,
                         else:
                             properties[key] = value
                 else:
-                    if key in self.tags.keys():
-                        if key == "name":
+                    if key in keep:
+                        properties[key] = value
+                        continue
+                    if key in self.tags:
+                        if key == "name" or key == 'name:en':
                             properties['title'] = self.tags[key]
                             properties['label'] = self.tags[key]
                         if value in self.tags[key]: