Skip to content

Commit

Permalink
fix: Major refactoring to use the new osm-rawdata module
Browse files Browse the repository at this point in the history
  • Loading branch information
robsavoye committed Sep 11, 2023
1 parent 06f926f commit 403151b
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 615 deletions.
71 changes: 39 additions & 32 deletions osm_fieldwork/filter_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
from osm_fieldwork.xlsforms import xlsforms_path
import yaml

# Find the other files for this project
import osm_fieldwork as of
rootdir = of.__path__[0]

# Instantiate logger
log = logging.getLogger(__name__)
Expand Down Expand Up @@ -62,6 +65,9 @@ def parse(self,
title (str): The title from the XLSForm Setting sheet
extract (str): The data extract filename from the XLSForm Survey sheet
"""
excel_object = pd.ExcelFile(filespec)
entries = excel_object.parse(sheet_name=[0, 1, 2], index_col=0,
usercols=[0, 1, 2])
entries = pd.read_excel(filespec, sheet_name=[0, 1, 2])
title = entries[2]['form_title'].to_list()[0]
extract = ""
Expand All @@ -80,10 +86,7 @@ def parse(self,
if key == 'model' or str(key) == "nan":
index += 1
continue
if 'name' in entries:
value = entries[1]['name'][index]
else:
value = None
value = entries[1]['name'][index]
if value == "<text>" or str(value) == "null":
index += 1
continue
Expand All @@ -92,29 +95,29 @@ def parse(self,
self.tags[key].append(value)
index += 1

# The yaml config file for the query has a list of columns
# to keep in addition to this default set.
path = xlsforms_path.replace("xlsforms", "data_models")
category = os.path.basename(filespec).replace(".xls", "")
file = open(f"{path}/{category}.yaml", "r").read()
self.yaml = yaml.load(file, Loader=yaml.Loader)
keep = ("name",
"name:en",
"id",
"operator",
"addr:street",
"addr:housenumber",
"osm_id",
"title",
"tags",
"label",
"landuse",
"opening_hours",
"tourism",
)
self.keep = list(keep)
if 'keep' in self.yaml:
self.keep.extend(self.yaml['keep'])
# # The yaml config file for the query has a list of columns
# # to keep in addition to this default set.
# path = xlsforms_path.replace("xlsforms", "data_models")
# category = os.path.basename(filespec).replace(".xls", "")
# file = open(f"{path}/{category}.yaml", "r").read()
# self.yaml = yaml.load(file, Loader=yaml.Loader)
# keep = ("name",
# "name:en",
# "id",
# "operator",
# "addr:street",
# "addr:housenumber",
# "osm_id",
# "title",
# "tags",
# "label",
# "landuse",
# "opening_hours",
# "tourism",
# )
# self.keep = list(keep)
# if 'keep' in self.yaml:
# self.keep.extend(self.yaml['keep'])

return title, extract

Expand All @@ -131,7 +134,6 @@ def cleanData(self,
(FeatureCollection): The modifed data
"""
tmpfile = data
if type(data) == str:
outfile = open(f"new-{data}", "x")
infile = open(tmpfile, "r")
Expand All @@ -146,12 +148,14 @@ def cleanData(self,
"version",
"changeset",
)
keep = ('osm_id', 'id', 'version')
collection = list()
for feature in indata['features']:
properties = dict()
for key, value in feature['properties'].items():
# log.debug(f"{key} = {value}")
if key in self.keep:
log.debug(f"{key} = {value}")
# if key in self.keep:
if False:
if key == 'tags':
for k, v in value.items():
if k[:4] == "name":
Expand All @@ -165,8 +169,11 @@ def cleanData(self,
else:
properties[key] = value
else:
if key in self.tags.keys():
if key == "name":
if key in keep:
properties[key] = value
continue
if key in self.tags:
if key == "name" or key == 'name:en':
properties['title'] = self.tags[key]
properties['label'] = self.tags[key]
if value in self.tags[key]:
Expand Down
Loading

0 comments on commit 403151b

Please sign in to comment.