Skip to content

Commit

Permalink
OSM
Browse files Browse the repository at this point in the history
  • Loading branch information
docuracy committed Jan 25, 2025
1 parent 4530704 commit c8bb681
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 4 deletions.
3 changes: 3 additions & 0 deletions vespa/repository/api/ingestion/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@
{
'url': 'https://planet.openstreetmap.org/planet/planet-latest.osm.bz2',
'file_type': 'xml',
'filters': [
lambda node: 'place' in node, # Filter to only include nodes tagged as places
]
}
],
},
Expand Down
5 changes: 5 additions & 0 deletions vespa/repository/api/ingestion/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,11 @@ async def process_batch(batch):

# Process the batch when it reaches the batch_size or limit
if len(current_batch) >= batch_size or (limit is not None and count >= limit):

# Temporarily skip processing TODO: REMOVE
logger.info(document)
continue

batch_results = await process_batch(current_batch)
results.extend(batch_results) # Collect results
current_batch = []
Expand Down
14 changes: 12 additions & 2 deletions vespa/repository/api/ingestion/streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,18 @@ async def async_generator():
def _parse_xml_stream(self, stream):
# Parse XML incrementally from stream
for event, elem in xml.etree.ElementTree.iterparse(stream, events=('end',)):
if elem.tag == 'place': # Assuming the root element of interest is <item>
yield elem
if elem.tag == 'node':
# Create a dictionary from element attributes
elem_data = dict(elem.attrib)

# Add child tag attributes (k, v) to the dictionary
for tag in elem.findall('tag'):
key = tag.attrib.get('k')
value = tag.attrib.get('v')
if key and value:
elem_data[key] = value

yield elem_data
elem.clear() # Free memory

def _split_triple(self, line):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,10 @@ def process(self) -> dict:
return self.output

years = {
**({'year_start': year_start} if (year_start := self.name.get('from')) else {}),
**({'year_end': year_end} if (year_end := self.name.get('to')) else {}),
# **({'year_start': year_start} if (year_start := self.name.get('from')) else {}),
# **({'year_end': year_end} if (year_end := self.name.get('to')) else {}),
'year_start': self.name.get('from', 2025),
'year_end': self.name.get('to', 2025),
}

self.output['names'].append({
Expand Down

0 comments on commit c8bb681

Please sign in to comment.