Skip to content

Commit

Permalink
OSM
Browse files Browse the repository at this point in the history
  • Loading branch information
docuracy committed Jan 25, 2025
1 parent c8bb681 commit 6af0a25
Showing 1 changed file with 23 additions and 15 deletions.
38 changes: 23 additions & 15 deletions vespa/repository/api/ingestion/streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,21 +176,29 @@ async def async_generator():
return async_generator()

def _parse_xml_stream(self, stream):
# Parse XML incrementally from stream
for event, elem in xml.etree.ElementTree.iterparse(stream, events=('end',)):
if elem.tag == 'node':
# Create a dictionary from element attributes
elem_data = dict(elem.attrib)

# Add child tag attributes (k, v) to the dictionary
for tag in elem.findall('tag'):
key = tag.attrib.get('k')
value = tag.attrib.get('v')
if key and value:
elem_data[key] = value

yield elem_data
elem.clear() # Free memory
"""
Asynchronous parser for XML streams.
"""
loop = asyncio.get_event_loop()
wrapper = io.TextIOWrapper(stream, encoding="utf-8", errors="replace")

async def async_generator():
for event, elem in xml.etree.ElementTree.iterparse(wrapper, events=("end",)):
if elem.tag == "node":
# Create a dictionary from element attributes
elem_data = dict(elem.attrib)

# Add child tag attributes (k, v) to the dictionary
for tag in elem.findall("tag"):
key = tag.attrib.get("k")
value = tag.attrib.get("v")
if key and value:
elem_data[key] = value

yield elem_data
elem.clear() # Free memory

return async_generator()

def _split_triple(self, line):
parts = line.rstrip(' .').split(' ', 2)
Expand Down

0 comments on commit 6af0a25

Please sign in to comment.