Skip to content

Commit

Permalink
OSM
Browse files Browse the repository at this point in the history
  • Loading branch information
docuracy committed Jan 25, 2025
1 parent 882a2f9 commit 79d4c69
Showing 1 changed file with 15 additions and 17 deletions.
32 changes: 15 additions & 17 deletions vespa/repository/api/ingestion/streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import logging
import os
import urllib.parse
import xml.etree.ElementTree
import zipfile

import ijson
import lxml
import requests
from lxml.etree import iterparse

Expand Down Expand Up @@ -185,22 +185,20 @@ async def _parse_xml_stream(self, stream):
"""
Asynchronous parser for XML streams.
"""
# Ensure the stream is read synchronously
with io.TextIOWrapper(stream, encoding="utf-8", errors="replace") as wrapper:
for event, elem in iterparse(wrapper, events=("end",)):
if elem.tag == "node":
# Create a dictionary from element attributes
elem_data = dict(elem.attrib)

# Add child tag attributes (k, v) to the dictionary
for tag in elem.findall("tag"):
key = tag.attrib.get("k")
value = tag.attrib.get("v")
if key and value:
elem_data[key] = value

yield elem_data
elem.clear() # Free memory
for event, elem in lxml.etree.iterparse(stream, events=("end",)):
if elem.tag == "node":
# Create a dictionary from element attributes
elem_data = dict(elem.attrib)

# Add child tag attributes (k, v) to the dictionary
for tag in elem.findall("tag"):
key = tag.attrib.get("k")
value = tag.attrib.get("v")
if key and value:
elem_data[key] = value

yield elem_data
elem.clear() # Free memory

def _split_triple(self, line):
parts = line.rstrip(' .').split(' ', 2)
Expand Down

0 comments on commit 79d4c69

Please sign in to comment.