Skip to content

Commit

Permalink
iop: fix for ElementTree parsing
Browse files Browse the repository at this point in the history
Signed-off-by: pamfilos <[email protected]>
  • Loading branch information
pamfilos committed Dec 12, 2024
1 parent 4557afc commit 1b7e6f7
Show file tree
Hide file tree
Showing 3 changed files with 4,333 additions and 11 deletions.
20 changes: 16 additions & 4 deletions dags/iop/iop_process_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,26 @@
logger = get_logger()


def process_xml(input):
def process_xml(input, italics=True):
input = convert_html_subscripts_to_latex(input)
input = convert_html_italics_to_latex(input)
if italics:
input = convert_html_italics_to_latex(input)
input = replace_cdata_format(input)
return input


def convert_xml_to_et_tree(input):
try:
xml = process_xml(input)
xml = ET.fromstring(xml)

except ET.ParseError:
xml = process_xml(input, italics=False)
xml = ET.fromstring(xml)

return xml


def iop_parse_file(**kwargs):
if "params" not in kwargs or "file" not in kwargs["params"]:
raise Exception("There was no 'file' parameter. Exiting run.")
Expand All @@ -36,8 +49,7 @@ def iop_parse_file(**kwargs):
xml_bytes = base64.b64decode(encoded_xml)
if isinstance(xml_bytes, bytes):
xml_bytes = xml_bytes.decode("utf-8")
xml_bytes = process_xml(xml_bytes)
xml = ET.fromstring(xml_bytes)
xml = convert_xml_to_et_tree(input)

parser = IOPParser(file_path=file_name)
parsed = parser.parse(xml)
Expand Down
Loading

0 comments on commit 1b7e6f7

Please sign in to comment.