Skip to content

Commit

Permalink
Handle errors in script
Browse files Browse the repository at this point in the history
- No title or abstract in frontmatter
- Print issue number when JSON fails
  • Loading branch information
mjpost committed Feb 9, 2025
1 parent 60de8a7 commit a855af8
Showing 1 changed file with 27 additions and 25 deletions.
52 changes: 27 additions & 25 deletions bin/process_bulk_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,10 @@ def _parse_metadata_changes(self, issue_body):
# For some reason, the issue body has \r\n line endings
issue_body = issue_body.replace("\r", "")

try:
if (
match := re.search(r"```json\n(.*?)\n```", issue_body, re.DOTALL)
) is not None:
return json.loads(match[1])
except Exception as e:
print(f"Error parsing metadata changes: {e}", file=sys.stderr)
if (
match := re.search(r"```json\n(.*?)\n```", issue_body, re.DOTALL)
) is not None:
return json.loads(match[1])

return None

Expand All @@ -119,19 +116,20 @@ def _apply_changes_to_xml(self, xml_repo_path, anthology_id, changes):
raise Exception(f"-> Paper not found in XML file: {xml_repo_path}")

# Apply changes to XML
for key in ["title", "abstract"]:
if key in changes:
node = paper_node.find(key)
if node is None:
node = make_simple_element(key, parent=paper_node)
# set the node to the structure of the new string
try:
new_node = ET.fromstring(f"<{key}>{changes[key]}</{key}>")
except ET.XMLSyntaxError as e:
print(f"Error parsing XML for key {key}: {e}", file=sys.stderr)
raise e
# replace the current node with the new node in the tree
paper_node.replace(node, new_node)
if paper_id != "0":
# frontmatter has no title or abstract
for key in ["title", "abstract"]:
if key in changes:
node = paper_node.find(key)
if node is None:
node = make_simple_element(key, parent=paper_node)
# set the node to the structure of the new string
try:
new_node = ET.fromstring(f"<{key}>{changes[key]}</{key}>")
except ET.XMLSyntaxError as e:
raise e
# replace the current node with the new node in the tree
paper_node.replace(node, new_node)

if "authors" in changes:
"""
Expand Down Expand Up @@ -234,7 +232,12 @@ def process_metadata_issues(
)

# Parse metadata changes from issue
json_block = self._parse_metadata_changes(issue.body)
try:
json_block = self._parse_metadata_changes(issue.body)
except json.decoder.JSONDecodeError as e
print(f"Failed to parse JSON block in #{issue.number}: {e}", file=sys.stderr)
json_block = None

if not json_block:
if close_old_issues:
# for old issues, filed without a JSON block, we append a comment
Expand Down Expand Up @@ -267,7 +270,7 @@ def process_metadata_issues(
continue
else:
if verbose:
print("-> Skipping (no JSON block)", file=sys.stderr)
print(f"-> Skipping #{issue.number} (no JSON block)", file=sys.stderr)
continue

self.stats["relevant_issues"] += 1
Expand All @@ -294,8 +297,7 @@ def process_metadata_issues(
xml_repo_path, anthology_id, json_block
)
except Exception as e:
if verbose:
print(e, file=sys.stderr)
print(f"Failed to apply changes to #{issue.number}: {e}", file=sys.stderr)
continue

if tree:
Expand All @@ -312,7 +314,7 @@ def process_metadata_issues(
# Commit changes
self.local_repo.index.add([xml_repo_path])
self.local_repo.index.commit(
f"Processed metadata corrections (closes #{issue.number})"
f"Process metadata corrections for {anthology_id} (closes #{issue.number})"
)

closed_issues.append(issue)
Expand Down

0 comments on commit a855af8

Please sign in to comment.