From a855af8d1feecbc9592be04c4738e72740fbcecc Mon Sep 17 00:00:00 2001 From: Matt Post Date: Sat, 8 Feb 2025 19:56:38 -0500 Subject: [PATCH] Handle errors in script - No title or abstract in frontmatter - Print issue number when JSON fails --- bin/process_bulk_metadata.py | 52 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/bin/process_bulk_metadata.py b/bin/process_bulk_metadata.py index 032bab6c84..6bf1b008f9 100755 --- a/bin/process_bulk_metadata.py +++ b/bin/process_bulk_metadata.py @@ -90,13 +90,10 @@ def _parse_metadata_changes(self, issue_body): # For some reason, the issue body has \r\n line endings issue_body = issue_body.replace("\r", "") - try: - if ( - match := re.search(r"```json\n(.*?)\n```", issue_body, re.DOTALL) - ) is not None: - return json.loads(match[1]) - except Exception as e: - print(f"Error parsing metadata changes: {e}", file=sys.stderr) + if ( + match := re.search(r"```json\n(.*?)\n```", issue_body, re.DOTALL) + ) is not None: + return json.loads(match[1]) return None @@ -119,19 +116,20 @@ def _apply_changes_to_xml(self, xml_repo_path, anthology_id, changes): raise Exception(f"-> Paper not found in XML file: {xml_repo_path}") # Apply changes to XML - for key in ["title", "abstract"]: - if key in changes: - node = paper_node.find(key) - if node is None: - node = make_simple_element(key, parent=paper_node) - # set the node to the structure of the new string - try: - new_node = ET.fromstring(f"<{key}>{changes[key]}") - except ET.XMLSyntaxError as e: - print(f"Error parsing XML for key {key}: {e}", file=sys.stderr) - raise e - # replace the current node with the new node in the tree - paper_node.replace(node, new_node) + if paper_id != "0": + # frontmatter has no title or abstract + for key in ["title", "abstract"]: + if key in changes: + node = paper_node.find(key) + if node is None: + node = make_simple_element(key, parent=paper_node) + # set the node to the structure of the new string + try: + new_node = ET.fromstring(f"<{key}>{changes[key]}") + except ET.XMLSyntaxError as e: + raise e + # replace the current node with the new node in the tree + paper_node.replace(node, new_node) if "authors" in changes: """ @@ -234,7 +232,12 @@ def process_metadata_issues( ) # Parse metadata changes from issue - json_block = self._parse_metadata_changes(issue.body) + try: + json_block = self._parse_metadata_changes(issue.body) + except json.decoder.JSONDecodeError as e + print(f"Failed to parse JSON block in #{issue.number}: {e}", file=sys.stderr) + json_block = None + if not json_block: if close_old_issues: # for old issues, filed without a JSON block, we append a comment @@ -267,7 +270,7 @@ def process_metadata_issues( continue else: if verbose: - print("-> Skipping (no JSON block)", file=sys.stderr) + print(f"-> Skipping #{issue.number} (no JSON block)", file=sys.stderr) continue self.stats["relevant_issues"] += 1 @@ -294,8 +297,7 @@ def process_metadata_issues( xml_repo_path, anthology_id, json_block ) except Exception as e: - if verbose: - print(e, file=sys.stderr) + print(f"Failed to apply changes to #{issue.number}: {e}", file=sys.stderr) continue if tree: @@ -312,7 +314,7 @@ def process_metadata_issues( # Commit changes self.local_repo.index.add([xml_repo_path]) self.local_repo.index.commit( - f"Processed metadata corrections (closes #{issue.number})" + f"Process metadata corrections for {anthology_id} (closes #{issue.number})" ) closed_issues.append(issue)