Skip to content

Commit

Permalink
Merge pull request #106 from MJedr/fix-refextract
Browse files Browse the repository at this point in the history
fix extract_texkeys_and_urls_from_pdf
  • Loading branch information
MJedr authored Aug 7, 2023
2 parents 16e0a8e + fb6919e commit ca54f04
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions refextract/references/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import logging

from PyPDF2 import PdfFileReader
from PyPDF2.generic import ByteStringObject

from .regexs import re_reference_in_dest

Expand Down Expand Up @@ -57,9 +58,12 @@ def extract_texkeys_and_urls_from_pdf(pdf_file):
LOGGER.debug(u"PDF: Internal PyPDF2 error, no TeXkeys returned.")
return []
# not all named destinations point to references
refs = [
dest for dest in destinations.items() if re_reference_in_dest.match(dest[0])
]
refs = []
for destination in destinations.items():
destination_key = destination[0].decode("utf-8") if isinstance(destination[0], ByteStringObject) else destination[0]
match = re_reference_in_dest.match(destination_key)
if match:
refs.append(destination)
try:
if _destinations_in_two_columns(pdf, refs):
LOGGER.debug(u"PDF: Using two-column layout")
Expand Down

0 comments on commit ca54f04

Please sign in to comment.