diff --git a/scoap3/articles/tasks.py b/scoap3/articles/tasks.py index faa14566..097011ac 100644 --- a/scoap3/articles/tasks.py +++ b/scoap3/articles/tasks.py @@ -163,15 +163,14 @@ def check_contains_funded_by_scoap3(article): return False, "No files found for the given article." for article_file in article_files: - file_path = article_file.file.path try: - if is_string_in_pdf(file_path, "Funded by SCOAP3"): + if is_string_in_pdf(article_file, "Funded by SCOAP3"): return ( True, - f"Files contain the required text: 'Funded by SCOAP3'. File: {file_path}", + f"Files contain the required text: 'Funded by SCOAP3'. File: {article_file.file.path}", ) except FileNotFoundError: - return False, f"File not found: {file_path}" + return False, f"File not found: {article_file.file.path}" return False, "Files do not contain the required text: 'Funded by SCOAP3'" except Exception as e: diff --git a/scoap3/articles/util.py b/scoap3/articles/util.py index d0f2cb64..703e5738 100644 --- a/scoap3/articles/util.py +++ b/scoap3/articles/util.py @@ -29,9 +29,10 @@ def parse_string_to_date_object(date_string): return datetime.fromisoformat(date_string.replace("Z", "+00:00")) -def is_string_in_pdf(pdf_path, search_string): +def is_string_in_pdf(article_file, search_string): try: - document = fitz.open(pdf_path) + pdf_file = article_file.file.read() + document = fitz.open(stream=pdf_file) search_string_lower = search_string.lower() for page_num in range(document.page_count): @@ -44,6 +45,6 @@ def is_string_in_pdf(pdf_path, search_string): document.close() return False except FileNotFoundError: - raise FileNotFoundError(f"File not found: {pdf_path}") + raise FileNotFoundError(f"File not found: {article_file}") except Exception as e: raise Exception(f"An error occurred while reading the PDF: {str(e)}")