diff --git a/dags/elsevier/repository.py b/dags/elsevier/repository.py index 44a2fbef..21cc4ecb 100644 --- a/dags/elsevier/repository.py +++ b/dags/elsevier/repository.py @@ -5,7 +5,7 @@ from common.repository import IRepository from common.s3_service import S3Service from common.utils import find_extension - +from common.exceptions import UnknownFileExtension class ElsevierRepository(IRepository): ZIPED_DIR: str = "raw/" @@ -28,13 +28,16 @@ def find_all(self, filenames_to_process=None): if filenames_to_process else self.__find_all_extracted_files() ) - for file in filenames: - last_part = os.path.basename(file) - filename_without_extension = last_part.split(".")[0] - if filename_without_extension not in grouped_files: - grouped_files[filename_without_extension] = {} - extension = find_extension(last_part) - grouped_files[filename_without_extension][extension] = file + try: + for file in filenames: + last_part = os.path.basename(file) + filename_without_extension = last_part.split(".")[0] + if filename_without_extension not in grouped_files: + grouped_files[filename_without_extension] = {} + extension = find_extension(last_part) + grouped_files[filename_without_extension][extension] = file + except (IndexError, UnknownFileExtension): + self.logger.error() return list(grouped_files.values()) def get_by_id(self, id: str):