From f105282180df89272a5e1bf3f7189a0bfc1c7b10 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 14 Aug 2024 16:52:14 -0400 Subject: [PATCH 1/3] [FIX] Find non-pdf tech notes (without grabbing non-notes...) --- datman/utils.py | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/datman/utils.py b/datman/utils.py index b7de2c1e..84364a15 100644 --- a/datman/utils.py +++ b/datman/utils.py @@ -1334,34 +1334,35 @@ def make_zip(source_dir, dest_zip): def find_tech_notes(folder): - """Find any technotes located within a folder. - - If only one PDF is found it is assumed to be the tech notes. If multiple - are found, unless one contains the string 'TechNotes', the first pdf is - guessed to be the tech notes. - - Args: - folder (str): A full path to a folder to search. - - Returns: - path (str): The full path to the tech notes or an empty string if - none have been found. - """ - pdf_list = [] - for root, dirs, files in os.walk(folder): + exts = ["pdf", "png", "jpg"] + notes = [] + for root, _, files in os.walk(folder): for fname in files: - if ".pdf" in fname: - pdf_list.append(os.path.join(root, fname)) + if any([fname.endswith(ext) for ext in exts]): + notes.append(os.path.join(root, fname)) + + if not notes: + return "" - if not pdf_list: + # find the file most likely to be the tech notes + scored = [] + for item in notes: + score = 0 + if "tech" in item.lower(): + score += 3 + if "note" in item.lower(): + score += 2 + if item.endswith("pdf"): + score += 1 + scored.append((item, score)) + + result = sorted(scored, key=lambda x: x[1], reverse=True) + + if result[0][1] == 0: + # No files scored as likely to be the notes return "" - elif len(pdf_list) > 1: - for pdf in pdf_list: - file_name = os.path.basename(pdf) - if 'technotes' in file_name.lower(): - return pdf - return pdf_list[0] + return result[0][0] def read_json(path): From ea8f5025ba9d92b49f44245140bb246bce7914c9 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 14 Aug 2024 20:19:44 -0400 Subject: [PATCH 2/3] [DOC] Readd doc string to find_tech_notes --- datman/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/datman/utils.py b/datman/utils.py index 84364a15..7d225b86 100644 --- a/datman/utils.py +++ b/datman/utils.py @@ -1334,6 +1334,15 @@ def make_zip(source_dir, dest_zip): def find_tech_notes(folder): + """Find any technotes located within a given folder. + + Args: + folder (str): A full path to a folder to search. + + Returns: + path (str): The full path to the tech notes or an empty string if + none have been found. + """ exts = ["pdf", "png", "jpg"] notes = [] for root, _, files in os.walk(folder): From e4de283cfb3cc2ecd21650ce387b553f1fd0745a Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 14 Aug 2024 20:20:47 -0400 Subject: [PATCH 3/3] [TEST] Add tests to catch issue with missed tech notes --- tests/test_utils.py | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index e49560b6..ba7c64af 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -125,6 +125,7 @@ def test_catches_invalid_site_in_kcni_id(self, dm_config): class FindTechNotes(unittest.TestCase): notes = "TechNotes.pdf" + jpg_notes = "TechNotes.jpg" other_pdf1 = "SomeFile.pdf" other_pdf2 = "otherFile.pdf" path = "./resources" @@ -136,7 +137,7 @@ def test_doesnt_crash_with_broken_path(self): @patch('os.walk', autospec=True) def test_doesnt_crash_when_no_tech_notes_exist(self, mock_walk): mock_walk.return_value = self.__mock_file_system( - randint(1, 10), add_notes=False) + randint(1, 10), add_pdf_notes=False) found_file = utils.find_tech_notes(self.path) @@ -163,20 +164,55 @@ def test_returns_tech_notes_when_multiple_pdfs_present(self, mock_walk): def test_first_file_returned_when_multiple_pdfs_but_no_tech_notes( self, mock_walk): mock_walk.return_value = self.__mock_file_system( - randint(1, 10), add_notes=False, add_pdf=True) + randint(1, 10), add_pdf_notes=False, add_pdf=True) found_file = utils.find_tech_notes(self.path) assert os.path.basename(found_file) == self.other_pdf1 - def __mock_file_system(self, depth, add_notes=True, add_pdf=False): + @patch('os.walk', autospec=True) + def test_finds_non_pdf_tech_notes(self, mock_walk): + mock_walk.return_value = self.__mock_file_system( + randint(1, 10), add_pdf_notes=False, add_pdf=True, + add_jpg_notes=True) + + found_file = utils.find_tech_notes(self.path) + + assert os.path.basename(found_file) == self.jpg_notes + + @patch('os.walk', autospec=True) + def test_doesnt_pick_similarly_named_file(self, mock_walk): + mock_walk.return_value = self.__mock_file_system( + randint(1, 10), add_pdf_notes=False, add_pdf=True, + add_jpg_notes=True, add_jpgs=True) + + found_file = utils.find_tech_notes(self.path) + + assert os.path.basename(found_file) == self.jpg_notes + + @patch('os.walk', autospec=True) + def test_prefers_pdf_notes_over_other_formats(self, mock_walk): + mock_walk.return_value = self.__mock_file_system( + randint(1, 10), add_pdf_notes=True, add_jpg_notes=True, + add_pdf=True, add_jpgs=True) + + found_file = utils.find_tech_notes(self.path) + + assert os.path.basename(found_file) == self.notes + + def __mock_file_system(self, depth, add_pdf_notes=True, add_jpgs=False, + add_jpg_notes=False, add_pdf=False): walk_list = [] cur_path = self.path file_list = ["file1.txt", "file2"] if add_pdf: file_list.extend([self.other_pdf1, self.other_pdf2]) - if add_notes: + if add_jpg_notes: + file_list.extend([self.jpg_notes]) + if add_pdf_notes: file_list.append(self.notes) + if add_jpgs: + file_list.extend(['SpiralView.jpg', 'RANotes.jpg']) for num in range(1, depth + 1): cur_path = cur_path + "/dir{}".format(num) dirs = ("dir{}".format(num + 1), )