diff --git a/delivery/__init__.py b/delivery/__init__.py index 24cc3c7..54f1093 100644 --- a/delivery/__init__.py +++ b/delivery/__init__.py @@ -1 +1 @@ -__version__ = "3.2.2-rc1" +__version__ = "3.2.2-rc2" diff --git a/delivery/repositories/project_repository.py b/delivery/repositories/project_repository.py index 01118c0..1ede6d3 100644 --- a/delivery/repositories/project_repository.py +++ b/delivery/repositories/project_repository.py @@ -140,19 +140,18 @@ def project_from_dir(d): project_name = os.path.basename(d) project_files = [] - # gather report files for the project from the runfolder try: project_files.extend( self.get_report_files( project_path, project_name, runfolder, + checksums=runfolder.checksums ) ) except ProjectReportNotFoundException as ex: log.warning(ex) - # gather the README to include with the project try: project_files.extend( self.get_project_readme( @@ -164,17 +163,6 @@ def project_from_dir(d): except ProjectReportNotFoundException as ex: log.warning(ex) - # gather metadata files for the project from the runfolder - try: - project_files.extend( - self.get_metadata_files( - project_name=project_name, - runfolder=runfolder - ) - ) - except ProjectReportNotFoundException as ex: - log.warning(ex) - samples = self.sample_repository.get_samples( project_path, project_name, @@ -210,12 +198,7 @@ def project_from_dir(d): f"Did not find {self.PROJECTS_DIR} folder for: {runfolder.name}" ) - def get_report_files( - self, - project_path, - project_name, - runfolder - ): + def get_report_files(self, project_path, project_name, runfolder, checksums=None): """ Gets the paths to files associated with the supplied project's MultiQC report. This report is fetched from seqreports unless there is a MultiQC report directly under the project's @@ -225,6 +208,8 @@ def get_report_files( :param project_path: the path to the project folder :param project_name: the name of the project :param runfolder: a Runfolder instance representing the runfolder containing the project + :param checksums: a dict with pre-calculated checksums for files. paths are keys and the + corresponding checksum is the value :return: a list of RunfolderFile objects representing project report files :raises ProjectReportNotFoundException: if no MultiQC report was found for the project """ @@ -264,7 +249,7 @@ def get_report_files( filesystem_service=self.filesystem_service, metadata_service=self.metadata_service, base_path=report_path, - checksums=runfolder.checksums + checksums=checksums ) ) except FileNotFoundError: @@ -291,6 +276,7 @@ def get_project_readme( self, project_name, runfolder, + checksums=None, with_undetermined=False ): """ @@ -298,9 +284,11 @@ def get_project_readme( :param project_name: the name of the project :param runfolder: a Runfolder instance representing the runfolder containing the project + :param checksums: a dict with pre-calculated checksums for files. paths are keys and the + corresponding checksum is the value :param with_undetermined: if True, the README should refer to data that includes undetermined reads - :return: a list containing a RunfolderFile object representing the README + :return: the path to the README file wrapped in a list :raises ProjectReportNotFoundException: if the README was not found """ log.info(f"Organising README for {project_name}") @@ -318,7 +306,7 @@ def get_project_readme( filesystem_service=self.filesystem_service, metadata_service=self.metadata_service, base_path=self.filesystem_service.dirname(readme_file), - checksums=runfolder.checksums + checksums=checksums ) ] except FileNotFoundError: @@ -327,42 +315,6 @@ def get_project_readme( f"{project_name}" ) - def get_metadata_files( - self, - project_name, - runfolder - ): - """ - Gather the metadata files to be included with the project on delivery - - :param project_name: the name of the project - :param runfolder: a Runfolder instance representing the runfolder containing the project - :return: a list of RunfolderFile objects representing the gathered metadata files - :raises ProjectReportNotFoundException: if the README was not found - """ - log.info(f"Fetching metadata files for {project_name}") - metadata_files = [ - RunfolderFile.create_object_from_path( - file_path=metafile, - runfolder_path=runfolder.path, - filesystem_service=self.filesystem_service, - metadata_service=self.metadata_service, - base_path=runfolder.path, - checksums=runfolder.checksums - ) - for metafile in self.filesystem_service.list_files_recursively( - os.path.join( - runfolder.path, - "metadata" - ) - ) if os.path.basename(metafile).startswith(project_name) - ] - if not metadata_files: - raise ProjectReportNotFoundException( - f"metadata files could not be found for {project_name}" - ) - return metadata_files - def is_sample_in_project(self, project, sample_project, sample_id, sample_lane): """ Checks if a matching sample is present in the project. diff --git a/delivery/repositories/runfolder_repository.py b/delivery/repositories/runfolder_repository.py index a89616d..98d30f2 100644 --- a/delivery/repositories/runfolder_repository.py +++ b/delivery/repositories/runfolder_repository.py @@ -249,8 +249,4 @@ def get_project_report_files(self, runfolder, project): :param project: an instance of Project :return: a tuple with the path to the directory containing the report and a list of paths to the report files """ - return self.project_repository.get_report_files( - project_path=project.path, - project_name=project.name, - runfolder=runfolder - ) + return self.project_repository.get_report_files(project, checksums=runfolder.checksums) diff --git a/delivery/services/file_system_service.py b/delivery/services/file_system_service.py index d94822a..de2470c 100644 --- a/delivery/services/file_system_service.py +++ b/delivery/services/file_system_service.py @@ -100,14 +100,6 @@ def copy(source, dest): :param dest: :return: None """ - try: - FileSystemService.makedirs( - FileSystemService.dirname( - dest - ) - ) - except FileExistsError: - pass try: return shutil.copyfile(source, dest) except IsADirectoryError: diff --git a/delivery/services/organise_service.py b/delivery/services/organise_service.py index 25b70af..142fc9c 100644 --- a/delivery/services/organise_service.py +++ b/delivery/services/organise_service.py @@ -55,21 +55,13 @@ def organise_runfolder(self, runfolder_id, lanes, projects, force): # organise the projects and return a new Runfolder instance organised_projects = [] for project in projects_on_runfolder: - organised_projects.append( - self.organise_project( - runfolder, - project, - organised_projects_path, - lanes - ) - ) + organised_projects.append(self.organise_project(runfolder, project, organised_projects_path, lanes)) return Runfolder( runfolder.name, runfolder.path, projects=organised_projects, - checksums=runfolder.checksums - ) + checksums=runfolder.checksums) def check_previously_organised_project(self, project, organised_projects_path, force): organised_project_path = os.path.join(organised_projects_path, project.name) @@ -87,13 +79,7 @@ def check_previously_organised_project(self, project, organised_projects_path, f self.file_system_service.mkdir(organised_projects_backup_path) self.file_system_service.rename(organised_project_path, backup_path) - def organise_project( - self, - runfolder, - project, - organised_projects_path, - lanes - ): + def organise_project(self, runfolder, project, organised_projects_path, lanes): """ Organise a project on a runfolder into its own directory and into a standard structure. If the project has already been organised, a ProjectAlreadyOrganisedException will be raised. @@ -141,7 +127,7 @@ def organise_project( def organise_project_file(self, project_file, organised_project_path): """ - Find and copy the project-associated files to the organised project directory. + Find and symlink or copy the project-associated files to the organised project directory. :param project_file: a RunfolderFile instance representing the project-associated file before organisation diff --git a/tests/integration_tests/test_integration.py b/tests/integration_tests/test_integration.py index a5ec4d9..cbf68b4 100644 --- a/tests/integration_tests/test_integration.py +++ b/tests/integration_tests/test_integration.py @@ -90,7 +90,8 @@ def _verify_checksum(file_path, expected_checksum): MetadataService.hash_file(samplesheet_file)) for project_file in project.project_files: - relative_path = os.path.relpath(project_file.file_path, project_file.base_path) + project_file_base = os.path.dirname(project.project_files[0].file_path) + relative_path = os.path.relpath(project_file.file_path, project_file_base) organised_project_file_path = os.path.join(organised_path, relative_path) self.assertEqual( os.path.basename(organised_project_file_path), diff --git a/tests/resources/runfolders/readme/README.md b/tests/resources/runfolders/readme/README.md deleted file mode 120000 index d8c3d7e..0000000 --- a/tests/resources/runfolders/readme/README.md +++ /dev/null @@ -1 +0,0 @@ -../../readme/README.md \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py index 63168fd..e508516 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -20,10 +20,8 @@ def __init__(self): def spawn_callback(self, f, **args): f(**args) - class TestUtils: DUMMY_CONFIG = {"monitored_directory": "/foo"} - README_DIRECTORY = "/bar" class DummyConfig: @@ -31,7 +29,6 @@ class DummyConfig: def __getitem__(self, key): return TestUtils.DUMMY_CONFIG[key] - fake_directories = ["160930_ST-E00216_0111_BH37CWALXX", "160930_ST-E00216_0112_BH37CWALXX"] fake_projects = ["ABC_123", "DEF_456", "GHI_789"] @@ -123,14 +120,7 @@ def runfolder_project( runfolder_path=runfolder.path, runfolder_name=runfolder.name ) - project_files = project_report_files(project, next(report_type)) - project_files.append( - project_readme_file() - ) - project_files.extend( - project_metadata_files(project) - ) - project.project_files = project_files + project.project_files = project_report_files(project, next(report_type)) sample_names = sample_name_generator() # a straight-forward sample with files on one lane @@ -282,50 +272,6 @@ def project_report_files(project, report_type): ] -def project_readme_file(): - readme_file = os.path.join( - "tests", - "resources", - "readme", - "README.md" - ) - return RunfolderFile( - file_path=readme_file, - base_path=os.path.dirname(readme_file), - file_checksum=f"checksum-for-{readme_file}" - ) - - -def project_metadata_files(project, file_types=None): - file_suffixes = [ - "run", - "experiment" - ] - file_types = file_types or [ - "xml", - "json" - ] - metadata_path = os.path.join( - project.runfolder_path, - "metadata" - ) - metadata_files = [] - for file_type in file_types: - for file_suffix in file_suffixes: - metadata_file = os.path.join( - metadata_path, - f"{project.name}-{file_suffix}.{file_type}" - ) - metadata_files.append( - RunfolderFile( - file_path=metadata_file, - base_path=project.runfolder_path, - file_checksum=f"checksum-for-{metadata_file}" - ) - ) - return metadata_files - - _runfolder1 = Runfolder(name="160930_ST-E00216_0111_BH37CWALXX", path="/foo/160930_ST-E00216_0111_BH37CWALXX") @@ -360,6 +306,7 @@ def project_metadata_files(project, file_types=None): FAKE_RUNFOLDERS = [_runfolder1, _runfolder2] UNORGANISED_RUNFOLDER = unorganised_runfolder() +README_DIRECTORY = "/bar" def assert_eventually_equals(self, timeout, f, expected, delay=0.1): diff --git a/tests/unit_tests/repositories/test_project_repository.py b/tests/unit_tests/repositories/test_project_repository.py index aee7c49..8539879 100644 --- a/tests/unit_tests/repositories/test_project_repository.py +++ b/tests/unit_tests/repositories/test_project_repository.py @@ -13,7 +13,7 @@ from delivery.services.file_system_service import FileSystemService from delivery.services.metadata_service import MetadataService -from tests.test_utils import UNORGANISED_RUNFOLDER +from tests.test_utils import README_DIRECTORY, UNORGANISED_RUNFOLDER class TestGeneralProjectRepository(unittest.TestCase): @@ -41,12 +41,12 @@ def setUp(self) -> None: self.sample_repository = mock.create_autospec(RunfolderProjectBasedSampleRepository) self.filesystem_service = mock.create_autospec(FileSystemService) self.metadata_service = mock.create_autospec(MetadataService) - self.runfolder = UNORGANISED_RUNFOLDER self.project_repository = UnorganisedRunfolderProjectRepository( sample_repository=self.sample_repository, - readme_directory=self.runfolder.path, + readme_directory=README_DIRECTORY, filesystem_service=self.filesystem_service, metadata_service=self.metadata_service) + self.runfolder = UNORGANISED_RUNFOLDER def test_get_report_files(self):