Skip to content

Commit

Permalink
Merge pull request #59 from arteria-project/revert-54-bio-2426_metada…
Browse files Browse the repository at this point in the history
…ta_delivery

Revert "organize project metadata files (#54)"
  • Loading branch information
matrulda authored Oct 15, 2024
2 parents 4150f79 + 3b13544 commit 8a65cbb
Show file tree
Hide file tree
Showing 9 changed files with 23 additions and 150 deletions.
2 changes: 1 addition & 1 deletion delivery/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.2.2-rc1"
__version__ = "3.2.2-rc2"
68 changes: 10 additions & 58 deletions delivery/repositories/project_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,19 +140,18 @@ def project_from_dir(d):
project_name = os.path.basename(d)
project_files = []

# gather report files for the project from the runfolder
try:
project_files.extend(
self.get_report_files(
project_path,
project_name,
runfolder,
checksums=runfolder.checksums
)
)
except ProjectReportNotFoundException as ex:
log.warning(ex)

# gather the README to include with the project
try:
project_files.extend(
self.get_project_readme(
Expand All @@ -164,17 +163,6 @@ def project_from_dir(d):
except ProjectReportNotFoundException as ex:
log.warning(ex)

# gather metadata files for the project from the runfolder
try:
project_files.extend(
self.get_metadata_files(
project_name=project_name,
runfolder=runfolder
)
)
except ProjectReportNotFoundException as ex:
log.warning(ex)

samples = self.sample_repository.get_samples(
project_path,
project_name,
Expand Down Expand Up @@ -210,12 +198,7 @@ def project_from_dir(d):
f"Did not find {self.PROJECTS_DIR} folder for: {runfolder.name}"
)

def get_report_files(
self,
project_path,
project_name,
runfolder
):
def get_report_files(self, project_path, project_name, runfolder, checksums=None):
"""
Gets the paths to files associated with the supplied project's MultiQC report. This report
is fetched from seqreports unless there is a MultiQC report directly under the project's
Expand All @@ -225,6 +208,8 @@ def get_report_files(
:param project_path: the path to the project folder
:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:param checksums: a dict with pre-calculated checksums for files. paths are keys and the
corresponding checksum is the value
:return: a list of RunfolderFile objects representing project report files
:raises ProjectReportNotFoundException: if no MultiQC report was found for the project
"""
Expand Down Expand Up @@ -264,7 +249,7 @@ def get_report_files(
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=report_path,
checksums=runfolder.checksums
checksums=checksums
)
)
except FileNotFoundError:
Expand All @@ -291,16 +276,19 @@ def get_project_readme(
self,
project_name,
runfolder,
checksums=None,
with_undetermined=False
):
"""
Get the README to be included with the project data set.
:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:param checksums: a dict with pre-calculated checksums for files. paths are keys and the
corresponding checksum is the value
:param with_undetermined: if True, the README should refer to data that includes
undetermined reads
:return: a list containing a RunfolderFile object representing the README
:return: the path to the README file wrapped in a list
:raises ProjectReportNotFoundException: if the README was not found
"""
log.info(f"Organising README for {project_name}")
Expand All @@ -318,7 +306,7 @@ def get_project_readme(
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=self.filesystem_service.dirname(readme_file),
checksums=runfolder.checksums
checksums=checksums
)
]
except FileNotFoundError:
Expand All @@ -327,42 +315,6 @@ def get_project_readme(
f"{project_name}"
)

def get_metadata_files(
self,
project_name,
runfolder
):
"""
Gather the metadata files to be included with the project on delivery
:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:return: a list of RunfolderFile objects representing the gathered metadata files
:raises ProjectReportNotFoundException: if the README was not found
"""
log.info(f"Fetching metadata files for {project_name}")
metadata_files = [
RunfolderFile.create_object_from_path(
file_path=metafile,
runfolder_path=runfolder.path,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=runfolder.path,
checksums=runfolder.checksums
)
for metafile in self.filesystem_service.list_files_recursively(
os.path.join(
runfolder.path,
"metadata"
)
) if os.path.basename(metafile).startswith(project_name)
]
if not metadata_files:
raise ProjectReportNotFoundException(
f"metadata files could not be found for {project_name}"
)
return metadata_files

def is_sample_in_project(self, project, sample_project, sample_id, sample_lane):
"""
Checks if a matching sample is present in the project.
Expand Down
6 changes: 1 addition & 5 deletions delivery/repositories/runfolder_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,4 @@ def get_project_report_files(self, runfolder, project):
:param project: an instance of Project
:return: a tuple with the path to the directory containing the report and a list of paths to the report files
"""
return self.project_repository.get_report_files(
project_path=project.path,
project_name=project.name,
runfolder=runfolder
)
return self.project_repository.get_report_files(project, checksums=runfolder.checksums)
8 changes: 0 additions & 8 deletions delivery/services/file_system_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,6 @@ def copy(source, dest):
:param dest:
:return: None
"""
try:
FileSystemService.makedirs(
FileSystemService.dirname(
dest
)
)
except FileExistsError:
pass
try:
return shutil.copyfile(source, dest)
except IsADirectoryError:
Expand Down
22 changes: 4 additions & 18 deletions delivery/services/organise_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,13 @@ def organise_runfolder(self, runfolder_id, lanes, projects, force):
# organise the projects and return a new Runfolder instance
organised_projects = []
for project in projects_on_runfolder:
organised_projects.append(
self.organise_project(
runfolder,
project,
organised_projects_path,
lanes
)
)
organised_projects.append(self.organise_project(runfolder, project, organised_projects_path, lanes))

return Runfolder(
runfolder.name,
runfolder.path,
projects=organised_projects,
checksums=runfolder.checksums
)
checksums=runfolder.checksums)

def check_previously_organised_project(self, project, organised_projects_path, force):
organised_project_path = os.path.join(organised_projects_path, project.name)
Expand All @@ -87,13 +79,7 @@ def check_previously_organised_project(self, project, organised_projects_path, f
self.file_system_service.mkdir(organised_projects_backup_path)
self.file_system_service.rename(organised_project_path, backup_path)

def organise_project(
self,
runfolder,
project,
organised_projects_path,
lanes
):
def organise_project(self, runfolder, project, organised_projects_path, lanes):
"""
Organise a project on a runfolder into its own directory and into a standard structure. If
the project has already been organised, a ProjectAlreadyOrganisedException will be raised.
Expand Down Expand Up @@ -141,7 +127,7 @@ def organise_project(

def organise_project_file(self, project_file, organised_project_path):
"""
Find and copy the project-associated files to the organised project directory.
Find and symlink or copy the project-associated files to the organised project directory.
:param project_file: a RunfolderFile instance representing the project-associated file
before organisation
Expand Down
3 changes: 2 additions & 1 deletion tests/integration_tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def _verify_checksum(file_path, expected_checksum):
MetadataService.hash_file(samplesheet_file))

for project_file in project.project_files:
relative_path = os.path.relpath(project_file.file_path, project_file.base_path)
project_file_base = os.path.dirname(project.project_files[0].file_path)
relative_path = os.path.relpath(project_file.file_path, project_file_base)
organised_project_file_path = os.path.join(organised_path, relative_path)
self.assertEqual(
os.path.basename(organised_project_file_path),
Expand Down
1 change: 0 additions & 1 deletion tests/resources/runfolders/readme/README.md

This file was deleted.

57 changes: 2 additions & 55 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,15 @@ def __init__(self):
def spawn_callback(self, f, **args):
f(**args)


class TestUtils:
DUMMY_CONFIG = {"monitored_directory": "/foo"}
README_DIRECTORY = "/bar"


class DummyConfig:

def __getitem__(self, key):
return TestUtils.DUMMY_CONFIG[key]


fake_directories = ["160930_ST-E00216_0111_BH37CWALXX",
"160930_ST-E00216_0112_BH37CWALXX"]
fake_projects = ["ABC_123", "DEF_456", "GHI_789"]
Expand Down Expand Up @@ -123,14 +120,7 @@ def runfolder_project(
runfolder_path=runfolder.path,
runfolder_name=runfolder.name
)
project_files = project_report_files(project, next(report_type))
project_files.append(
project_readme_file()
)
project_files.extend(
project_metadata_files(project)
)
project.project_files = project_files
project.project_files = project_report_files(project, next(report_type))
sample_names = sample_name_generator()

# a straight-forward sample with files on one lane
Expand Down Expand Up @@ -282,50 +272,6 @@ def project_report_files(project, report_type):
]


def project_readme_file():
readme_file = os.path.join(
"tests",
"resources",
"readme",
"README.md"
)
return RunfolderFile(
file_path=readme_file,
base_path=os.path.dirname(readme_file),
file_checksum=f"checksum-for-{readme_file}"
)


def project_metadata_files(project, file_types=None):
file_suffixes = [
"run",
"experiment"
]
file_types = file_types or [
"xml",
"json"
]
metadata_path = os.path.join(
project.runfolder_path,
"metadata"
)
metadata_files = []
for file_type in file_types:
for file_suffix in file_suffixes:
metadata_file = os.path.join(
metadata_path,
f"{project.name}-{file_suffix}.{file_type}"
)
metadata_files.append(
RunfolderFile(
file_path=metadata_file,
base_path=project.runfolder_path,
file_checksum=f"checksum-for-{metadata_file}"
)
)
return metadata_files


_runfolder1 = Runfolder(name="160930_ST-E00216_0111_BH37CWALXX",
path="/foo/160930_ST-E00216_0111_BH37CWALXX")

Expand Down Expand Up @@ -360,6 +306,7 @@ def project_metadata_files(project, file_types=None):

FAKE_RUNFOLDERS = [_runfolder1, _runfolder2]
UNORGANISED_RUNFOLDER = unorganised_runfolder()
README_DIRECTORY = "/bar"


def assert_eventually_equals(self, timeout, f, expected, delay=0.1):
Expand Down
6 changes: 3 additions & 3 deletions tests/unit_tests/repositories/test_project_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from delivery.services.file_system_service import FileSystemService
from delivery.services.metadata_service import MetadataService

from tests.test_utils import UNORGANISED_RUNFOLDER
from tests.test_utils import README_DIRECTORY, UNORGANISED_RUNFOLDER


class TestGeneralProjectRepository(unittest.TestCase):
Expand Down Expand Up @@ -41,12 +41,12 @@ def setUp(self) -> None:
self.sample_repository = mock.create_autospec(RunfolderProjectBasedSampleRepository)
self.filesystem_service = mock.create_autospec(FileSystemService)
self.metadata_service = mock.create_autospec(MetadataService)
self.runfolder = UNORGANISED_RUNFOLDER
self.project_repository = UnorganisedRunfolderProjectRepository(
sample_repository=self.sample_repository,
readme_directory=self.runfolder.path,
readme_directory=README_DIRECTORY,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service)
self.runfolder = UNORGANISED_RUNFOLDER

def test_get_report_files(self):

Expand Down

0 comments on commit 8a65cbb

Please sign in to comment.