Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "organize project metadata files (#54)" #59

Merged
merged 2 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 10 additions & 58 deletions delivery/repositories/project_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,19 +140,18 @@ def project_from_dir(d):
project_name = os.path.basename(d)
project_files = []

# gather report files for the project from the runfolder
try:
project_files.extend(
self.get_report_files(
project_path,
project_name,
runfolder,
checksums=runfolder.checksums
)
)
except ProjectReportNotFoundException as ex:
log.warning(ex)

# gather the README to include with the project
try:
project_files.extend(
self.get_project_readme(
Expand All @@ -164,17 +163,6 @@ def project_from_dir(d):
except ProjectReportNotFoundException as ex:
log.warning(ex)

# gather metadata files for the project from the runfolder
try:
project_files.extend(
self.get_metadata_files(
project_name=project_name,
runfolder=runfolder
)
)
except ProjectReportNotFoundException as ex:
log.warning(ex)

samples = self.sample_repository.get_samples(
project_path,
project_name,
Expand Down Expand Up @@ -210,12 +198,7 @@ def project_from_dir(d):
f"Did not find {self.PROJECTS_DIR} folder for: {runfolder.name}"
)

def get_report_files(
self,
project_path,
project_name,
runfolder
):
def get_report_files(self, project_path, project_name, runfolder, checksums=None):
"""
Gets the paths to files associated with the supplied project's MultiQC report. This report
is fetched from seqreports unless there is a MultiQC report directly under the project's
Expand All @@ -225,6 +208,8 @@ def get_report_files(
:param project_path: the path to the project folder
:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:param checksums: a dict with pre-calculated checksums for files. paths are keys and the
corresponding checksum is the value
:return: a list of RunfolderFile objects representing project report files
:raises ProjectReportNotFoundException: if no MultiQC report was found for the project
"""
Expand Down Expand Up @@ -264,7 +249,7 @@ def get_report_files(
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=report_path,
checksums=runfolder.checksums
checksums=checksums
)
)
except FileNotFoundError:
Expand All @@ -291,16 +276,19 @@ def get_project_readme(
self,
project_name,
runfolder,
checksums=None,
with_undetermined=False
):
"""
Get the README to be included with the project data set.

:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:param checksums: a dict with pre-calculated checksums for files. paths are keys and the
corresponding checksum is the value
:param with_undetermined: if True, the README should refer to data that includes
undetermined reads
:return: a list containing a RunfolderFile object representing the README
:return: the path to the README file wrapped in a list
:raises ProjectReportNotFoundException: if the README was not found
"""
log.info(f"Organising README for {project_name}")
Expand All @@ -318,7 +306,7 @@ def get_project_readme(
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=self.filesystem_service.dirname(readme_file),
checksums=runfolder.checksums
checksums=checksums
)
]
except FileNotFoundError:
Expand All @@ -327,42 +315,6 @@ def get_project_readme(
f"{project_name}"
)

def get_metadata_files(
self,
project_name,
runfolder
):
"""
Gather the metadata files to be included with the project on delivery

:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:return: a list of RunfolderFile objects representing the gathered metadata files
:raises ProjectReportNotFoundException: if the README was not found
"""
log.info(f"Fetching metadata files for {project_name}")
metadata_files = [
RunfolderFile.create_object_from_path(
file_path=metafile,
runfolder_path=runfolder.path,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=runfolder.path,
checksums=runfolder.checksums
)
for metafile in self.filesystem_service.list_files_recursively(
os.path.join(
runfolder.path,
"metadata"
)
) if os.path.basename(metafile).startswith(project_name)
]
if not metadata_files:
raise ProjectReportNotFoundException(
f"metadata files could not be found for {project_name}"
)
return metadata_files

def is_sample_in_project(self, project, sample_project, sample_id, sample_lane):
"""
Checks if a matching sample is present in the project.
Expand Down
6 changes: 1 addition & 5 deletions delivery/repositories/runfolder_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,4 @@ def get_project_report_files(self, runfolder, project):
:param project: an instance of Project
:return: a tuple with the path to the directory containing the report and a list of paths to the report files
"""
return self.project_repository.get_report_files(
project_path=project.path,
project_name=project.name,
runfolder=runfolder
)
return self.project_repository.get_report_files(project, checksums=runfolder.checksums)
8 changes: 0 additions & 8 deletions delivery/services/file_system_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,6 @@ def copy(source, dest):
:param dest:
:return: None
"""
try:
FileSystemService.makedirs(
FileSystemService.dirname(
dest
)
)
except FileExistsError:
pass
try:
return shutil.copyfile(source, dest)
except IsADirectoryError:
Expand Down
22 changes: 4 additions & 18 deletions delivery/services/organise_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,13 @@ def organise_runfolder(self, runfolder_id, lanes, projects, force):
# organise the projects and return a new Runfolder instance
organised_projects = []
for project in projects_on_runfolder:
organised_projects.append(
self.organise_project(
runfolder,
project,
organised_projects_path,
lanes
)
)
organised_projects.append(self.organise_project(runfolder, project, organised_projects_path, lanes))

return Runfolder(
runfolder.name,
runfolder.path,
projects=organised_projects,
checksums=runfolder.checksums
)
checksums=runfolder.checksums)

def check_previously_organised_project(self, project, organised_projects_path, force):
organised_project_path = os.path.join(organised_projects_path, project.name)
Expand All @@ -87,13 +79,7 @@ def check_previously_organised_project(self, project, organised_projects_path, f
self.file_system_service.mkdir(organised_projects_backup_path)
self.file_system_service.rename(organised_project_path, backup_path)

def organise_project(
self,
runfolder,
project,
organised_projects_path,
lanes
):
def organise_project(self, runfolder, project, organised_projects_path, lanes):
"""
Organise a project on a runfolder into its own directory and into a standard structure. If
the project has already been organised, a ProjectAlreadyOrganisedException will be raised.
Expand Down Expand Up @@ -141,7 +127,7 @@ def organise_project(

def organise_project_file(self, project_file, organised_project_path):
"""
Find and copy the project-associated files to the organised project directory.
Find and symlink or copy the project-associated files to the organised project directory.

:param project_file: a RunfolderFile instance representing the project-associated file
before organisation
Expand Down
3 changes: 2 additions & 1 deletion tests/integration_tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def _verify_checksum(file_path, expected_checksum):
MetadataService.hash_file(samplesheet_file))

for project_file in project.project_files:
relative_path = os.path.relpath(project_file.file_path, project_file.base_path)
project_file_base = os.path.dirname(project.project_files[0].file_path)
relative_path = os.path.relpath(project_file.file_path, project_file_base)
organised_project_file_path = os.path.join(organised_path, relative_path)
self.assertEqual(
os.path.basename(organised_project_file_path),
Expand Down
1 change: 0 additions & 1 deletion tests/resources/runfolders/readme/README.md

This file was deleted.

57 changes: 2 additions & 55 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,15 @@ def __init__(self):
def spawn_callback(self, f, **args):
f(**args)


class TestUtils:
DUMMY_CONFIG = {"monitored_directory": "/foo"}
README_DIRECTORY = "/bar"


class DummyConfig:

def __getitem__(self, key):
return TestUtils.DUMMY_CONFIG[key]


fake_directories = ["160930_ST-E00216_0111_BH37CWALXX",
"160930_ST-E00216_0112_BH37CWALXX"]
fake_projects = ["ABC_123", "DEF_456", "GHI_789"]
Expand Down Expand Up @@ -123,14 +120,7 @@ def runfolder_project(
runfolder_path=runfolder.path,
runfolder_name=runfolder.name
)
project_files = project_report_files(project, next(report_type))
project_files.append(
project_readme_file()
)
project_files.extend(
project_metadata_files(project)
)
project.project_files = project_files
project.project_files = project_report_files(project, next(report_type))
sample_names = sample_name_generator()

# a straight-forward sample with files on one lane
Expand Down Expand Up @@ -282,50 +272,6 @@ def project_report_files(project, report_type):
]


def project_readme_file():
readme_file = os.path.join(
"tests",
"resources",
"readme",
"README.md"
)
return RunfolderFile(
file_path=readme_file,
base_path=os.path.dirname(readme_file),
file_checksum=f"checksum-for-{readme_file}"
)


def project_metadata_files(project, file_types=None):
file_suffixes = [
"run",
"experiment"
]
file_types = file_types or [
"xml",
"json"
]
metadata_path = os.path.join(
project.runfolder_path,
"metadata"
)
metadata_files = []
for file_type in file_types:
for file_suffix in file_suffixes:
metadata_file = os.path.join(
metadata_path,
f"{project.name}-{file_suffix}.{file_type}"
)
metadata_files.append(
RunfolderFile(
file_path=metadata_file,
base_path=project.runfolder_path,
file_checksum=f"checksum-for-{metadata_file}"
)
)
return metadata_files


_runfolder1 = Runfolder(name="160930_ST-E00216_0111_BH37CWALXX",
path="/foo/160930_ST-E00216_0111_BH37CWALXX")

Expand Down Expand Up @@ -360,6 +306,7 @@ def project_metadata_files(project, file_types=None):

FAKE_RUNFOLDERS = [_runfolder1, _runfolder2]
UNORGANISED_RUNFOLDER = unorganised_runfolder()
README_DIRECTORY = "/bar"


def assert_eventually_equals(self, timeout, f, expected, delay=0.1):
Expand Down
6 changes: 3 additions & 3 deletions tests/unit_tests/repositories/test_project_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from delivery.services.file_system_service import FileSystemService
from delivery.services.metadata_service import MetadataService

from tests.test_utils import UNORGANISED_RUNFOLDER
from tests.test_utils import README_DIRECTORY, UNORGANISED_RUNFOLDER


class TestGeneralProjectRepository(unittest.TestCase):
Expand Down Expand Up @@ -41,12 +41,12 @@ def setUp(self) -> None:
self.sample_repository = mock.create_autospec(RunfolderProjectBasedSampleRepository)
self.filesystem_service = mock.create_autospec(FileSystemService)
self.metadata_service = mock.create_autospec(MetadataService)
self.runfolder = UNORGANISED_RUNFOLDER
self.project_repository = UnorganisedRunfolderProjectRepository(
sample_repository=self.sample_repository,
readme_directory=self.runfolder.path,
readme_directory=README_DIRECTORY,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service)
self.runfolder = UNORGANISED_RUNFOLDER

def test_get_report_files(self):

Expand Down
Loading