diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c8f360e..61b6b84 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -18,6 +18,15 @@ jobs: python -m pip install --upgrade pip pip install wheel pip install -r requirements/dev . + npm install -g pajv - name: Launch tests run: | nosetests ./tests + - name: Lint config files + run: | + yamllint -c .yamllint.yml config/organise_configs/*.yml + - name: Validate config files + run: > + pajv + -s config/organise_configs/schema.json + -d config/organise_configs/*.yml diff --git a/.yammlint.yml b/.yammlint.yml new file mode 100644 index 0000000..ae48435 --- /dev/null +++ b/.yammlint.yml @@ -0,0 +1,6 @@ +extends: default + +rules: + line-length: + max: 80 + level: warning diff --git a/config/app.config b/config/app.config index d11ff09..5877df6 100644 --- a/config/app.config +++ b/config/app.config @@ -10,4 +10,5 @@ staging_directory: /tmp/ project_links_directory: /tmp/ dds_conf: log_path: dds.log +organise_config_dir: config/organise_configs/ port: 9999 diff --git a/config/organise_config/organise_runfolder.yml b/config/organise_config/organise_runfolder.yml deleted file mode 100644 index 78c1cb2..0000000 --- a/config/organise_config/organise_runfolder.yml +++ /dev/null @@ -1,43 +0,0 @@ ---- -#Input: runfolder_name - -#Currently named runfolderpath and runfolder to harmonize with rnaseq config by Monika, but for specific runfolder delivery, technically, these could be "merged" to one. -runfolderpath: /proj/ngi2016001/incoming -runfolder: / -organised: /Projects/ - -#current path is /proj/ngi2016001/incoming//Projects/// -#The following is based on that we are to keep the same directory structure as above. -files_to_organize: - - #The fastq files - - source: /Unaligned/* - destination: /(?P=projectid)//Sample_(?P=samplename)/ - options: - required: True - symlink: True - regexp: (?P[\w-]+)/Sample_(?P[\w-]+)/(?P=samplename)_S(?P\d+)_L(?P\d+)_R(?P\d)_001.fastq.gz - - #The MultiQC files - - source: /seqreports/project/* - destination: /(?P=projectid)// - options: - required: True - symlink: True - regexp: (?P[\w-]+)/(?P\w+)_(?P=projectid)_multiqc_report[\w.-]+ - - - -#what we are lacking, and what might need to be created outside the config is: -#1. checksums.md5 -#2. Encrypted samplesheet -#As far as I know, these don't exist prior to organization. - -# #hypothethical undetermined, needs an input of lane(s) connected to project -# #We will not include this at this point, as it requires additional input. Will maybe end up in its own config or something. -# - source: /Unaligned/Undetermined/Undetermined_S0_L_R_001.fastq.gz -# destination: /Unaligned//Undetermined/ -# options: -# required: False -# symlink: True - diff --git a/config/organise_configs/runfolder.yml b/config/organise_configs/runfolder.yml new file mode 100644 index 0000000..dee5d93 --- /dev/null +++ b/config/organise_configs/runfolder.yml @@ -0,0 +1,46 @@ +--- +# Input: runfolder_name + +# Currently named runfolderpath and runfolder to harmonize with rnaseq config +# by Monika, but for specific runfolder delivery, technically, these could be +# "merged" to one. +variables: + runfolderpath: /proj/ngi2016001/incoming + runfolder: "{runfolderpath}/{runfolder_name}" + organised: "{runfolder}/Projects/" + +# current path is +# /proj/ngi2016001/incoming//Projects/// The +# following is based on that we are to keep the same directory structure as +# above. +files_to_organise: + + # The fastq files + - source: "{runfolder}/Unaligned/*" + destination: "{organised}/{projectid}/{runfolder_name}/Sample_{samplename}/" + options: + required: true + link_type: softlink + filter: (?P[\w-]+)/Sample_(?P[\w-]+)/(?P=samplename)_S(?P\d+)_L(?P\d+)_R(?P\d)_001.fastq.gz # yamllint disable-line + + # The MultiQC files + - source: "{runfolder}/seqreports/project/*" + destination: "{organised}/{projectid}//" + options: + required: true + link_type: softlink + filter: (?P[\w-]+)/\w+_(?P=projectid)_multiqc_report[\w.-]+ + +# what we are lacking, and what might need to be created outside the config is: +# 1. checksums.md5 +# 2. Encrypted samplesheet +# As far as I know, these don't exist prior to organization. + +# hypothethical undetermined, needs an input of lane(s) connected to project +# We will not include this at this point, as it requires additional input. Will +# maybe end up in its own config or something. +# - source: /Unaligned/Undetermined/Undetermined_S0_L_R_001.fastq.gz +# destination: /Unaligned//Undetermined/ +# options: +# required: False +# symlink: True diff --git a/config/organise_configs/schema.json b/config/organise_configs/schema.json new file mode 100644 index 0000000..289ff9d --- /dev/null +++ b/config/organise_configs/schema.json @@ -0,0 +1,34 @@ +{ + "type": "object", + "required": ["variables", "files_to_organise"], + "properties": { + "variables": { + "type": "object", + "additionalProperties": {"type": "string"} + }, + "files_to_organise": { + "type": "array", + "items": { + "type": "object", + "required": ["source", "destination", "options"], + "properties": { + "source": {"type": "string"}, + "destination": {"type": "string"}, + "options": { + "type": "object", + "properties": { + "required": {"type": "boolean"}, + "link_type": { + "type": "string", + "enum": ["softlink", "hardlink", "copy"] + }, + "filter": {"type": "string"} + }, + "additionalProperties": false + } + } + } + } + }, + "additionalProperties": false +} diff --git a/delivery/app.py b/delivery/app.py index 6b4aa7a..626d4d8 100644 --- a/delivery/app.py +++ b/delivery/app.py @@ -19,7 +19,11 @@ from delivery.handlers.delivery_handlers import DeliverByStageIdHandler, DeliveryStatusHandler from delivery.handlers.staging_handlers import StagingRunfolderHandler, StagingHandler,\ StageGeneralDirectoryHandler, StagingProjectRunfoldersHandler -from delivery.handlers.organise_handlers import OrganiseRunfolderHandler +from delivery.handlers.organise_handlers import \ + OrganiseRunfolderHandler, \ + OrganiseProjectHandler, \ + OrganiseProjectAnalysisHandler, \ + OrganiseRunfolderConfigHandler from delivery.repositories.runfolder_repository import FileSystemBasedRunfolderRepository, \ FileSystemBasedUnorganisedRunfolderRepository @@ -57,9 +61,22 @@ def routes(**kwargs): url(r"/api/1.0/project/([^/]+)/best_practice_samples$", BestPracticeProjectSampleHandler, name="best_practice_samples", kwargs=kwargs), + # I'm keeping this endpoint for backward compatibility but we should be + # able to remove it once we have updated our workflows to use the new + # endpoints /AC230510 url(r"/api/1.0/organise/runfolder/([^/]+)", OrganiseRunfolderHandler, name="organise_runfolder", kwargs=kwargs), + url(r"/api/1.0/organise/delivery/runfolder/([^/]+)", + OrganiseRunfolderConfigHandler, + name="organise_runfolder", kwargs=kwargs), + url(r"/api/1.0/organise/delivery/project/([^/]+)/([^/]+)", + OrganiseProjectAnalysisHandler, + name="organise_project", kwargs=kwargs), + url(r"/api/1.0/organise/delivery/project/([^/]+)", + OrganiseProjectHandler, + name="organise_project_custom", kwargs=kwargs), + url(r"/api/1.0/stage/project/runfolders/(.+)", StagingProjectRunfoldersHandler, name="stage_multiple_runfolders_one_project", kwargs=kwargs), url(r"/api/1.0/stage/runfolder/(.+)", StagingRunfolderHandler, diff --git a/delivery/handlers/__init__.py b/delivery/handlers/__init__.py index 3dc7c1c..4434f55 100644 --- a/delivery/handlers/__init__.py +++ b/delivery/handlers/__init__.py @@ -1,10 +1,30 @@ - +import jsonschema # Status codes OK = 200 ACCEPTED = 202 NO_CONTENT = 204 +BAD_REQUEST = 400 FORBIDDEN = 403 NOT_FOUND = 404 INTERNAL_SERVER_ERROR = 500 + + +def exception_handler(postget_request): + def wrapper(self, *args, **kwargs): + try: + postget_request(self, *args, **kwargs) + except FileNotFoundError as file_not_found_error: + self.set_status(NOT_FOUND) + self.write(str(file_not_found_error)) + except PermissionError as permission_error: + self.set_status(FORBIDDEN) + self.write(str(permission_error)) + except RuntimeError as runtime_error: + self.set_status(INTERNAL_SERVER_ERROR) + self.write(str(runtime_error)) + except jsonschema.ValidationError as validation_error: + self.set_status(BAD_REQUEST) + self.write(str(validation_error)) + return wrapper diff --git a/delivery/handlers/organise_handlers.py b/delivery/handlers/organise_handlers.py index 1e05014..89eb743 100644 --- a/delivery/handlers/organise_handlers.py +++ b/delivery/handlers/organise_handlers.py @@ -1,10 +1,14 @@ import logging +import pathlib +import yaml +import json +import jsonschema from arteria.web.handlers import BaseRestHandler from delivery.exceptions import ProjectsDirNotfoundException, ChecksumFileNotFoundException, FileNameParsingException, \ SamplesheetNotFoundException, ProjectReportNotFoundException, ProjectAlreadyOrganisedException -from delivery.handlers import OK, NOT_FOUND, INTERNAL_SERVER_ERROR, FORBIDDEN +from delivery.handlers import OK, NOT_FOUND, INTERNAL_SERVER_ERROR, FORBIDDEN, exception_handler log = logging.getLogger(__name__) @@ -81,3 +85,108 @@ def post(self, runfolder_id): except FileNameParsingException as e: log.error(str(e), exc_info=e) self.set_status(INTERNAL_SERVER_ERROR, reason=str(e)) + + +class OrganiseProjectAnalysisHandler(BaseOrganiseHandler): + """ + Handler class for organizing a project after analysis in preparation for + staging and delivery. + """ + + def initialize(self, **kwargs): + self.config = kwargs["config"] + self.organise_service = kwargs["organise_service"] + + @exception_handler + def post(self, analysis_pipeline, project): + organise_config_dir = pathlib.Path(self.config["organise_config_dir"]) + organise_config_path = organise_config_dir / f"{analysis_pipeline}.md" + + project_path = pathlib.Path(self.config["general_project_directory"]) / project + + # TODO once we have not found exceptions, use these here + if not organise_config_path.is_file(): + raise FileNotFoundError( + f"Config file not found at {organise_config_path}") + with open(organise_config_path, 'r') as organise_config_file: + config = yaml.load(organise_config_file, Loader=yaml.CLoader) + with open(organise_config_dir / "schema.json", 'r') as organise_config_schema: + schema = json.load(organise_config_schema) + jsonschema.validate(config, schema) + + if not project_path.is_dir(): + raise FileNotFoundError( + f"Project {project} not found at {project_path}") + + self.organise_service.organise_with_config( + str(organise_config_path), str(project_path)) + + +class OrganiseProjectHandler(BaseOrganiseHandler): + """ + Handler class for organizing a project from a custom config file. + """ + + def initialize(self, **kwargs): + self.config = kwargs["config"] + self.organise_service = kwargs["organise_service"] + + @exception_handler + def post(self, project): + required_members = ["config"] + request_data = self.body_as_object(required_members=required_members) + organise_config_path = pathlib.Path(request_data["config"]) + project_path = pathlib.Path(self.config["general_project_directory"]) / project + + if not organise_config_path.is_file(): + raise FileNotFoundError( + f"Config file not found at {organise_config_path}") + with open(organise_config_path, 'r') as organise_config_file: + config = yaml.load(organise_config_file, Loader=yaml.CLoader) + with open(pathlib.Path(self.config["organise_config_dir"]) / "schema.json", 'r') as organise_config_schema: + schema = json.load(organise_config_schema) + jsonschema.validate(config, schema) + + if not project_path.is_dir(): + raise FileNotFoundError( + f"Project {project} not found at {project_path}") + + self.organise_service.organise_with_config( + str(organise_config_path), str(project_path)) + +class OrganiseRunfolderConfigHandler(BaseOrganiseHandler): + """ + Handler class for organizing a runfolder from a config file. + """ + + def initialize(self, **kwargs): + self.config = kwargs["config"] + self.organise_service = kwargs["organise_service"] + + @exception_handler + def post(self, runfolder): + request_data = self.body_as_object() + + try: + organise_config_path = pathlib.Path(request_data["config"]) + except KeyError: + organise_config_dir = pathlib.Path(self.config["organise_config_dir"]) + organise_config_path = organise_config_dir / "runfolder.yml" + + runfolder_path = pathlib.Path(self.config["runfolder_directory"]) / runfolder + + if not organise_config_path.is_file(): + raise FileNotFoundError( + f"Config file not found at {organise_config_path}") + with open(organise_config_path, 'r') as organise_config_file: + config = yaml.load(organise_config_file, Loader=yaml.CLoader) + with open(organise_config_dir / "schema.json", 'r') as organise_config_schema: + schema = json.load(organise_config_schema) + jsonschema.validate(config, schema) + + if not runfolder_path.is_dir(): + raise FileNotFoundError( + f"Runfolder {runfolder} not found at {runfolder_path}") + + self.organise_service.organise_with_config( + str(organise_config_path), str(runfolder_path)) diff --git a/requirements/dev b/requirements/dev index 77da1a7..6f96eec 100644 --- a/requirements/dev +++ b/requirements/dev @@ -1,4 +1,4 @@ -r prod mock==4.0.3 nose==1.3.7 - +yamllint==1.32.0 diff --git a/requirements/prod b/requirements/prod index fbec63a..ec2f1f4 100644 --- a/requirements/prod +++ b/requirements/prod @@ -5,4 +5,6 @@ sqlalchemy==1.4.35 alembic==1.7.7 enum34==1.1.10 arteria==1.1.4 +jsonschema==4.19 dds-cli +PyYAML==6.0.1 diff --git a/tests/test_utils.py b/tests/test_utils.py index 89f8b88..730aca2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ import csv import os +import tempfile import time from collections import OrderedDict @@ -20,14 +21,30 @@ def __init__(self): def spawn_callback(self, f, **args): f(**args) -class TestUtils: - DUMMY_CONFIG = {"monitored_directory": "/foo"} - class DummyConfig: + def __init__(self): + # TODO add documentation + self.dummy_config = { + "db_connection_string": "sqlite:///my.db", + "alembic_path": "alembic/", + "runfolder_directory": None, + "general_project_directory": None, + "staging_directory": None, + "project_links_directory": None, + "dds_conf": {"log_path": "dds.log"}, + "organise_config_dir": None, + "port": 9999, + } + + for key in self.dummy_config: + if self.dummy_config[key] is None: + tempdir = tempfile.TemporaryDirectory() + self.__setattr__(f"_{key}", tempdir) + self.dummy_config[key] = tempdir.name def __getitem__(self, key): - return TestUtils.DUMMY_CONFIG[key] + return self.dummy_config[key] fake_directories = ["160930_ST-E00216_0111_BH37CWALXX", "160930_ST-E00216_0112_BH37CWALXX"] diff --git a/tests/unit_tests/handlers/test_delivery_organise_handlers.py b/tests/unit_tests/handlers/test_delivery_organise_handlers.py new file mode 100644 index 0000000..d4e8d63 --- /dev/null +++ b/tests/unit_tests/handlers/test_delivery_organise_handlers.py @@ -0,0 +1,163 @@ +from tornado.testing import * +from tornado.web import Application + +import tempfile +import mock +import pathlib + +from delivery.app import routes + +from tests.test_utils import DummyConfig + +class TestOrganiseHandlers(AsyncHTTPTestCase): + API_BASE = "/api/1.0" + + def get_app(self): + self.organise_service = mock.MagicMock() + self.config = DummyConfig() + + self.project = "CD-1234" + self.project_path = ( + pathlib.Path(self.config["general_project_directory"]) / self.project + ) + self.project_path.mkdir() + + self.project_config_path = ( + pathlib.Path(self.config["organise_config_dir"]) / "sarek.yml" + ) + self.project_config_path.touch() + + self.runfolder = "230721_A000_BHXX" + self.runfolder_path = ( + pathlib.Path(self.config["runfolder_directory"]) / self.runfolder + ) + self.runfolder_path.mkdir() + + self.runfolder_config_path = ( + pathlib.Path(self.config["organise_config_dir"]) / "runfolder.yml" + ) + self.runfolder_config_path.touch() + + return Application( + routes( + config=self.config, + organise_service=self.organise_service, + )) + + def test_project_analysis_handler(self): + analysis = "sarek" + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/project/{analysis}/{self.project}", + method="POST", body="", + ) + + self.assertEqual(response.code, 200) + self.organise_service.organise_with_config.assert_called_with( + str(self.project_config_path), str(self.project_path)) + + def test_project_analysis_handler_missing_project(self): + analysis = "sarek" + project = "AB-4567" + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/project/{analysis}/{project}", + method="POST", body="", + ) + + self.assertEqual(response.code, 404) + + def test_project_analysis_handler_missing_config_file(self): + analysis = "rnaseq" + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/project/{analysis}/{self.project}", + method="POST", body="" + ) + + self.assertEqual(response.code, 404) + + def test_project_handler(self): + response = self.fetch( + f"{self.API_BASE}/organise/delivery/project/{self.project}", + method="POST", body=f'{{"config": "{self.project_config_path}"}}', + ) + + self.assertEqual(response.code, 200) + self.organise_service.organise_with_config.assert_called_with( + str(self.project_config_path), str(self.project_path)) + + def test_project_handler_missing_config(self): + project = "AB-4567" + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/project/{project}", + method="POST", body='', + ) + + self.assertEqual(response.code, 500) + + def test_project_handler_missing_project(self): + project = "AB-4567" + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/project/{project}", + method="POST", body=f'{{"config": "{self.project_config_path}"}}', + ) + + self.assertEqual(response.code, 404) + + def test_project_handler_missing_config_file(self): + project_config_path = '/tmp/rnaseq.md' + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/project/{self.project}", + method="POST", body=f'{{"config": "{project_config_path}"}}', + ) + + self.assertEqual(response.code, 404) + + def test_runfolder_handler_no_config(self): + response = self.fetch( + f"{self.API_BASE}/organise/delivery/runfolder/{self.runfolder}", + method="POST", body='{}', + ) + + self.assertEqual(response.code, 200) + self.organise_service.organise_with_config.assert_called_with( + str(self.runfolder_config_path), str(self.runfolder_path)) + + def test_runfolder_handler_config(self): + custom_runfolder_config_path = ( + pathlib.Path(self.config["organise_config_dir"]) / "custom_runfolder.yml" + ) + custom_runfolder_config_path.touch() + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/runfolder/{self.runfolder}", + method="POST", body=f'{{"config": "{custom_runfolder_config_path}"}}', + ) + + self.assertEqual(response.code, 200) + self.organise_service.organise_with_config.assert_called_with( + str(custom_runfolder_config_path), str(self.runfolder_path)) + + def test_runfolder_handler_missing_runfolder(self): + runfolder = "230721_fake_runfolder" + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/runfolder/{runfolder}", + method="POST", body='{}', + ) + + self.assertEqual(response.code, 404) + + def test_runfolder_handler_missing_config_file(self): + runfolder_config_path = '/tmp/fake_config.md' + + response = self.fetch( + f"{self.API_BASE}/organise/delivery/runfolder/{self.runfolder}", + method="POST", body=f'{{"config": "{runfolder_config_path}"}}', + ) + + self.assertEqual(response.code, 404)