From 1cc6ad34ac35851f3d10cdb184f5ee6e45a353ff Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 17 Jan 2017 13:41:41 +0100 Subject: [PATCH] Restrict output to specific dirs --- bcl2fastq/handlers/bcl2fastq_handlers.py | 9 +------ bcl2fastq/lib/bcl2fastq_utils.py | 23 ++++++++++++++++-- config/app.config | 9 +++---- tests/test_bcl2fastq_handlers.py | 31 +++++++++++++++--------- tests/test_utils.py | 5 ++-- 5 files changed, 49 insertions(+), 28 deletions(-) diff --git a/bcl2fastq/handlers/bcl2fastq_handlers.py b/bcl2fastq/handlers/bcl2fastq_handlers.py index 8f5ab41..787d1ea 100644 --- a/bcl2fastq/handlers/bcl2fastq_handlers.py +++ b/bcl2fastq/handlers/bcl2fastq_handlers.py @@ -119,14 +119,7 @@ def create_config_from_request(self, runfolder, request_body): bcl2fastq_version = request_data["bcl2fastq_version"] if "output" in request_data: - if self.config['allow_arbitrary_output_folder']: - output = request_data["output"] - else: - raise ArteriaUsageException("A output folder was specified by the request, but the configuration " - "does not allow arbitrary output folder to be specified.") - - if os.path.abspath(output) == os.path.abspath(runfolder_input): - raise ArteriaUsageException("The specified output path is the same as the input path. This is not allowed!") + output = request_data["output"] if "samplesheet" in request_data: samplesheet = request_data["samplesheet"] diff --git a/bcl2fastq/lib/bcl2fastq_utils.py b/bcl2fastq/lib/bcl2fastq_utils.py index df1d088..fe0e7f8 100644 --- a/bcl2fastq/lib/bcl2fastq_utils.py +++ b/bcl2fastq/lib/bcl2fastq_utils.py @@ -48,6 +48,8 @@ def __init__(self, :param nbr_of_cores: number of cores to run bcl2fastq with """ + self.general_config = general_config + self.runfolder_input = runfolder_input self.base_calls_input = runfolder_input + "/Data/Intensities/BaseCalls" @@ -297,12 +299,29 @@ def construct_command(self): """ raise NotImplementedError("Subclasses should implement this!") + def validate_output(self): + + def _parent_dir(d): + return os.path.abspath(os.path.join(d, os.path.pardir)) + + abs_path_of_allowed_dirs = map(os.path.abspath, self.config.general_config['allowed_output_folders']) + is_located_in_parent_dir = _parent_dir(self.config.output) in abs_path_of_allowed_dirs + + if is_located_in_parent_dir: + True + else: + error_string = "Invalid output directory {} was specified." \ + " Allowed dirs were: {}".format(self.config.output, + self.config.general_config['allowed_output_folders']) + log.error(error_string) + raise ArteriaUsageException(error_string) + def delete_output(self): """ - Delete the output directory if it exists + Delete the output directory if it exists and the output path is valid :return: None """ - if os.path.isdir(self.config.output): + if self.validate_output(): log.info("Found a directory at output path {}, will remove it.".format(self.config.output)) shutil.rmtree(self.config.output) diff --git a/config/app.config b/config/app.config index 2de35ec..51b0e28 100644 --- a/config/app.config +++ b/config/app.config @@ -35,9 +35,8 @@ default_output_path: /vagrant/runfolder_output/ bcl2fastq_logs_path: bcl2fastq_logs -# Setting this to True will allow the bcl2fastq service direct -# the output of bcl2fastq anywhere on the filesystem (restricted -# by normal permissions). If this is set to False (as per default) -# it will only write output to the specified `default_output_path` -allow_arbitrary_output_folder: False +# Only folders and child folder of the directories listed here will be valid as output +# directories. +allowed_output_folders: + - /vagrant/runfolder_output/ diff --git a/tests/test_bcl2fastq_handlers.py b/tests/test_bcl2fastq_handlers.py index eb38ba3..c19a4ca 100644 --- a/tests/test_bcl2fastq_handlers.py +++ b/tests/test_bcl2fastq_handlers.py @@ -32,9 +32,8 @@ def start_api_call_nbr(self): 8: "y*,7i,n*,y*", } - DUMMY_RUNNER_CONF = DummyRunnerConfig(output='/foo/bar') - dummy_config = DummyConfig() + DUMMY_RUNNER_CONF = DummyRunnerConfig(output='/foo/bar/runfolder', general_config = dummy_config) def get_app(self): return Application(routes(config=self.dummy_config)) @@ -94,22 +93,32 @@ def test_start_with_empty_body(self): def test_start_with_disallowed_output_specified(self): - self.dummy_config.DUMMY_CONFIG['allow_arbitrary_output_folder'] = False + # TODO Please note that this test is not very good, since the + # what we ideally want to test is the output specifed by the request, + # and that that gets rejected, however since the create_bcl2fastq_runner command + # is mocked away here and that is what returns the invalid output the tests is + # a bit misleading. In the future we probably want to refactor this. + # / JD 20170117 + runner_conf_with_invalid_output = DummyRunnerConfig(output='/not/foo/bar/runfolder', + general_config = self.dummy_config) + with mock.patch.object(os.path, 'isdir', return_value=True), \ + mock.patch.object(shutil, 'rmtree', return_value=None), \ + mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \ + mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner", + return_value=FakeRunner("2.15.2", runner_conf_with_invalid_output)), \ + mock.patch.object(BCL2FastqRunner, 'symlink_output_to_unaligned', return_value=None): - with mock.patch.object(os.path, 'isdir', return_value=True): - body = {'output': '/foo/bar'} + # This output dir is not allowed by the config + body = {'output': '/not/foo/bar'} response = self.fetch(self.API_BASE + "/start/150415_D00457_0091_AC6281ANXX", method="POST", body=json_encode(body)) + self.assertEqual(response.code, 500) - self.assertEqual(response.reason, "A output folder was specified by the request, " - "but the configuration does " - "not allow arbitrary output folder to be specified.") + self.assertEqual(response.reason, "Invalid output directory /not/foo/bar/runfolder was specified." + " Allowed dirs were: ['/foo/bar']") def test_start_with_allowed_output_specified(self): - - self.dummy_config.DUMMY_CONFIG['allow_arbitrary_output_folder'] = True - with mock.patch.object(os.path, 'isdir', return_value=True), \ mock.patch.object(shutil, 'rmtree', return_value=None), \ mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \ diff --git a/tests/test_utils.py b/tests/test_utils.py index 40d5750..fa09c76 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -54,14 +54,15 @@ class DummyConfig: "HiSeq 2000": {"bcl2fastq_version": "1.8.4"}, "NextSeq 500": {"bcl2fastq_version": "1.8.4"}}, "bcl2fastq_logs_path": "/tmp/", - "allow_arbitrary_output_folder": False} + "allowed_output_folders": ['/foo/bar']} def __getitem__(self, key): return self.DUMMY_CONFIG[key] class DummyRunnerConfig(Bcl2FastqConfig): - def __init__(self, output): + def __init__(self, output, general_config): self.output = output + self.general_config = general_config class FakeRunner(BCL2FastqRunner):