Skip to content

Commit

Permalink
Restrict output to specific dirs
Browse files Browse the repository at this point in the history
  • Loading branch information
Johan Dahlberg committed Jan 17, 2017
1 parent b8b1f6c commit 1cc6ad3
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 28 deletions.
9 changes: 1 addition & 8 deletions bcl2fastq/handlers/bcl2fastq_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,7 @@ def create_config_from_request(self, runfolder, request_body):
bcl2fastq_version = request_data["bcl2fastq_version"]

if "output" in request_data:
if self.config['allow_arbitrary_output_folder']:
output = request_data["output"]
else:
raise ArteriaUsageException("A output folder was specified by the request, but the configuration "
"does not allow arbitrary output folder to be specified.")

if os.path.abspath(output) == os.path.abspath(runfolder_input):
raise ArteriaUsageException("The specified output path is the same as the input path. This is not allowed!")
output = request_data["output"]

if "samplesheet" in request_data:
samplesheet = request_data["samplesheet"]
Expand Down
23 changes: 21 additions & 2 deletions bcl2fastq/lib/bcl2fastq_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def __init__(self,
:param nbr_of_cores: number of cores to run bcl2fastq with
"""

self.general_config = general_config

self.runfolder_input = runfolder_input
self.base_calls_input = runfolder_input + "/Data/Intensities/BaseCalls"

Expand Down Expand Up @@ -297,12 +299,29 @@ def construct_command(self):
"""
raise NotImplementedError("Subclasses should implement this!")

def validate_output(self):

def _parent_dir(d):
return os.path.abspath(os.path.join(d, os.path.pardir))

abs_path_of_allowed_dirs = map(os.path.abspath, self.config.general_config['allowed_output_folders'])
is_located_in_parent_dir = _parent_dir(self.config.output) in abs_path_of_allowed_dirs

if is_located_in_parent_dir:
True
else:
error_string = "Invalid output directory {} was specified." \
" Allowed dirs were: {}".format(self.config.output,
self.config.general_config['allowed_output_folders'])
log.error(error_string)
raise ArteriaUsageException(error_string)

def delete_output(self):
"""
Delete the output directory if it exists
Delete the output directory if it exists and the output path is valid
:return: None
"""
if os.path.isdir(self.config.output):
if self.validate_output():
log.info("Found a directory at output path {}, will remove it.".format(self.config.output))
shutil.rmtree(self.config.output)

Expand Down
9 changes: 4 additions & 5 deletions config/app.config
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ default_output_path: /vagrant/runfolder_output/

bcl2fastq_logs_path: bcl2fastq_logs

# Setting this to True will allow the bcl2fastq service direct
# the output of bcl2fastq anywhere on the filesystem (restricted
# by normal permissions). If this is set to False (as per default)
# it will only write output to the specified `default_output_path`
allow_arbitrary_output_folder: False
# Only folders and child folder of the directories listed here will be valid as output
# directories.
allowed_output_folders:
- /vagrant/runfolder_output/

31 changes: 20 additions & 11 deletions tests/test_bcl2fastq_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ def start_api_call_nbr(self):
8: "y*,7i,n*,y*",
}

DUMMY_RUNNER_CONF = DummyRunnerConfig(output='/foo/bar')

dummy_config = DummyConfig()
DUMMY_RUNNER_CONF = DummyRunnerConfig(output='/foo/bar/runfolder', general_config = dummy_config)

def get_app(self):
return Application(routes(config=self.dummy_config))
Expand Down Expand Up @@ -94,22 +93,32 @@ def test_start_with_empty_body(self):

def test_start_with_disallowed_output_specified(self):

self.dummy_config.DUMMY_CONFIG['allow_arbitrary_output_folder'] = False
# TODO Please note that this test is not very good, since the
# what we ideally want to test is the output specifed by the request,
# and that that gets rejected, however since the create_bcl2fastq_runner command
# is mocked away here and that is what returns the invalid output the tests is
# a bit misleading. In the future we probably want to refactor this.
# / JD 20170117
runner_conf_with_invalid_output = DummyRunnerConfig(output='/not/foo/bar/runfolder',
general_config = self.dummy_config)
with mock.patch.object(os.path, 'isdir', return_value=True), \
mock.patch.object(shutil, 'rmtree', return_value=None), \
mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner",
return_value=FakeRunner("2.15.2", runner_conf_with_invalid_output)), \
mock.patch.object(BCL2FastqRunner, 'symlink_output_to_unaligned', return_value=None):

with mock.patch.object(os.path, 'isdir', return_value=True):
body = {'output': '/foo/bar'}
# This output dir is not allowed by the config
body = {'output': '/not/foo/bar'}
response = self.fetch(self.API_BASE + "/start/150415_D00457_0091_AC6281ANXX",
method="POST",
body=json_encode(body))

self.assertEqual(response.code, 500)
self.assertEqual(response.reason, "A output folder was specified by the request, "
"but the configuration does "
"not allow arbitrary output folder to be specified.")
self.assertEqual(response.reason, "Invalid output directory /not/foo/bar/runfolder was specified."
" Allowed dirs were: ['/foo/bar']")

def test_start_with_allowed_output_specified(self):

self.dummy_config.DUMMY_CONFIG['allow_arbitrary_output_folder'] = True

with mock.patch.object(os.path, 'isdir', return_value=True), \
mock.patch.object(shutil, 'rmtree', return_value=None), \
mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \
Expand Down
5 changes: 3 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,15 @@ class DummyConfig:
"HiSeq 2000": {"bcl2fastq_version": "1.8.4"},
"NextSeq 500": {"bcl2fastq_version": "1.8.4"}},
"bcl2fastq_logs_path": "/tmp/",
"allow_arbitrary_output_folder": False}
"allowed_output_folders": ['/foo/bar']}

def __getitem__(self, key):
return self.DUMMY_CONFIG[key]

class DummyRunnerConfig(Bcl2FastqConfig):
def __init__(self, output):
def __init__(self, output, general_config):
self.output = output
self.general_config = general_config


class FakeRunner(BCL2FastqRunner):
Expand Down

0 comments on commit 1cc6ad3

Please sign in to comment.