From ddcf321d9848cdf94dbd3e5b6142b0123a5f4aeb Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Mon, 12 Feb 2018 16:28:35 +0100 Subject: [PATCH 1/4] Add option to create index read --- bcl2fastq/handlers/bcl2fastq_handlers.py | 6 ++++++ bcl2fastq/lib/bcl2fastq_utils.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/bcl2fastq/handlers/bcl2fastq_handlers.py b/bcl2fastq/handlers/bcl2fastq_handlers.py index 787d1ea..8a6aa6e 100644 --- a/bcl2fastq/handlers/bcl2fastq_handlers.py +++ b/bcl2fastq/handlers/bcl2fastq_handlers.py @@ -107,6 +107,7 @@ def create_config_from_request(self, runfolder, request_body): barcode_mismatches = "" tiles = "" use_base_mask = "" + create_indexes = False additional_args = "" runfolder_base_path = self.config["runfolder_path"] @@ -133,6 +134,10 @@ def create_config_from_request(self, runfolder, request_body): if "use_base_mask" in request_data: use_base_mask = request_data["use_base_mask"] + if "create_indexes" in request_data: + if request_data["create_indexes"] == "True": + create_indexes = True + if "additional_args" in request_data: additional_args = request_data["additional_args"] @@ -145,6 +150,7 @@ def create_config_from_request(self, runfolder, request_body): barcode_mismatches, tiles, use_base_mask, + create_indexes, additional_args) return config diff --git a/bcl2fastq/lib/bcl2fastq_utils.py b/bcl2fastq/lib/bcl2fastq_utils.py index 90f7df6..a661ec2 100644 --- a/bcl2fastq/lib/bcl2fastq_utils.py +++ b/bcl2fastq/lib/bcl2fastq_utils.py @@ -30,6 +30,7 @@ def __init__(self, barcode_mismatches=None, tiles=None, use_base_mask=None, + create_indexes=False, additional_args=None, nbr_of_cores=None): """ @@ -45,6 +46,7 @@ def __init__(self, :param barcode_mismatches: how many mismatches to allow in tag. :param tiles: tiles to include when running bcl2fastq :param use_base_mask: base mask to use + :param create_indexes: Create fastq files for indexes :param additional_args: this can be used to pass any other arguments to bcl2fastq :param nbr_of_cores: number of cores to run bcl2fastq with """ @@ -86,6 +88,7 @@ def __init__(self, # commandline passed. E.g. "--use-bases-mask 1:y*,6i,6i, y* --use-bases-mask y*,6i,6i, y* " self.use_base_mask = use_base_mask self.additional_args = additional_args + self.create_indexes = create_indexes # Nbr of cores to use will default to the number of cpus on the system. if nbr_of_cores: @@ -398,6 +401,9 @@ def construct_command(self): if self.config.tiles: commandline_collection.append("--tiles " + self.config.tiles) + if self.config.create_indexes: + commandline_collection.append("--create-fastq-for-index-reads") + if self.config.use_base_mask: # Note that for the base mask the "--use-bases-mask" must be included in the # commandline passed. From 8c737f2fbfed9eea9820413f75be33e99c15ebed Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Mon, 12 Feb 2018 16:51:35 +0100 Subject: [PATCH 2/4] Add support for NovaSeq and drop Illuminate dep --- bcl2fastq/lib/bcl2fastq_utils.py | 47 +++++++++++++++++++++++--------- config/app.config | 2 ++ requirements/prod | 1 - 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/bcl2fastq/lib/bcl2fastq_utils.py b/bcl2fastq/lib/bcl2fastq_utils.py index a661ec2..a7da486 100644 --- a/bcl2fastq/lib/bcl2fastq_utils.py +++ b/bcl2fastq/lib/bcl2fastq_utils.py @@ -6,7 +6,8 @@ import shutil import time -from illuminate.metadata import InteropMetadata + +import xmltodict from arteria.exceptions import ArteriaUsageException @@ -109,6 +110,12 @@ def write_samplesheet(samplesheet_string, new_samplesheet_file): with open(new_samplesheet_file, "w") as f: f.write(samplesheet_string) + @staticmethod + def runinfo_as_dict(runfolder): + runinfo_path = os.path.join(runfolder, "RunInfo.xml") + with open(runinfo_path) as f: + return xmltodict.parse(f.read()) + @staticmethod def get_bcl2fastq_version_from_run_parameters(runfolder, config): """ @@ -120,13 +127,20 @@ def get_bcl2fastq_version_from_run_parameters(runfolder, config): :return the version of bcl2fastq to use. """ - meta_data = InteropMetadata(runfolder) - model = meta_data.model + run_info = Bcl2FastqConfig.runinfo_as_dict(runfolder) + instrument_name = run_info["RunInfo"]["Run"]["Instrument"] - current_config = config - version = current_config["machine_type"][model]["bcl2fastq_version"] + machine_type_mappings = {"M": "MiSeq", + "D": "HiSeq 2500", + "SN": "HiSeq 2000", + "ST": "HiSeq X", + "A": "NovaSeq", + "NS": "NextSeq 500", + "K": "HiSeq 4000"} - return version + for key, value in machine_type_mappings.items(): + if instrument_name.startswith(key): + return config["machine_type"][value]["bcl2fastq_version"] @staticmethod def get_length_of_indexes(runfolder): @@ -136,16 +150,23 @@ def get_length_of_indexes(runfolder): :return: a dict with the read number as key and the length of each index as value e.g.: {2: 7, 3: 8} """ - meta_data = InteropMetadata(runfolder) - index_read_info = filter(lambda x: x["is_index"], meta_data.read_config) - indexes_and_lengths = map(lambda x: (x["read_num"], x["cycles"]), index_read_info) - return dict(indexes_and_lengths) + + run_info = Bcl2FastqConfig.runinfo_as_dict(runfolder) + reads = run_info["RunInfo"]["Run"]["Reads"]["Read"] + + index_lengths = {} + for read in reads: + if read['@IsIndexedRead'] == 'Y': + index_lengths[int(read['@Number'])] = int(read['@NumCycles']) + return index_lengths @staticmethod def is_single_read(runfolder): - meta_data = InteropMetadata(runfolder) - number_of_reads = filter(lambda x: not x["is_index"], meta_data.read_config) - return len(number_of_reads) < 2 + run_info = Bcl2FastqConfig.runinfo_as_dict(runfolder) + reads = run_info["RunInfo"]["Run"]["Reads"]["Read"] + + nbr_of_reads = len(list(filter(lambda x: not x["@IsIndexedRead"] == 'Y', reads))) + return nbr_of_reads < 2 @staticmethod def get_bases_mask_per_lane_from_samplesheet(samplesheet, index_lengths, is_single_read): diff --git a/config/app.config b/config/app.config index 51b0e28..2d5f393 100644 --- a/config/app.config +++ b/config/app.config @@ -28,6 +28,8 @@ machine_type: bcl2fastq_version: 2.17.1 MiSeq: bcl2fastq_version: 2.17.1 + NovaSeq: + bcl2fastq_version: 2.17.1 runfolder_path: /vagrant/tiny-test-data/ diff --git a/requirements/prod b/requirements/prod index fa2db8e..514a1b3 100644 --- a/requirements/prod +++ b/requirements/prod @@ -2,6 +2,5 @@ jsonpickle==0.9.2 tornado==4.2.1 git+https://github.com/johandahlberg/localq.git@with_shell_true # Get from pip in future - localq git+https://github.com/arteria-project/arteria-core.git@v1.1.0#egg=arteria-core -illuminate==0.6.2 pandas==0.14.1 From e333f9ee409a0e33d596711ace2f531c99c7fc70 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Mon, 12 Feb 2018 17:22:23 +0100 Subject: [PATCH 3/4] Add xml2dict in reqs --- requirements/prod | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/prod b/requirements/prod index 514a1b3..3a76983 100644 --- a/requirements/prod +++ b/requirements/prod @@ -2,5 +2,6 @@ jsonpickle==0.9.2 tornado==4.2.1 git+https://github.com/johandahlberg/localq.git@with_shell_true # Get from pip in future - localq git+https://github.com/arteria-project/arteria-core.git@v1.1.0#egg=arteria-core +xml2dict pandas==0.14.1 From 44912b8ede89cee9af07ead88173d24121307b31 Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Tue, 13 Feb 2018 09:32:30 +0100 Subject: [PATCH 4/4] Correct xmltodict --- requirements/prod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/prod b/requirements/prod index 3a76983..dc11636 100644 --- a/requirements/prod +++ b/requirements/prod @@ -2,6 +2,6 @@ jsonpickle==0.9.2 tornado==4.2.1 git+https://github.com/johandahlberg/localq.git@with_shell_true # Get from pip in future - localq git+https://github.com/arteria-project/arteria-core.git@v1.1.0#egg=arteria-core -xml2dict +xmltodict pandas==0.14.1