diff --git a/bean/mapping/GuideEditCounter.py b/bean/mapping/GuideEditCounter.py index d7b2a1d..43e67c9 100755 --- a/bean/mapping/GuideEditCounter.py +++ b/bean/mapping/GuideEditCounter.py @@ -152,6 +152,7 @@ def __init__(self, **kwargs): if not self.objectify_allele: info(f"{self.name}: Storing allele as strings.") self.keep_intermediate = kwargs["keep_intermediate"] + self.skip_filtering = kwargs["skip_filtering"] self.id_number = random.randint(0, int(1e6)) self.semimatch = 0 self.bcmatch = 0 @@ -197,15 +198,24 @@ def _set_sgRNA_df(self): def check_filter_fastq(self): """Checks if the quality filtered fastq files already exists, and use them if the do.""" - self.filtered_R1_filename = self._jp( - os.path.basename(self.R1_filename).replace(".fastq", "").replace(".gz", "") - + "_filtered.fastq.gz" - ) - self.filtered_R2_filename = self._jp( - os.path.basename(self.R2_filename).replace(".fastq", "").replace(".gz", "") - + "_filtered.fastq.gz" - ) - self._check_names_filter_fastq() + if self.skip_filtering: + self.filtered_R1_filename = self.R1_filename + self.filtered_R2_filename = self.R2_filename + self.n_reads_after_filtering = self.n_total_reads + else: + self.filtered_R1_filename = self._jp( + os.path.basename(self.R1_filename) + .replace(".fastq", "") + .replace(".gz", "") + + "_filtered.fastq.gz" + ) + self.filtered_R2_filename = self._jp( + os.path.basename(self.R2_filename) + .replace(".fastq", "") + .replace(".gz", "") + + "_filtered.fastq.gz" + ) + self._check_names_filter_fastq() # if ( # path.exists(self.filtered_R1_filename) # and path.exists(self.filtered_R2_filename) diff --git a/bean/mapping/utils.py b/bean/mapping/utils.py index 934a3a3..fd823ff 100755 --- a/bean/mapping/utils.py +++ b/bean/mapping/utils.py @@ -161,6 +161,11 @@ def _get_input_parser(parser=None): help="Keep all the intermediate files", action="store_true", ) + parser.add_argument( + "--skip-filtering", + help="Keep the read filtering", + action="store_true", + ) parser.add_argument( "--qstart-R1", help="Start position of the read when filtering for quality score of the read 1", diff --git a/tests/test_count.py b/tests/test_count.py index 176084d..c2aeaaa 100755 --- a/tests/test_count.py +++ b/tests/test_count.py @@ -45,6 +45,19 @@ def test_count_samples(): @pytest.mark.order(105) +def test_count_samples_nofilter(): + cmd = "bean count-samples -i tests/data/sample_list.csv -b A -f tests/data/test_guide_info.csv -o tests/test_res/var/ -r --guide-start-seq=GGAAAGGACGAAACACCG --skip-filtering" + try: + subprocess.check_output( + cmd, + shell=True, + universal_newlines=True, + ) + except subprocess.CalledProcessError as exc: + raise exc + + +@pytest.mark.order(106) def test_count_samples_dual(): cmd = "bean count-samples -i tests/data/sample_list.csv -b A,C -f tests/data/test_guide_info.csv -o tests/test_res/var/ -r --guide-start-seq=GGAAAGGACGAAACACCG" try: @@ -57,7 +70,7 @@ def test_count_samples_dual(): raise exc -@pytest.mark.order(106) +@pytest.mark.order(107) def test_count_samples_bcstart(): cmd = "bean count-samples -i tests/data/sample_list.csv -b A -f tests/data/test_guide_info.csv -o tests/test_res/var2/ -r --barcode-start-seq=GGAA" try: @@ -95,7 +108,7 @@ def test_barcode_start_idx(): assert bc == "AGAA" -@pytest.mark.order(106) +@pytest.mark.order(108) def test_count_samples_tiling(): cmd = "bean count-samples -i tests/data/sample_list_tiling.csv -b A -f tests/data/test_guide_info_tiling_chrom.csv -o tests/test_res/tiling/ -r" try: @@ -108,7 +121,7 @@ def test_count_samples_tiling(): raise exc -@pytest.mark.order(107) +@pytest.mark.order(109) def test_count_chroms(): cmd = "bean count --R1 tests/data/test_tiling_R1.fastq --R2 tests/data/test_tiling_R2.fastq -b A -f tests/data/test_guide_info_tiling_chrom.csv -o tests/test_res/tiling_chrom/ -r" try: