From 582464db0e1a4c4f367706146e7a918eeebcdd9f Mon Sep 17 00:00:00 2001 From: Psy-Fer Date: Fri, 25 Oct 2024 16:00:31 +1100 Subject: [PATCH] Add --U2T and uracil to thymine conversion --- src/basecaller.py | 20 ++++++++++++++++++++ src/cli.py | 5 +++++ 2 files changed, 25 insertions(+) diff --git a/src/basecaller.py b/src/basecaller.py index f82a261..9d2a603 100644 --- a/src/basecaller.py +++ b/src/basecaller.py @@ -261,6 +261,14 @@ def get_reads(args, client, read_counter, sk, read_store): else: bcalled_read["header"] = "@{} parent_read_id={} model_version_id={} mean_qscore={}".format(bcalled_read["read_id"], bcalled_read["parent_read_id"], call['metadata'].get('model_version_id', model_id), bcalled_read["int_read_qscore"]) bcalled_read["sequence"] = call['datasets']['sequence'] + if args.U2T: + seq = [] + for i in bcalled_read["sequence"]: + if i == "U": + seq.append("T") + else: + seq.append(i) + bcalled_read["sequence"] = "".join(seq) if args.duplex: bcalled_read["duplex_parent"] = call['metadata']['is_duplex_parent'] bcalled_read["duplex_strand_1"] = call['metadata'].get('duplex_strand_1', None) @@ -289,6 +297,18 @@ def get_reads(args, client, read_counter, sk, read_store): bcalled_read["sam_record"] = "" skipped_list.append([read_id, "stage-1", "Failed to get sam_record/alignment_sam_record"]) continue + if len(bcalled_read["sam_record"]) > 0 and args.U2T: + splitrec = bcalled_read["sam_record"].split() + sam_seq = splitrec[9] + brec = "\t".join(splitrec[:9]) + arec = "\t".join(splitrec[10:]) + seq = [] + for i in sam_seq: + if i == "U": + seq.append("T") + else: + seq.append(i) + bcalled_read["sam_record"] = "\t".join([brec, seq, arec]) if args.do_read_splitting and not args.above_7310: bcalled_read["num_samples"] = None bcalled_read["trimmed_samples"] = None diff --git a/src/cli.py b/src/cli.py index a3bf79e..6ce31ab 100644 --- a/src/cli.py +++ b/src/cli.py @@ -25,6 +25,7 @@ def get_args(above_7310_flag, above_7412_flag): read_splitting = parser.add_argument_group("Read splitting Options") adapter_trimming = parser.add_argument_group("Adapter trimming Options") barcode_dmux = parser.add_argument_group("Barcode demultiplexing Options") + rna = parser.add_argument_group("RNA options") duplex = parser.add_argument_group("Duplex Options") # Args for the wrapper, and then probably best to just have free form args for basecaller @@ -91,6 +92,10 @@ def get_args(above_7310_flag, above_7412_flag): # barcode_dmux.add_argument("--min_score_barcode_mid", type=float, default=60.0, # help="Minimum score for mid barcodes to be detected") + # RNA + rna.add_argument("--U2T", action="store_true", + help="Convert Uracil (U) to Thymine (T) in direct RNA output") + # Duplex duplex.add_argument("--duplex", action="store_true", help="Turn on duplex calling - channel based - See README for information")