Skip to content

Commit

Permalink
Add --U2T and uracil to thymine conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
Psy-Fer committed Oct 25, 2024
1 parent 954b908 commit 582464d
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
20 changes: 20 additions & 0 deletions src/basecaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,14 @@ def get_reads(args, client, read_counter, sk, read_store):
else:
bcalled_read["header"] = "@{} parent_read_id={} model_version_id={} mean_qscore={}".format(bcalled_read["read_id"], bcalled_read["parent_read_id"], call['metadata'].get('model_version_id', model_id), bcalled_read["int_read_qscore"])
bcalled_read["sequence"] = call['datasets']['sequence']
if args.U2T:
seq = []
for i in bcalled_read["sequence"]:
if i == "U":
seq.append("T")
else:
seq.append(i)
bcalled_read["sequence"] = "".join(seq)
if args.duplex:
bcalled_read["duplex_parent"] = call['metadata']['is_duplex_parent']
bcalled_read["duplex_strand_1"] = call['metadata'].get('duplex_strand_1', None)
Expand Down Expand Up @@ -289,6 +297,18 @@ def get_reads(args, client, read_counter, sk, read_store):
bcalled_read["sam_record"] = ""
skipped_list.append([read_id, "stage-1", "Failed to get sam_record/alignment_sam_record"])
continue
if len(bcalled_read["sam_record"]) > 0 and args.U2T:
splitrec = bcalled_read["sam_record"].split()
sam_seq = splitrec[9]
brec = "\t".join(splitrec[:9])
arec = "\t".join(splitrec[10:])
seq = []
for i in sam_seq:
if i == "U":
seq.append("T")
else:
seq.append(i)
bcalled_read["sam_record"] = "\t".join([brec, seq, arec])
if args.do_read_splitting and not args.above_7310:
bcalled_read["num_samples"] = None
bcalled_read["trimmed_samples"] = None
Expand Down
5 changes: 5 additions & 0 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def get_args(above_7310_flag, above_7412_flag):
read_splitting = parser.add_argument_group("Read splitting Options")
adapter_trimming = parser.add_argument_group("Adapter trimming Options")
barcode_dmux = parser.add_argument_group("Barcode demultiplexing Options")
rna = parser.add_argument_group("RNA options")
duplex = parser.add_argument_group("Duplex Options")

# Args for the wrapper, and then probably best to just have free form args for basecaller
Expand Down Expand Up @@ -91,6 +92,10 @@ def get_args(above_7310_flag, above_7412_flag):
# barcode_dmux.add_argument("--min_score_barcode_mid", type=float, default=60.0,
# help="Minimum score for mid barcodes to be detected")

# RNA
rna.add_argument("--U2T", action="store_true",
help="Convert Uracil (U) to Thymine (T) in direct RNA output")

# Duplex
duplex.add_argument("--duplex", action="store_true",
help="Turn on duplex calling - channel based - See README for information")
Expand Down

0 comments on commit 582464d

Please sign in to comment.