From b4cfc67ecfb77d76bf273ac00fffaa05dc51374a Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Thu, 5 Oct 2023 15:22:50 -0700 Subject: [PATCH] Add two options to gff.bed() --- jcvi/formats/gff.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/jcvi/formats/gff.py b/jcvi/formats/gff.py index 9eb20d16..e5a9e201 100644 --- a/jcvi/formats/gff.py +++ b/jcvi/formats/gff.py @@ -497,7 +497,6 @@ def to_range(obj, score=None, id=None, strand=None): def main(): - actions = ( ("addparent", "merge sister features and infer their parent"), ("bed", "parse gff and produce bed file for particular feature type"), @@ -3022,6 +3021,18 @@ def bed(args): default="Parent", help="Parent gene key to group with --primary_only", ) + p.add_option( + "--add_chr", + default=False, + action="store_true", + help="Add `chr` prefix to seqid", + ) + p.add_option( + "--ensembl_cds", + default=False, + action="store_true", + help="Use transcript_name.exon_number as accn", + ) p.set_outfile() opts, args = p.parse_args(args) @@ -3036,6 +3047,8 @@ def bed(args): span = opts.span primary_only = opts.primary_only parent_key = opts.parent_key + add_chr = opts.add_chr + ensembl_cds = opts.ensembl_cds if opts.type: type = set(x.strip() for x in opts.type.split(",")) @@ -3072,6 +3085,13 @@ def bed(args): bl.accn = accn if span: bl.score = bl.span + if add_chr: + bl.seqid = "chr" + bl.seqid + if ensembl_cds: + bl.accn = "{0}.{1}".format( + g.get_attr("transcript_name"), g.get_attr("exon_number") + ) + b.append(bl) sorted = not opts.nosort @@ -3134,7 +3154,6 @@ def children(args): parents = set(opts.parents.split(",")) for feat in get_parents(gff_file, parents): - cc = [c.id for c in g.children(feat.id, 1)] if len(cc) <= 1: continue @@ -3513,7 +3532,6 @@ def get_coords(feature, site, fLen, seqlen, feat, children_list, gffdb): elif site in ["TrSS", "TrES"]: children = [] for c in gffdb.children(feat.id, 1): - if c.featuretype not in children_list: continue children.append((c.start, c.stop)) @@ -3686,7 +3704,6 @@ def bed12(args): fw = must_open(outfile, "w") for f in g.features_of_type(parent): - chrom = f.chrom chromStart = f.start - 1 chromEnd = f.stop @@ -3701,7 +3718,6 @@ def bed12(args): blocks = [] for c in g.children(name, 1): - cstart, cend = c.start - 1, c.stop if c.featuretype == block: