Skip to content

Commit

Permalink
automatically find the other opts when --ensembl_cds
Browse files Browse the repository at this point in the history
  • Loading branch information
Haibao Tang committed Oct 5, 2023
1 parent b4cfc67 commit b1db49b
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion jcvi/formats/gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3049,11 +3049,13 @@ def bed(args):
parent_key = opts.parent_key
add_chr = opts.add_chr
ensembl_cds = opts.ensembl_cds

if opts.type:
type = set(x.strip() for x in opts.type.split(","))
if opts.source:
source = set(x.strip() for x in opts.source.split(","))
if ensembl_cds:
type = {"CDS"}
source = {"protein_coding"}

gff = Gff(
gffile,
Expand All @@ -3066,6 +3068,8 @@ def bed(args):
)
b = Bed()
seen_parents = set() # used with --primary_only
seen = set() # used with --ensembl_cds
skipped_identical_range = 0
skipped_non_primary = 0

for g in gff:
Expand All @@ -3091,6 +3095,11 @@ def bed(args):
bl.accn = "{0}.{1}".format(
g.get_attr("transcript_name"), g.get_attr("exon_number")
)
position = (bl.seqid, bl.start, bl.end)
if position in seen:
skipped_identical_range += 1
continue
seen.add(position)

b.append(bl)

Expand All @@ -3103,6 +3112,8 @@ def bed(args):
)
if primary_only:
logging.debug("Skipped non-primary: %d", skipped_non_primary)
if ensembl_cds:
logging.debug("Skipped due to identical range: %d", skipped_identical_range)


def make_index(gff_file):
Expand Down

0 comments on commit b1db49b

Please sign in to comment.