Skip to content

Commit

Permalink
Merge pull request #26 from TAMU-CPT/crr/interjacent-fix/i15
Browse files Browse the repository at this point in the history
i15 // intersect & adjacent fix
  • Loading branch information
curtisim0 authored Aug 30, 2024
2 parents d24ea14 + 6b68423 commit b286289
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 104 deletions.
138 changes: 48 additions & 90 deletions cpt_intersect_adj/intersect_and_adjacent.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,73 +84,40 @@ def intersect(a, b, window, stranding):
b_pos = []
tree_a = []
tree_b = []
if stranding == True:
for feat in rec_a_i.features:
if feat.type == "remark" or feat.type == "annotation":
continue

for feat in rec_a_i.features:
if feat.type == "remark" or feat.type == "annotation":
continue
interval = Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
if stranding:
if feat.strand > 0:
a_pos.append(
Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
)
a_pos.append(interval)
else:
a_neg.append(
Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
)
a_neg.append(interval)
else:
tree_a.append(interval)

for feat in rec_b_i.features:
if feat.type == "remark" or feat.type == "annotation":
continue
for feat in rec_b_i.features:
if feat.type == "remark" or feat.type == "annotation":
continue
interval = Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
if stranding:
if feat.strand > 0:
b_pos.append(
Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
)
b_pos.append(interval)
else:
b_neg.append(
Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
)
b_neg.append(interval)
else:
tree_b.append(interval)

else:
for feat in rec_a_i.features:
if feat.type == "remark" or feat.type == "annotation":
continue
tree_a.append(
Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
)
for feat in rec_b_i.features:
if feat.type == "remark" or feat.type == "annotation":
continue
tree_b.append(
Interval(
int(feat.location.start) - int(window),
int(feat.location.end) + int(window),
feat.id,
)
)
if stranding:
# builds interval tree from Interval objects of form (start, end, id) for each feature
# tree_a = IntervalTree(list(treeFeatures_noRem(rec_a_i.features, window)))
# tree_b = IntervalTree(list(treeFeatures_noRem(rec_b_i.features, window)))
# else:
tree_a_pos = IntervalTree(a_pos)
tree_a_neg = IntervalTree(a_neg)
tree_b_pos = IntervalTree(b_pos)
Expand All @@ -163,75 +130,64 @@ def intersect(a, b, window, stranding):
rec_a_map = {f.id: f for f in rec_a_i.features}
rec_b_map = {f.id: f for f in rec_b_i.features}

rec_a_hits_in_b = []
rec_b_hits_in_a = []
rec_a_hits_in_b = {}
rec_b_hits_in_a = {}

for feature in rec_a_i.features:
# Save each feature in rec_a that overlaps a feature in rec_b
# hits = tree_b.find_range((int(feature.location.start), int(feature.location.end)))

if feature.type == "remark" or feature.type == "annotation":
continue

if stranding == False:
if not stranding:
hits = tree_b[
int(feature.location.start) : int(feature.location.end)
]

# feature id is saved in interval result.data, use map to get full feature
for hit in hits:
rec_a_hits_in_b.append(rec_b_map[hit.data])

rec_a_hits_in_b[hit.data] = rec_b_map[hit.data]
else:
if feature.strand > 0:
hits_pos = tree_b_pos[
hits = tree_b_pos[
int(feature.location.start) : int(feature.location.end)
]
for hit in hits_pos:
rec_a_hits_in_b.append(rec_b_map[hit.data])
else:
hits_neg = tree_b_neg[
hits = tree_b_neg[
int(feature.location.start) : int(feature.location.end)
]
for hit in hits_neg:
rec_a_hits_in_b.append(rec_b_map[hit.data])
for hit in hits:
rec_a_hits_in_b[hit.data] = rec_b_map[hit.data]

for feature in rec_b_i.features:
if feature.type == "remark" or feature.type == "annotation":
continue

if stranding == False:
if not stranding:
hits = tree_a[
int(feature.location.start) : int(feature.location.end)
]

# feature id is saved in interval result.data, use map to get full feature
for hit in hits:
rec_b_hits_in_a.append(rec_a_map[hit.data])

rec_b_hits_in_a[hit.data] = rec_a_map[hit.data]
else:
if feature.strand > 0:
hits_pos = tree_a_pos[
hits = tree_a_pos[
int(feature.location.start) : int(feature.location.end)
]
for hit in hits_pos:
rec_b_hits_in_a.append(rec_a_map[hit.data])
else:
hits_neg = tree_a_neg[
hits = tree_a_neg[
int(feature.location.start) : int(feature.location.end)
]
for hit in hits_neg:
rec_b_hits_in_a.append(rec_a_map[hit.data])
for hit in hits:
rec_b_hits_in_a[hit.data] = rec_a_map[hit.data]

# Remove duplicate features using sets
# Sort features by start position
rec_a_out.append(
SeqRecord(
rec_a[iterate].seq,
rec_a[iterate].id,
rec_a[iterate].name,
rec_a[iterate].description,
rec_a[iterate].dbxrefs,
sorted(set(rec_a_hits_in_b), key=lambda feat: feat.location.start),
sorted(
rec_a_hits_in_b.values(), key=lambda feat: feat.location.start
),
rec_a[iterate].annotations,
)
)
Expand All @@ -242,7 +198,9 @@ def intersect(a, b, window, stranding):
rec_b[iterate].name,
rec_b[iterate].description,
rec_b[iterate].dbxrefs,
sorted(set(rec_b_hits_in_a), key=lambda feat: feat.location.start),
sorted(
rec_b_hits_in_a.values(), key=lambda feat: feat.location.start
),
rec_b[iterate].annotations,
)
)
Expand Down
25 changes: 11 additions & 14 deletions cpt_intersect_adj/intersect_and_adjacent.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,20 @@
<description>Outputs nearby top-level GFF features from two GFF3 files</description>
<macros>
<import>macros.xml</import>

</macros>
<requirements>
<requirement type="package" version="3.8.13">python</requirement>
<requirement type="package" version="1.79">biopython</requirement>
<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
<expand macro="requirements">
<requirement type="package" version="3.0.2">intervaltree</requirement>
</requirements>
</expand>
<command detect_errors="aggressive"><![CDATA[
'$__tool_directory__/intersect_and_adjacent.py'
'$gff3_data_a'
'$gff3_data_b'
'$window'
'$stranding'
--oa '$oa'
--ob '$ob'
]]></command>
python '$__tool_directory__/intersect_and_adjacent.py'
'$gff3_data_a'
'$gff3_data_b'
$window
$stranding
--oa '$oa'
--ob '$ob'
]]>
</command>
<inputs>
<param label="GFF3 Annotations A" name="gff3_data_a" type="data" format="gff3"/>
<param label="GFF3 Annotations B" name="gff3_data_b" type="data" format="gff3"/>
Expand Down

0 comments on commit b286289

Please sign in to comment.