Skip to content

Commit

Permalink
implemented and tests pass
Browse files Browse the repository at this point in the history
  • Loading branch information
manulera committed Jan 22, 2024
1 parent 3ff817c commit f21effd
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 38 deletions.
18 changes: 16 additions & 2 deletions scripts/cutsite_pairs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -42,8 +42,22 @@
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
Expand Down
29 changes: 22 additions & 7 deletions src/pydna/dseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -1477,11 +1477,11 @@ def get_cutsites(self, *enzymes):
enzymes = _flatten(enzymes)
out = list()
for e in enzymes:
# Positions are 1-based, so we subtract 1 to get 0-based positions
# Positions of the cut on the watson strand. They are 1-based, so we subtract
# 1 to get 0-based positions
cuts_watson = [c - 1 for c in e.search(self, linear=(not self.circular))]
cuts_crick = [(c - e.ovhg) % len(self) for c in cuts_watson]

out += [((w, c), e) for w, c in zip(cuts_watson, cuts_crick)]
out += [((w, e.ovhg), e) for w in cuts_watson]

return sorted(out)

Expand Down Expand Up @@ -1517,17 +1517,32 @@ def right_end_position(self) -> tuple[int, int]:
return len(self) + self.watson_ovhg(), len(self)
return len(self), len(self) - self.watson_ovhg()

def get_cut_parameters(self, cut: tuple, is_left: bool):
"""For a given cut expressed as ((watson_cut, ovhg), enz), returns
a tuple (watson_cut, crick_cut, ovhg). The cut can be None if it
represents the left or right end of the sequence. That's what the
is_left parameter is for."""
if cut is not None:
watson, ovhg = cut[0]
crick = (watson - ovhg) % len(self)
return watson, crick, ovhg
if is_left:
return *self.left_end_position(), self.ovhg
return *self.right_end_position(), self.ovhg

def apply_cut(self, left_cut, right_cut):
if _cuts_overlap(left_cut, right_cut, len(self)):
raise ValueError("Cuts overlap")
left_watson, left_crick = left_cut[0] if left_cut is not None else self.left_end_position()
ovhg = left_cut[1].ovhg if left_cut is not None else self.ovhg
right_watson, right_crick = right_cut[0] if right_cut is not None else self.right_end_position()

left_watson, left_crick, ovhg_left = self.get_cut_parameters(left_cut, True)
right_watson, right_crick, _ = self.get_cut_parameters(right_cut, False)
print(left_watson, left_crick)
print(right_watson, right_crick)
return Dseq(
str(self[left_watson:right_watson]),
# The line below could be easier to understand as _rc(str(self[left_crick:right_crick])), but it does not preserve the case
str(self.reverse_complement()[len(self) - right_crick:len(self) - left_crick]),
ovhg=ovhg,
ovhg=ovhg_left,
)

def get_cutsite_pairs(self, cutsites):
Expand Down
20 changes: 12 additions & 8 deletions src/pydna/dseqrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,8 @@ def apply_cut(self, left_cut, right_cut):
# 000
# 2222
#
features = self.shifted(min(left_cut[0])).features
left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(left_cut, True)
features = self.shifted(min(left_watson, left_crick)).features
# for f in features:
# print(f.id, f.location, _location_boundaries(f.location))
# Here, we have done what's shown below (* indicates the origin).
Expand All @@ -1374,8 +1375,8 @@ def apply_cut(self, left_cut, right_cut):
# 000
# 2222

features_need_transfer = [f for f in features if (_location_boundaries(f.location)[1] <= abs(left_cut[1].ovhg))]
features_need_transfer = [_shift_feature(f, -abs(left_cut[1].ovhg), len(self)) for f in features_need_transfer]
features_need_transfer = [f for f in features if (_location_boundaries(f.location)[1] <= abs(left_ovhg))]
features_need_transfer = [_shift_feature(f, -abs(left_ovhg), len(self)) for f in features_need_transfer]
# ^ ^^^^^^^^^
# Now we have shifted the features that end before the cut (0 and 1, but not 3), as if
# they referred to the below sequence (* indicates the origin):
Expand All @@ -1388,7 +1389,7 @@ def apply_cut(self, left_cut, right_cut):
# The features 0 and 1 would have the right location if the final sequence had the same length
# as the original one. However, the final product is longer because of the overhang.

features += [_shift_feature(f, abs(left_cut[1].ovhg), len(dseq)) for f in features_need_transfer]
features += [_shift_feature(f, abs(left_ovhg), len(dseq)) for f in features_need_transfer]
# ^ ^^^^^^^^^
# So we shift back by the same amount in the opposite direction, but this time we pass the
# length of the final product.
Expand All @@ -1398,11 +1399,14 @@ def apply_cut(self, left_cut, right_cut):
_location_boundaries(f.location)[1] <= len(dseq) and
_location_boundaries(f.location)[0] <= _location_boundaries(f.location)[1])]
else:
left_watson, left_crick = left_cut[0] if left_cut is not None else (0, 0)
right_watson, right_crick = right_cut[0] if right_cut is not None else (None, None)
left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(left_cut, True)
right_watson, right_crick, right_ovhg = self.seq.get_cut_parameters(right_cut, False)

left_edge = left_crick if dseq.ovhg > 0 else left_watson
right_edge = right_watson if dseq.watson_ovhg() > 0 else right_crick
# left_watson, left_crick = left_cut[0] if left_cut is not None else (0, 0)
# right_watson, right_crick = right_cut[0] if right_cut is not None else (None, None)

left_edge = left_crick if left_ovhg > 0 else left_watson
right_edge = right_watson if right_ovhg > 0 else right_crick
features = self[left_edge:right_edge].features

return Dseqrecord(dseq, features=features)
Expand Down
12 changes: 7 additions & 5 deletions src/pydna/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import collections as _collections
import itertools as _itertools
from copy import deepcopy as _deepcopy
from typing import Union as _Union

import sys as _sys
import re
Expand Down Expand Up @@ -816,10 +817,11 @@ def cuts_overlap(left_cut, right_cut, seq_len):

# This block of code would not be necessary if the cuts were
# initially represented like this
(left_watson, _), enz1 = left_cut
(right_watson, _), enz2 = right_cut
left_crick = left_watson - enz1.ovhg
right_crick = right_watson - enz2.ovhg
(left_watson, left_ovhg), _ = left_cut
(right_watson, right_ovhg), _ = right_cut
# Position of the cut on the crick strands on the left and right
left_crick = left_watson - left_ovhg
right_crick = right_watson - right_ovhg
if left_crick >= seq_len:
left_crick -= seq_len
left_watson -= seq_len
Expand All @@ -832,7 +834,7 @@ def cuts_overlap(left_cut, right_cut, seq_len):
y = sorted([right_watson, right_crick])
return (x[1] > y[0]) != (y[1] < x[0])

def location_boundaries(loc: _sl|_cl):
def location_boundaries(loc: _Union[_sl,_cl]):

#TODO: pending on https://github.com/BjornFJohansson/pydna/pull/179
if loc.strand != 1:
Expand Down
28 changes: 14 additions & 14 deletions tests/test_module_dseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ def test_apply_cut():
assert seq.apply_cut(None, None) == seq

# A cut where one side is None leaves that side intact
EcoRI_cut = ((3, 7), type('DynamicClass', (), {'ovhg': -4})())
EcoRI_cut = ((3, -4), None)
assert seq.apply_cut(None, EcoRI_cut) == Dseq.from_full_sequence_and_overhangs('aaGAATT', watson_ovhg=-4, crick_ovhg=0)
assert seq.apply_cut(EcoRI_cut, None) == Dseq.from_full_sequence_and_overhangs('AATTCaa', watson_ovhg=0, crick_ovhg=-4)

Expand All @@ -844,28 +844,28 @@ def test_apply_cut():

# Two cuts extract a subsequence
seq = Dseq('aaGAATTCaaGAATTCaa', circular=True)
EcoRI_cut_2 = ((11, 15), type('DynamicClass', (), {'ovhg': -4})())
EcoRI_cut_2 = ((11, -4), None)
assert seq.apply_cut(EcoRI_cut, EcoRI_cut_2) == Dseq.from_full_sequence_and_overhangs('AATTCaaGAATT', watson_ovhg=-4, crick_ovhg=-4)

# Overlapping cuts should return an error
seq = Dseq('aaGAATTCaa', circular=True)
first_cuts = [
((3, 7), type('DynamicClass', (), {'ovhg': -4})()),
((7, 3), type('DynamicClass', (), {'ovhg': 4})()),
((3, -4), None),
((7, 4), None),
# Spanning the origin
((9, 8), type('DynamicClass', (), {'ovhg': -8})()),
((8, 9), type('DynamicClass', (), {'ovhg': 8})()),
((9, -8), None),
((8, 8), None),
]
overlapping_cuts = [
((4, 8), type('DynamicClass', (), {'ovhg': -4})()),
((2, 6), type('DynamicClass', (), {'ovhg': -4})()),
((2, 8), type('DynamicClass', (), {'ovhg': -4})()),
((8, 4), type('DynamicClass', (), {'ovhg': 4})()),
((6, 2), type('DynamicClass', (), {'ovhg': 4})()),
((8, 2), type('DynamicClass', (), {'ovhg': 4})()),
((4, -4), None),
((2, -4), None),
((2, -6), None),
((8, 4), None),
((6, 4), None),
((8, 6), None),
# Spanning the origin
((7, 6), type('DynamicClass', (), {'ovhg': -8})()),
((6, 7), type('DynamicClass', (), {'ovhg': 8})()),
((7, -8), None),
((6, 8), None),
]

for first_cut in first_cuts:
Expand Down
3 changes: 1 addition & 2 deletions tests/test_module_dseqrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -2243,8 +2243,7 @@ def test_apply_cut():

# Single cut case
for strand in [1, -1, None]:
for cut_coords, cut_ovhg in (((4, 7), -3), ((7, 4), 3)):
dummy_cut = (cut_coords, type('DynamicClass', (), {'ovhg': cut_ovhg})())
for dummy_cut in (((4, -3), None), ((7, 3), None)):
seq = Dseqrecord("acgtATGaatt", circular=True)
seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap'))
seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side'))
Expand Down

0 comments on commit f21effd

Please sign in to comment.