From b81644d5bea5ab8639cfa1f6602c37ba1924c8c8 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Fri, 19 Jan 2024 17:53:54 +0000 Subject: [PATCH] fixes #180 --- dummy.py | 50 --------------------------------- scripts/cutsite_pairs.ipynb | 33 ++++++++++++++++++++++ src/pydna/dseqrecord.py | 13 +++++---- src/pydna/utils.py | 10 +++++-- tests/test_module_dseqrecord.py | 24 ++++++++++++++++ 5 files changed, 73 insertions(+), 57 deletions(-) delete mode 100644 dummy.py diff --git a/dummy.py b/dummy.py deleted file mode 100644 index 34cb23e5..00000000 --- a/dummy.py +++ /dev/null @@ -1,50 +0,0 @@ -from pydna.dseqrecord import Dseqrecord -from Bio.SeqFeature import SeqFeature, SimpleLocation -from Bio.Restriction import EcoRI, PacI -from pydna.dseq import Dseq -# _shift_location(SimpleLocation(x_start, x_start + length), 0, len(first) - -seq = Dseqrecord("acgtTTTaatt", circular=True) -seq.features.append(SeqFeature(SimpleLocation(4, 7, 1), id='full_overlap')) -seq.features.append(SeqFeature(SimpleLocation(3, 7, 1), id='left_side')) -seq.features.append(SeqFeature(SimpleLocation(4, 8, 1), id='right_side')) -seq.features.append(SeqFeature(SimpleLocation(3, 10, 1), id='throughout')) -# print(*seq.features, sep='\n') -print('===') -dummy_cut = ((4, 7), type('DynamicClass', (), {'ovhg': -3})()) -open_seq = seq.apply_cut(dummy_cut, dummy_cut) - -print(*open_seq.features, sep='\n') -print(open_seq.seq.__repr__()) - - - - - - -# seq = Dseq('aaGAATTCaa', circular=True) - -# print('EcORI', EcoRI.ovhg, len(seq)) -# for shift in range(len(seq)): -# seq_shifted = seq.shifted(shift) -# cut_site = seq_shifted.get_cutsites(EcoRI)[0][0] -# print(shift, seq_shifted, cut_site, cut_site[0] - cut_site[1]) - -# seq = Dseq('ccTTAATTAAcc', circular=True) -# print('PacI', PacI.ovhg, len(seq)) -# for shift in range(len(seq)): -# seq_shifted = seq.shifted(shift) -# cut_site = seq_shifted.get_cutsites(PacI)[0][0] -# print(shift, seq_shifted, cut_site, cut_site[0] - cut_site[1]) - - -# seq = Dseq('TTAAccccTTAA', circular=True) -# custom_cut = ((1, 11), type('DynamicClass', (), {'ovhg': 2})()) -# print(seq.apply_cut(custom_cut, custom_cut).__repr__()) - -# print() - -# custom_cut = ((1, 11), type('DynamicClass', (), {'ovhg': -10})()) -# print(seq.apply_cut(custom_cut, custom_cut).__repr__()) - - diff --git a/scripts/cutsite_pairs.ipynb b/scripts/cutsite_pairs.ipynb index 264290f8..9b2705c8 100644 --- a/scripts/cutsite_pairs.ipynb +++ b/scripts/cutsite_pairs.ipynb @@ -6,6 +6,39 @@ "source": [ "# New cut implementation\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Coming soon\n", + "# seq = Dseq('aaGAATTCaa', circular=True)\n", + "\n", + "# print('EcORI', EcoRI.ovhg, len(seq))\n", + "# for shift in range(len(seq)):\n", + "# seq_shifted = seq.shifted(shift)\n", + "# cut_site = seq_shifted.get_cutsites(EcoRI)[0][0]\n", + "# print(shift, seq_shifted, cut_site, cut_site[0] - cut_site[1])\n", + "\n", + "# seq = Dseq('ccTTAATTAAcc', circular=True)\n", + "# print('PacI', PacI.ovhg, len(seq))\n", + "# for shift in range(len(seq)):\n", + "# seq_shifted = seq.shifted(shift)\n", + "# cut_site = seq_shifted.get_cutsites(PacI)[0][0]\n", + "# print(shift, seq_shifted, cut_site, cut_site[0] - cut_site[1])\n", + "\n", + "\n", + "# seq = Dseq('TTAAccccTTAA', circular=True)\n", + "# custom_cut = ((1, 11), type('DynamicClass', (), {'ovhg': 2})())\n", + "# print(seq.apply_cut(custom_cut, custom_cut).__repr__())\n", + "\n", + "# print()\n", + "\n", + "# custom_cut = ((1, 11), type('DynamicClass', (), {'ovhg': -10})())\n", + "# print(seq.apply_cut(custom_cut, custom_cut).__repr__())" + ] } ], "metadata": { diff --git a/src/pydna/dseqrecord.py b/src/pydna/dseqrecord.py index 571f8eb5..79b988cf 100644 --- a/src/pydna/dseqrecord.py +++ b/src/pydna/dseqrecord.py @@ -15,7 +15,7 @@ from Bio.Restriction import CommOnly from pydna.dseq import Dseq as _Dseq from pydna._pretty import pretty_str as _pretty_str -from pydna.utils import flatten as _flatten +from pydna.utils import flatten as _flatten, location_boundaries as _location_boundaries # from pydna.utils import memorize as _memorize from pydna.utils import rc as _rc @@ -901,6 +901,7 @@ def __getitem__(self, sl): # origin-spanning features should only be included after shifting # in cases where the slice comprises the entire sequence, but then # sl_start == sl_stop and the second condition is not met + # TODO: _location_boundaries answer.features = [f for f in answer.features if ( f.location.parts[-1].end <= answer.seq.length and f.location.parts[0].start <= f.location.parts[-1].end)] @@ -1361,6 +1362,8 @@ def apply_cut(self, left_cut, right_cut): # 2222 # features = self.shifted(min(left_cut[0])).features + # for f in features: + # print(f.id, f.location, _location_boundaries(f.location)) # Here, we have done what's shown below (* indicates the origin). # The features 0 and 2 have the right location for the final product: # @@ -1371,7 +1374,7 @@ def apply_cut(self, left_cut, right_cut): # 000 # 2222 - features_need_transfer = [f for f in features if (f.location.parts[-1].end <= abs(left_cut[1].ovhg))] + features_need_transfer = [f for f in features if (_location_boundaries(f.location)[1] <= abs(left_cut[1].ovhg))] features_need_transfer = [_shift_feature(f, -abs(left_cut[1].ovhg), len(self)) for f in features_need_transfer] # ^ ^^^^^^^^^ # Now we have shifted the features that end before the cut (0 and 1, but not 3), as if @@ -1389,11 +1392,11 @@ def apply_cut(self, left_cut, right_cut): # ^ ^^^^^^^^^ # So we shift back by the same amount in the opposite direction, but this time we pass the # length of the final product. - + # print(*features, sep='\n') # Features like 3 are removed here features = [f for f in features if ( - f.location.parts[-1].end <= len(dseq) and - f.location.parts[0].start <= f.location.parts[-1].end)] + _location_boundaries(f.location)[1] <= len(dseq) and + _location_boundaries(f.location)[0] <= _location_boundaries(f.location)[1])] else: left_watson, left_crick = left_cut[0] if left_cut is not None else (0, 0) right_watson, right_crick = right_cut[0] if right_cut is not None else (None, None) diff --git a/src/pydna/utils.py b/src/pydna/utils.py index 09c2fb63..a569c923 100644 --- a/src/pydna/utils.py +++ b/src/pydna/utils.py @@ -75,9 +75,7 @@ def shift_location(original_location, shift, lim): def shift_feature(feature, shift, lim): """Return a new feature with shifted location.""" # TODO: Missing tests - print(feature.location) new_location = shift_location(feature.location, shift, lim) - print(new_location) new_feature = _deepcopy(feature) new_feature.location = new_location return new_feature @@ -834,6 +832,14 @@ def cuts_overlap(left_cut, right_cut, seq_len): y = sorted([right_watson, right_crick]) return (x[1] > y[0]) != (y[1] < x[0]) +def location_boundaries(loc: _sl|_cl): + + #TODO: pending on https://github.com/BjornFJohansson/pydna/pull/179 + if loc.strand != 1: + return loc.parts[-1].start, loc.parts[0].end + else: + return loc.parts[0].start, loc.parts[-1].end + if __name__ == "__main__": cached = _os.getenv("pydna_cached_funcs", "") diff --git a/tests/test_module_dseqrecord.py b/tests/test_module_dseqrecord.py index 4fa0deda..fc058d5e 100644 --- a/tests/test_module_dseqrecord.py +++ b/tests/test_module_dseqrecord.py @@ -2237,6 +2237,30 @@ def test_assemble_YEp24PGK_XK(): assert YEp24PGK_XK_correct.cseguid() == "t9fs_9UvEuD-Ankyy8XEr1hD5DQ" assert eq(YEp24PGK_XK, YEp24PGK_XK_correct) +def test_apply_cut(): + from pydna.dseqrecord import Dseqrecord + from Bio.SeqFeature import SeqFeature, SimpleLocation + + # Single cut case + for strand in [1, -1, None]: + for cut_coords, cut_ovhg in (((4, 7), -3), ((7, 4), 3)): + dummy_cut = (cut_coords, type('DynamicClass', (), {'ovhg': cut_ovhg})()) + seq = Dseqrecord("acgtATGaatt", circular=True) + seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap')) + seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side')) + seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id='right_side')) + seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout')) + open_seq = seq.apply_cut(dummy_cut, dummy_cut) + assert len(open_seq.features) == 4 + new_locs = [str(f.location) for f in open_seq.features] + if strand == 1: + assert new_locs == ['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)'] + elif strand == -1: + # TODO: change the join{[11:14](-), [10:11](-)} case? + assert new_locs == ['[0:3](-)', '[0:4](-)', '[11:14](-)', 'join{[11:14](-), [10:11](-)}'] + if strand == None: + # TODO: pending on https://github.com/BjornFJohansson/pydna/pull/179 + assert new_locs == ['[0:3]', '[0:4]', '[11:14]', 'join{[11:14], [10:11]}'] if __name__ == "__main__": args = [