fixes #180

pydna-group · Jan 19, 2024 · b81644d · b81644d
1 parent 18f03ff
commit b81644d
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 57 deletions.
diff --git a/dummy.py b/dummy.py
diff --git a/scripts/cutsite_pairs.ipynb b/scripts/cutsite_pairs.ipynb
@@ -6,6 +6,39 @@
    "source": [
     "# New cut implementation\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Coming soon\n",
+    "# seq = Dseq('aaGAATTCaa', circular=True)\n",
+    "\n",
+    "# print('EcORI', EcoRI.ovhg, len(seq))\n",
+    "# for shift in range(len(seq)):\n",
+    "#     seq_shifted = seq.shifted(shift)\n",
+    "#     cut_site = seq_shifted.get_cutsites(EcoRI)[0][0]\n",
+    "#     print(shift, seq_shifted, cut_site, cut_site[0] - cut_site[1])\n",
+    "\n",
+    "# seq = Dseq('ccTTAATTAAcc', circular=True)\n",
+    "# print('PacI', PacI.ovhg, len(seq))\n",
+    "# for shift in range(len(seq)):\n",
+    "#     seq_shifted = seq.shifted(shift)\n",
+    "#     cut_site = seq_shifted.get_cutsites(PacI)[0][0]\n",
+    "#     print(shift, seq_shifted, cut_site, cut_site[0] - cut_site[1])\n",
+    "\n",
+    "\n",
+    "# seq = Dseq('TTAAccccTTAA', circular=True)\n",
+    "# custom_cut = ((1, 11), type('DynamicClass', (), {'ovhg': 2})())\n",
+    "# print(seq.apply_cut(custom_cut, custom_cut).__repr__())\n",
+    "\n",
+    "# print()\n",
+    "\n",
+    "# custom_cut = ((1, 11), type('DynamicClass', (), {'ovhg': -10})())\n",
+    "# print(seq.apply_cut(custom_cut, custom_cut).__repr__())"
+   ]
   }
  ],
  "metadata": {

diff --git a/src/pydna/dseqrecord.py b/src/pydna/dseqrecord.py
@@ -15,7 +15,7 @@
 from Bio.Restriction import CommOnly
 from pydna.dseq import Dseq as _Dseq
 from pydna._pretty import pretty_str as _pretty_str
-from pydna.utils import flatten as _flatten
+from pydna.utils import flatten as _flatten, location_boundaries as _location_boundaries
 
 # from pydna.utils import memorize as _memorize
 from pydna.utils import rc as _rc
@@ -901,6 +901,7 @@ def __getitem__(self, sl):
             # origin-spanning features should only be included after shifting
             # in cases where the slice comprises the entire sequence, but then
             # sl_start == sl_stop and the second condition is not met
+            # TODO: _location_boundaries
             answer.features = [f for f in answer.features if (
                                f.location.parts[-1].end <= answer.seq.length and
                                f.location.parts[0].start <= f.location.parts[-1].end)]
@@ -1361,6 +1362,8 @@ def apply_cut(self, left_cut, right_cut):
                 #     2222
                 #
                 features = self.shifted(min(left_cut[0])).features
+                # for f in features:
+                #     print(f.id, f.location, _location_boundaries(f.location))
                 # Here, we have done what's shown below (* indicates the origin).
                 # The features 0 and 2 have the right location for the final product:
                 #
@@ -1371,7 +1374,7 @@ def apply_cut(self, left_cut, right_cut):
                 #      000
                 #      2222
 
-                features_need_transfer = [f for f in features if (f.location.parts[-1].end <= abs(left_cut[1].ovhg))]
+                features_need_transfer = [f for f in features if (_location_boundaries(f.location)[1] <= abs(left_cut[1].ovhg))]
                 features_need_transfer = [_shift_feature(f, -abs(left_cut[1].ovhg), len(self)) for f in features_need_transfer]
                 #                                           ^                       ^^^^^^^^^
                 # Now we have shifted the features that end before the cut (0 and 1, but not 3), as if
@@ -1389,11 +1392,11 @@ def apply_cut(self, left_cut, right_cut):
                 #                             ^                       ^^^^^^^^^
                 # So we shift back by the same amount in the opposite direction, but this time we pass the 
                 # length of the final product.
-
+                # print(*features, sep='\n')
                 # Features like 3 are removed here
                 features = [f for f in features if (
-                               f.location.parts[-1].end <= len(dseq) and
-                               f.location.parts[0].start <= f.location.parts[-1].end)]
+                               _location_boundaries(f.location)[1] <= len(dseq) and
+                               _location_boundaries(f.location)[0] <= _location_boundaries(f.location)[1])]
         else:
             left_watson, left_crick = left_cut[0] if left_cut is not None else (0, 0)
             right_watson, right_crick = right_cut[0] if right_cut is not None else (None, None)

diff --git a/src/pydna/utils.py b/src/pydna/utils.py
@@ -75,9 +75,7 @@ def shift_location(original_location, shift, lim):
 def shift_feature(feature, shift, lim):
     """Return a new feature with shifted location."""
     # TODO: Missing tests
-    print(feature.location)
     new_location = shift_location(feature.location, shift, lim)
-    print(new_location)
     new_feature = _deepcopy(feature)
     new_feature.location = new_location
     return new_feature
@@ -834,6 +832,14 @@ def cuts_overlap(left_cut, right_cut, seq_len):
     y = sorted([right_watson, right_crick])
     return (x[1] > y[0]) != (y[1] < x[0])
 
+def location_boundaries(loc: _sl|_cl):
+
+    #TODO: pending on https://github.com/BjornFJohansson/pydna/pull/179
+    if loc.strand != 1:
+        return loc.parts[-1].start, loc.parts[0].end
+    else:
+        return loc.parts[0].start, loc.parts[-1].end
+
 
 if __name__ == "__main__":
     cached = _os.getenv("pydna_cached_funcs", "")

diff --git a/tests/test_module_dseqrecord.py b/tests/test_module_dseqrecord.py
@@ -2237,6 +2237,30 @@ def test_assemble_YEp24PGK_XK():
     assert YEp24PGK_XK_correct.cseguid() == "t9fs_9UvEuD-Ankyy8XEr1hD5DQ"
     assert eq(YEp24PGK_XK, YEp24PGK_XK_correct)
 
+def test_apply_cut():
+    from pydna.dseqrecord import Dseqrecord
+    from Bio.SeqFeature import SeqFeature, SimpleLocation
+
+    # Single cut case
+    for strand in [1, -1, None]:
+        for cut_coords, cut_ovhg in (((4, 7), -3), ((7, 4), 3)):
+            dummy_cut = (cut_coords, type('DynamicClass', (), {'ovhg': cut_ovhg})())
+            seq = Dseqrecord("acgtATGaatt", circular=True)
+            seq.features.append(SeqFeature(SimpleLocation(4, 7,  strand), id='full_overlap'))
+            seq.features.append(SeqFeature(SimpleLocation(3, 7,  strand), id='left_side'))
+            seq.features.append(SeqFeature(SimpleLocation(4, 8,  strand), id='right_side'))
+            seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout'))
+            open_seq = seq.apply_cut(dummy_cut, dummy_cut)
+            assert len(open_seq.features) == 4
+            new_locs = [str(f.location) for f in open_seq.features]
+            if strand == 1:
+                assert new_locs == ['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)']
+            elif strand == -1:
+                # TODO: change the join{[11:14](-), [10:11](-)} case?
+                assert new_locs == ['[0:3](-)', '[0:4](-)', '[11:14](-)', 'join{[11:14](-), [10:11](-)}']
+            if strand == None:
+                # TODO: pending on https://github.com/BjornFJohansson/pydna/pull/179
+                assert new_locs == ['[0:3]', '[0:4]', '[11:14]', 'join{[11:14], [10:11]}']
 
 if __name__ == "__main__":
     args = [