pydna-group · manulera · Jul 15, 2024 · Jul 9, 2024 · Jul 9, 2024
diff --git a/src/pydna/dseqrecord.py b/src/pydna/dseqrecord.py
@@ -482,15 +482,12 @@ def format(self, f="gb"):
 
         """
 
-        s = super().format(f).strip()
-
-        if f in ("genbank", "gb"):
-            if self.circular:
-                return _pretty_str(s[:55] + "circular" + s[63:])
-            else:
-                return _pretty_str(s[:55] + "linear  " + s[63:])
+        record = _copy.deepcopy(self)
+        if f in ("genbank", "gb") and self.circular:
+            record.annotations["topology"] = "circular"
         else:
-            return _pretty_str(s).strip()
+            record.annotations["topology"] = "linear"
+        return _SeqRecord.format(record, f).strip()
 
     def write(self, filename=None, f="gb"):
         """Writes the Dseqrecord to a file using the format f, which must
@@ -1114,7 +1111,7 @@ def orfs_to_features(self, minsize=300):
                         _SimpleLocation(x, y, strand=strand),
                         type="CDS",
                         qualifiers={
-                            "note": f"{y-x}bp {(y-x)//3}aa",
+                            "note": f"{y - x}bp {(y - x) // 3}aa",
                             "checksum": [orf.seguid() + " (DNA)", prt.seguid() + " (protein)"],
                             "codon_start": 1,
                             "transl_table": 11,
@@ -1153,8 +1150,8 @@ def figure(self, feature=0, highlight="\x1b[48;5;11m", plain="\x1b[0m"):
 
         ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
 
-        w = f"{self.seq.ovhg*chr(32)}{self.seq.watson}{-ovhg*chr(32)}"
-        c = f"{-self.seq.ovhg*chr(32)}{self.seq.crick[::-1]}{ovhg*chr(32)}"
+        w = f"{self.seq.ovhg * chr(32)}{self.seq.watson}{-ovhg * chr(32)}"
+        c = f"{-self.seq.ovhg * chr(32)}{self.seq.crick[::-1]}{ovhg * chr(32)}"
 
         if strand == 1:
             s1, s2 = w, c

diff --git a/tests/format_sequences.ipynb b/tests/format_sequences.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -31,7 +31,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -85,18 +85,7 @@
     {
      "data": {
       "text/plain": [
-       "LOCUS       .                          3 bp    DNA     linear   UNK 20-DEC-2016\n",
-       "DEFINITION  .\n",
-       "ACCESSION   <unknown id>\n",
-       "VERSION     <unknown id>\n",
-       "KEYWORDS    .\n",
-       "SOURCE      .\n",
-       "  ORGANISM  .\n",
-       "            .\n",
-       "FEATURES             Location/Qualifiers\n",
-       "ORIGIN\n",
-       "        1 aaa\n",
-       "//"
+       "'LOCUS       .                          3 bp    DNA     linear   UNK 20-DEC-2016\\nDEFINITION  .\\nACCESSION   <unknown id>\\nVERSION     <unknown id>\\nKEYWORDS    .\\nSOURCE      .\\n  ORGANISM  .\\n            .\\nFEATURES             Location/Qualifiers\\nORIGIN\\n        1 aaa\\n//'"
       ]
      },
      "execution_count": 6,
@@ -118,18 +107,7 @@
     {
      "data": {
       "text/plain": [
-       "LOCUS       name                       3 bp    DNA     linear   UNK 20-DEC-2016\n",
-       "DEFINITION  description.\n",
-       "ACCESSION   id\n",
-       "VERSION     id\n",
-       "KEYWORDS    .\n",
-       "SOURCE      .\n",
-       "  ORGANISM  .\n",
-       "            .\n",
-       "FEATURES             Location/Qualifiers\n",
-       "ORIGIN\n",
-       "        1 aaa\n",
-       "//"
+       "'LOCUS       name                       3 bp    DNA     linear   UNK 20-DEC-2016\\nDEFINITION  description.\\nACCESSION   id\\nVERSION     id\\nKEYWORDS    .\\nSOURCE      .\\n  ORGANISM  .\\n            .\\nFEATURES             Location/Qualifiers\\nORIGIN\\n        1 aaa\\n//'"
       ]
      },
      "execution_count": 7,
@@ -151,18 +129,7 @@
     {
      "data": {
       "text/plain": [
-       "LOCUS       abc                        3 bp    DNA     linear   UNK 20-DEC-2016\n",
-       "DEFINITION  abc.\n",
-       "ACCESSION   abc\n",
-       "VERSION     abc\n",
-       "KEYWORDS    .\n",
-       "SOURCE      .\n",
-       "  ORGANISM  .\n",
-       "            .\n",
-       "FEATURES             Location/Qualifiers\n",
-       "ORIGIN\n",
-       "        1 aaa\n",
-       "//"
+       "'LOCUS       abc                        3 bp    DNA     linear   UNK 20-DEC-2016\\nDEFINITION  abc.\\nACCESSION   abc\\nVERSION     abc\\nKEYWORDS    .\\nSOURCE      .\\n  ORGANISM  .\\n            .\\nFEATURES             Location/Qualifiers\\nORIGIN\\n        1 aaa\\n//'"
       ]
      },
      "execution_count": 8,
@@ -214,7 +181,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,

diff --git a/tests/test_module_dseqrecord.py b/tests/test_module_dseqrecord.py
@@ -21,6 +21,7 @@ def test_orfs():
     for orf, ln in zip(s.orfs(1002), lens):
         assert len(orf) == ln
 
+
 def test_cas9():
     from pydna.dseqrecord import Dseqrecord
 
@@ -434,6 +435,15 @@ def test_format():
     s = Dseqrecord("GGATCC", circular=False)
     s.format("fasta")
 
+    # A long sequence name is properly handled
+    s.name = "A" * 45
+    genbank_str = s.format("genbank")
+    locus_line = genbank_str.split("\n")[0]
+    assert (
+        locus_line
+        == "LOCUS       AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 6 bp    DNA     linear   UNK 01-JAN-1980"
+    )
+
 
 def test_write():
     from unittest.mock import patch
@@ -673,7 +683,9 @@ def test_cut_add():
 
     a = Dseqrecord("GGATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGGATCC").seq
     b = a.cut(BamHI)[1]
-    c = Dseqrecord("nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn").seq
+    c = Dseqrecord(
+        "nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn"
+    ).seq
     f, d, l = c.cut((EcoRI, PstI))
 
     pUC19 = read("pUC19.gb")
@@ -808,9 +820,9 @@ def test_Dseqrecord_cutting_adding_2():
     for enz in enzymes:
         for f in a:
             b, c, d = f.cut(enz)
-            #print(b.seq.__repr__())
-            #print(c.seq.__repr__())
-            #print(d.seq.__repr__())
+            # print(b.seq.__repr__())
+            # print(c.seq.__repr__())
+            # print(d.seq.__repr__())
             e = b + c + d
             assert str(e.seq).lower() == str(f.seq).lower()
 
@@ -2264,26 +2276,26 @@ def find_feature_by_id(f: Dseqrecord, id: str) -> SeqFeature:
     # Single cut case, check that features are transmitted correctly.
     for strand in [1, -1, None]:
         seq = Dseqrecord("acgtATGaatt", circular=True)
-        seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap'))
-        seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side'))
-        seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id='right_side'))
-        seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout'))
+        seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id="full_overlap"))
+        seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id="left_side"))
+        seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id="right_side"))
+        seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id="throughout"))
         for shift in range(len(seq)):
             seq_shifted = seq.shifted(shift)
-            cut_feature = find_feature_by_id(seq_shifted, 'full_overlap')
+            cut_feature = find_feature_by_id(seq_shifted, "full_overlap")
             start, end = _location_boundaries(cut_feature.location)
             # Cut leaving + and - overhangs in the feature full_overlap
             for dummy_cut in (((start, -3), None), ((end, 3), None)):
                 open_seq = seq_shifted.apply_cut(dummy_cut, dummy_cut)
                 assert len(open_seq.features) == 4
                 new_locs = sorted(str(f.location) for f in open_seq.features)
-                assert str(open_seq.seq) == 'ATGaattacgtATG'
+                assert str(open_seq.seq) == "ATGaattacgtATG"
                 if strand == 1:
-                    assert new_locs == sorted(['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)'])
+                    assert new_locs == sorted(["[0:3](+)", "[0:4](+)", "[11:14](+)", "[10:14](+)"])
                 elif strand == -1:
-                    assert new_locs == sorted(['[0:3](-)', '[0:4](-)', '[11:14](-)', '[10:14](-)'])
+                    assert new_locs == sorted(["[0:3](-)", "[0:4](-)", "[11:14](-)", "[10:14](-)"])
                 if strand == None:
-                    assert new_locs == sorted(['[0:3]', '[0:4]', '[11:14]', '[10:14]'])
+                    assert new_locs == sorted(["[0:3]", "[0:4]", "[11:14]", "[10:14]"])
 
 
 def test_apply_cut():
@@ -2298,26 +2310,26 @@ def find_feature_by_id(f: Dseqrecord, id: str) -> SeqFeature:
     # Single cut case, check that features are transmitted correctly.
     for strand in [1, -1, None]:
         seq = Dseqrecord("acgtATGaatt", circular=True)
-        seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap'))
-        seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side'))
-        seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id='right_side'))
-        seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout'))
+        seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id="full_overlap"))
+        seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id="left_side"))
+        seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id="right_side"))
+        seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id="throughout"))
         for shift in range(len(seq)):
             seq_shifted = seq.shifted(shift)
-            cut_feature = find_feature_by_id(seq_shifted, 'full_overlap')
+            cut_feature = find_feature_by_id(seq_shifted, "full_overlap")
             start, end = _location_boundaries(cut_feature.location)
             # Cut leaving + and - overhangs in the feature full_overlap
             for dummy_cut in (((start, -3), None), ((end, 3), None)):
                 open_seq = seq_shifted.apply_cut(dummy_cut, dummy_cut)
                 assert len(open_seq.features) == 4
                 new_locs = sorted(str(f.location) for f in open_seq.features)
-                assert str(open_seq.seq) == 'ATGaattacgtATG'
+                assert str(open_seq.seq) == "ATGaattacgtATG"
                 if strand == 1:
-                    assert new_locs == sorted(['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)'])
+                    assert new_locs == sorted(["[0:3](+)", "[0:4](+)", "[11:14](+)", "[10:14](+)"])
                 elif strand == -1:
-                    assert new_locs == sorted(['[0:3](-)', '[0:4](-)', '[11:14](-)', '[10:14](-)'])
+                    assert new_locs == sorted(["[0:3](-)", "[0:4](-)", "[11:14](-)", "[10:14](-)"])
                 if strand == None:
-                    assert new_locs == sorted(['[0:3]', '[0:4]', '[11:14]', '[10:14]'])
+                    assert new_locs == sorted(["[0:3]", "[0:4]", "[11:14]", "[10:14]"])
 
 
 if __name__ == "__main__":