From bfddf0cb23dd5c326ad56b4907b52089fd6733d7 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 9 Jul 2024 10:13:19 +0100 Subject: [PATCH 1/2] closes #238 --- src/pydna/dseqrecord.py | 19 +++++------ tests/test_module_dseqrecord.py | 56 ++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/pydna/dseqrecord.py b/src/pydna/dseqrecord.py index e3ffaecd..cfeb2815 100644 --- a/src/pydna/dseqrecord.py +++ b/src/pydna/dseqrecord.py @@ -482,15 +482,12 @@ def format(self, f="gb"): """ - s = super().format(f).strip() - - if f in ("genbank", "gb"): - if self.circular: - return _pretty_str(s[:55] + "circular" + s[63:]) - else: - return _pretty_str(s[:55] + "linear " + s[63:]) + record = _copy.deepcopy(self) + if f in ("genbank", "gb") and self.circular: + record.annotations["topology"] = "circular" else: - return _pretty_str(s).strip() + record.annotations["topology"] = "linear" + return _SeqRecord.format(record, f).strip() def write(self, filename=None, f="gb"): """Writes the Dseqrecord to a file using the format f, which must @@ -1114,7 +1111,7 @@ def orfs_to_features(self, minsize=300): _SimpleLocation(x, y, strand=strand), type="CDS", qualifiers={ - "note": f"{y-x}bp {(y-x)//3}aa", + "note": f"{y - x}bp {(y - x) // 3}aa", "checksum": [orf.seguid() + " (DNA)", prt.seguid() + " (protein)"], "codon_start": 1, "transl_table": 11, @@ -1153,8 +1150,8 @@ def figure(self, feature=0, highlight="\x1b[48;5;11m", plain="\x1b[0m"): ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick) - w = f"{self.seq.ovhg*chr(32)}{self.seq.watson}{-ovhg*chr(32)}" - c = f"{-self.seq.ovhg*chr(32)}{self.seq.crick[::-1]}{ovhg*chr(32)}" + w = f"{self.seq.ovhg * chr(32)}{self.seq.watson}{-ovhg * chr(32)}" + c = f"{-self.seq.ovhg * chr(32)}{self.seq.crick[::-1]}{ovhg * chr(32)}" if strand == 1: s1, s2 = w, c diff --git a/tests/test_module_dseqrecord.py b/tests/test_module_dseqrecord.py index a2e9b60b..6cf61f11 100644 --- a/tests/test_module_dseqrecord.py +++ b/tests/test_module_dseqrecord.py @@ -21,6 +21,7 @@ def test_orfs(): for orf, ln in zip(s.orfs(1002), lens): assert len(orf) == ln + def test_cas9(): from pydna.dseqrecord import Dseqrecord @@ -434,6 +435,15 @@ def test_format(): s = Dseqrecord("GGATCC", circular=False) s.format("fasta") + # A long sequence name is properly handled + s.name = "A" * 45 + genbank_str = s.format("genbank") + locus_line = genbank_str.split("\n")[0] + assert ( + locus_line + == "LOCUS AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 6 bp DNA linear UNK 01-JAN-1980" + ) + def test_write(): from unittest.mock import patch @@ -673,7 +683,9 @@ def test_cut_add(): a = Dseqrecord("GGATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGGATCC").seq b = a.cut(BamHI)[1] - c = Dseqrecord("nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn").seq + c = Dseqrecord( + "nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn" + ).seq f, d, l = c.cut((EcoRI, PstI)) pUC19 = read("pUC19.gb") @@ -808,9 +820,9 @@ def test_Dseqrecord_cutting_adding_2(): for enz in enzymes: for f in a: b, c, d = f.cut(enz) - #print(b.seq.__repr__()) - #print(c.seq.__repr__()) - #print(d.seq.__repr__()) + # print(b.seq.__repr__()) + # print(c.seq.__repr__()) + # print(d.seq.__repr__()) e = b + c + d assert str(e.seq).lower() == str(f.seq).lower() @@ -2264,26 +2276,26 @@ def find_feature_by_id(f: Dseqrecord, id: str) -> SeqFeature: # Single cut case, check that features are transmitted correctly. for strand in [1, -1, None]: seq = Dseqrecord("acgtATGaatt", circular=True) - seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap')) - seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side')) - seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id='right_side')) - seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout')) + seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id="full_overlap")) + seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id="left_side")) + seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id="right_side")) + seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id="throughout")) for shift in range(len(seq)): seq_shifted = seq.shifted(shift) - cut_feature = find_feature_by_id(seq_shifted, 'full_overlap') + cut_feature = find_feature_by_id(seq_shifted, "full_overlap") start, end = _location_boundaries(cut_feature.location) # Cut leaving + and - overhangs in the feature full_overlap for dummy_cut in (((start, -3), None), ((end, 3), None)): open_seq = seq_shifted.apply_cut(dummy_cut, dummy_cut) assert len(open_seq.features) == 4 new_locs = sorted(str(f.location) for f in open_seq.features) - assert str(open_seq.seq) == 'ATGaattacgtATG' + assert str(open_seq.seq) == "ATGaattacgtATG" if strand == 1: - assert new_locs == sorted(['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)']) + assert new_locs == sorted(["[0:3](+)", "[0:4](+)", "[11:14](+)", "[10:14](+)"]) elif strand == -1: - assert new_locs == sorted(['[0:3](-)', '[0:4](-)', '[11:14](-)', '[10:14](-)']) + assert new_locs == sorted(["[0:3](-)", "[0:4](-)", "[11:14](-)", "[10:14](-)"]) if strand == None: - assert new_locs == sorted(['[0:3]', '[0:4]', '[11:14]', '[10:14]']) + assert new_locs == sorted(["[0:3]", "[0:4]", "[11:14]", "[10:14]"]) def test_apply_cut(): @@ -2298,26 +2310,26 @@ def find_feature_by_id(f: Dseqrecord, id: str) -> SeqFeature: # Single cut case, check that features are transmitted correctly. for strand in [1, -1, None]: seq = Dseqrecord("acgtATGaatt", circular=True) - seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap')) - seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side')) - seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id='right_side')) - seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout')) + seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id="full_overlap")) + seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id="left_side")) + seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id="right_side")) + seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id="throughout")) for shift in range(len(seq)): seq_shifted = seq.shifted(shift) - cut_feature = find_feature_by_id(seq_shifted, 'full_overlap') + cut_feature = find_feature_by_id(seq_shifted, "full_overlap") start, end = _location_boundaries(cut_feature.location) # Cut leaving + and - overhangs in the feature full_overlap for dummy_cut in (((start, -3), None), ((end, 3), None)): open_seq = seq_shifted.apply_cut(dummy_cut, dummy_cut) assert len(open_seq.features) == 4 new_locs = sorted(str(f.location) for f in open_seq.features) - assert str(open_seq.seq) == 'ATGaattacgtATG' + assert str(open_seq.seq) == "ATGaattacgtATG" if strand == 1: - assert new_locs == sorted(['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)']) + assert new_locs == sorted(["[0:3](+)", "[0:4](+)", "[11:14](+)", "[10:14](+)"]) elif strand == -1: - assert new_locs == sorted(['[0:3](-)', '[0:4](-)', '[11:14](-)', '[10:14](-)']) + assert new_locs == sorted(["[0:3](-)", "[0:4](-)", "[11:14](-)", "[10:14](-)"]) if strand == None: - assert new_locs == sorted(['[0:3]', '[0:4]', '[11:14]', '[10:14]']) + assert new_locs == sorted(["[0:3]", "[0:4]", "[11:14]", "[10:14]"]) if __name__ == "__main__": From ce62d6a999a7b0cb98235203573010108d6fc6be Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 9 Jul 2024 10:18:22 +0100 Subject: [PATCH 2/2] fix notebook --- tests/format_sequences.ipynb | 51 +++++++----------------------------- 1 file changed, 9 insertions(+), 42 deletions(-) diff --git a/tests/format_sequences.ipynb b/tests/format_sequences.ipynb index ebaeb8b4..f4ec3a4c 100755 --- a/tests/format_sequences.ipynb +++ b/tests/format_sequences.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -85,18 +85,7 @@ { "data": { "text/plain": [ - "LOCUS . 3 bp DNA linear UNK 20-DEC-2016\n", - "DEFINITION .\n", - "ACCESSION \n", - "VERSION \n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - "ORIGIN\n", - " 1 aaa\n", - "//" + "'LOCUS . 3 bp DNA linear UNK 20-DEC-2016\\nDEFINITION .\\nACCESSION \\nVERSION \\nKEYWORDS .\\nSOURCE .\\n ORGANISM .\\n .\\nFEATURES Location/Qualifiers\\nORIGIN\\n 1 aaa\\n//'" ] }, "execution_count": 6, @@ -118,18 +107,7 @@ { "data": { "text/plain": [ - "LOCUS name 3 bp DNA linear UNK 20-DEC-2016\n", - "DEFINITION description.\n", - "ACCESSION id\n", - "VERSION id\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - "ORIGIN\n", - " 1 aaa\n", - "//" + "'LOCUS name 3 bp DNA linear UNK 20-DEC-2016\\nDEFINITION description.\\nACCESSION id\\nVERSION id\\nKEYWORDS .\\nSOURCE .\\n ORGANISM .\\n .\\nFEATURES Location/Qualifiers\\nORIGIN\\n 1 aaa\\n//'" ] }, "execution_count": 7, @@ -151,18 +129,7 @@ { "data": { "text/plain": [ - "LOCUS abc 3 bp DNA linear UNK 20-DEC-2016\n", - "DEFINITION abc.\n", - "ACCESSION abc\n", - "VERSION abc\n", - "KEYWORDS .\n", - "SOURCE .\n", - " ORGANISM .\n", - " .\n", - "FEATURES Location/Qualifiers\n", - "ORIGIN\n", - " 1 aaa\n", - "//" + "'LOCUS abc 3 bp DNA linear UNK 20-DEC-2016\\nDEFINITION abc.\\nACCESSION abc\\nVERSION abc\\nKEYWORDS .\\nSOURCE .\\n ORGANISM .\\n .\\nFEATURES Location/Qualifiers\\nORIGIN\\n 1 aaa\\n//'" ] }, "execution_count": 8, @@ -214,7 +181,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.12.3" } }, "nbformat": 4,