Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 238 #239

Merged
merged 2 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 8 additions & 11 deletions src/pydna/dseqrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,15 +482,12 @@ def format(self, f="gb"):

"""

s = super().format(f).strip()

if f in ("genbank", "gb"):
if self.circular:
return _pretty_str(s[:55] + "circular" + s[63:])
else:
return _pretty_str(s[:55] + "linear " + s[63:])
record = _copy.deepcopy(self)
if f in ("genbank", "gb") and self.circular:
record.annotations["topology"] = "circular"
else:
return _pretty_str(s).strip()
record.annotations["topology"] = "linear"
return _SeqRecord.format(record, f).strip()

def write(self, filename=None, f="gb"):
"""Writes the Dseqrecord to a file using the format f, which must
Expand Down Expand Up @@ -1114,7 +1111,7 @@ def orfs_to_features(self, minsize=300):
_SimpleLocation(x, y, strand=strand),
type="CDS",
qualifiers={
"note": f"{y-x}bp {(y-x)//3}aa",
"note": f"{y - x}bp {(y - x) // 3}aa",
"checksum": [orf.seguid() + " (DNA)", prt.seguid() + " (protein)"],
"codon_start": 1,
"transl_table": 11,
Expand Down Expand Up @@ -1153,8 +1150,8 @@ def figure(self, feature=0, highlight="\x1b[48;5;11m", plain="\x1b[0m"):

ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)

w = f"{self.seq.ovhg*chr(32)}{self.seq.watson}{-ovhg*chr(32)}"
c = f"{-self.seq.ovhg*chr(32)}{self.seq.crick[::-1]}{ovhg*chr(32)}"
w = f"{self.seq.ovhg * chr(32)}{self.seq.watson}{-ovhg * chr(32)}"
c = f"{-self.seq.ovhg * chr(32)}{self.seq.crick[::-1]}{ovhg * chr(32)}"

if strand == 1:
s1, s2 = w, c
Expand Down
51 changes: 9 additions & 42 deletions tests/format_sequences.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -31,7 +31,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -41,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -85,18 +85,7 @@
{
"data": {
"text/plain": [
"LOCUS . 3 bp DNA linear UNK 20-DEC-2016\n",
"DEFINITION .\n",
"ACCESSION <unknown id>\n",
"VERSION <unknown id>\n",
"KEYWORDS .\n",
"SOURCE .\n",
" ORGANISM .\n",
" .\n",
"FEATURES Location/Qualifiers\n",
"ORIGIN\n",
" 1 aaa\n",
"//"
"'LOCUS . 3 bp DNA linear UNK 20-DEC-2016\\nDEFINITION .\\nACCESSION <unknown id>\\nVERSION <unknown id>\\nKEYWORDS .\\nSOURCE .\\n ORGANISM .\\n .\\nFEATURES Location/Qualifiers\\nORIGIN\\n 1 aaa\\n//'"
]
},
"execution_count": 6,
Expand All @@ -118,18 +107,7 @@
{
"data": {
"text/plain": [
"LOCUS name 3 bp DNA linear UNK 20-DEC-2016\n",
"DEFINITION description.\n",
"ACCESSION id\n",
"VERSION id\n",
"KEYWORDS .\n",
"SOURCE .\n",
" ORGANISM .\n",
" .\n",
"FEATURES Location/Qualifiers\n",
"ORIGIN\n",
" 1 aaa\n",
"//"
"'LOCUS name 3 bp DNA linear UNK 20-DEC-2016\\nDEFINITION description.\\nACCESSION id\\nVERSION id\\nKEYWORDS .\\nSOURCE .\\n ORGANISM .\\n .\\nFEATURES Location/Qualifiers\\nORIGIN\\n 1 aaa\\n//'"
]
},
"execution_count": 7,
Expand All @@ -151,18 +129,7 @@
{
"data": {
"text/plain": [
"LOCUS abc 3 bp DNA linear UNK 20-DEC-2016\n",
"DEFINITION abc.\n",
"ACCESSION abc\n",
"VERSION abc\n",
"KEYWORDS .\n",
"SOURCE .\n",
" ORGANISM .\n",
" .\n",
"FEATURES Location/Qualifiers\n",
"ORIGIN\n",
" 1 aaa\n",
"//"
"'LOCUS abc 3 bp DNA linear UNK 20-DEC-2016\\nDEFINITION abc.\\nACCESSION abc\\nVERSION abc\\nKEYWORDS .\\nSOURCE .\\n ORGANISM .\\n .\\nFEATURES Location/Qualifiers\\nORIGIN\\n 1 aaa\\n//'"
]
},
"execution_count": 8,
Expand Down Expand Up @@ -214,7 +181,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
56 changes: 34 additions & 22 deletions tests/test_module_dseqrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_orfs():
for orf, ln in zip(s.orfs(1002), lens):
assert len(orf) == ln


def test_cas9():
from pydna.dseqrecord import Dseqrecord

Expand Down Expand Up @@ -434,6 +435,15 @@ def test_format():
s = Dseqrecord("GGATCC", circular=False)
s.format("fasta")

# A long sequence name is properly handled
s.name = "A" * 45
genbank_str = s.format("genbank")
locus_line = genbank_str.split("\n")[0]
assert (
locus_line
== "LOCUS AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 6 bp DNA linear UNK 01-JAN-1980"
)


def test_write():
from unittest.mock import patch
Expand Down Expand Up @@ -673,7 +683,9 @@ def test_cut_add():

a = Dseqrecord("GGATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGGATCC").seq
b = a.cut(BamHI)[1]
c = Dseqrecord("nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn").seq
c = Dseqrecord(
"nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn"
).seq
f, d, l = c.cut((EcoRI, PstI))

pUC19 = read("pUC19.gb")
Expand Down Expand Up @@ -808,9 +820,9 @@ def test_Dseqrecord_cutting_adding_2():
for enz in enzymes:
for f in a:
b, c, d = f.cut(enz)
#print(b.seq.__repr__())
#print(c.seq.__repr__())
#print(d.seq.__repr__())
# print(b.seq.__repr__())
# print(c.seq.__repr__())
# print(d.seq.__repr__())
e = b + c + d
assert str(e.seq).lower() == str(f.seq).lower()

Expand Down Expand Up @@ -2264,26 +2276,26 @@ def find_feature_by_id(f: Dseqrecord, id: str) -> SeqFeature:
# Single cut case, check that features are transmitted correctly.
for strand in [1, -1, None]:
seq = Dseqrecord("acgtATGaatt", circular=True)
seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap'))
seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side'))
seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id='right_side'))
seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout'))
seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id="full_overlap"))
seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id="left_side"))
seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id="right_side"))
seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id="throughout"))
for shift in range(len(seq)):
seq_shifted = seq.shifted(shift)
cut_feature = find_feature_by_id(seq_shifted, 'full_overlap')
cut_feature = find_feature_by_id(seq_shifted, "full_overlap")
start, end = _location_boundaries(cut_feature.location)
# Cut leaving + and - overhangs in the feature full_overlap
for dummy_cut in (((start, -3), None), ((end, 3), None)):
open_seq = seq_shifted.apply_cut(dummy_cut, dummy_cut)
assert len(open_seq.features) == 4
new_locs = sorted(str(f.location) for f in open_seq.features)
assert str(open_seq.seq) == 'ATGaattacgtATG'
assert str(open_seq.seq) == "ATGaattacgtATG"
if strand == 1:
assert new_locs == sorted(['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)'])
assert new_locs == sorted(["[0:3](+)", "[0:4](+)", "[11:14](+)", "[10:14](+)"])
elif strand == -1:
assert new_locs == sorted(['[0:3](-)', '[0:4](-)', '[11:14](-)', '[10:14](-)'])
assert new_locs == sorted(["[0:3](-)", "[0:4](-)", "[11:14](-)", "[10:14](-)"])
if strand == None:
assert new_locs == sorted(['[0:3]', '[0:4]', '[11:14]', '[10:14]'])
assert new_locs == sorted(["[0:3]", "[0:4]", "[11:14]", "[10:14]"])


def test_apply_cut():
Expand All @@ -2298,26 +2310,26 @@ def find_feature_by_id(f: Dseqrecord, id: str) -> SeqFeature:
# Single cut case, check that features are transmitted correctly.
for strand in [1, -1, None]:
seq = Dseqrecord("acgtATGaatt", circular=True)
seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id='full_overlap'))
seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id='left_side'))
seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id='right_side'))
seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id='throughout'))
seq.features.append(SeqFeature(SimpleLocation(4, 7, strand), id="full_overlap"))
seq.features.append(SeqFeature(SimpleLocation(3, 7, strand), id="left_side"))
seq.features.append(SeqFeature(SimpleLocation(4, 8, strand), id="right_side"))
seq.features.append(SeqFeature(SimpleLocation(3, 10, strand), id="throughout"))
for shift in range(len(seq)):
seq_shifted = seq.shifted(shift)
cut_feature = find_feature_by_id(seq_shifted, 'full_overlap')
cut_feature = find_feature_by_id(seq_shifted, "full_overlap")
start, end = _location_boundaries(cut_feature.location)
# Cut leaving + and - overhangs in the feature full_overlap
for dummy_cut in (((start, -3), None), ((end, 3), None)):
open_seq = seq_shifted.apply_cut(dummy_cut, dummy_cut)
assert len(open_seq.features) == 4
new_locs = sorted(str(f.location) for f in open_seq.features)
assert str(open_seq.seq) == 'ATGaattacgtATG'
assert str(open_seq.seq) == "ATGaattacgtATG"
if strand == 1:
assert new_locs == sorted(['[0:3](+)', '[0:4](+)', '[11:14](+)', '[10:14](+)'])
assert new_locs == sorted(["[0:3](+)", "[0:4](+)", "[11:14](+)", "[10:14](+)"])
elif strand == -1:
assert new_locs == sorted(['[0:3](-)', '[0:4](-)', '[11:14](-)', '[10:14](-)'])
assert new_locs == sorted(["[0:3](-)", "[0:4](-)", "[11:14](-)", "[10:14](-)"])
if strand == None:
assert new_locs == sorted(['[0:3]', '[0:4]', '[11:14]', '[10:14]'])
assert new_locs == sorted(["[0:3]", "[0:4]", "[11:14]", "[10:14]"])


if __name__ == "__main__":
Expand Down
Loading