diff --git a/skbio/sequence/_dna.py b/skbio/sequence/_dna.py index ba0d85727..6ff9c0333 100644 --- a/skbio/sequence/_dna.py +++ b/skbio/sequence/_dna.py @@ -61,13 +61,13 @@ class DNA(GrammaredSequence, NucleotideMixin): +-----+-----------+ |Code |Nucleobase | +=====+===========+ - |`A` |Adenine | + |``A``|Adenine | +-----+-----------+ - |`C` |Cytosine | + |``C``|Cytosine | +-----+-----------+ - |`G` |Guanine | + |``G``|Guanine | +-----+-----------+ - |`T` |Thymine | + |``T``|Thymine | +-----+-----------+ And the following 11 degenerate characters, each of which representing 2-4 @@ -76,33 +76,33 @@ class DNA(GrammaredSequence, NucleotideMixin): +-----+-------------+-----------+ |Code |Nucleobases |Meaning | +=====+=============+===========+ - |`R` |A or G |Purine | + |``R``|A or G |Purine | +-----+-------------+-----------+ - |`Y` |C or T |Pyrimidine | + |``Y``|C or T |Pyrimidine | +-----+-------------+-----------+ - |`S` |G or C |Strong | + |``S``|G or C |Strong | +-----+-------------+-----------+ - |`W` |A or T |Weak | + |``W``|A or T |Weak | +-----+-------------+-----------+ - |`K` |G or T |Keto | + |``K``|G or T |Keto | +-----+-------------+-----------+ - |`M` |A or C |Amino | + |``M``|A or C |Amino | +-----+-------------+-----------+ - |`B` |C, G or T |Not A | + |``B``|C, G or T |Not A | +-----+-------------+-----------+ - |`D` |A, G or T |Not C | + |``D``|A, G or T |Not C | +-----+-------------+-----------+ - |`H` |A, C or T |Not G | + |``H``|A, C or T |Not G | +-----+-------------+-----------+ - |`V` |A, C or G |Not T | + |``V``|A, C or G |Not T | +-----+-------------+-----------+ - |`N` |A, C, G or T |Any | + |``N``|A, C, G or T |Any | +-----+-------------+-----------+ - Plus two gap characters: `-` and `.`. + Plus two gap characters: ``-`` and ``.``. Characters other than the above 17 are not allowed. If you intend to use - additional characters to represent non-canonical nucleobases, such as `I` + additional characters to represent non-canonical nucleobases, such as ``I`` (Inosine), you may create a custom alphabet using ``GrammaredSequence``. Directly modifying the alphabet of ``DNA`` may break methods that rely on the IUPAC alphabet. diff --git a/skbio/sequence/_protein.py b/skbio/sequence/_protein.py index bd9dec484..02ecea9b4 100644 --- a/skbio/sequence/_protein.py +++ b/skbio/sequence/_protein.py @@ -59,45 +59,45 @@ class Protein(GrammaredSequence): +-----+---------+--------------+ |Code |3-letter |Amino acid | +=====+=========+==============+ - |`A` |Ala |Alanine | + |``A``|Ala |Alanine | +-----+---------+--------------+ - |`C` |Cys |Cysteine | + |``C``|Cys |Cysteine | +-----+---------+--------------+ - |`D` |Asp |Aspartic acid | + |``D``|Asp |Aspartic acid | +-----+---------+--------------+ - |`E` |Glu |Glutamic acid | + |``E``|Glu |Glutamic acid | +-----+---------+--------------+ - |`F` |Phe |Phenylalanine | + |``F``|Phe |Phenylalanine | +-----+---------+--------------+ - |`G` |Gly |Glycine | + |``G``|Gly |Glycine | +-----+---------+--------------+ - |`H` |His |Histidine | + |``H``|His |Histidine | +-----+---------+--------------+ - |`I` |Ile |Isoleucine | + |``I``|Ile |Isoleucine | +-----+---------+--------------+ - |`K` |Lys |Lysine | + |``K``|Lys |Lysine | +-----+---------+--------------+ - |`L` |Leu |Leucine | + |``L``|Leu |Leucine | +-----+---------+--------------+ - |`M` |Met |Methionine | + |``M``|Met |Methionine | +-----+---------+--------------+ - |`N` |Asn |Asparagine | + |``N``|Asn |Asparagine | +-----+---------+--------------+ - |`P` |Pro |Proline | + |``P``|Pro |Proline | +-----+---------+--------------+ - |`Q` |Gln |Glutamine | + |``Q``|Gln |Glutamine | +-----+---------+--------------+ - |`R` |Arg |Arginine | + |``R``|Arg |Arginine | +-----+---------+--------------+ - |`S` |Ser |Serine | + |``S``|Ser |Serine | +-----+---------+--------------+ - |`T` |Thr |Threonine | + |``T``|Thr |Threonine | +-----+---------+--------------+ - |`V` |Val |Valine | + |``V``|Val |Valine | +-----+---------+--------------+ - |`W` |Trp |Tryptophan | + |``W``|Trp |Tryptophan | +-----+---------+--------------+ - |`Y` |Tyr |Tyrosine | + |``Y``|Tyr |Tyrosine | +-----+---------+--------------+ And the following four degenerate characters, each of which representing @@ -106,27 +106,27 @@ class Protein(GrammaredSequence): +-----+---------+------------+ |Code |3-letter |Amino acids | +=====+=========+============+ - |`B` |Asx |D or N | + |``B``|Asx |D or N | +-----+---------+------------+ - |`Z` |Glx |E or Q | + |``Z``|Glx |E or Q | +-----+---------+------------+ - |`J` |Xle |I or L | + |``J``|Xle |I or L | +-----+---------+------------+ - |`X` |Xaa |All 20 | + |``X``|Xaa |All 20 | +-----+---------+------------+ - Plus one stop character: `*` (Ter), and two gap characters: `-` and `.`. + Plus one stop character: ``*`` (Ter), and two gap characters: ``-`` and ``.``. Characters other than the above 27 are not allowed. If you intend to use - additional characters to represent non-canonical amino acids, such as `U` - (Sec, Selenocysteine) and `O` (Pyl, Pyrrolysine), you may create a custom + additional characters to represent non-canonical amino acids, such as ``U`` + (Sec, Selenocysteine) and ``O`` (Pyl, Pyrrolysine), you may create a custom alphabet using ``GrammaredSequence``. Directly modifying the alphabet of ``Protein`` may break functions that rely on the IUPAC alphabet. It should be noted that some functions do not support certain characters. - For example, the BLOSUM and PAM substitution matrices do not support `J` + For example, the BLOSUM and PAM substitution matrices do not support ``J`` (Xle). In such circumstances, unsupported characters will be replaced with - `X` to represent any of the canonical amino acids. + ``X`` to represent any of the canonical amino acids. References ---------- diff --git a/skbio/sequence/_rna.py b/skbio/sequence/_rna.py index 8c3fd02fc..c0c546cc0 100644 --- a/skbio/sequence/_rna.py +++ b/skbio/sequence/_rna.py @@ -60,19 +60,19 @@ class RNA(GrammaredSequence, NucleotideMixin): +-----+-----------+ |Code |Nucleobase | +=====+===========+ - |`A` |Adenine | + |``A``|Adenine | +-----+-----------+ - |`C` |Cytosine | + |``C``|Cytosine | +-----+-----------+ - |`G` |Guanine | + |``G``|Guanine | +-----+-----------+ - |`U` |Uracil | + |``U``|Uracil | +-----+-----------+ - Plus 11 degenerate characters: `R`, `Y`, `S`, `W`, `K`, `M`, `B`, `D`, `H`, - `V` and `N`, and two gap characters: `-` and `.`. The definitions of - degenerate characters are provided in ``DNA``, in which `T` should be - replaced with `U` for RNA sequences. + Plus 11 degenerate characters: ``R``, ``Y``, ``S``, ``W``, ``K``, ``M``, ``B``, + ``D``, ``H``, ``V`` and ``N``, and two gap characters: ``-`` and ``.``. The + definitions of degenerate characters are provided in ``DNA``, in which ``T`` should + be replaced with ``U`` for RNA sequences. Characters other than the above 17 are not allowed. To include additional characters, you may create a custom alphabet using ``GrammaredSequence``.